Took care of Sattam's comments from our code review.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_indexes@522 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-invertedindex/pom.xml b/hyracks-storage-am-invertedindex/pom.xml
index 15d13dc..34f1135 100644
--- a/hyracks-storage-am-invertedindex/pom.xml
+++ b/hyracks-storage-am-invertedindex/pom.xml
@@ -59,13 +59,13 @@
<type>jar</type>
<scope>compile</scope>
</dependency>
- <dependency>
- <groupId>edu.uci.ics.fuzzyjoin</groupId>
- <artifactId>fuzzyjoin-core</artifactId>
- <version>0.0.3-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.fuzzyjoin</groupId>
+ <artifactId>fuzzyjoin-core</artifactId>
+ <version>0.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexOperatorDescriptorHelper.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexOperatorDescriptorHelper.java
new file mode 100644
index 0000000..b3afe4a
--- /dev/null
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexOperatorDescriptorHelper.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.invertedindex.api;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.ITreeIndexOperatorDescriptorHelper;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexRegistryProvider;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
+
+public interface IInvertedIndexOperatorDescriptorHelper extends ITreeIndexOperatorDescriptorHelper {
+ public IFileSplitProvider getInvIndexFileSplitProvider();
+
+ public IBinaryComparatorFactory[] getInvIndexComparatorFactories();
+
+ public ITypeTrait[] getInvIndexTypeTraits();
+
+ public IIndexRegistryProvider<InvertedIndex> getInvIndexRegistryProvider();
+}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearchModifier.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearchModifier.java
index b3819a2..8f7a3e3 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearchModifier.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearchModifier.java
@@ -1,8 +1,24 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.api;
import java.util.List;
public interface IInvertedIndexSearchModifier {
int getOccurrenceThreshold(List<IInvertedListCursor> invListCursors);
- int getPrefixLists(List<IInvertedListCursor> invListCursors);
+
+ int getPrefixLists(List<IInvertedListCursor> invListCursors);
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearcher.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearcher.java
index 9a591d0..36d64ea 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearcher.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearcher.java
@@ -22,9 +22,14 @@
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
public interface IInvertedIndexSearcher {
- public void search(IInvertedIndexResultCursor resultCursor, ITupleReference queryTuple, int queryFieldIndex, IInvertedIndexSearchModifier searchModifier) throws Exception;
+ public void search(IInvertedIndexResultCursor resultCursor, ITupleReference queryTuple, int queryFieldIndex,
+ IInvertedIndexSearchModifier searchModifier) throws Exception;
+
public IFrameTupleAccessor createResultFrameTupleAccessor();
+
public ITupleReference createResultTupleReference();
+
public List<ByteBuffer> getResultBuffers();
- public int getNumValidResultBuffers();
+
+ public int getNumValidResultBuffers();
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListBuilder.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListBuilder.java
index 02b57d2..aaaef56 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListBuilder.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListBuilder.java
@@ -1,17 +1,32 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.api;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-public interface IInvertedListBuilder {
- public boolean startNewList(ITupleReference tuple, int numTokenFields);
-
- // returns true if successfully appended
- // returns false if not enough space in targetBuf
- public boolean appendElement(ITupleReference tuple, int numTokenFields, int numElementFields);
-
- public void setTargetBuffer(byte[] targetBuf, int startPos);
-
- public int getListSize();
-
- public int getPos();
+public interface IInvertedListBuilder {
+ public boolean startNewList(ITupleReference tuple, int numTokenFields);
+
+ // returns true if successfully appended
+ // returns false if not enough space in targetBuf
+ public boolean appendElement(ITupleReference tuple, int numTokenFields, int numElementFields);
+
+ public void setTargetBuffer(byte[] targetBuf, int startPos);
+
+ public int getListSize();
+
+ public int getPos();
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListCursor.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListCursor.java
index f03bdfc..9435f3c 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListCursor.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListCursor.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.api;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
@@ -7,27 +22,35 @@
public interface IInvertedListCursor extends Comparable<IInvertedListCursor> {
void reset(int startPageId, int endPageId, int startOff, int numElements);
-
+
void pinPagesSync() throws HyracksDataException;
+
void pinPagesAsync() throws HyracksDataException;
+
void unpinPages() throws HyracksDataException;
-
+
boolean hasNext();
+
void next();
-
+
ITupleReference getTuple();
-
+
// getters
int getNumElements();
+
int getStartPageId();
+
int getEndPageId();
+
int getStartOff();
-
+
// jump to a specific element
void positionCursor(int elementIx);
- boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp);
-
+
+ boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp);
+
// for debugging
- String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException;
- String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException;
+ String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException;
+
+ String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException;
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/AbstractInvertedIndexOperatorDescriptor.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/AbstractInvertedIndexOperatorDescriptor.java
index 8cc0bd7..0d0c5bf 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/AbstractInvertedIndexOperatorDescriptor.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/AbstractInvertedIndexOperatorDescriptor.java
@@ -25,6 +25,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexRegistryProvider;
import edu.uci.ics.hyracks.storage.am.common.dataflow.ITreeIndexOpHelperFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexOperatorDescriptorHelper;
import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
import edu.uci.ics.hyracks.storage.common.IStorageManagerInterface;
@@ -44,10 +45,10 @@
protected final ITypeTrait[] btreeTypeTraits;
protected final IBinaryComparatorFactory[] btreeComparatorFactories;
protected final ITreeIndexOpHelperFactory opHelperFactory;
-
+
// inverted index
protected final IFileSplitProvider invIndexFileSplitProvider;
- protected final IIndexRegistryProvider<InvertedIndex> invIndexRegistryProvider;
+ protected final IIndexRegistryProvider<InvertedIndex> invIndexRegistryProvider;
protected final ITypeTrait[] invIndexTypeTraits;
protected final IBinaryComparatorFactory[] invIndexComparatorFactories;
@@ -56,8 +57,7 @@
IFileSplitProvider btreeFileSplitProvider, IIndexRegistryProvider<ITreeIndex> treeIndexRegistryProvider,
ITreeIndexFrameFactory interiorFrameFactory, ITreeIndexFrameFactory leafFrameFactory,
ITypeTrait[] btreeTypeTraits, IBinaryComparatorFactory[] btreeComparatorFactories, float btreeFillFactor,
- ITreeIndexOpHelperFactory opHelperFactory,
- IFileSplitProvider invIndexFileSplitProvider,
+ ITreeIndexOpHelperFactory opHelperFactory, IFileSplitProvider invIndexFileSplitProvider,
IIndexRegistryProvider<InvertedIndex> invIndexRegistryProvider, ITypeTrait[] invIndexTypeTraits,
IBinaryComparatorFactory[] invIndexComparatorFactories) {
super(spec, inputArity, outputArity);
@@ -143,7 +143,7 @@
public ITypeTrait[] getInvIndexTypeTraits() {
return invIndexTypeTraits;
}
-
+
@Override
public ITreeIndexOpHelperFactory getTreeIndexOpHelperFactory() {
return opHelperFactory;
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/IInvertedIndexOperatorDescriptorHelper.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/IInvertedIndexOperatorDescriptorHelper.java
deleted file mode 100644
index 762a86c..0000000
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/IInvertedIndexOperatorDescriptorHelper.java
+++ /dev/null
@@ -1,18 +0,0 @@
-package edu.uci.ics.hyracks.storage.am.invertedindex.dataflow;
-
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
-import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.ITreeIndexOperatorDescriptorHelper;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexRegistryProvider;
-import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
-
-public interface IInvertedIndexOperatorDescriptorHelper extends ITreeIndexOperatorDescriptorHelper {
- public IFileSplitProvider getInvIndexFileSplitProvider();
-
- public IBinaryComparatorFactory[] getInvIndexComparatorFactories();
-
- public ITypeTrait[] getInvIndexTypeTraits();
-
- public IIndexRegistryProvider<InvertedIndex> getInvIndexRegistryProvider();
-}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorDescriptor.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorDescriptor.java
index 2aa0e0a..d003580 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorDescriptor.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorDescriptor.java
@@ -44,13 +44,13 @@
IIndexRegistryProvider<ITreeIndex> treeIndexRegistryProvider, ITreeIndexFrameFactory interiorFrameFactory,
ITreeIndexFrameFactory leafFrameFactory, ITypeTrait[] btreeTypeTraits,
IBinaryComparatorFactory[] btreeComparatorFactories, float btreeFillFactor,
- ITreeIndexOpHelperFactory opHelperFactory,
- IFileSplitProvider invIndexFileSplitProvider,
+ ITreeIndexOpHelperFactory opHelperFactory, IFileSplitProvider invIndexFileSplitProvider,
IIndexRegistryProvider<InvertedIndex> invIndexRegistryProvider, ITypeTrait[] invIndexTypeTraits,
IBinaryComparatorFactory[] invIndexComparatorFactories, IInvertedListBuilder invListBuilder) {
- super(spec, 1, 0, null, storageManager, btreeFileSplitProvider, treeIndexRegistryProvider, interiorFrameFactory,
- leafFrameFactory, btreeTypeTraits, btreeComparatorFactories, btreeFillFactor,
- opHelperFactory, invIndexFileSplitProvider, invIndexRegistryProvider, invIndexTypeTraits, invIndexComparatorFactories);
+ super(spec, 1, 0, null, storageManager, btreeFileSplitProvider, treeIndexRegistryProvider,
+ interiorFrameFactory, leafFrameFactory, btreeTypeTraits, btreeComparatorFactories, btreeFillFactor,
+ opHelperFactory, invIndexFileSplitProvider, invIndexRegistryProvider, invIndexTypeTraits,
+ invIndexComparatorFactories);
this.fieldPermutation = fieldPermutation;
this.btreeFillFactor = btreeFillFactor;
this.invListBuilder = invListBuilder;
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorNodePushable.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorNodePushable.java
index 060f166..4969124 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorNodePushable.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorNodePushable.java
@@ -31,22 +31,23 @@
public class InvertedIndexBulkLoadOperatorNodePushable extends AbstractUnaryInputSinkOperatorNodePushable {
private final TreeIndexOpHelper treeIndexOpHelper;
private float btreeFillFactor;
-
- private final InvertedIndexOpHelper invIndexOpHelper;
+
+ private final InvertedIndexOpHelper invIndexOpHelper;
protected final IInvertedListBuilder invListBuilder;
private InvertedIndex.BulkLoadContext bulkLoadCtx;
private final IHyracksStageletContext ctx;
-
+
private FrameTupleAccessor accessor;
private PermutingFrameTupleReference tuple = new PermutingFrameTupleReference();
-
+
private IRecordDescriptorProvider recordDescProvider;
-
+
public InvertedIndexBulkLoadOperatorNodePushable(AbstractInvertedIndexOperatorDescriptor opDesc,
IHyracksStageletContext ctx, int partition, int[] fieldPermutation, float btreeFillFactor,
IInvertedListBuilder invListBuilder, IRecordDescriptorProvider recordDescProvider) {
- treeIndexOpHelper = opDesc.getTreeIndexOpHelperFactory().createTreeIndexOpHelper(opDesc, ctx, partition, IndexHelperOpenMode.CREATE);
+ treeIndexOpHelper = opDesc.getTreeIndexOpHelperFactory().createTreeIndexOpHelper(opDesc, ctx, partition,
+ IndexHelperOpenMode.CREATE);
invIndexOpHelper = new InvertedIndexOpHelper(opDesc, ctx, partition, IndexHelperOpenMode.CREATE);
this.btreeFillFactor = btreeFillFactor;
this.recordDescProvider = recordDescProvider;
@@ -61,22 +62,23 @@
.getOperatorDescriptor();
RecordDescriptor recDesc = recordDescProvider.getInputRecordDescriptor(opDesc.getOperatorId(), 0);
accessor = new FrameTupleAccessor(treeIndexOpHelper.getHyracksStageletContext().getFrameSize(), recDesc);
-
+
// btree
try {
treeIndexOpHelper.init();
- treeIndexOpHelper.getTreeIndex().open(treeIndexOpHelper.getIndexFileId());
+ treeIndexOpHelper.getTreeIndex().open(treeIndexOpHelper.getIndexFileId());
} catch (Exception e) {
// cleanup in case of failure
treeIndexOpHelper.deinit();
throw new HyracksDataException(e);
}
-
+
// inverted index
try {
invIndexOpHelper.init();
- invIndexOpHelper.getInvIndex().open(invIndexOpHelper.getInvIndexFileId());
- bulkLoadCtx = invIndexOpHelper.getInvIndex().beginBulkLoad(invListBuilder, ctx.getFrameSize(), btreeFillFactor);
+ invIndexOpHelper.getInvIndex().open(invIndexOpHelper.getInvIndexFileId());
+ bulkLoadCtx = invIndexOpHelper.getInvIndex().beginBulkLoad(invListBuilder, ctx.getFrameSize(),
+ btreeFillFactor);
} catch (Exception e) {
// cleanup in case of failure
invIndexOpHelper.deinit();
@@ -95,7 +97,7 @@
}
@Override
- public void close() throws HyracksDataException {
+ public void close() throws HyracksDataException {
try {
invIndexOpHelper.getInvIndex().endBulkLoad(bulkLoadCtx);
} finally {
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexOpHelper.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexOpHelper.java
index 4beefcd..c16cfcd 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexOpHelper.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexOpHelper.java
@@ -25,30 +25,31 @@
import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexHelperOpenMode;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexRegistry;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexOperatorDescriptorHelper;
import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
public final class InvertedIndexOpHelper {
-
- private InvertedIndex invIndex;
- private int invIndexFileId = -1;
- private int partition;
+
+ private InvertedIndex invIndex;
+ private int invIndexFileId = -1;
+ private int partition;
private IInvertedIndexOperatorDescriptorHelper opDesc;
private IHyracksStageletContext ctx;
- private IndexHelperOpenMode mode;
+ private IndexHelperOpenMode mode;
- public InvertedIndexOpHelper(IInvertedIndexOperatorDescriptorHelper opDesc, final IHyracksStageletContext ctx, int partition,
- IndexHelperOpenMode mode) {
+ public InvertedIndexOpHelper(IInvertedIndexOperatorDescriptorHelper opDesc, final IHyracksStageletContext ctx,
+ int partition, IndexHelperOpenMode mode) {
this.opDesc = opDesc;
this.ctx = ctx;
this.mode = mode;
this.partition = partition;
}
- public void init() throws HyracksDataException {
+ public void init() throws HyracksDataException {
IBufferCache bufferCache = opDesc.getStorageManager().getBufferCache(ctx);
IFileMapProvider fileMapProvider = opDesc.getStorageManager().getFileMapProvider(ctx);
IFileSplitProvider fileSplitProvider = opDesc.getInvIndexFileSplitProvider();
@@ -56,82 +57,81 @@
FileReference f = fileSplitProvider.getFileSplits()[partition].getLocalFile();
boolean fileIsMapped = fileMapProvider.isMapped(f);
- switch (mode) {
-
- case OPEN: {
- if (!fileIsMapped) {
- throw new HyracksDataException(
- "Trying to open inverted index from unmapped file " + f.toString());
- }
- }
- break;
+ switch (mode) {
- case CREATE:
- case ENLIST: {
- if (!fileIsMapped) {
- bufferCache.createFile(f);
- }
- }
- break;
-
- }
-
+ case OPEN: {
+ if (!fileIsMapped) {
+ throw new HyracksDataException("Trying to open inverted index from unmapped file " + f.toString());
+ }
+ }
+ break;
+
+ case CREATE:
+ case ENLIST: {
+ if (!fileIsMapped) {
+ bufferCache.createFile(f);
+ }
+ }
+ break;
+
+ }
+
int fileId = fileMapProvider.lookupFileId(f);
try {
- bufferCache.openFile(fileId);
- } catch(HyracksDataException e) {
- // revert state of buffer cache since file failed to open
- if(!fileIsMapped) {
- bufferCache.deleteFile(fileId);
- }
- throw e;
+ bufferCache.openFile(fileId);
+ } catch (HyracksDataException e) {
+ // revert state of buffer cache since file failed to open
+ if (!fileIsMapped) {
+ bufferCache.deleteFile(fileId);
+ }
+ throw e;
}
-
- // only set btreeFileId member when openFile() succeeds,
+
+ // only set btreeFileId member when openFile() succeeds,
// otherwise deinit() will try to close the file that failed to open
- invIndexFileId = fileId;
+ invIndexFileId = fileId;
IndexRegistry<InvertedIndex> invIndexRegistry = opDesc.getInvIndexRegistryProvider().getRegistry(ctx);
invIndex = invIndexRegistry.get(invIndexFileId);
if (invIndex == null) {
- // create new inverted index and register it
+ // create new inverted index and register it
invIndexRegistry.lock();
- try {
- // check if inverted index has already been registered by another thread
- invIndex = invIndexRegistry.get(invIndexFileId);
- if (invIndex == null) {
- // this thread should create and register the inverted index
+ try {
+ // check if inverted index has already been registered by
+ // another thread
+ invIndex = invIndexRegistry.get(invIndexFileId);
+ if (invIndex == null) {
+ // this thread should create and register the inverted index
- IBinaryComparator[] comparators = new IBinaryComparator[opDesc
- .getInvIndexComparatorFactories().length];
- for (int i = 0; i < opDesc.getInvIndexComparatorFactories().length; i++) {
- comparators[i] = opDesc.getInvIndexComparatorFactories()[i]
- .createBinaryComparator();
- }
+ IBinaryComparator[] comparators = new IBinaryComparator[opDesc.getInvIndexComparatorFactories().length];
+ for (int i = 0; i < opDesc.getInvIndexComparatorFactories().length; i++) {
+ comparators[i] = opDesc.getInvIndexComparatorFactories()[i].createBinaryComparator();
+ }
- MultiComparator cmp = new MultiComparator(opDesc
- .getInvIndexTypeTraits(), comparators);
-
- // assumes btree has already been registered
- IFileSplitProvider btreeFileSplitProvider = opDesc.getTreeIndexFileSplitProvider();
- IndexRegistry<ITreeIndex> treeIndexRegistry = opDesc.getTreeIndexRegistryProvider().getRegistry(ctx);
- FileReference btreeFile = btreeFileSplitProvider.getFileSplits()[partition].getLocalFile();
- boolean btreeFileIsMapped = fileMapProvider.isMapped(btreeFile);
- if(!btreeFileIsMapped) {
- throw new HyracksDataException("Trying to create inverted index, but associated BTree file has not been mapped");
- }
- int btreeFileId = fileMapProvider.lookupFileId(f);
- BTree btree = (BTree)treeIndexRegistry.get(btreeFileId);
-
- invIndex = new InvertedIndex(bufferCache, btree, cmp);
- invIndex.open(invIndexFileId);
- invIndexRegistry.register(invIndexFileId, invIndex);
- }
- } finally {
- invIndexRegistry.unlock();
- }
- }
- }
+ MultiComparator cmp = new MultiComparator(opDesc.getInvIndexTypeTraits(), comparators);
+
+ // assumes btree has already been registered
+ IFileSplitProvider btreeFileSplitProvider = opDesc.getTreeIndexFileSplitProvider();
+ IndexRegistry<ITreeIndex> treeIndexRegistry = opDesc.getTreeIndexRegistryProvider()
+ .getRegistry(ctx);
+ FileReference btreeFile = btreeFileSplitProvider.getFileSplits()[partition].getLocalFile();
+ boolean btreeFileIsMapped = fileMapProvider.isMapped(btreeFile);
+ if (!btreeFileIsMapped) {
+ throw new HyracksDataException(
+ "Trying to create inverted index, but associated BTree file has not been mapped");
+ }
+ int btreeFileId = fileMapProvider.lookupFileId(f);
+ BTree btree = (BTree) treeIndexRegistry.get(btreeFileId);
+
+ invIndex = new InvertedIndex(bufferCache, btree, cmp);
+ invIndex.open(invIndexFileId);
+ invIndexRegistry.register(invIndexFileId, invIndex);
+ }
+ } finally {
+ invIndexRegistry.unlock();
+ }
+ }
+ }
public void deinit() throws HyracksDataException {
if (invIndexFileId != -1) {
@@ -140,19 +140,19 @@
}
}
- public InvertedIndex getInvIndex() {
- return invIndex;
- }
-
+ public InvertedIndex getInvIndex() {
+ return invIndex;
+ }
+
public IHyracksStageletContext getHyracksStageletContext() {
return ctx;
}
- public ITreeIndexOperatorDescriptorHelper getOperatorDescriptor() {
- return opDesc;
- }
-
- public int getInvIndexFileId() {
- return invIndexFileId;
- }
+ public ITreeIndexOperatorDescriptorHelper getOperatorDescriptor() {
+ return opDesc;
+ }
+
+ public int getInvIndexFileId() {
+ return invIndexFileId;
+ }
}
\ No newline at end of file
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java
index d141db0..03fc9a1 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java
@@ -1,61 +1,79 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListBuilder;
-public class FixedSizeElementInvertedListBuilder implements IInvertedListBuilder {
- private final int listElementSize;
- private int listSize = 0;
-
- private byte[] targetBuf;
- private int pos;
-
- public FixedSizeElementInvertedListBuilder(ITypeTrait[] invListFields) {
- int tmp = 0;
- for(int i = 0; i < invListFields.length; i++) {
- tmp += invListFields[i].getStaticallyKnownDataLength();
- }
- listElementSize = tmp;
- }
-
- @Override
- public boolean startNewList(ITupleReference tuple, int tokenField) {
- if(pos + listElementSize > targetBuf.length) return false;
- else {
- listSize = 0;
- return true;
- }
- }
-
- @Override
- public boolean appendElement(ITupleReference tuple, int numTokenFields, int numElementFields) {
- if(pos + listElementSize > targetBuf.length) return false;
-
- for(int i = 0; i < numElementFields; i++) {
- int field = numTokenFields + i;
- System.arraycopy(tuple.getFieldData(field), tuple.getFieldStart(field), targetBuf, pos, tuple.getFieldLength(field));
- }
-
- listSize++;
- pos += listElementSize;
-
- return true;
- }
-
- @Override
- public void setTargetBuffer(byte[] targetBuf, int startPos) {
- this.targetBuf = targetBuf;
- this.pos = startPos;
- }
+public class FixedSizeElementInvertedListBuilder implements IInvertedListBuilder {
+ private final int listElementSize;
+ private int listSize = 0;
- @Override
- public int getListSize() {
- return listSize;
- }
+ private byte[] targetBuf;
+ private int pos;
- @Override
- public int getPos() {
- return pos;
- }
+ public FixedSizeElementInvertedListBuilder(ITypeTrait[] invListFields) {
+ int tmp = 0;
+ for (int i = 0; i < invListFields.length; i++) {
+ tmp += invListFields[i].getStaticallyKnownDataLength();
+ }
+ listElementSize = tmp;
+ }
+
+ @Override
+ public boolean startNewList(ITupleReference tuple, int tokenField) {
+ if (pos + listElementSize > targetBuf.length)
+ return false;
+ else {
+ listSize = 0;
+ return true;
+ }
+ }
+
+ @Override
+ public boolean appendElement(ITupleReference tuple, int numTokenFields, int numElementFields) {
+ if (pos + listElementSize > targetBuf.length)
+ return false;
+
+ for (int i = 0; i < numElementFields; i++) {
+ int field = numTokenFields + i;
+ System.arraycopy(tuple.getFieldData(field), tuple.getFieldStart(field), targetBuf, pos,
+ tuple.getFieldLength(field));
+ }
+
+ listSize++;
+ pos += listElementSize;
+
+ return true;
+ }
+
+ @Override
+ public void setTargetBuffer(byte[] targetBuf, int startPos) {
+ this.targetBuf = targetBuf;
+ this.pos = startPos;
+ }
+
+ @Override
+ public int getListSize() {
+ return listSize;
+ }
+
+ @Override
+ public int getPos() {
+ return pos;
+ }
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java
index 9ed0ee1..f7ef56e 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java
@@ -21,99 +21,98 @@
private final int elementSize;
private int currentElementIx;
private int currentOff;
- private int currentPageIx;
-
+ private int currentPageIx;
+
private int startPageId;
private int endPageId;
private int startOff;
private int numElements;
-
+
private final FixedSizeTupleReference tuple;
private ICachedPage[] pages = new ICachedPage[10];
private int[] elementIndexes = new int[10];
-
+
public FixedSizeElementInvertedListCursor(IBufferCache bufferCache, int fileId, ITypeTrait[] invListFields) {
this.bufferCache = bufferCache;
this.fileId = fileId;
this.currentElementIx = 0;
- this.currentPageIx = 0;
-
+ this.currentPageIx = 0;
+
int tmp = 0;
- for(int i = 0; i < invListFields.length; i++) {
+ for (int i = 0; i < invListFields.length; i++) {
tmp += invListFields[i].getStaticallyKnownDataLength();
}
elementSize = tmp;
this.currentOff = -elementSize;
this.tuple = new FixedSizeTupleReference(invListFields);
}
-
+
@Override
public boolean hasNext() {
- if(currentElementIx < numElements) return true;
- else return false;
+ if (currentElementIx < numElements)
+ return true;
+ else
+ return false;
}
@Override
public void next() {
- //System.out.println("NEXT: " + currentOff + " " + elementSize + " " + bufferCache.getPageSize());
- if(currentOff + elementSize >= bufferCache.getPageSize()) {
+ if (currentOff + elementSize >= bufferCache.getPageSize()) {
currentPageIx++;
- currentOff = 0;
- }
- else {
+ currentOff = 0;
+ } else {
currentOff += elementSize;
}
-
+
currentElementIx++;
tuple.reset(pages[currentPageIx].getBuffer().array(), currentOff);
}
@Override
public void pinPagesAsync() {
-
-
+ // TODO: implement
}
@Override
- public void pinPagesSync() throws HyracksDataException {
+ public void pinPagesSync() throws HyracksDataException {
int pix = 0;
- for(int i = startPageId; i <= endPageId; i++) {
+ for (int i = startPageId; i <= endPageId; i++) {
pages[pix] = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, i), false);
pages[pix].acquireReadLatch();
pix++;
- }
+ }
}
@Override
- public void unpinPages() throws HyracksDataException {
+ public void unpinPages() throws HyracksDataException {
int numPages = endPageId - startPageId + 1;
- for(int i = 0; i < numPages; i++) {
+ for (int i = 0; i < numPages; i++) {
pages[i].releaseReadLatch();
bufferCache.unpin(pages[i]);
- }
+ }
}
-
+
@Override
public void positionCursor(int elementIx) {
int numPages = endPageId - startPageId + 1;
-
- currentPageIx = binarySearch(elementIndexes, 0, numPages, elementIx);
- if(currentPageIx < 0) {
- throw new IndexOutOfBoundsException("Requested index: " + elementIx + " from array with numElements: " + numElements);
+
+ currentPageIx = binarySearch(elementIndexes, 0, numPages, elementIx);
+ if (currentPageIx < 0) {
+ throw new IndexOutOfBoundsException("Requested index: " + elementIx + " from array with numElements: "
+ + numElements);
}
-
- if(currentPageIx == 0) {
+
+ if (currentPageIx == 0) {
currentOff = startOff + elementIx * elementSize;
- }
- else {
- int relativeElementIx = elementIx - elementIndexes[currentPageIx-1] - 1;
+ } else {
+ int relativeElementIx = elementIx - elementIndexes[currentPageIx - 1] - 1;
currentOff = relativeElementIx * elementSize;
}
-
+
currentElementIx = elementIx;
tuple.reset(pages[currentPageIx].getBuffer().array(), currentOff);
}
-
+
@Override
public boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp) {
int mid;
@@ -128,14 +127,14 @@
end = mid - 1;
} else if (cmp > 0) {
begin = mid + 1;
- } else {
+ } else {
return true;
}
}
-
+
return false;
- }
-
+ }
+
@Override
public void reset(int startPageId, int endPageId, int startOff, int numElements) {
this.startPageId = startPageId;
@@ -145,97 +144,100 @@
this.currentElementIx = 0;
this.currentPageIx = 0;
this.currentOff = startOff - elementSize;
-
+
int numPages = endPageId - startPageId + 1;
- if(numPages > pages.length) {
+ if (numPages > pages.length) {
pages = new ICachedPage[endPageId - startPageId + 1];
elementIndexes = new int[endPageId - startPageId + 1];
- }
-
+ }
+
// fill elementIndexes
// first page
int cumulElements = (bufferCache.getPageSize() - startOff) / elementSize;
- elementIndexes[0] = cumulElements-1;
-
- // middle, full pages
- for(int i = 1; i < numPages-1; i++) {
- elementIndexes[i] = elementIndexes[i-1] + (bufferCache.getPageSize() / elementSize);
+ elementIndexes[0] = cumulElements - 1;
+
+ // middle, full pages
+ for (int i = 1; i < numPages - 1; i++) {
+ elementIndexes[i] = elementIndexes[i - 1] + (bufferCache.getPageSize() / elementSize);
}
-
+
// last page
- elementIndexes[numPages-1] = numElements-1;
- }
-
+ elementIndexes[numPages - 1] = numElements - 1;
+ }
+
@Override
- public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException {
+ public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException {
int oldCurrentOff = currentOff;
int oldCurrentPageId = currentPageIx;
int oldCurrentElementIx = currentElementIx;
-
+
currentOff = startOff - elementSize;
currentPageIx = 0;
currentElementIx = 0;
-
+
StringBuilder strBuilder = new StringBuilder();
-
+
int count = 0;
- while(hasNext()) {
+ while (hasNext()) {
next();
- count++;
-
- // System.out.println(offset + " " + currentElementIx);
-
- for(int i = 0; i < tuple.getFieldCount(); i++) {
- ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i), tuple.getFieldLength(i));
- DataInput dataIn = new DataInputStream(inStream);
+ count++;
+ for (int i = 0; i < tuple.getFieldCount(); i++) {
+ ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i),
+ tuple.getFieldLength(i));
+ DataInput dataIn = new DataInputStream(inStream);
Object o = serdes[i].deserialize(dataIn);
strBuilder.append(o.toString());
- if(i+1 < tuple.getFieldCount()) strBuilder.append(",");
+ if (i + 1 < tuple.getFieldCount())
+ strBuilder.append(",");
}
strBuilder.append(" ");
}
- //System.out.println("PRINT COUNT: " + count);
-
+
// reset previous state
currentOff = oldCurrentOff;
currentPageIx = oldCurrentPageId;
currentElementIx = oldCurrentElementIx;
-
- return strBuilder.toString();
- }
-
- public String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException {
- StringBuilder strBuilder = new StringBuilder();
- for(int i = 0; i < tuple.getFieldCount(); i++) {
- ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i), tuple.getFieldLength(i));
- DataInput dataIn = new DataInputStream(inStream);
- Object o = serdes[i].deserialize(dataIn);
- strBuilder.append(o.toString());
- if(i+1 < tuple.getFieldCount()) strBuilder.append(",");
- }
+
return strBuilder.toString();
}
-
+
+ public String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException {
+ StringBuilder strBuilder = new StringBuilder();
+ for (int i = 0; i < tuple.getFieldCount(); i++) {
+ ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i),
+ tuple.getFieldLength(i));
+ DataInput dataIn = new DataInputStream(inStream);
+ Object o = serdes[i].deserialize(dataIn);
+ strBuilder.append(o.toString());
+ if (i + 1 < tuple.getFieldCount())
+ strBuilder.append(",");
+ }
+ return strBuilder.toString();
+ }
+
private int binarySearch(int[] arr, int arrStart, int arrLength, int key) {
int mid;
int begin = arrStart;
int end = arrStart + arrLength - 1;
while (begin <= end) {
- mid = (begin + end) / 2;
+ mid = (begin + end) / 2;
int cmp = (key - arr[mid]);
if (cmp < 0) {
end = mid - 1;
} else if (cmp > 0) {
begin = mid + 1;
- } else {
+ } else {
return mid;
}
}
-
- if(begin > arr.length - 1) return -1;
- if(key < arr[begin]) return begin;
- else return -1;
+
+ if (begin > arr.length - 1)
+ return -1;
+ if (key < arr[begin])
+ return begin;
+ else
+ return -1;
}
@Override
@@ -270,9 +272,9 @@
public ICachedPage getPage() {
return pages[currentPageIx];
}
-
+
@Override
- public ITupleReference getTuple() {
+ public ITupleReference getTuple() {
return tuple;
}
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAccessor.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAccessor.java
index 1c3c9e4..9858eb0 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAccessor.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAccessor.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
import java.nio.ByteBuffer;
@@ -10,27 +25,27 @@
private final int frameSize;
private ByteBuffer buffer;
-
+
private final ITypeTrait[] fields;
private final int[] fieldStartOffsets;
private final int tupleSize;
-
+
public FixedSizeFrameTupleAccessor(int frameSize, ITypeTrait[] fields) {
this.frameSize = frameSize;
this.fields = fields;
this.fieldStartOffsets = new int[fields.length];
this.fieldStartOffsets[0] = 0;
- for(int i = 1; i < fields.length; i++) {
- fieldStartOffsets[i] = fieldStartOffsets[i-1] + fields[i-1].getStaticallyKnownDataLength();
+ for (int i = 1; i < fields.length; i++) {
+ fieldStartOffsets[i] = fieldStartOffsets[i - 1] + fields[i - 1].getStaticallyKnownDataLength();
}
-
+
int tmp = 0;
- for(int i = 0; i < fields.length; i++) {
+ for (int i = 0; i < fields.length; i++) {
tmp += fields[i].getStaticallyKnownDataLength();
}
tupleSize = tmp;
}
-
+
@Override
public ByteBuffer getBuffer() {
return buffer;
@@ -68,7 +83,7 @@
@Override
public int getTupleEndOffset(int tupleIndex) {
- return getFieldEndOffset(tupleIndex, fields.length-1);
+ return getFieldEndOffset(tupleIndex, fields.length - 1);
}
@Override
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAppender.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAppender.java
index 7d95584..edc2304 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAppender.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAppender.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
import java.nio.ByteBuffer;
@@ -12,17 +27,17 @@
private final int tupleSize;
private ByteBuffer buffer;
private int tupleCount;
- private int tupleDataEndOffset;
+ private int tupleDataEndOffset;
public FixedSizeFrameTupleAppender(int frameSize, ITypeTrait[] fields) {
this.frameSize = frameSize;
int tmp = 0;
- for(int i = 0; i < fields.length; i++) {
+ for (int i = 0; i < fields.length; i++) {
tmp += fields[i].getStaticallyKnownDataLength();
}
tupleSize = tmp;
}
-
+
public void reset(ByteBuffer buffer, boolean clear) {
this.buffer = buffer;
if (clear) {
@@ -31,76 +46,77 @@
tupleDataEndOffset = 0;
}
}
-
+
public boolean append(byte[] bytes, int offset) {
if (tupleDataEndOffset + tupleSize + TUPLE_COUNT_SIZE <= frameSize) {
System.arraycopy(bytes, offset, buffer.array(), tupleDataEndOffset, tupleSize);
tupleDataEndOffset += tupleSize;
- tupleCount++;
+ tupleCount++;
return true;
}
return false;
- }
-
+ }
+
public boolean append(byte[] bytes, int offset, int length) {
if (tupleDataEndOffset + length + TUPLE_COUNT_SIZE <= frameSize) {
System.arraycopy(bytes, offset, buffer.array(), tupleDataEndOffset, length);
- tupleDataEndOffset += length;
+ tupleDataEndOffset += length;
return true;
}
return false;
}
-
+
public boolean append(int fieldValue) {
if (tupleDataEndOffset + 4 + TUPLE_COUNT_SIZE <= frameSize) {
- buffer.putInt(tupleDataEndOffset, fieldValue);
+ buffer.putInt(tupleDataEndOffset, fieldValue);
tupleDataEndOffset += 4;
- tupleCount++;
+ tupleCount++;
return true;
}
return false;
}
-
+
public boolean append(long fieldValue) {
if (tupleDataEndOffset + 8 + TUPLE_COUNT_SIZE <= frameSize) {
- buffer.putLong(tupleDataEndOffset, fieldValue);
+ buffer.putLong(tupleDataEndOffset, fieldValue);
tupleDataEndOffset += 8;
- tupleCount++;
+ tupleCount++;
return true;
}
return false;
}
-
+
public boolean append(char fieldValue) {
if (tupleDataEndOffset + 2 + TUPLE_COUNT_SIZE <= frameSize) {
- buffer.putLong(tupleDataEndOffset, fieldValue);
+ buffer.putLong(tupleDataEndOffset, fieldValue);
tupleDataEndOffset += 2;
- tupleCount++;
+ tupleCount++;
return true;
}
return false;
}
-
+
public boolean append(byte fieldValue) {
if (tupleDataEndOffset + 1 + TUPLE_COUNT_SIZE <= frameSize) {
- buffer.put(tupleDataEndOffset, fieldValue);
+ buffer.put(tupleDataEndOffset, fieldValue);
tupleDataEndOffset += 1;
- tupleCount++;
+ tupleCount++;
return true;
}
return false;
- }
-
+ }
+
// returns true if an entire tuple fits
// returns false otherwise
public boolean hasSpace() {
return tupleDataEndOffset + tupleSize + TUPLE_COUNT_SIZE <= frameSize;
}
-
+
public void incrementTupleCount(int count) {
- buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), buffer.getInt(FrameHelper.getTupleCountOffset(frameSize)) + count);
+ buffer.putInt(FrameHelper.getTupleCountOffset(frameSize),
+ buffer.getInt(FrameHelper.getTupleCountOffset(frameSize)) + count);
}
-
+
public int getTupleCount() {
return tupleCount;
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java
index 0ddde07..248b81e 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
@@ -8,22 +23,22 @@
private final ITypeTrait[] typeTraits;
private final int[] fieldStartOffsets;
private byte[] data;
- private int startOff;
-
+ private int startOff;
+
public FixedSizeTupleReference(ITypeTrait[] typeTraits) {
this.typeTraits = typeTraits;
this.fieldStartOffsets = new int[typeTraits.length];
this.fieldStartOffsets[0] = 0;
- for(int i = 1; i < typeTraits.length; i++) {
- fieldStartOffsets[i] = fieldStartOffsets[i-1] + typeTraits[i-1].getStaticallyKnownDataLength();
+ for (int i = 1; i < typeTraits.length; i++) {
+ fieldStartOffsets[i] = fieldStartOffsets[i - 1] + typeTraits[i - 1].getStaticallyKnownDataLength();
}
}
-
+
public void reset(byte[] data, int startOff) {
this.data = data;
this.startOff = startOff;
}
-
+
@Override
public int getFieldCount() {
return typeTraits.length;
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
index fc3cf15..9eab110 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
import java.nio.ByteBuffer;
@@ -24,6 +39,14 @@
import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
+/**
+ * An inverted index consists of two files: 1. a file storing (paginated)
+ * inverted lists 2. a BTree-file mapping from tokens to inverted lists
+ *
+ * Implemented features: bulk loading and searching (based on T-Occurrence) Not
+ * implemented features: updates (insert/update/delete) Limitations: a query
+ * cannot exceed the size of a Hyracks frame
+ */
public class InvertedIndex {
private BTree btree;
@@ -32,8 +55,8 @@
private int fileId;
private final MultiComparator invListCmp;
private final int numTokenFields;
- private final int numInvListKeys;
-
+ private final int numInvListKeys;
+
public InvertedIndex(IBufferCache bufferCache, BTree btree, MultiComparator invListCmp) {
this.bufferCache = bufferCache;
this.btree = btree;
@@ -45,32 +68,26 @@
public void open(int fileId) {
this.fileId = fileId;
}
-
+
public void close() {
- this.fileId = -1;
+ this.fileId = -1;
}
-
- public BulkLoadContext beginBulkLoad(IInvertedListBuilder invListBuilder, int hyracksFrameSize, float btreeFillFactor) throws HyracksDataException {
+
+ public BulkLoadContext beginBulkLoad(IInvertedListBuilder invListBuilder, int hyracksFrameSize,
+ float btreeFillFactor) throws HyracksDataException {
BulkLoadContext ctx = new BulkLoadContext(invListBuilder, hyracksFrameSize, btreeFillFactor);
ctx.init(rootPageId, fileId);
return ctx;
}
-
- // ASSUMPTIONS:
- // the first btree.getMultiComparator().getKeyFieldCount() fields in tuple are btree keys (e.g., a string token)
- // the next invListCmp.getKeyFieldCount() fields in tuple are keys of the inverted list (e.g., primary key)
+
+ // ASSUMPTIONS:
+ // the first btree.getMultiComparator().getKeyFieldCount() fields in tuple
+ // are btree keys (e.g., a string token)
+ // the next invListCmp.getKeyFieldCount() fields in tuple are keys of the
+ // inverted list (e.g., primary key)
// key fields of inverted list are fixed size
- public void bulkLoadAddTuple(BulkLoadContext ctx, ITupleReference tuple)
- throws HyracksDataException {
-
- // debug
- //UTF8StringSerializerDeserializer serde = UTF8StringSerializerDeserializer.INSTANCE;
- //ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(tokenField), tuple.getFieldStart(tokenField), tuple
- // .getFieldLength(tokenField));
- //DataInput dataIn = new DataInputStream(inStream);
- //Object o = serde.deserialize(dataIn);
- //System.out.println(o.toString());
-
+ public void bulkLoadAddTuple(BulkLoadContext ctx, ITupleReference tuple) throws HyracksDataException {
+
// first inverted list, copy token to baaos and start new list
if (ctx.currentInvListTokenBaaos.size() == 0) {
ctx.currentInvListStartPageId = ctx.currentPageId;
@@ -79,8 +96,8 @@
// remember current token
ctx.currentInvListTokenBaaos.reset();
for (int i = 0; i < numTokenFields; i++) {
- ctx.currentInvListTokenBaaos.write(tuple.getFieldData(i), tuple.getFieldStart(i), tuple
- .getFieldLength(i));
+ ctx.currentInvListTokenBaaos.write(tuple.getFieldData(i), tuple.getFieldStart(i),
+ tuple.getFieldLength(i));
}
if (!ctx.invListBuilder.startNewList(tuple, numTokenFields)) {
@@ -90,22 +107,22 @@
throw new IllegalStateException("Failed to create first inverted list.");
}
}
- }
-
+ }
+
// create new inverted list?
ctx.currentInvListToken.reset(ctx.currentInvListTokenBaaos.getByteArray(), 0);
if (ctx.tokenCmp.compare(tuple, ctx.currentInvListToken) != 0) {
-
+
// create entry in btree for last inverted list
createAndInsertBTreeTuple(ctx);
// remember new token
ctx.currentInvListTokenBaaos.reset();
for (int i = 0; i < numTokenFields; i++) {
- ctx.currentInvListTokenBaaos.write(tuple.getFieldData(i), tuple.getFieldStart(i), tuple
- .getFieldLength(i));
+ ctx.currentInvListTokenBaaos.write(tuple.getFieldData(i), tuple.getFieldStart(i),
+ tuple.getFieldLength(i));
}
-
+
// start new list
if (!ctx.invListBuilder.startNewList(tuple, numTokenFields)) {
ctx.pinNextPage();
@@ -118,7 +135,7 @@
ctx.currentInvListStartPageId = ctx.currentPageId;
ctx.currentInvListStartOffset = ctx.invListBuilder.getPos();
}
-
+
// append to current inverted list
if (!ctx.invListBuilder.appendElement(tuple, numTokenFields, numInvListKeys)) {
ctx.pinNextPage();
@@ -128,41 +145,45 @@
"Failed to append element to inverted list after switching to a new page.");
}
}
- }
-
- public boolean openCursor(ITreeIndexCursor btreeCursor, RangePredicate btreePred, BTreeOpContext btreeOpCtx, IInvertedListCursor invListCursor) throws Exception {
- btree.search(btreeCursor, btreePred, btreeOpCtx);
-
+ }
+
+ public boolean openCursor(ITreeIndexCursor btreeCursor, RangePredicate btreePred, BTreeOpContext btreeOpCtx,
+ IInvertedListCursor invListCursor) throws Exception {
+ btree.search(btreeCursor, btreePred, btreeOpCtx);
+
boolean ret = false;
- if(btreeCursor.hasNext()) {
-
+ if (btreeCursor.hasNext()) {
+
btreeCursor.next();
ITupleReference frameTuple = btreeCursor.getTuple();
-
- // TODO: hardcoded mapping of btree fields
- int startPageId = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(1), frameTuple.getFieldStart(1));
- int endPageId = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(2), frameTuple.getFieldStart(2));
- int startOff = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(3), frameTuple.getFieldStart(3));
- int numElements = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(4), frameTuple.getFieldStart(4));
-
+
+ // hardcoded mapping of btree fields
+ int startPageId = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(1),
+ frameTuple.getFieldStart(1));
+ int endPageId = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(2),
+ frameTuple.getFieldStart(2));
+ int startOff = IntegerSerializerDeserializer
+ .getInt(frameTuple.getFieldData(3), frameTuple.getFieldStart(3));
+ int numElements = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(4),
+ frameTuple.getFieldStart(4));
+
invListCursor.reset(startPageId, endPageId, startOff, numElements);
ret = true;
- }
- else {
+ } else {
invListCursor.reset(0, 0, 0, 0);
}
-
+
btreeCursor.close();
btreeCursor.reset();
-
+
return ret;
}
-
+
public void createAndInsertBTreeTuple(BulkLoadContext ctx) throws HyracksDataException {
// build tuple
ctx.btreeTupleBuilder.reset();
- ctx.btreeTupleBuilder.addField(ctx.currentInvListTokenBaaos.getByteArray(), 0, ctx.currentInvListTokenBaaos
- .size());
+ ctx.btreeTupleBuilder.addField(ctx.currentInvListTokenBaaos.getByteArray(), 0,
+ ctx.currentInvListTokenBaaos.size());
ctx.btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, ctx.currentInvListStartPageId);
ctx.btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, ctx.currentPageId);
ctx.btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, ctx.currentInvListStartOffset);
@@ -173,7 +194,7 @@
ctx.btreeTupleAppender.append(ctx.btreeTupleBuilder.getFieldEndOffsets(), ctx.btreeTupleBuilder.getByteArray(),
0, ctx.btreeTupleBuilder.getSize());
- // reset tuple reference
+ // reset tuple reference
ctx.btreeFrameTupleReference.reset(ctx.btreeFrameTupleAccessor, 0);
btree.bulkLoadAddTuple(ctx.btreeBulkLoadCtx, ctx.btreeFrameTupleReference);
@@ -189,19 +210,19 @@
public IBufferCache getBufferCache() {
return bufferCache;
}
-
+
public int getInvListsFileId() {
return fileId;
}
-
+
public MultiComparator getInvListElementCmp() {
return invListCmp;
}
-
+
public BTree getBTree() {
return btree;
}
-
+
public final class BulkLoadContext {
private final ByteBuffer btreeTupleBuffer;
private final ArrayTupleBuilder btreeTupleBuilder;
@@ -214,33 +235,35 @@
private int currentInvListStartPageId;
private int currentInvListStartOffset;
private final ByteArrayAccessibleOutputStream currentInvListTokenBaaos = new ByteArrayAccessibleOutputStream();
- private final FixedSizeTupleReference currentInvListToken = new FixedSizeTupleReference(invListCmp.getTypeTraits());
-
+ private final FixedSizeTupleReference currentInvListToken = new FixedSizeTupleReference(
+ invListCmp.getTypeTraits());
+
private int currentPageId;
private ICachedPage currentPage;
private final IInvertedListBuilder invListBuilder;
- private final MultiComparator tokenCmp;
-
+ private final MultiComparator tokenCmp;
+
public BulkLoadContext(IInvertedListBuilder invListBuilder, int hyracksFrameSize, float btreeFillFactor) {
this.invListBuilder = invListBuilder;
this.tokenCmp = btree.getMultiComparator();
this.btreeTupleBuffer = ByteBuffer.allocate(hyracksFrameSize);
this.btreeTupleBuilder = new ArrayTupleBuilder(tokenCmp.getFieldCount());
this.btreeTupleAppender = new FrameTupleAppender(hyracksFrameSize);
- // TODO: dummy record descriptor, serde never used anyway, only need
- // correct number of fields
- // tuple contains (token, start page, end page, start offset, num elements)
+ // TODO: serde never used, only need correct number of fields
+ // tuple contains (token, start page, end page, start offset, num
+ // elements)
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] {
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE });
this.btreeFrameTupleAccessor = new FrameTupleAccessor(hyracksFrameSize, recDesc);
this.btreeFillFactor = btreeFillFactor;
}
public void init(int startPageId, int fileId) throws HyracksDataException {
- btreeBulkLoadCtx = btree.beginBulkLoad(BTree.DEFAULT_FILL_FACTOR, btree.getLeafFrameFactory().createFrame(),
- btree.getInteriorFrameFactory().createFrame(), btree.getFreePageManager().getMetaDataFrameFactory()
- .createFrame());
+ btreeBulkLoadCtx = btree.beginBulkLoad(BTree.DEFAULT_FILL_FACTOR,
+ btree.getLeafFrameFactory().createFrame(), btree.getInteriorFrameFactory().createFrame(), btree
+ .getFreePageManager().getMetaDataFrameFactory().createFrame());
currentPageId = startPageId;
currentPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
currentPage.acquireWriteLatch();
@@ -262,5 +285,5 @@
currentPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
currentPage.acquireWriteLatch();
}
- };
+ };
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndexException.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndexException.java
index 5ba852a..494ac0c 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndexException.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndexException.java
@@ -1,8 +1,24 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
-public class InvertedIndexException extends Exception {
- private static final long serialVersionUID = 1L;
- public InvertedIndexException(String msg) {
- super(msg);
- }
+public class InvertedIndexException extends Exception {
+ private static final long serialVersionUID = 1L;
+
+ public InvertedIndexException(String msg) {
+ super(msg);
+ }
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/OccurrenceThresholdPanicException.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/OccurrenceThresholdPanicException.java
index 4711fb6..b0e737c 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/OccurrenceThresholdPanicException.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/OccurrenceThresholdPanicException.java
@@ -1,9 +1,24 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
public class OccurrenceThresholdPanicException extends InvertedIndexException {
- private static final long serialVersionUID = 1L;
-
- public OccurrenceThresholdPanicException(String msg) {
- super(msg);
- }
+ private static final long serialVersionUID = 1L;
+
+ public OccurrenceThresholdPanicException(String msg) {
+ super(msg);
+ }
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SearchResultCursor.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SearchResultCursor.java
index f1fe841..5a7230c 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SearchResultCursor.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SearchResultCursor.java
@@ -30,13 +30,13 @@
private FixedSizeTupleReference resultTuple;
private int numResultBuffers;
private int currentBufferIndex = 0;
- private int tupleIndex = 0;
-
+ private int tupleIndex = 0;
+
public SearchResultCursor(IFrameTupleAccessor fta, ITupleReference resultTuple) {
this.fta = fta;
- this.resultTuple = (FixedSizeTupleReference)resultTuple;
+ this.resultTuple = (FixedSizeTupleReference) resultTuple;
}
-
+
@Override
public boolean hasNext() {
if (currentBufferIndex < numResultBuffers && tupleIndex < fta.getTupleCount())
@@ -46,16 +46,16 @@
}
@Override
- public void next() {
- resultTuple.reset(fta.getBuffer().array(), fta.getTupleStartOffset(tupleIndex));
+ public void next() {
+ resultTuple.reset(fta.getBuffer().array(), fta.getTupleStartOffset(tupleIndex));
tupleIndex++;
- if(tupleIndex >= fta.getTupleCount()) {
- if(currentBufferIndex + 1 < numResultBuffers) {
+ if (tupleIndex >= fta.getTupleCount()) {
+ if (currentBufferIndex + 1 < numResultBuffers) {
currentBufferIndex++;
fta.reset(resultBuffers.get(currentBufferIndex));
tupleIndex = 0;
}
- }
+ }
}
@Override
@@ -64,12 +64,12 @@
}
@Override
- public void reset(IInvertedIndexSearcher invIndexSearcher) {
+ public void reset(IInvertedIndexSearcher invIndexSearcher) {
currentBufferIndex = 0;
tupleIndex = 0;
resultBuffers = invIndexSearcher.getResultBuffers();
numResultBuffers = invIndexSearcher.getNumValidResultBuffers();
- if(numResultBuffers > 0) {
+ if (numResultBuffers > 0) {
fta.reset(resultBuffers.get(0));
}
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
index 41d831f..5585856 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
@@ -15,6 +15,9 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
@@ -52,243 +55,239 @@
public class TOccurrenceSearcher implements IInvertedIndexSearcher {
- protected final IHyracksStageletContext ctx;
- protected final FixedSizeFrameTupleAppender resultFrameTupleApp;
- protected final FixedSizeFrameTupleAccessor resultFrameTupleAcc;
- protected final FixedSizeTupleReference resultTuple;
- protected final int invListKeyLength;
- protected int currentNumResults;
+ protected final IHyracksStageletContext ctx;
+ protected final FixedSizeFrameTupleAppender resultFrameTupleApp;
+ protected final FixedSizeFrameTupleAccessor resultFrameTupleAcc;
+ protected final FixedSizeTupleReference resultTuple;
+ protected final int invListKeyLength;
+ protected int currentNumResults;
- protected List<ByteBuffer> newResultBuffers = new ArrayList<ByteBuffer>();
- protected List<ByteBuffer> prevResultBuffers = new ArrayList<ByteBuffer>();
- protected List<ByteBuffer> swap = null;
- protected int maxResultBufIdx = 0;
+ protected List<ByteBuffer> newResultBuffers = new ArrayList<ByteBuffer>();
+ protected List<ByteBuffer> prevResultBuffers = new ArrayList<ByteBuffer>();
+ protected List<ByteBuffer> swap = null;
+ protected int maxResultBufIdx = 0;
- protected final ITreeIndexFrame leafFrame;
- protected final ITreeIndexFrame interiorFrame;
- protected final ITreeIndexCursor btreeCursor;
- protected final FrameTupleReference searchKey = new FrameTupleReference();
- protected final RangePredicate btreePred = new RangePredicate(true, null, null, true, true, null, null);
- protected final BTreeOpContext btreeOpCtx;
+ protected final ITreeIndexFrame leafFrame;
+ protected final ITreeIndexFrame interiorFrame;
+ protected final ITreeIndexCursor btreeCursor;
+ protected final FrameTupleReference searchKey = new FrameTupleReference();
+ protected final RangePredicate btreePred = new RangePredicate(true, null, null, true, true, null, null);
+ protected final BTreeOpContext btreeOpCtx;
- protected RecordDescriptor queryTokenRecDesc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
- protected ArrayTupleBuilder queryTokenBuilder = new ArrayTupleBuilder(queryTokenRecDesc.getFields().length);
- protected DataOutput queryTokenDos = queryTokenBuilder.getDataOutput();
- protected FrameTupleAppender queryTokenAppender;
- protected ByteBuffer queryTokenFrame;
+ protected RecordDescriptor queryTokenRecDesc = new RecordDescriptor(
+ new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ protected ArrayTupleBuilder queryTokenBuilder = new ArrayTupleBuilder(queryTokenRecDesc.getFields().length);
+ protected DataOutput queryTokenDos = queryTokenBuilder.getDataOutput();
+ protected FrameTupleAppender queryTokenAppender;
+ protected ByteBuffer queryTokenFrame;
- protected final InvertedIndex invIndex;
- protected final IBinaryTokenizer queryTokenizer;
- protected final ITypeTrait[] invListFieldsWithCount;
- protected int occurrenceThreshold;
+ protected final InvertedIndex invIndex;
+ protected final IBinaryTokenizer queryTokenizer;
+ protected final ITypeTrait[] invListFieldsWithCount;
+ protected int occurrenceThreshold;
- protected final int cursorCacheSize = 10;
- protected List<IInvertedListCursor> invListCursorCache = new ArrayList<IInvertedListCursor>(cursorCacheSize);
- protected List<IInvertedListCursor> invListCursors = new ArrayList<IInvertedListCursor>(cursorCacheSize);
+ protected final int cursorCacheSize = 10;
+ protected List<IInvertedListCursor> invListCursorCache = new ArrayList<IInvertedListCursor>(cursorCacheSize);
+ protected List<IInvertedListCursor> invListCursors = new ArrayList<IInvertedListCursor>(cursorCacheSize);
- public TOccurrenceSearcher(IHyracksStageletContext ctx, InvertedIndex invIndex, IBinaryTokenizer queryTokenizer) {
- this.ctx = ctx;
- this.invIndex = invIndex;
- this.queryTokenizer = queryTokenizer;
+ public TOccurrenceSearcher(IHyracksStageletContext ctx, InvertedIndex invIndex, IBinaryTokenizer queryTokenizer) {
+ this.ctx = ctx;
+ this.invIndex = invIndex;
+ this.queryTokenizer = queryTokenizer;
- leafFrame = invIndex.getBTree().getLeafFrameFactory().createFrame();
- interiorFrame = invIndex.getBTree().getInteriorFrameFactory().createFrame();
+ leafFrame = invIndex.getBTree().getLeafFrameFactory().createFrame();
+ interiorFrame = invIndex.getBTree().getInteriorFrameFactory().createFrame();
- btreeCursor = new BTreeRangeSearchCursor((IBTreeLeafFrame)leafFrame);
- ITypeTrait[] invListFields = invIndex.getInvListElementCmp().getTypeTraits();
- invListFieldsWithCount = new TypeTrait[invListFields.length + 1];
- int tmp = 0;
- for(int i = 0; i < invListFields.length; i++) {
- invListFieldsWithCount[i] = invListFields[i];
- tmp += invListFields[i].getStaticallyKnownDataLength();
- }
- // using an integer for counting occurrences
- invListFieldsWithCount[invListFields.length] = new TypeTrait(4);
- invListKeyLength = tmp;
+ btreeCursor = new BTreeRangeSearchCursor((IBTreeLeafFrame) leafFrame);
+ ITypeTrait[] invListFields = invIndex.getInvListElementCmp().getTypeTraits();
+ invListFieldsWithCount = new TypeTrait[invListFields.length + 1];
+ int tmp = 0;
+ for (int i = 0; i < invListFields.length; i++) {
+ invListFieldsWithCount[i] = invListFields[i];
+ tmp += invListFields[i].getStaticallyKnownDataLength();
+ }
+ // using an integer for counting occurrences
+ invListFieldsWithCount[invListFields.length] = new TypeTrait(4);
+ invListKeyLength = tmp;
- btreeOpCtx = invIndex.getBTree().createOpContext(IndexOp.SEARCH, leafFrame,
- interiorFrame, null);
+ btreeOpCtx = invIndex.getBTree().createOpContext(IndexOp.SEARCH, leafFrame, interiorFrame, null);
- resultFrameTupleApp = new FixedSizeFrameTupleAppender(ctx.getFrameSize(), invListFieldsWithCount);
- resultFrameTupleAcc = new FixedSizeFrameTupleAccessor(ctx.getFrameSize(), invListFieldsWithCount);
- resultTuple = new FixedSizeTupleReference(invListFieldsWithCount);
- newResultBuffers.add(ctx.allocateFrame());
- prevResultBuffers.add(ctx.allocateFrame());
+ resultFrameTupleApp = new FixedSizeFrameTupleAppender(ctx.getFrameSize(), invListFieldsWithCount);
+ resultFrameTupleAcc = new FixedSizeFrameTupleAccessor(ctx.getFrameSize(), invListFieldsWithCount);
+ resultTuple = new FixedSizeTupleReference(invListFieldsWithCount);
+ newResultBuffers.add(ctx.allocateFrame());
+ prevResultBuffers.add(ctx.allocateFrame());
- MultiComparator searchCmp = invIndex.getBTree().getMultiComparator();
- btreePred.setLowKeyComparator(searchCmp);
- btreePred.setHighKeyComparator(searchCmp);
- btreePred.setLowKey(searchKey, true);
- btreePred.setHighKey(searchKey, true);
+ MultiComparator searchCmp = invIndex.getBTree().getMultiComparator();
+ btreePred.setLowKeyComparator(searchCmp);
+ btreePred.setHighKeyComparator(searchCmp);
+ btreePred.setLowKey(searchKey, true);
+ btreePred.setHighKey(searchKey, true);
- // pre-create cursor objects
- for (int i = 0; i < cursorCacheSize; i++) {
- invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
- .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
- }
+ // pre-create cursor objects
+ for (int i = 0; i < cursorCacheSize; i++) {
+ invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
+ .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
+ }
- queryTokenAppender = new FrameTupleAppender(ctx.getFrameSize());
- queryTokenFrame = ctx.allocateFrame();
+ queryTokenAppender = new FrameTupleAppender(ctx.getFrameSize());
+ queryTokenFrame = ctx.allocateFrame();
- currentNumResults = 0;
- }
+ currentNumResults = 0;
+ }
- public void reset() {
- for(ByteBuffer b : newResultBuffers) {
- resultFrameTupleApp.reset(b, true);
- }
- for(ByteBuffer b : prevResultBuffers) {
- resultFrameTupleApp.reset(b, true);
- }
- currentNumResults = 0;
- }
+ public void reset() {
+ for (ByteBuffer b : newResultBuffers) {
+ resultFrameTupleApp.reset(b, true);
+ }
+ for (ByteBuffer b : prevResultBuffers) {
+ resultFrameTupleApp.reset(b, true);
+ }
+ currentNumResults = 0;
+ }
- public void search(IInvertedIndexResultCursor resultCursor, ITupleReference queryTuple, int queryFieldIndex, IInvertedIndexSearchModifier searchModifier) throws Exception {
+ public void search(IInvertedIndexResultCursor resultCursor, ITupleReference queryTuple, int queryFieldIndex,
+ IInvertedIndexSearchModifier searchModifier) throws Exception {
- queryTokenAppender.reset(queryTokenFrame, true);
- queryTokenizer.reset(queryTuple.getFieldData(queryFieldIndex), queryTuple.getFieldStart(queryFieldIndex),
- queryTuple.getFieldLength(queryFieldIndex));
- while (queryTokenizer.hasNext()) {
- queryTokenizer.next();
+ queryTokenAppender.reset(queryTokenFrame, true);
+ queryTokenizer.reset(queryTuple.getFieldData(queryFieldIndex), queryTuple.getFieldStart(queryFieldIndex),
+ queryTuple.getFieldLength(queryFieldIndex));
- queryTokenBuilder.reset();
- try {
- IToken token = queryTokenizer.getToken();
- token.serializeToken(queryTokenDos);
- queryTokenBuilder.addFieldEndOffset();
- // WARNING: assuming one frame is big enough to hold all tokens
- queryTokenAppender.append(queryTokenBuilder.getFieldEndOffsets(), queryTokenBuilder.getByteArray(), 0,
- queryTokenBuilder.getSize());
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
+ while (queryTokenizer.hasNext()) {
+ queryTokenizer.next();
+ queryTokenBuilder.reset();
+ try {
+ IToken token = queryTokenizer.getToken();
+ token.serializeToken(queryTokenDos);
+ queryTokenBuilder.addFieldEndOffset();
+ // WARNING: assuming one frame is big enough to hold all tokens
+ queryTokenAppender.append(queryTokenBuilder.getFieldEndOffsets(), queryTokenBuilder.getByteArray(), 0,
+ queryTokenBuilder.getSize());
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
- FrameTupleAccessor queryTokenAccessor = new FrameTupleAccessor(ctx.getFrameSize(), queryTokenRecDesc);
- queryTokenAccessor.reset(queryTokenFrame);
- int numQueryTokens = queryTokenAccessor.getTupleCount();
+ FrameTupleAccessor queryTokenAccessor = new FrameTupleAccessor(ctx.getFrameSize(), queryTokenRecDesc);
+ queryTokenAccessor.reset(queryTokenFrame);
+ int numQueryTokens = queryTokenAccessor.getTupleCount();
- // expand cursor cache if necessary
- if (numQueryTokens > invListCursorCache.size()) {
- int diff = numQueryTokens - invListCursorCache.size();
- for (int i = 0; i < diff; i++) {
- invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
- .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
- }
- }
+ // expand cursor cache if necessary
+ if (numQueryTokens > invListCursorCache.size()) {
+ int diff = numQueryTokens - invListCursorCache.size();
+ for (int i = 0; i < diff; i++) {
+ invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
+ .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
+ }
+ }
- invListCursors.clear();
- for (int i = 0; i < numQueryTokens; i++) {
- searchKey.reset(queryTokenAccessor, i);
- invIndex.openCursor(btreeCursor, btreePred, btreeOpCtx, invListCursorCache.get(i));
- invListCursors.add(invListCursorCache.get(i));
- }
-
- occurrenceThreshold = searchModifier.getOccurrenceThreshold(invListCursors);
-
- // TODO: deal with panic cases properly
- if(occurrenceThreshold <= 0) {
- throw new OccurrenceThresholdPanicException("Merge Threshold is <= 0. Failing Search.");
- }
+ invListCursors.clear();
+ for (int i = 0; i < numQueryTokens; i++) {
+ searchKey.reset(queryTokenAccessor, i);
+ invIndex.openCursor(btreeCursor, btreePred, btreeOpCtx, invListCursorCache.get(i));
+ invListCursors.add(invListCursorCache.get(i));
+ }
- int numPrefixLists = searchModifier.getPrefixLists(invListCursors);
- maxResultBufIdx = mergePrefixLists(numPrefixLists, numQueryTokens);
- maxResultBufIdx = mergeSuffixLists(numPrefixLists, numQueryTokens, maxResultBufIdx);
+ occurrenceThreshold = searchModifier.getOccurrenceThreshold(invListCursors);
- resultCursor.reset(this);
+ // TODO: deal with panic cases properly
+ if (occurrenceThreshold <= 0) {
+ throw new OccurrenceThresholdPanicException("Merge Threshold is <= 0. Failing Search.");
+ }
- //printNewResults(maxResultBufIdx);
- }
+ int numPrefixLists = searchModifier.getPrefixLists(invListCursors);
+ maxResultBufIdx = mergePrefixLists(numPrefixLists, numQueryTokens);
+ maxResultBufIdx = mergeSuffixLists(numPrefixLists, numQueryTokens, maxResultBufIdx);
- protected int mergePrefixLists(int numPrefixTokens, int numQueryTokens) throws IOException {
- int maxPrevBufIdx = 0;
- for(int i = 0; i < numPrefixTokens; i++) {
- swap = prevResultBuffers;
- prevResultBuffers = newResultBuffers;
- newResultBuffers = swap;
- currentNumResults = 0;
+ resultCursor.reset(this);
+ }
- invListCursors.get(i).pinPagesSync();
- maxPrevBufIdx = mergePrefixList(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers);
- invListCursors.get(i).unpinPages();
+ protected int mergePrefixLists(int numPrefixTokens, int numQueryTokens) throws IOException {
+ int maxPrevBufIdx = 0;
+ for (int i = 0; i < numPrefixTokens; i++) {
+ swap = prevResultBuffers;
+ prevResultBuffers = newResultBuffers;
+ newResultBuffers = swap;
+ currentNumResults = 0;
- //printNewResults(maxPrevBufIdx);
- }
+ invListCursors.get(i).pinPagesSync();
+ maxPrevBufIdx = mergePrefixList(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers);
+ invListCursors.get(i).unpinPages();
+ }
+ return maxPrevBufIdx;
+ }
- return maxPrevBufIdx;
- }
+ protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws IOException {
+ for (int i = numPrefixTokens; i < numQueryTokens; i++) {
+ swap = prevResultBuffers;
+ prevResultBuffers = newResultBuffers;
+ newResultBuffers = swap;
- protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws IOException {
- for(int i = numPrefixTokens; i < numQueryTokens; i++) {
- swap = prevResultBuffers;
- prevResultBuffers = newResultBuffers;
- newResultBuffers = swap;
+ invListCursors.get(i).pinPagesSync();
+ int numInvListElements = invListCursors.get(i).getNumElements();
+ // should we binary search the next list or should we sort-merge it?
+ if (currentNumResults * Math.log(numInvListElements) < currentNumResults + numInvListElements) {
+ maxPrevBufIdx = mergeSuffixListProbe(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx,
+ newResultBuffers, i, numQueryTokens);
+ } else {
+ maxPrevBufIdx = mergeSuffixListScan(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx,
+ newResultBuffers, i, numQueryTokens);
+ }
+ invListCursors.get(i).unpinPages();
+ }
+ return maxPrevBufIdx;
+ }
- invListCursors.get(i).pinPagesSync();
- int numInvListElements = invListCursors.get(i).getNumElements();
- // should we binary search the next list or should we sort-merge it?
- if(currentNumResults * Math.log(numInvListElements) < currentNumResults + numInvListElements) {
- //System.out.println("PROBING LIST: " + i);
- maxPrevBufIdx = mergeSuffixListProbe(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers, i, numQueryTokens);
- }
- else {
- //System.out.println("SCANNING LIST: " + i);
- maxPrevBufIdx = mergeSuffixListScan(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers, i, numQueryTokens);
- }
- invListCursors.get(i).unpinPages();
- }
- return maxPrevBufIdx;
- }
+ protected int mergeSuffixListProbe(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
+ int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
- protected int mergeSuffixListProbe(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
+ int newBufIdx = 0;
+ ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
- int newBufIdx = 0;
- ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
+ int prevBufIdx = 0;
+ ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
- int prevBufIdx = 0;
- ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
+ int resultTidx = 0;
- int resultTidx = 0;
+ currentNumResults = 0;
- currentNumResults = 0;
+ MultiComparator invListCmp = invIndex.getInvListElementCmp();
- MultiComparator invListCmp = invIndex.getInvListElementCmp();
+ resultFrameTupleAcc.reset(prevCurrentBuffer);
+ resultFrameTupleApp.reset(newCurrentBuffer, true);
- resultFrameTupleAcc.reset(prevCurrentBuffer);
- resultFrameTupleApp.reset(newCurrentBuffer, true);
+ while (resultTidx < resultFrameTupleAcc.getTupleCount()) {
- while(resultTidx < resultFrameTupleAcc.getTupleCount()) {
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
+ if (invListCursor.containsKey(resultTuple, invListCmp)) {
+ count++;
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ } else {
+ if (count + numQueryTokens - invListIx > occurrenceThreshold) {
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ }
+ }
- if(invListCursor.containsKey(resultTuple, invListCmp)) {
- count++;
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- }
- else {
- if(count + numQueryTokens - invListIx > occurrenceThreshold) {
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- }
- }
+ resultTidx++;
+ if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
+ prevBufIdx++;
+ if (prevBufIdx <= maxPrevBufIdx) {
+ prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
+ resultFrameTupleAcc.reset(prevCurrentBuffer);
+ resultTidx = 0;
+ }
+ }
+ }
- resultTidx++;
- if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
- prevBufIdx++;
- if (prevBufIdx <= maxPrevBufIdx) {
- prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
- resultFrameTupleAcc.reset(prevCurrentBuffer);
- resultTidx = 0;
- }
- }
- }
+ return newBufIdx;
+ }
- return newBufIdx;
- }
-
- protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
- int newBufIdx = 0;
+ protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
+ int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
+ int newBufIdx = 0;
ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
int prevBufIdx = 0;
@@ -296,7 +295,7 @@
boolean advanceCursor = true;
boolean advancePrevResult = false;
- int resultTidx = 0;
+ int resultTidx = 0;
MultiComparator invListCmp = invIndex.getInvListElementCmp();
@@ -304,31 +303,34 @@
resultFrameTupleApp.reset(newCurrentBuffer, true);
int invListTidx = 0;
- int invListNumTuples = invListCursor.getNumElements();
+ int invListNumTuples = invListCursor.getNumElements();
- if(invListCursor.hasNext()) invListCursor.next();
+ if (invListCursor.hasNext())
+ invListCursor.next();
- while(invListTidx < invListNumTuples && resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
+ while (invListTidx < invListNumTuples && resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
ITupleReference invListTuple = invListCursor.getTuple();
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
int cmp = invListCmp.compare(invListTuple, resultTuple);
if (cmp == 0) {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1)) + 1;
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)) + 1;
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
advanceCursor = true;
advancePrevResult = true;
} else {
- if (cmp < 0) {
+ if (cmp < 0) {
advanceCursor = true;
advancePrevResult = false;
} else {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
- if(count + numQueryTokens - invListIx > occurrenceThreshold) {
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- }
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
+ if (count + numQueryTokens - invListIx > occurrenceThreshold) {
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ }
advanceCursor = false;
advancePrevResult = true;
}
@@ -344,23 +346,24 @@
resultTidx = 0;
}
}
- }
-
- if(advanceCursor) {
- invListTidx++;
- invListCursor.next();
- }
+ }
+
+ if (advanceCursor) {
+ invListTidx++;
+ invListCursor.next();
+ }
}
-
+
// append remaining elements from previous result set
- while(resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
-
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
- if(count + numQueryTokens - invListIx > occurrenceThreshold) {
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- }
+ while (resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
+ if (count + numQueryTokens - invListIx > occurrenceThreshold) {
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ }
resultTidx++;
if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
@@ -374,9 +377,10 @@
}
return newBufIdx;
- }
-
- protected int mergePrefixList(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers) throws IOException {
+ }
+
+ protected int mergePrefixList(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
+ int maxPrevBufIdx, List<ByteBuffer> newResultBuffers) throws IOException {
int newBufIdx = 0;
ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
@@ -385,7 +389,7 @@
boolean advanceCursor = true;
boolean advancePrevResult = false;
- int resultTidx = 0;
+ int resultTidx = 0;
MultiComparator invListCmp = invIndex.getInvListElementCmp();
@@ -393,20 +397,21 @@
resultFrameTupleApp.reset(newCurrentBuffer, true);
int invListTidx = 0;
- int invListNumTuples = invListCursor.getNumElements();
+ int invListNumTuples = invListCursor.getNumElements();
- if(invListCursor.hasNext()) invListCursor.next();
+ if (invListCursor.hasNext())
+ invListCursor.next();
- while(invListTidx < invListNumTuples && resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
+ while (invListTidx < invListNumTuples && resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
ITupleReference invListTuple = invListCursor.getTuple();
-
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
int cmp = invListCmp.compare(invListTuple, resultTuple);
if (cmp == 0) {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1)) + 1;
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)) + 1;
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
advanceCursor = true;
advancePrevResult = true;
} else {
@@ -416,7 +421,8 @@
advanceCursor = true;
advancePrevResult = false;
} else {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
advanceCursor = false;
advancePrevResult = true;
@@ -433,28 +439,29 @@
resultTidx = 0;
}
}
- }
-
- if(advanceCursor) {
- invListTidx++;
- invListCursor.next();
- }
+ }
+
+ if (advanceCursor) {
+ invListTidx++;
+ invListCursor.next();
+ }
}
- // append remaining new elements from inverted list
- while(invListTidx < invListNumTuples) {
+ // append remaining new elements from inverted list
+ while (invListTidx < invListNumTuples) {
ITupleReference invListTuple = invListCursor.getTuple();
newBufIdx = appendTupleToNewResults(invListTuple, 1, newBufIdx);
invListTidx++;
- invListCursor.next();
+ invListCursor.next();
}
// append remaining elements from previous result set
- while(resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ while (resultTidx < resultFrameTupleAcc.getTupleCount()) {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
resultTidx++;
@@ -468,236 +475,72 @@
}
}
- return newBufIdx;
+ return newBufIdx;
}
- protected int appendTupleToNewResults(ITupleReference tuple, int newCount, int newBufIdx) throws IOException {
- ByteBuffer newCurrentBuffer = newResultBuffers.get(newBufIdx);
+ protected int appendTupleToNewResults(ITupleReference tuple, int newCount, int newBufIdx) throws IOException {
+ ByteBuffer newCurrentBuffer = newResultBuffers.get(newBufIdx);
- if (!resultFrameTupleApp.hasSpace()) {
- newBufIdx++;
- if (newBufIdx >= newResultBuffers.size()) {
- newResultBuffers.add(ctx.allocateFrame());
- }
- newCurrentBuffer = newResultBuffers.get(newBufIdx);
- resultFrameTupleApp.reset(newCurrentBuffer, true);
- }
+ if (!resultFrameTupleApp.hasSpace()) {
+ newBufIdx++;
+ if (newBufIdx >= newResultBuffers.size()) {
+ newResultBuffers.add(ctx.allocateFrame());
+ }
+ newCurrentBuffer = newResultBuffers.get(newBufIdx);
+ resultFrameTupleApp.reset(newCurrentBuffer, true);
+ }
- // append key
- if (!resultFrameTupleApp.append(tuple.getFieldData(0), tuple.getFieldStart(0), invListKeyLength) ) {
- throw new IllegalStateException();
- }
+ // append key
+ if (!resultFrameTupleApp.append(tuple.getFieldData(0), tuple.getFieldStart(0), invListKeyLength)) {
+ throw new IllegalStateException();
+ }
- // append new count
- if (!resultFrameTupleApp.append(newCount) ) {
- throw new IllegalStateException();
- }
+ // append new count
+ if (!resultFrameTupleApp.append(newCount)) {
+ throw new IllegalStateException();
+ }
- resultFrameTupleApp.incrementTupleCount(1);
+ resultFrameTupleApp.incrementTupleCount(1);
- currentNumResults++;
+ currentNumResults++;
- return newBufIdx;
- }
+ return newBufIdx;
+ }
- public IFrameTupleAccessor createResultFrameTupleAccessor() {
- return new FixedSizeFrameTupleAccessor(ctx.getFrameSize(), invListFieldsWithCount);
- }
+ public IFrameTupleAccessor createResultFrameTupleAccessor() {
+ return new FixedSizeFrameTupleAccessor(ctx.getFrameSize(), invListFieldsWithCount);
+ }
- public ITupleReference createResultTupleReference() {
- return new FixedSizeTupleReference(invListFieldsWithCount);
- }
+ public ITupleReference createResultTupleReference() {
+ return new FixedSizeTupleReference(invListFieldsWithCount);
+ }
- @Override
- public List<ByteBuffer> getResultBuffers() {
- return newResultBuffers;
- }
+ @Override
+ public List<ByteBuffer> getResultBuffers() {
+ return newResultBuffers;
+ }
- @Override
- public int getNumValidResultBuffers() {
- return maxResultBufIdx + 1;
- }
+ @Override
+ public int getNumValidResultBuffers() {
+ return maxResultBufIdx + 1;
+ }
- public int getOccurrenceThreshold() {
- return occurrenceThreshold;
- }
+ public int getOccurrenceThreshold() {
+ return occurrenceThreshold;
+ }
- public void printNewResults(int maxResultBufIdx) {
- StringBuffer strBuffer = new StringBuffer();
- for(int i = 0; i <= maxResultBufIdx; i++) {
- ByteBuffer testBuf = newResultBuffers.get(i);
- resultFrameTupleAcc.reset(testBuf);
- for(int j = 0; j < resultFrameTupleAcc.getTupleCount(); j++) {
- strBuffer.append(IntegerSerializerDeserializer.getInt(resultFrameTupleAcc.getBuffer().array(), resultFrameTupleAcc.getFieldStartOffset(j, 0)) + ",");
- strBuffer.append(IntegerSerializerDeserializer.getInt(resultFrameTupleAcc.getBuffer().array(), resultFrameTupleAcc.getFieldStartOffset(j, 1)) + " ");
- }
- }
- System.out.println(strBuffer.toString());
- }
-
-
-
- // older slower code
- /*
- protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
-
- int newBufIdx = 0;
- ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
-
- int prevBufIdx = 0;
- ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
-
- boolean advanceCursor = true;
- boolean advancePrevResult = false;
- int resultTidx = 0;
-
- MultiComparator invListCmp = invIndex.getInvListElementCmp();
-
- resultFrameTupleAcc.reset(prevCurrentBuffer);
- resultFrameTupleApp.reset(newCurrentBuffer, true);
-
- ITupleReference invListTuple = null;
-
- int invListTidx = 0;
- int invListNumTuples = invListCursor.getNumElements();
-
- if(invListCursor.hasNext()) invListCursor.next();
-
- while(invListTidx < invListNumTuples || resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- invListTuple = null;
-
- int cmp = 0;
- if(invListTidx >= invListNumTuples) {
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
- cmp = 1;
- } else if(resultTidx >= resultFrameTupleAcc.getTupleCount()) {
- invListTuple = invListCursor.getTuple();
- cmp = -1;
- } else {
- invListTuple = invListCursor.getTuple();
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
- cmp = invListCmp.compare(invListTuple, resultTuple);
- }
-
- if (cmp == 0) {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1)) + 1;
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- advanceCursor = true;
- advancePrevResult = true;
- } else {
- if (cmp < 0) {
- advanceCursor = true;
- advancePrevResult = false;
- } else {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
- if(count + numQueryTokens - invListIx > occurrenceThreshold) {
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- }
- advanceCursor = false;
- advancePrevResult = true;
- }
- }
-
- if (advancePrevResult) {
- resultTidx++;
- if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
- prevBufIdx++;
- if (prevBufIdx <= maxPrevBufIdx) {
- prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
- resultFrameTupleAcc.reset(prevCurrentBuffer);
- resultTidx = 0;
- }
- }
- }
-
- if(advanceCursor) {
- invListTidx++;
- invListCursor.next();
- }
- }
-
- return newBufIdx;
- }
-
- protected int mergePrefixList(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers) throws IOException {
- int newBufIdx = 0;
- ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
-
- int prevBufIdx = 0;
- ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
-
- boolean advanceCursor = true;
- boolean advancePrevResult = false;
- int resultTidx = 0;
-
- MultiComparator invListCmp = invIndex.getInvListElementCmp();
-
- resultFrameTupleAcc.reset(prevCurrentBuffer);
- resultFrameTupleApp.reset(newCurrentBuffer, true);
-
- ITupleReference invListTuple = null;
-
- int invListTidx = 0;
- int invListNumTuples = invListCursor.getNumElements();
-
- if(invListCursor.hasNext()) invListCursor.next();
-
- while(invListTidx < invListNumTuples || resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- invListTuple = null;
-
- int cmp = 0;
- if(invListTidx >= invListNumTuples) {
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
- cmp = 1;
- } else if(resultTidx >= resultFrameTupleAcc.getTupleCount()) {
- invListTuple = invListCursor.getTuple();
- cmp = -1;
- } else {
- invListTuple = invListCursor.getTuple();
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
- cmp = invListCmp.compare(invListTuple, resultTuple);
- }
-
- if (cmp == 0) {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1)) + 1;
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- advanceCursor = true;
- advancePrevResult = true;
- } else {
- if (cmp < 0) {
- int count = 1;
- newBufIdx = appendTupleToNewResults(invListTuple, count, newBufIdx);
- advanceCursor = true;
- advancePrevResult = false;
- } else {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- advanceCursor = false;
- advancePrevResult = true;
- }
- }
-
- if (advancePrevResult) {
- resultTidx++;
- if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
- prevBufIdx++;
- if (prevBufIdx <= maxPrevBufIdx) {
- prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
- resultFrameTupleAcc.reset(prevCurrentBuffer);
- resultTidx = 0;
- }
- }
- }
-
- if(advanceCursor) {
- invListTidx++;
- invListCursor.next();
- }
- }
-
- return newBufIdx;
- }
-*/
+ public void printNewResults(int maxResultBufIdx) {
+ StringBuffer strBuffer = new StringBuffer();
+ for (int i = 0; i <= maxResultBufIdx; i++) {
+ ByteBuffer testBuf = newResultBuffers.get(i);
+ resultFrameTupleAcc.reset(testBuf);
+ for (int j = 0; j < resultFrameTupleAcc.getTupleCount(); j++) {
+ strBuffer.append(IntegerSerializerDeserializer.getInt(resultFrameTupleAcc.getBuffer().array(),
+ resultFrameTupleAcc.getFieldStartOffset(j, 0)) + ",");
+ strBuffer.append(IntegerSerializerDeserializer.getInt(resultFrameTupleAcc.getBuffer().array(),
+ resultFrameTupleAcc.getFieldStartOffset(j, 1)) + " ");
+ }
+ }
+ System.out.println(strBuffer.toString());
+ }
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixProbeOnly.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixProbeOnly.java
index 4a4e91c..18b870b 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixProbeOnly.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixProbeOnly.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
import java.io.IOException;
@@ -16,50 +31,52 @@
IBinaryTokenizer queryTokenizer) {
super(ctx, invIndex, queryTokenizer);
}
-
+
protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws IOException {
- for(int i = numPrefixTokens; i < numQueryTokens; i++) {
+ for (int i = numPrefixTokens; i < numQueryTokens; i++) {
swap = prevResultBuffers;
prevResultBuffers = newResultBuffers;
newResultBuffers = swap;
currentNumResults = 0;
-
+
invListCursors.get(i).pinPagesSync();
- maxPrevBufIdx = mergeSuffixListProbe(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers, i, numQueryTokens);
- invListCursors.get(i).unpinPages();
- }
- return maxPrevBufIdx;
+ maxPrevBufIdx = mergeSuffixListProbe(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx,
+ newResultBuffers, i, numQueryTokens);
+ invListCursors.get(i).unpinPages();
+ }
+ return maxPrevBufIdx;
}
-
- protected int mergeSuffixListProbe(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
-
+
+ protected int mergeSuffixListProbe(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
+ int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
+
int newBufIdx = 0;
ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
int prevBufIdx = 0;
ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
-
- int resultTidx = 0;
-
+
+ int resultTidx = 0;
+
MultiComparator invListCmp = invIndex.getInvListElementCmp();
-
+
resultFrameTupleAcc.reset(prevCurrentBuffer);
resultFrameTupleApp.reset(newCurrentBuffer, true);
-
- while(resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
- if(invListCursor.containsKey(resultTuple, invListCmp)) {
+ while (resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
+
+ if (invListCursor.containsKey(resultTuple, invListCmp)) {
count++;
newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- }
- else {
- if(count + numQueryTokens - invListIx > occurrenceThreshold) {
+ } else {
+ if (count + numQueryTokens - invListIx > occurrenceThreshold) {
newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
}
- }
+ }
resultTidx++;
if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
@@ -71,7 +88,7 @@
}
}
}
-
+
return newBufIdx;
}
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixScanOnly.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixScanOnly.java
index 8db1b91..604c68d 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixScanOnly.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixScanOnly.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
import java.io.IOException;
@@ -17,59 +32,64 @@
IBinaryTokenizer queryTokenizer) {
super(ctx, invIndex, queryTokenizer);
}
-
+
protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws IOException {
- for(int i = numPrefixTokens; i < numQueryTokens; i++) {
+ for (int i = numPrefixTokens; i < numQueryTokens; i++) {
swap = prevResultBuffers;
prevResultBuffers = newResultBuffers;
newResultBuffers = swap;
currentNumResults = 0;
-
+
invListCursors.get(i).pinPagesSync();
- maxPrevBufIdx = mergeSuffixListScan(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers, i, numQueryTokens);
- invListCursors.get(i).unpinPages();
- }
- return maxPrevBufIdx;
+ maxPrevBufIdx = mergeSuffixListScan(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx,
+ newResultBuffers, i, numQueryTokens);
+ invListCursors.get(i).unpinPages();
+ }
+ return maxPrevBufIdx;
}
-
- protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
-
+
+ protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
+ int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
+
int newBufIdx = 0;
ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
int prevBufIdx = 0;
ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
-
+
boolean advanceCursor = true;
boolean advancePrevResult = false;
- int resultTidx = 0;
-
+ int resultTidx = 0;
+
MultiComparator invListCmp = invIndex.getInvListElementCmp();
-
+
resultFrameTupleAcc.reset(prevCurrentBuffer);
resultFrameTupleApp.reset(newCurrentBuffer, true);
-
- while(invListCursor.hasNext() && resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- if(advanceCursor) invListCursor.next();
-
+
+ while (invListCursor.hasNext() && resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
+ if (advanceCursor)
+ invListCursor.next();
+
ITupleReference invListTuple = invListCursor.getTuple();
-
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
-
+
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+
int cmp = invListCmp.compare(invListTuple, resultTuple);
if (cmp == 0) {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1)) + 1;
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)) + 1;
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
advanceCursor = true;
advancePrevResult = true;
} else {
- if (cmp < 0) {
+ if (cmp < 0) {
advanceCursor = true;
advancePrevResult = false;
} else {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
- if(count + numQueryTokens - invListIx > occurrenceThreshold) {
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
+ if (count + numQueryTokens - invListIx > occurrenceThreshold) {
newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
}
advanceCursor = false;
@@ -87,16 +107,16 @@
resultTidx = 0;
}
}
- }
+ }
}
-
+
// append remaining elements from previous result set
- //if(resultTidx < resultFrameTupleAcc.getTupleCount()) System.out.println("APPENDING FROM RESULTS");
- while(resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
+ while (resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
-
+
resultTidx++;
if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
prevBufIdx++;
@@ -107,7 +127,7 @@
}
}
}
-
+
return newBufIdx;
}
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/ConjunctiveSearchModifier.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/ConjunctiveSearchModifier.java
index 1f1d32c..55945be 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/ConjunctiveSearchModifier.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/ConjunctiveSearchModifier.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers;
import java.util.Collections;
@@ -12,7 +27,7 @@
public int getOccurrenceThreshold(List<IInvertedListCursor> invListCursors) {
return invListCursors.size();
}
-
+
@Override
public int getPrefixLists(List<IInvertedListCursor> invListCursors) {
Collections.sort(invListCursors);
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/EditDistanceSearchModifier.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/EditDistanceSearchModifier.java
index 19481af..ac109b6 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/EditDistanceSearchModifier.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/EditDistanceSearchModifier.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers;
import java.util.Collections;
@@ -10,20 +25,20 @@
private int gramLength;
private int edThresh;
-
+
public EditDistanceSearchModifier(int gramLength, int edThresh) {
this.gramLength = gramLength;
this.edThresh = edThresh;
}
-
+
@Override
- public int getOccurrenceThreshold(List<IInvertedListCursor> invListCursors) {
+ public int getOccurrenceThreshold(List<IInvertedListCursor> invListCursors) {
return invListCursors.size() - edThresh * gramLength;
}
@Override
public int getPrefixLists(List<IInvertedListCursor> invListCursors) {
- Collections.sort(invListCursors);
+ Collections.sort(invListCursors);
return invListCursors.size() - getOccurrenceThreshold(invListCursors) + 1;
}
@@ -41,5 +56,5 @@
public void setEdThresh(int edThresh) {
this.edThresh = edThresh;
- }
+ }
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifier.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifier.java
index 72298e3..f42841c 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifier.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifier.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers;
import java.util.Collections;
@@ -7,16 +22,16 @@
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
public class JaccardSearchModifier implements IInvertedIndexSearchModifier {
-
+
private float jaccThresh;
-
+
public JaccardSearchModifier(float jaccThresh) {
this.jaccThresh = jaccThresh;
}
-
+
@Override
public int getOccurrenceThreshold(List<IInvertedListCursor> invListCursors) {
- return (int) Math.floor((float) invListCursors.size() * jaccThresh);
+ return (int) Math.floor((float) invListCursors.size() * jaccThresh);
}
@Override