Merged hyracks_dev_next -r 1287 into trunk
git-svn-id: https://hyracks.googlecode.com/svn/trunk@1288 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks/hyracks-storage-am-invertedindex/.settings/org.eclipse.jdt.core.prefs b/hyracks/hyracks-storage-am-invertedindex/.settings/org.eclipse.jdt.core.prefs
deleted file mode 100644
index 1e91fb3..0000000
--- a/hyracks/hyracks-storage-am-invertedindex/.settings/org.eclipse.jdt.core.prefs
+++ /dev/null
@@ -1,6 +0,0 @@
-#Thu May 05 14:44:24 PDT 2011
-eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
-org.eclipse.jdt.core.compiler.compliance=1.6
-org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
-org.eclipse.jdt.core.compiler.source=1.6
diff --git a/hyracks/hyracks-storage-am-invertedindex/.settings/org.maven.ide.eclipse.prefs b/hyracks/hyracks-storage-am-invertedindex/.settings/org.maven.ide.eclipse.prefs
deleted file mode 100644
index e5e549a..0000000
--- a/hyracks/hyracks-storage-am-invertedindex/.settings/org.maven.ide.eclipse.prefs
+++ /dev/null
@@ -1,9 +0,0 @@
-#Thu May 05 14:44:18 PDT 2011
-activeProfiles=
-eclipse.preferences.version=1
-fullBuildGoals=process-test-resources
-includeModules=false
-resolveWorkspaceProjects=true
-resourceFilterGoals=process-resources resources\:testResources
-skipCompilerPlugin=true
-version=1
diff --git a/hyracks/hyracks-storage-am-invertedindex/pom.xml b/hyracks/hyracks-storage-am-invertedindex/pom.xml
index c9bbe20..32ce735 100644
--- a/hyracks/hyracks-storage-am-invertedindex/pom.xml
+++ b/hyracks/hyracks-storage-am-invertedindex/pom.xml
@@ -2,12 +2,12 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-invertedindex</artifactId>
- <version>0.1.9-SNAPSHOT</version>
+ <version>0.2.0-SNAPSHOT</version>
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.1.9-SNAPSHOT</version>
+ <version>0.2.0-SNAPSHOT</version>
</parent>
<build>
@@ -27,35 +27,35 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-common</artifactId>
- <version>0.1.9-SNAPSHOT</version>
+ <version>0.2.0-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.1.9-SNAPSHOT</version>
+ <version>0.2.0-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.1.9-SNAPSHOT</version>
+ <version>0.2.0-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.1.9-SNAPSHOT</version>
+ <version>0.2.0-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.1.9-SNAPSHOT</version>
+ <version>0.2.0-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexOperatorDescriptorHelper.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexOperatorDescriptor.java
similarity index 60%
rename from hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexOperatorDescriptorHelper.java
rename to hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexOperatorDescriptor.java
index b3afe4a..1dc753e 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexOperatorDescriptorHelper.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexOperatorDescriptor.java
@@ -16,18 +16,14 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.api;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.ITreeIndexOperatorDescriptorHelper;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexRegistryProvider;
-import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.ITreeIndexOperatorDescriptor;
-public interface IInvertedIndexOperatorDescriptorHelper extends ITreeIndexOperatorDescriptorHelper {
- public IFileSplitProvider getInvIndexFileSplitProvider();
+public interface IInvertedIndexOperatorDescriptor extends ITreeIndexOperatorDescriptor {
+ public IBinaryComparatorFactory[] getInvListsComparatorFactories();
- public IBinaryComparatorFactory[] getInvIndexComparatorFactories();
-
- public ITypeTrait[] getInvIndexTypeTraits();
-
- public IIndexRegistryProvider<InvertedIndex> getInvIndexRegistryProvider();
+ public ITypeTraits[] getInvListsTypeTraits();
+
+ public IFileSplitProvider getInvListsFileSplitProvider();
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearchModifierFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearchModifierFactory.java
new file mode 100644
index 0000000..92770d6
--- /dev/null
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearchModifierFactory.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.invertedindex.api;
+
+import java.io.Serializable;
+
+public interface IInvertedIndexSearchModifierFactory extends Serializable {
+ public IInvertedIndexSearchModifier createSearchModifier();
+}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/AbstractInvertedIndexOperatorDescriptor.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/AbstractInvertedIndexOperatorDescriptor.java
index 554f0d0..eab266d 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/AbstractInvertedIndexOperatorDescriptor.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/AbstractInvertedIndexOperatorDescriptor.java
@@ -16,79 +16,86 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.dataflow;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
+import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
+import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMLeafFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexTupleWriterFactory;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexRegistryProvider;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.ITreeIndexOpHelperFactory;
-import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexOperatorDescriptorHelper;
-import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
+import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexOperatorDescriptor;
+import edu.uci.ics.hyracks.storage.am.invertedindex.util.InvertedIndexUtils;
import edu.uci.ics.hyracks.storage.common.IStorageManagerInterface;
public abstract class AbstractInvertedIndexOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor
- implements IInvertedIndexOperatorDescriptorHelper {
+ implements IInvertedIndexOperatorDescriptor {
private static final long serialVersionUID = 1L;
- // general
+ // General.
protected final IStorageManagerInterface storageManager;
+ protected final IIndexRegistryProvider<IIndex> indexRegistryProvider;
- // btree
- protected final IFileSplitProvider btreeFileSplitProvider;
- protected final IIndexRegistryProvider<ITreeIndex> treeIndexRegistryProvider;
- protected final ITreeIndexFrameFactory interiorFrameFactory;
- protected final ITreeIndexFrameFactory leafFrameFactory;
- protected final ITypeTrait[] btreeTypeTraits;
+ // Btree.
+ protected final ITreeIndexFrameFactory btreeInteriorFrameFactory;
+ protected final ITreeIndexFrameFactory btreeLeafFrameFactory;
+ protected final ITypeTraits[] btreeTypeTraits;
protected final IBinaryComparatorFactory[] btreeComparatorFactories;
- protected final ITreeIndexOpHelperFactory opHelperFactory;
+ protected final IIndexDataflowHelperFactory btreeDataflowHelperFactory;
+ protected final IFileSplitProvider btreeFileSplitProvider;
- // inverted index
- protected final IFileSplitProvider invIndexFileSplitProvider;
- protected final IIndexRegistryProvider<InvertedIndex> invIndexRegistryProvider;
- protected final ITypeTrait[] invIndexTypeTraits;
- protected final IBinaryComparatorFactory[] invIndexComparatorFactories;
+ // Inverted index.
+ protected final ITypeTraits[] invListsTypeTraits;
+ protected final IBinaryComparatorFactory[] invListComparatorFactories;
+ protected final IFileSplitProvider invListsFileSplitProvider;
public AbstractInvertedIndexOperatorDescriptor(JobSpecification spec, int inputArity, int outputArity,
RecordDescriptor recDesc, IStorageManagerInterface storageManager,
- IFileSplitProvider btreeFileSplitProvider, IIndexRegistryProvider<ITreeIndex> treeIndexRegistryProvider,
- ITreeIndexFrameFactory interiorFrameFactory, ITreeIndexFrameFactory leafFrameFactory,
- ITypeTrait[] btreeTypeTraits, IBinaryComparatorFactory[] btreeComparatorFactories, float btreeFillFactor,
- ITreeIndexOpHelperFactory opHelperFactory, IFileSplitProvider invIndexFileSplitProvider,
- IIndexRegistryProvider<InvertedIndex> invIndexRegistryProvider, ITypeTrait[] invIndexTypeTraits,
- IBinaryComparatorFactory[] invIndexComparatorFactories) {
+ IFileSplitProvider btreeFileSplitProvider, IFileSplitProvider invListsFileSplitProvider,
+ IIndexRegistryProvider<IIndex> indexRegistryProvider, ITypeTraits[] tokenTypeTraits,
+ IBinaryComparatorFactory[] tokenComparatorFactories, ITypeTraits[] invListsTypeTraits,
+ IBinaryComparatorFactory[] invListComparatorFactories,
+ IIndexDataflowHelperFactory btreeDataflowHelperFactory) {
super(spec, inputArity, outputArity);
- // general
+ // General.
this.storageManager = storageManager;
+ this.indexRegistryProvider = indexRegistryProvider;
- // btree
+ // Btree.
+ this.btreeTypeTraits = InvertedIndexUtils.getBTreeTypeTraits(tokenTypeTraits);
+ ITreeIndexTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(btreeTypeTraits);
+ this.btreeInteriorFrameFactory = new BTreeNSMInteriorFrameFactory(tupleWriterFactory);
+ this.btreeLeafFrameFactory = new BTreeNSMLeafFrameFactory(tupleWriterFactory);
+ this.btreeComparatorFactories = tokenComparatorFactories;
+ this.btreeDataflowHelperFactory = btreeDataflowHelperFactory;
this.btreeFileSplitProvider = btreeFileSplitProvider;
- this.treeIndexRegistryProvider = treeIndexRegistryProvider;
- this.interiorFrameFactory = interiorFrameFactory;
- this.leafFrameFactory = leafFrameFactory;
- this.btreeTypeTraits = btreeTypeTraits;
- this.btreeComparatorFactories = btreeComparatorFactories;
- this.opHelperFactory = opHelperFactory;
- // inverted index
- this.invIndexFileSplitProvider = invIndexFileSplitProvider;
- this.invIndexRegistryProvider = invIndexRegistryProvider;
- this.invIndexTypeTraits = invIndexTypeTraits;
- this.invIndexComparatorFactories = invIndexComparatorFactories;
+ // Inverted index.
+ this.invListsTypeTraits = invListsTypeTraits;
+ this.invListComparatorFactories = invListComparatorFactories;
+ this.invListsFileSplitProvider = invListsFileSplitProvider;
- if (outputArity > 0)
+ if (outputArity > 0) {
recordDescriptors[0] = recDesc;
+ }
}
@Override
- public IFileSplitProvider getTreeIndexFileSplitProvider() {
+ public IFileSplitProvider getFileSplitProvider() {
return btreeFileSplitProvider;
}
+
+ @Override
+ public IFileSplitProvider getInvListsFileSplitProvider() {
+ return invListsFileSplitProvider;
+ }
@Override
public IBinaryComparatorFactory[] getTreeIndexComparatorFactories() {
@@ -96,23 +103,18 @@
}
@Override
- public IPrimitiveValueProviderFactory[] getTreeIndexValueProviderFactories() {
- return null;
- }
-
- @Override
- public ITypeTrait[] getTreeIndexTypeTraits() {
+ public ITypeTraits[] getTreeIndexTypeTraits() {
return btreeTypeTraits;
}
@Override
public ITreeIndexFrameFactory getTreeIndexInteriorFactory() {
- return interiorFrameFactory;
+ return btreeInteriorFrameFactory;
}
@Override
public ITreeIndexFrameFactory getTreeIndexLeafFactory() {
- return leafFrameFactory;
+ return btreeLeafFrameFactory;
}
@Override
@@ -121,37 +123,27 @@
}
@Override
- public IIndexRegistryProvider<ITreeIndex> getTreeIndexRegistryProvider() {
- return treeIndexRegistryProvider;
- }
-
- @Override
public RecordDescriptor getRecordDescriptor() {
return recordDescriptors[0];
}
@Override
- public IIndexRegistryProvider<InvertedIndex> getInvIndexRegistryProvider() {
- return invIndexRegistryProvider;
+ public IBinaryComparatorFactory[] getInvListsComparatorFactories() {
+ return invListComparatorFactories;
}
@Override
- public IBinaryComparatorFactory[] getInvIndexComparatorFactories() {
- return invIndexComparatorFactories;
+ public ITypeTraits[] getInvListsTypeTraits() {
+ return invListsTypeTraits;
}
@Override
- public IFileSplitProvider getInvIndexFileSplitProvider() {
- return invIndexFileSplitProvider;
+ public IIndexRegistryProvider<IIndex> getIndexRegistryProvider() {
+ return indexRegistryProvider;
}
-
+
@Override
- public ITypeTrait[] getInvIndexTypeTraits() {
- return invIndexTypeTraits;
+ public IIndexDataflowHelperFactory getIndexDataflowHelperFactory() {
+ return btreeDataflowHelperFactory;
}
-
- @Override
- public ITreeIndexOpHelperFactory getTreeIndexOpHelperFactory() {
- return opHelperFactory;
- }
-}
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorDescriptor.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorDescriptor.java
index 83246d6..ffb94e7 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorDescriptor.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorDescriptor.java
@@ -15,12 +15,11 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.dataflow;
-import edu.uci.ics.hyracks.api.context.IHyracksStageletContext;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.job.IOperatorEnvironment;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizerFactory;
@@ -30,26 +29,26 @@
private static final long serialVersionUID = 1L;
private final IBinaryTokenizerFactory tokenizerFactory;
- // fields that will be tokenized
+ // Fields that will be tokenized
private final int[] tokenFields;
- // operator will emit these projected fields for each token, e.g., as
+ // operator will append these key fields to each token, e.g., as
// payload for an inverted list
- // WARNING: too many projected fields can cause significant data blowup
- private final int[] projFields;
+ // WARNING: too many key fields can cause significant data blowup.
+ private final int[] keyFields;
public BinaryTokenizerOperatorDescriptor(JobSpecification spec, RecordDescriptor recDesc,
- IBinaryTokenizerFactory tokenizerFactory, int[] tokenFields, int[] projFields) {
+ IBinaryTokenizerFactory tokenizerFactory, int[] tokenFields, int[] keyFields) {
super(spec, 1, 1);
this.tokenizerFactory = tokenizerFactory;
this.tokenFields = tokenFields;
- this.projFields = projFields;
+ this.keyFields = keyFields;
recordDescriptors[0] = recDesc;
}
@Override
- public IOperatorNodePushable createPushRuntime(IHyracksStageletContext ctx, IOperatorEnvironment env,
- IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
+ public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider,
+ int partition, int nPartitions) throws HyracksDataException {
return new BinaryTokenizerOperatorNodePushable(ctx, recordDescProvider.getInputRecordDescriptor(odId, 0),
- recordDescriptors[0], tokenizerFactory.createTokenizer(), tokenFields, projFields);
+ recordDescriptors[0], tokenizerFactory.createTokenizer(), tokenFields, keyFields);
}
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java
index 0647f45..d00bea6 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java
@@ -19,7 +19,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;
-import edu.uci.ics.hyracks.api.context.IHyracksStageletContext;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
@@ -32,7 +32,7 @@
public class BinaryTokenizerOperatorNodePushable extends AbstractUnaryInputUnaryOutputOperatorNodePushable {
- private final IHyracksStageletContext ctx;
+ private final IHyracksTaskContext ctx;
private final IBinaryTokenizer tokenizer;
private final int[] tokenFields;
private final int[] projFields;
@@ -45,7 +45,7 @@
private FrameTupleAppender appender;
private ByteBuffer writeBuffer;
- public BinaryTokenizerOperatorNodePushable(IHyracksStageletContext ctx, RecordDescriptor inputRecDesc,
+ public BinaryTokenizerOperatorNodePushable(IHyracksTaskContext ctx, RecordDescriptor inputRecDesc,
RecordDescriptor outputRecDesc, IBinaryTokenizer tokenizer, int[] tokenFields, int[] projFields) {
this.ctx = ctx;
this.tokenizer = tokenizer;
@@ -59,10 +59,11 @@
public void open() throws HyracksDataException {
accessor = new FrameTupleAccessor(ctx.getFrameSize(), inputRecDesc);
writeBuffer = ctx.allocateFrame();
- builder = new ArrayTupleBuilder(outputRecDesc.getFields().length);
+ builder = new ArrayTupleBuilder(outputRecDesc.getFieldCount());
builderDos = builder.getDataOutput();
appender = new FrameTupleAppender(ctx.getFrameSize());
appender.reset(writeBuffer, true);
+ writer.open();
}
@Override
@@ -119,6 +120,7 @@
}
@Override
- public void flush() throws HyracksDataException {
+ public void fail() throws HyracksDataException {
+ writer.fail();
}
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorDescriptor.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorDescriptor.java
index d003580..e91aa08 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorDescriptor.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorDescriptor.java
@@ -15,20 +15,16 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.dataflow;
-import edu.uci.ics.hyracks.api.context.IHyracksStageletContext;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
-import edu.uci.ics.hyracks.api.job.IOperatorEnvironment;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexRegistryProvider;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.ITreeIndexOpHelperFactory;
-import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListBuilder;
-import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
import edu.uci.ics.hyracks.storage.common.IStorageManagerInterface;
public class InvertedIndexBulkLoadOperatorDescriptor extends AbstractInvertedIndexOperatorDescriptor {
@@ -36,30 +32,22 @@
private static final long serialVersionUID = 1L;
private final int[] fieldPermutation;
- private final float btreeFillFactor;
- private final IInvertedListBuilder invListBuilder;
- public InvertedIndexBulkLoadOperatorDescriptor(JobSpecification spec, IStorageManagerInterface storageManager,
- int[] fieldPermutation, IFileSplitProvider btreeFileSplitProvider,
- IIndexRegistryProvider<ITreeIndex> treeIndexRegistryProvider, ITreeIndexFrameFactory interiorFrameFactory,
- ITreeIndexFrameFactory leafFrameFactory, ITypeTrait[] btreeTypeTraits,
- IBinaryComparatorFactory[] btreeComparatorFactories, float btreeFillFactor,
- ITreeIndexOpHelperFactory opHelperFactory, IFileSplitProvider invIndexFileSplitProvider,
- IIndexRegistryProvider<InvertedIndex> invIndexRegistryProvider, ITypeTrait[] invIndexTypeTraits,
- IBinaryComparatorFactory[] invIndexComparatorFactories, IInvertedListBuilder invListBuilder) {
- super(spec, 1, 0, null, storageManager, btreeFileSplitProvider, treeIndexRegistryProvider,
- interiorFrameFactory, leafFrameFactory, btreeTypeTraits, btreeComparatorFactories, btreeFillFactor,
- opHelperFactory, invIndexFileSplitProvider, invIndexRegistryProvider, invIndexTypeTraits,
- invIndexComparatorFactories);
+ public InvertedIndexBulkLoadOperatorDescriptor(JobSpecification spec, int[] fieldPermutation,
+ IStorageManagerInterface storageManager, IFileSplitProvider btreeFileSplitProvider,
+ IFileSplitProvider invListsFileSplitProvider, IIndexRegistryProvider<IIndex> indexRegistryProvider,
+ ITypeTraits[] tokenTypeTraits, IBinaryComparatorFactory[] tokenComparatorFactories,
+ ITypeTraits[] invListsTypeTraits, IBinaryComparatorFactory[] invListComparatorFactories,
+ IIndexDataflowHelperFactory btreeDataflowHelperFactory) {
+ super(spec, 1, 0, null, storageManager, btreeFileSplitProvider, invListsFileSplitProvider,
+ indexRegistryProvider, tokenTypeTraits, tokenComparatorFactories, invListsTypeTraits,
+ invListComparatorFactories, btreeDataflowHelperFactory);
this.fieldPermutation = fieldPermutation;
- this.btreeFillFactor = btreeFillFactor;
- this.invListBuilder = invListBuilder;
}
@Override
- public IOperatorNodePushable createPushRuntime(IHyracksStageletContext ctx, IOperatorEnvironment env,
+ public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) {
- return new InvertedIndexBulkLoadOperatorNodePushable(this, ctx, partition, fieldPermutation, btreeFillFactor,
- invListBuilder, recordDescProvider);
+ return new InvertedIndexBulkLoadOperatorNodePushable(this, ctx, partition, fieldPermutation, recordDescProvider);
}
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorNodePushable.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorNodePushable.java
index 4969124..9dcabb4 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorNodePushable.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexBulkLoadOperatorNodePushable.java
@@ -16,27 +16,28 @@
import java.nio.ByteBuffer;
-import edu.uci.ics.hyracks.api.context.IHyracksStageletContext;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexHelperOpenMode;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.common.api.PageAllocationException;
import edu.uci.ics.hyracks.storage.am.common.dataflow.PermutingFrameTupleReference;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexOpHelper;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexDataflowHelper;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListBuilder;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.FixedSizeElementInvertedListBuilder;
import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
public class InvertedIndexBulkLoadOperatorNodePushable extends AbstractUnaryInputSinkOperatorNodePushable {
- private final TreeIndexOpHelper treeIndexOpHelper;
- private float btreeFillFactor;
-
- private final InvertedIndexOpHelper invIndexOpHelper;
- protected final IInvertedListBuilder invListBuilder;
+ private final TreeIndexDataflowHelper btreeDataflowHelper;
+ private final InvertedIndexDataflowHelper invIndexDataflowHelper;
+ private final IInvertedListBuilder invListBuilder;
+ private InvertedIndex invIndex;
private InvertedIndex.BulkLoadContext bulkLoadCtx;
- private final IHyracksStageletContext ctx;
+ private final IHyracksTaskContext ctx;
private FrameTupleAccessor accessor;
private PermutingFrameTupleReference tuple = new PermutingFrameTupleReference();
@@ -44,45 +45,49 @@
private IRecordDescriptorProvider recordDescProvider;
public InvertedIndexBulkLoadOperatorNodePushable(AbstractInvertedIndexOperatorDescriptor opDesc,
- IHyracksStageletContext ctx, int partition, int[] fieldPermutation, float btreeFillFactor,
- IInvertedListBuilder invListBuilder, IRecordDescriptorProvider recordDescProvider) {
- treeIndexOpHelper = opDesc.getTreeIndexOpHelperFactory().createTreeIndexOpHelper(opDesc, ctx, partition,
- IndexHelperOpenMode.CREATE);
- invIndexOpHelper = new InvertedIndexOpHelper(opDesc, ctx, partition, IndexHelperOpenMode.CREATE);
- this.btreeFillFactor = btreeFillFactor;
+ IHyracksTaskContext ctx, int partition, int[] fieldPermutation, IRecordDescriptorProvider recordDescProvider) {
+ btreeDataflowHelper = (TreeIndexDataflowHelper) opDesc.getIndexDataflowHelperFactory()
+ .createIndexDataflowHelper(opDesc, ctx, partition, true);
+ invIndexDataflowHelper = new InvertedIndexDataflowHelper(btreeDataflowHelper, opDesc, ctx, partition, true);
this.recordDescProvider = recordDescProvider;
this.ctx = ctx;
- this.invListBuilder = invListBuilder;
+ this.invListBuilder = new FixedSizeElementInvertedListBuilder(opDesc.getInvListsTypeTraits());
tuple.setFieldPermutation(fieldPermutation);
}
@Override
public void open() throws HyracksDataException {
- AbstractInvertedIndexOperatorDescriptor opDesc = (AbstractInvertedIndexOperatorDescriptor) treeIndexOpHelper
+ AbstractInvertedIndexOperatorDescriptor opDesc = (AbstractInvertedIndexOperatorDescriptor) btreeDataflowHelper
.getOperatorDescriptor();
RecordDescriptor recDesc = recordDescProvider.getInputRecordDescriptor(opDesc.getOperatorId(), 0);
- accessor = new FrameTupleAccessor(treeIndexOpHelper.getHyracksStageletContext().getFrameSize(), recDesc);
+ accessor = new FrameTupleAccessor(btreeDataflowHelper.getHyracksTaskContext().getFrameSize(), recDesc);
- // btree
+ // BTree.
try {
- treeIndexOpHelper.init();
- treeIndexOpHelper.getTreeIndex().open(treeIndexOpHelper.getIndexFileId());
+ btreeDataflowHelper.init();
} catch (Exception e) {
- // cleanup in case of failure
- treeIndexOpHelper.deinit();
- throw new HyracksDataException(e);
+ // Cleanup in case of failure.
+ btreeDataflowHelper.deinit();
+ if (e instanceof HyracksDataException) {
+ throw (HyracksDataException) e;
+ } else {
+ throw new HyracksDataException(e);
+ }
}
- // inverted index
+ // Inverted Index.
try {
- invIndexOpHelper.init();
- invIndexOpHelper.getInvIndex().open(invIndexOpHelper.getInvIndexFileId());
- bulkLoadCtx = invIndexOpHelper.getInvIndex().beginBulkLoad(invListBuilder, ctx.getFrameSize(),
- btreeFillFactor);
+ invIndexDataflowHelper.init();
+ invIndex = (InvertedIndex) invIndexDataflowHelper.getIndex();
+ bulkLoadCtx = invIndex.beginBulkLoad(invListBuilder, ctx.getFrameSize(), BTree.DEFAULT_FILL_FACTOR);
} catch (Exception e) {
- // cleanup in case of failure
- invIndexOpHelper.deinit();
- throw new HyracksDataException(e);
+ // Cleanup in case of failure.
+ invIndexDataflowHelper.deinit();
+ if (e instanceof HyracksDataException) {
+ throw (HyracksDataException) e;
+ } else {
+ throw new HyracksDataException(e);
+ }
}
}
@@ -92,20 +97,30 @@
int tupleCount = accessor.getTupleCount();
for (int i = 0; i < tupleCount; i++) {
tuple.reset(accessor, i);
- invIndexOpHelper.getInvIndex().bulkLoadAddTuple(bulkLoadCtx, tuple);
+ try {
+ invIndex.bulkLoadAddTuple(bulkLoadCtx, tuple);
+ } catch (PageAllocationException e) {
+ throw new HyracksDataException(e);
+ }
}
}
@Override
public void close() throws HyracksDataException {
try {
- invIndexOpHelper.getInvIndex().endBulkLoad(bulkLoadCtx);
+ invIndex.endBulkLoad(bulkLoadCtx);
+ } catch (PageAllocationException e) {
+ throw new HyracksDataException(e);
} finally {
- treeIndexOpHelper.deinit();
+ try {
+ btreeDataflowHelper.deinit();
+ } finally {
+ invIndexDataflowHelper.deinit();
+ }
}
}
@Override
- public void flush() throws HyracksDataException {
+ public void fail() throws HyracksDataException {
}
}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexDataflowHelper.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexDataflowHelper.java
new file mode 100644
index 0000000..71717e4
--- /dev/null
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexDataflowHelper.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.storage.am.invertedindex.dataflow;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexDataflowHelper;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexDataflowHelper;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.common.util.IndexUtils;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexOperatorDescriptor;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
+
+public final class InvertedIndexDataflowHelper extends IndexDataflowHelper {
+ private final TreeIndexDataflowHelper btreeDataflowHelper;
+
+ public InvertedIndexDataflowHelper(TreeIndexDataflowHelper btreeDataflowHelper, IIndexOperatorDescriptor opDesc,
+ IHyracksTaskContext ctx, int partition, boolean createIfNotExists) {
+ super(opDesc, ctx, partition, createIfNotExists);
+ this.btreeDataflowHelper = btreeDataflowHelper;
+ }
+
+ public FileReference getFilereference() {
+ AbstractInvertedIndexOperatorDescriptor invIndexOpDesc = (AbstractInvertedIndexOperatorDescriptor) opDesc;
+ IFileSplitProvider fileSplitProvider = invIndexOpDesc.getInvListsFileSplitProvider();
+ return fileSplitProvider.getFileSplits()[partition].getLocalFile();
+ }
+
+ @Override
+ public IIndex createIndexInstance() throws HyracksDataException {
+ IInvertedIndexOperatorDescriptor invIndexOpDesc = (IInvertedIndexOperatorDescriptor) opDesc;
+ MultiComparator cmp = IndexUtils.createMultiComparator(invIndexOpDesc.getInvListsComparatorFactories());
+ // Assumes btreeDataflowHelper.init() has already been called.
+ BTree btree = (BTree) btreeDataflowHelper.getIndex();
+ return new InvertedIndex(opDesc.getStorageManager().getBufferCache(ctx), btree,
+ invIndexOpDesc.getInvListsTypeTraits(), cmp);
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexOpHelper.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexOpHelper.java
deleted file mode 100644
index c16cfcd..0000000
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexOpHelper.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright 2009-2010 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hyracks.storage.am.invertedindex.dataflow;
-
-import edu.uci.ics.hyracks.api.context.IHyracksStageletContext;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.io.FileReference;
-import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.ITreeIndexOperatorDescriptorHelper;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexHelperOpenMode;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexRegistry;
-import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
-import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexOperatorDescriptorHelper;
-import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
-import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
-import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
-
-public final class InvertedIndexOpHelper {
-
- private InvertedIndex invIndex;
- private int invIndexFileId = -1;
- private int partition;
-
- private IInvertedIndexOperatorDescriptorHelper opDesc;
- private IHyracksStageletContext ctx;
-
- private IndexHelperOpenMode mode;
-
- public InvertedIndexOpHelper(IInvertedIndexOperatorDescriptorHelper opDesc, final IHyracksStageletContext ctx,
- int partition, IndexHelperOpenMode mode) {
- this.opDesc = opDesc;
- this.ctx = ctx;
- this.mode = mode;
- this.partition = partition;
- }
-
- public void init() throws HyracksDataException {
- IBufferCache bufferCache = opDesc.getStorageManager().getBufferCache(ctx);
- IFileMapProvider fileMapProvider = opDesc.getStorageManager().getFileMapProvider(ctx);
- IFileSplitProvider fileSplitProvider = opDesc.getInvIndexFileSplitProvider();
-
- FileReference f = fileSplitProvider.getFileSplits()[partition].getLocalFile();
- boolean fileIsMapped = fileMapProvider.isMapped(f);
-
- switch (mode) {
-
- case OPEN: {
- if (!fileIsMapped) {
- throw new HyracksDataException("Trying to open inverted index from unmapped file " + f.toString());
- }
- }
- break;
-
- case CREATE:
- case ENLIST: {
- if (!fileIsMapped) {
- bufferCache.createFile(f);
- }
- }
- break;
-
- }
-
- int fileId = fileMapProvider.lookupFileId(f);
- try {
- bufferCache.openFile(fileId);
- } catch (HyracksDataException e) {
- // revert state of buffer cache since file failed to open
- if (!fileIsMapped) {
- bufferCache.deleteFile(fileId);
- }
- throw e;
- }
-
- // only set btreeFileId member when openFile() succeeds,
- // otherwise deinit() will try to close the file that failed to open
- invIndexFileId = fileId;
- IndexRegistry<InvertedIndex> invIndexRegistry = opDesc.getInvIndexRegistryProvider().getRegistry(ctx);
- invIndex = invIndexRegistry.get(invIndexFileId);
- if (invIndex == null) {
-
- // create new inverted index and register it
- invIndexRegistry.lock();
- try {
- // check if inverted index has already been registered by
- // another thread
- invIndex = invIndexRegistry.get(invIndexFileId);
- if (invIndex == null) {
- // this thread should create and register the inverted index
-
- IBinaryComparator[] comparators = new IBinaryComparator[opDesc.getInvIndexComparatorFactories().length];
- for (int i = 0; i < opDesc.getInvIndexComparatorFactories().length; i++) {
- comparators[i] = opDesc.getInvIndexComparatorFactories()[i].createBinaryComparator();
- }
-
- MultiComparator cmp = new MultiComparator(opDesc.getInvIndexTypeTraits(), comparators);
-
- // assumes btree has already been registered
- IFileSplitProvider btreeFileSplitProvider = opDesc.getTreeIndexFileSplitProvider();
- IndexRegistry<ITreeIndex> treeIndexRegistry = opDesc.getTreeIndexRegistryProvider()
- .getRegistry(ctx);
- FileReference btreeFile = btreeFileSplitProvider.getFileSplits()[partition].getLocalFile();
- boolean btreeFileIsMapped = fileMapProvider.isMapped(btreeFile);
- if (!btreeFileIsMapped) {
- throw new HyracksDataException(
- "Trying to create inverted index, but associated BTree file has not been mapped");
- }
- int btreeFileId = fileMapProvider.lookupFileId(f);
- BTree btree = (BTree) treeIndexRegistry.get(btreeFileId);
-
- invIndex = new InvertedIndex(bufferCache, btree, cmp);
- invIndex.open(invIndexFileId);
- invIndexRegistry.register(invIndexFileId, invIndex);
- }
- } finally {
- invIndexRegistry.unlock();
- }
- }
- }
-
- public void deinit() throws HyracksDataException {
- if (invIndexFileId != -1) {
- IBufferCache bufferCache = opDesc.getStorageManager().getBufferCache(ctx);
- bufferCache.closeFile(invIndexFileId);
- }
- }
-
- public InvertedIndex getInvIndex() {
- return invIndex;
- }
-
- public IHyracksStageletContext getHyracksStageletContext() {
- return ctx;
- }
-
- public ITreeIndexOperatorDescriptorHelper getOperatorDescriptor() {
- return opDesc;
- }
-
- public int getInvIndexFileId() {
- return invIndexFileId;
- }
-}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexSearchOperatorDescriptor.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexSearchOperatorDescriptor.java
new file mode 100644
index 0000000..bf9899f
--- /dev/null
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexSearchOperatorDescriptor.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.invertedindex.dataflow;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexRegistryProvider;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifierFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.common.IStorageManagerInterface;
+
+public class InvertedIndexSearchOperatorDescriptor extends AbstractInvertedIndexOperatorDescriptor {
+ private static final long serialVersionUID = 1L;
+
+ private final int queryField;
+ private final IBinaryTokenizerFactory queryTokenizerFactory;
+ private final IInvertedIndexSearchModifierFactory searchModifierFactory;
+
+ public InvertedIndexSearchOperatorDescriptor(JobSpecification spec,
+ int queryField, IStorageManagerInterface storageManager, IFileSplitProvider btreeFileSplitProvider,
+ IFileSplitProvider invListsFileSplitProvider, IIndexRegistryProvider<IIndex> indexRegistryProvider,
+ ITypeTraits[] tokenTypeTraits, IBinaryComparatorFactory[] tokenComparatorFactories,
+ ITypeTraits[] invListsTypeTraits, IBinaryComparatorFactory[] invListComparatorFactories,
+ IIndexDataflowHelperFactory btreeDataflowHelperFactory, IBinaryTokenizerFactory queryTokenizerFactory,
+ IInvertedIndexSearchModifierFactory searchModifierFactory, RecordDescriptor recDesc) {
+ super(spec, 1, 1, recDesc, storageManager, btreeFileSplitProvider, invListsFileSplitProvider,
+ indexRegistryProvider, tokenTypeTraits, tokenComparatorFactories, invListsTypeTraits,
+ invListComparatorFactories, btreeDataflowHelperFactory);
+ this.queryField = queryField;
+ this.queryTokenizerFactory = queryTokenizerFactory;
+ this.searchModifierFactory = searchModifierFactory;
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
+ IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
+ IBinaryTokenizer tokenizer = queryTokenizerFactory.createTokenizer();
+ IInvertedIndexSearchModifier searchModifier = searchModifierFactory.createSearchModifier();
+ return new InvertedIndexSearchOperatorNodePushable(this, ctx, partition, queryField, searchModifier, tokenizer,
+ recordDescProvider);
+ }
+}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexSearchOperatorNodePushable.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexSearchOperatorNodePushable.java
new file mode 100644
index 0000000..ceaff1e
--- /dev/null
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/InvertedIndexSearchOperatorNodePushable.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.invertedindex.dataflow;
+
+import java.io.DataOutput;
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexDataflowHelper;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexResultCursor;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.OccurrenceThresholdPanicException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.SearchResultCursor;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.TOccurrenceSearcher;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizer;
+
+public class InvertedIndexSearchOperatorNodePushable extends AbstractUnaryInputUnaryOutputOperatorNodePushable {
+ private final TreeIndexDataflowHelper btreeDataflowHelper;
+ private final InvertedIndexDataflowHelper invIndexDataflowHelper;
+ private final IHyracksTaskContext ctx;
+ private final int queryField;
+ private FrameTupleAccessor accessor;
+ private FrameTupleReference tuple;
+ private IRecordDescriptorProvider recordDescProvider;
+ private InvertedIndex invIndex;
+
+ private final IInvertedIndexSearchModifier searchModifier;
+ private final IBinaryTokenizer queryTokenizer;
+ private TOccurrenceSearcher searcher;
+ private IInvertedIndexResultCursor resultCursor;
+
+ private ByteBuffer writeBuffer;
+ private FrameTupleAppender appender;
+ private ArrayTupleBuilder tb;
+ private DataOutput dos;
+
+ private final AbstractInvertedIndexOperatorDescriptor opDesc;
+
+ public InvertedIndexSearchOperatorNodePushable(AbstractInvertedIndexOperatorDescriptor opDesc,
+ IHyracksTaskContext ctx, int partition, int queryField, IInvertedIndexSearchModifier searchModifier,
+ IBinaryTokenizer queryTokenizer, IRecordDescriptorProvider recordDescProvider) {
+ this.opDesc = opDesc;
+ btreeDataflowHelper = (TreeIndexDataflowHelper) opDesc.getIndexDataflowHelperFactory()
+ .createIndexDataflowHelper(opDesc, ctx, partition, false);
+ invIndexDataflowHelper = new InvertedIndexDataflowHelper(btreeDataflowHelper, opDesc, ctx, partition, false);
+ this.ctx = ctx;
+ this.queryField = queryField;
+ this.searchModifier = searchModifier;
+ this.queryTokenizer = queryTokenizer;
+ this.recordDescProvider = recordDescProvider;
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+ RecordDescriptor recDesc = recordDescProvider.getInputRecordDescriptor(opDesc.getOperatorId(), 0);
+ accessor = new FrameTupleAccessor(btreeDataflowHelper.getHyracksTaskContext().getFrameSize(), recDesc);
+ tuple = new FrameTupleReference();
+ // BTree.
+ try {
+ btreeDataflowHelper.init();
+ } catch (Exception e) {
+ // Cleanup in case of failure/
+ btreeDataflowHelper.deinit();
+ if (e instanceof HyracksDataException) {
+ throw (HyracksDataException) e;
+ } else {
+ throw new HyracksDataException(e);
+ }
+ }
+ // Inverted Index.
+ try {
+ invIndexDataflowHelper.init();
+ invIndex = (InvertedIndex) invIndexDataflowHelper.getIndex();
+ } catch (Exception e) {
+ // Cleanup in case of failure.
+ invIndexDataflowHelper.deinit();
+ if (e instanceof HyracksDataException) {
+ throw (HyracksDataException) e;
+ } else {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ writeBuffer = btreeDataflowHelper.getHyracksTaskContext().allocateFrame();
+ tb = new ArrayTupleBuilder(opDesc.getInvListsTypeTraits().length);
+ dos = tb.getDataOutput();
+ appender = new FrameTupleAppender(btreeDataflowHelper.getHyracksTaskContext().getFrameSize());
+ appender.reset(writeBuffer, true);
+
+ searcher = new TOccurrenceSearcher(ctx, invIndex, queryTokenizer);
+ resultCursor = new SearchResultCursor(searcher.createResultFrameTupleAccessor(),
+ searcher.createResultTupleReference());
+
+ writer.open();
+ }
+
+ private void writeSearchResults() throws Exception {
+ while (resultCursor.hasNext()) {
+ resultCursor.next();
+ tb.reset();
+ ITupleReference invListElement = resultCursor.getTuple();
+ int invListFields = opDesc.getInvListsTypeTraits().length;
+ for (int i = 0; i < invListFields; i++) {
+ dos.write(invListElement.getFieldData(i), invListElement.getFieldStart(i),
+ invListElement.getFieldLength(i));
+ tb.addFieldEndOffset();
+ }
+ if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+ FrameUtils.flushFrame(writeBuffer, writer);
+ appender.reset(writeBuffer, true);
+ if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+ throw new IllegalStateException();
+ }
+ }
+ }
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ accessor.reset(buffer);
+ int tupleCount = accessor.getTupleCount();
+ try {
+ for (int i = 0; i < tupleCount; i++) {
+ tuple.reset(accessor, i);
+ searcher.reset();
+ try {
+ searcher.reset();
+ searcher.search(resultCursor, tuple, queryField, searchModifier);
+ writeSearchResults();
+ } catch (OccurrenceThresholdPanicException e) {
+ // Ignore panic cases for now.
+ }
+ }
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ writer.fail();
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ try {
+ if (appender.getTupleCount() > 0) {
+ FrameUtils.flushFrame(writeBuffer, writer);
+ }
+ writer.close();
+ } finally {
+ try {
+ btreeDataflowHelper.deinit();
+ } finally {
+ invIndexDataflowHelper.deinit();
+ }
+ }
+ }
+}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java
index 03fc9a1..643c105 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java
@@ -15,7 +15,7 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListBuilder;
@@ -26,10 +26,10 @@
private byte[] targetBuf;
private int pos;
- public FixedSizeElementInvertedListBuilder(ITypeTrait[] invListFields) {
+ public FixedSizeElementInvertedListBuilder(ITypeTraits[] invListFields) {
int tmp = 0;
for (int i = 0; i < invListFields.length; i++) {
- tmp += invListFields[i].getStaticallyKnownDataLength();
+ tmp += invListFields[i].getFixedLength();
}
listElementSize = tmp;
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java
index f7ef56e..4158019 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java
@@ -5,7 +5,7 @@
import java.io.DataInputStream;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
@@ -32,7 +32,7 @@
private ICachedPage[] pages = new ICachedPage[10];
private int[] elementIndexes = new int[10];
- public FixedSizeElementInvertedListCursor(IBufferCache bufferCache, int fileId, ITypeTrait[] invListFields) {
+ public FixedSizeElementInvertedListCursor(IBufferCache bufferCache, int fileId, ITypeTraits[] invListFields) {
this.bufferCache = bufferCache;
this.fileId = fileId;
this.currentElementIx = 0;
@@ -40,7 +40,7 @@
int tmp = 0;
for (int i = 0; i < invListFields.length; i++) {
- tmp += invListFields[i].getStaticallyKnownDataLength();
+ tmp += invListFields[i].getFixedLength();
}
elementSize = tmp;
this.currentOff = -elementSize;
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAccessor.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAccessor.java
index 9858eb0..cbedc45 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAccessor.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAccessor.java
@@ -19,29 +19,29 @@
import edu.uci.ics.hyracks.api.comm.FrameHelper;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
public class FixedSizeFrameTupleAccessor implements IFrameTupleAccessor {
private final int frameSize;
private ByteBuffer buffer;
- private final ITypeTrait[] fields;
+ private final ITypeTraits[] fields;
private final int[] fieldStartOffsets;
private final int tupleSize;
- public FixedSizeFrameTupleAccessor(int frameSize, ITypeTrait[] fields) {
+ public FixedSizeFrameTupleAccessor(int frameSize, ITypeTraits[] fields) {
this.frameSize = frameSize;
this.fields = fields;
this.fieldStartOffsets = new int[fields.length];
this.fieldStartOffsets[0] = 0;
for (int i = 1; i < fields.length; i++) {
- fieldStartOffsets[i] = fieldStartOffsets[i - 1] + fields[i - 1].getStaticallyKnownDataLength();
+ fieldStartOffsets[i] = fieldStartOffsets[i - 1] + fields[i - 1].getFixedLength();
}
int tmp = 0;
for (int i = 0; i < fields.length; i++) {
- tmp += fields[i].getStaticallyKnownDataLength();
+ tmp += fields[i].getFixedLength();
}
tupleSize = tmp;
}
@@ -58,12 +58,12 @@
@Override
public int getFieldEndOffset(int tupleIndex, int fIdx) {
- return getTupleStartOffset(tupleIndex) + fieldStartOffsets[fIdx] + fields[fIdx].getStaticallyKnownDataLength();
+ return getTupleStartOffset(tupleIndex) + fieldStartOffsets[fIdx] + fields[fIdx].getFixedLength();
}
@Override
public int getFieldLength(int tupleIndex, int fIdx) {
- return fields[fIdx].getStaticallyKnownDataLength();
+ return fields[fIdx].getFixedLength();
}
@Override
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAppender.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAppender.java
index edc2304..489ec2e 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAppender.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeFrameTupleAppender.java
@@ -18,7 +18,7 @@
import java.nio.ByteBuffer;
import edu.uci.ics.hyracks.api.comm.FrameHelper;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
public class FixedSizeFrameTupleAppender {
@@ -29,11 +29,11 @@
private int tupleCount;
private int tupleDataEndOffset;
- public FixedSizeFrameTupleAppender(int frameSize, ITypeTrait[] fields) {
+ public FixedSizeFrameTupleAppender(int frameSize, ITypeTraits[] fields) {
this.frameSize = frameSize;
int tmp = 0;
for (int i = 0; i < fields.length; i++) {
- tmp += fields[i].getStaticallyKnownDataLength();
+ tmp += fields[i].getFixedLength();
}
tupleSize = tmp;
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java
index 248b81e..0656d69 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java
@@ -15,22 +15,22 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
public class FixedSizeTupleReference implements ITupleReference {
- private final ITypeTrait[] typeTraits;
+ private final ITypeTraits[] typeTraits;
private final int[] fieldStartOffsets;
private byte[] data;
private int startOff;
- public FixedSizeTupleReference(ITypeTrait[] typeTraits) {
+ public FixedSizeTupleReference(ITypeTraits[] typeTraits) {
this.typeTraits = typeTraits;
this.fieldStartOffsets = new int[typeTraits.length];
this.fieldStartOffsets[0] = 0;
for (int i = 1; i < typeTraits.length; i++) {
- fieldStartOffsets[i] = fieldStartOffsets[i - 1] + typeTraits[i - 1].getStaticallyKnownDataLength();
+ fieldStartOffsets[i] = fieldStartOffsets[i - 1] + typeTraits[i - 1].getFixedLength();
}
}
@@ -51,7 +51,7 @@
@Override
public int getFieldLength(int fIdx) {
- return typeTraits[fIdx].getStaticallyKnownDataLength();
+ return typeTraits[fIdx].getFixedLength();
}
@Override
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
index 9eab110..986e57b 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
@@ -18,6 +18,7 @@
import java.nio.ByteBuffer;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
@@ -28,10 +29,13 @@
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
-import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeOpContext;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoadContext;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.PageAllocationException;
+import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListBuilder;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListCursor;
@@ -47,34 +51,41 @@
* implemented features: updates (insert/update/delete) Limitations: a query
* cannot exceed the size of a Hyracks frame
*/
-public class InvertedIndex {
+public class InvertedIndex implements IIndex {
private BTree btree;
private int rootPageId = 0;
private IBufferCache bufferCache;
private int fileId;
+ private final ITypeTraits[] invListTypeTraits;
private final MultiComparator invListCmp;
private final int numTokenFields;
private final int numInvListKeys;
- public InvertedIndex(IBufferCache bufferCache, BTree btree, MultiComparator invListCmp) {
+ public InvertedIndex(IBufferCache bufferCache, BTree btree, ITypeTraits[] invListTypeTraits, MultiComparator invListCmp) {
this.bufferCache = bufferCache;
this.btree = btree;
this.invListCmp = invListCmp;
+ this.invListTypeTraits = invListTypeTraits;
this.numTokenFields = btree.getMultiComparator().getKeyFieldCount();
this.numInvListKeys = invListCmp.getKeyFieldCount();
}
+ @Override
public void open(int fileId) {
this.fileId = fileId;
}
+ @Override
+ public void create(int indexFileId) throws HyracksDataException {
+ }
+
public void close() {
this.fileId = -1;
}
public BulkLoadContext beginBulkLoad(IInvertedListBuilder invListBuilder, int hyracksFrameSize,
- float btreeFillFactor) throws HyracksDataException {
+ float btreeFillFactor) throws HyracksDataException, TreeIndexException, PageAllocationException {
BulkLoadContext ctx = new BulkLoadContext(invListBuilder, hyracksFrameSize, btreeFillFactor);
ctx.init(rootPageId, fileId);
return ctx;
@@ -86,7 +97,7 @@
// the next invListCmp.getKeyFieldCount() fields in tuple are keys of the
// inverted list (e.g., primary key)
// key fields of inverted list are fixed size
- public void bulkLoadAddTuple(BulkLoadContext ctx, ITupleReference tuple) throws HyracksDataException {
+ public void bulkLoadAddTuple(BulkLoadContext ctx, ITupleReference tuple) throws HyracksDataException, PageAllocationException {
// first inverted list, copy token to baaos and start new list
if (ctx.currentInvListTokenBaaos.size() == 0) {
@@ -147,9 +158,9 @@
}
}
- public boolean openCursor(ITreeIndexCursor btreeCursor, RangePredicate btreePred, BTreeOpContext btreeOpCtx,
+ public boolean openCursor(ITreeIndexCursor btreeCursor, RangePredicate btreePred, ITreeIndexAccessor btreeAccessor,
IInvertedListCursor invListCursor) throws Exception {
- btree.search(btreeCursor, btreePred, btreeOpCtx);
+ btreeAccessor.search(btreeCursor, btreePred);
boolean ret = false;
if (btreeCursor.hasNext()) {
@@ -179,7 +190,7 @@
return ret;
}
- public void createAndInsertBTreeTuple(BulkLoadContext ctx) throws HyracksDataException {
+ public void createAndInsertBTreeTuple(BulkLoadContext ctx) throws HyracksDataException, PageAllocationException {
// build tuple
ctx.btreeTupleBuilder.reset();
ctx.btreeTupleBuilder.addField(ctx.currentInvListTokenBaaos.getByteArray(), 0,
@@ -197,10 +208,10 @@
// reset tuple reference
ctx.btreeFrameTupleReference.reset(ctx.btreeFrameTupleAccessor, 0);
- btree.bulkLoadAddTuple(ctx.btreeBulkLoadCtx, ctx.btreeFrameTupleReference);
+ btree.bulkLoadAddTuple(ctx.btreeFrameTupleReference, ctx.btreeBulkLoadCtx);
}
- public void endBulkLoad(BulkLoadContext ctx) throws HyracksDataException {
+ public void endBulkLoad(BulkLoadContext ctx) throws HyracksDataException, PageAllocationException {
// create entry in btree for last inverted list
createAndInsertBTreeTuple(ctx);
btree.endBulkLoad(ctx.btreeBulkLoadCtx);
@@ -218,6 +229,10 @@
public MultiComparator getInvListElementCmp() {
return invListCmp;
}
+
+ public ITypeTraits[] getTypeTraits() {
+ return invListTypeTraits;
+ }
public BTree getBTree() {
return btree;
@@ -235,8 +250,7 @@
private int currentInvListStartPageId;
private int currentInvListStartOffset;
private final ByteArrayAccessibleOutputStream currentInvListTokenBaaos = new ByteArrayAccessibleOutputStream();
- private final FixedSizeTupleReference currentInvListToken = new FixedSizeTupleReference(
- invListCmp.getTypeTraits());
+ private final FixedSizeTupleReference currentInvListToken = new FixedSizeTupleReference(invListTypeTraits);
private int currentPageId;
private ICachedPage currentPage;
@@ -247,7 +261,7 @@
this.invListBuilder = invListBuilder;
this.tokenCmp = btree.getMultiComparator();
this.btreeTupleBuffer = ByteBuffer.allocate(hyracksFrameSize);
- this.btreeTupleBuilder = new ArrayTupleBuilder(tokenCmp.getFieldCount());
+ this.btreeTupleBuilder = new ArrayTupleBuilder(btree.getFieldCount());
this.btreeTupleAppender = new FrameTupleAppender(hyracksFrameSize);
// TODO: serde never used, only need correct number of fields
// tuple contains (token, start page, end page, start offset, num
@@ -260,10 +274,8 @@
this.btreeFillFactor = btreeFillFactor;
}
- public void init(int startPageId, int fileId) throws HyracksDataException {
- btreeBulkLoadCtx = btree.beginBulkLoad(BTree.DEFAULT_FILL_FACTOR,
- btree.getLeafFrameFactory().createFrame(), btree.getInteriorFrameFactory().createFrame(), btree
- .getFreePageManager().getMetaDataFrameFactory().createFrame());
+ public void init(int startPageId, int fileId) throws HyracksDataException, TreeIndexException, PageAllocationException {
+ btreeBulkLoadCtx = btree.beginBulkLoad(BTree.DEFAULT_FILL_FACTOR);
currentPageId = startPageId;
currentPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
currentPage.acquireWriteLatch();
@@ -285,5 +297,5 @@
currentPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
currentPage.acquireWriteLatch();
}
- };
+ }
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
index d1fba3b..b24d416 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
@@ -22,12 +22,12 @@
import java.util.List;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
-import edu.uci.ics.hyracks.api.context.IHyracksStageletContext;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.value.TypeTrait;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
@@ -36,12 +36,11 @@
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
-import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeOpContext;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeRangeSearchCursor;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrame;
-import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOp;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexResultCursor;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
@@ -52,7 +51,7 @@
public class TOccurrenceSearcher implements IInvertedIndexSearcher {
- protected final IHyracksStageletContext ctx;
+ protected final IHyracksTaskContext ctx;
protected final FixedSizeFrameTupleAppender resultFrameTupleApp;
protected final FixedSizeFrameTupleAccessor resultFrameTupleAcc;
protected final FixedSizeTupleReference resultTuple;
@@ -69,25 +68,25 @@
protected final ITreeIndexCursor btreeCursor;
protected final FrameTupleReference searchKey = new FrameTupleReference();
protected final RangePredicate btreePred = new RangePredicate(true, null, null, true, true, null, null);
- protected final BTreeOpContext btreeOpCtx;
+ protected final ITreeIndexAccessor btreeAccessor;
protected RecordDescriptor queryTokenRecDesc = new RecordDescriptor(
new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
- protected ArrayTupleBuilder queryTokenBuilder = new ArrayTupleBuilder(queryTokenRecDesc.getFields().length);
+ protected ArrayTupleBuilder queryTokenBuilder = new ArrayTupleBuilder(queryTokenRecDesc.getFieldCount());
protected DataOutput queryTokenDos = queryTokenBuilder.getDataOutput();
protected FrameTupleAppender queryTokenAppender;
protected ByteBuffer queryTokenFrame;
protected final InvertedIndex invIndex;
protected final IBinaryTokenizer queryTokenizer;
- protected final ITypeTrait[] invListFieldsWithCount;
+ protected final ITypeTraits[] invListFieldsWithCount;
protected int occurrenceThreshold;
protected final int cursorCacheSize = 10;
protected List<IInvertedListCursor> invListCursorCache = new ArrayList<IInvertedListCursor>(cursorCacheSize);
protected List<IInvertedListCursor> invListCursors = new ArrayList<IInvertedListCursor>(cursorCacheSize);
- public TOccurrenceSearcher(IHyracksStageletContext ctx, InvertedIndex invIndex, IBinaryTokenizer queryTokenizer) {
+ public TOccurrenceSearcher(IHyracksTaskContext ctx, InvertedIndex invIndex, IBinaryTokenizer queryTokenizer) {
this.ctx = ctx;
this.invIndex = invIndex;
this.queryTokenizer = queryTokenizer;
@@ -95,20 +94,18 @@
leafFrame = invIndex.getBTree().getLeafFrameFactory().createFrame();
interiorFrame = invIndex.getBTree().getInteriorFrameFactory().createFrame();
- btreeCursor = new BTreeRangeSearchCursor((IBTreeLeafFrame) leafFrame);
- ITypeTrait[] invListFields = invIndex.getInvListElementCmp().getTypeTraits();
- invListFieldsWithCount = new TypeTrait[invListFields.length + 1];
+ btreeCursor = new BTreeRangeSearchCursor((IBTreeLeafFrame) leafFrame, false);
+ ITypeTraits[] invListFields = invIndex.getTypeTraits();
+ invListFieldsWithCount = new ITypeTraits[invListFields.length + 1];
int tmp = 0;
for (int i = 0; i < invListFields.length; i++) {
invListFieldsWithCount[i] = invListFields[i];
- tmp += invListFields[i].getStaticallyKnownDataLength();
+ tmp += invListFields[i].getFixedLength();
}
// using an integer for counting occurrences
- invListFieldsWithCount[invListFields.length] = new TypeTrait(4);
+ invListFieldsWithCount[invListFields.length] = IntegerPointable.TYPE_TRAITS;
invListKeyLength = tmp;
- btreeOpCtx = invIndex.getBTree().createOpContext(IndexOp.SEARCH, leafFrame, interiorFrame, null);
-
resultFrameTupleApp = new FixedSizeFrameTupleAppender(ctx.getFrameSize(), invListFieldsWithCount);
resultFrameTupleAcc = new FixedSizeFrameTupleAccessor(ctx.getFrameSize(), invListFieldsWithCount);
resultTuple = new FixedSizeTupleReference(invListFieldsWithCount);
@@ -124,12 +121,13 @@
// pre-create cursor objects
for (int i = 0; i < cursorCacheSize; i++) {
invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
- .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
+ .getInvListsFileId(), invIndex.getTypeTraits()));
}
queryTokenAppender = new FrameTupleAppender(ctx.getFrameSize());
queryTokenFrame = ctx.allocateFrame();
+ btreeAccessor = invIndex.getBTree().createAccessor();
currentNumResults = 0;
}
@@ -143,12 +141,12 @@
currentNumResults = 0;
}
- public void search(IInvertedIndexResultCursor resultCursor, ITupleReference queryTuple, int queryFieldIndex,
+ public void search(IInvertedIndexResultCursor resultCursor, ITupleReference queryTuple, int queryField,
IInvertedIndexSearchModifier searchModifier) throws Exception {
queryTokenAppender.reset(queryTokenFrame, true);
- queryTokenizer.reset(queryTuple.getFieldData(queryFieldIndex), queryTuple.getFieldStart(queryFieldIndex),
- queryTuple.getFieldLength(queryFieldIndex));
+ queryTokenizer.reset(queryTuple.getFieldData(queryField), queryTuple.getFieldStart(queryField),
+ queryTuple.getFieldLength(queryField));
while (queryTokenizer.hasNext()) {
queryTokenizer.next();
@@ -174,14 +172,14 @@
int diff = numQueryTokens - invListCursorCache.size();
for (int i = 0; i < diff; i++) {
invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
- .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
+ .getInvListsFileId(), invIndex.getTypeTraits()));
}
}
invListCursors.clear();
for (int i = 0; i < numQueryTokens; i++) {
searchKey.reset(queryTokenAccessor, i);
- invIndex.openCursor(btreeCursor, btreePred, btreeOpCtx, invListCursorCache.get(i));
+ invIndex.openCursor(btreeCursor, btreePred, btreeAccessor, invListCursorCache.get(i));
invListCursors.add(invListCursorCache.get(i));
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixProbeOnly.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixProbeOnly.java
index 30d67f0..0f5439b 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixProbeOnly.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixProbeOnly.java
@@ -19,7 +19,7 @@
import java.nio.ByteBuffer;
import java.util.List;
-import edu.uci.ics.hyracks.api.context.IHyracksStageletContext;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListCursor;
@@ -27,7 +27,7 @@
public class TOccurrenceSearcherSuffixProbeOnly extends TOccurrenceSearcher {
- public TOccurrenceSearcherSuffixProbeOnly(IHyracksStageletContext ctx, InvertedIndex invIndex,
+ public TOccurrenceSearcherSuffixProbeOnly(IHyracksTaskContext ctx, InvertedIndex invIndex,
IBinaryTokenizer queryTokenizer) {
super(ctx, invIndex, queryTokenizer);
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixScanOnly.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixScanOnly.java
index f8bc1ab..b997dc9 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixScanOnly.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcherSuffixScanOnly.java
@@ -19,7 +19,7 @@
import java.nio.ByteBuffer;
import java.util.List;
-import edu.uci.ics.hyracks.api.context.IHyracksStageletContext;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
@@ -28,7 +28,7 @@
public class TOccurrenceSearcherSuffixScanOnly extends TOccurrenceSearcher {
- public TOccurrenceSearcherSuffixScanOnly(IHyracksStageletContext ctx, InvertedIndex invIndex,
+ public TOccurrenceSearcherSuffixScanOnly(IHyracksTaskContext ctx, InvertedIndex invIndex,
IBinaryTokenizer queryTokenizer) {
super(ctx, invIndex, queryTokenizer);
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/ConjunctiveSearchModifierFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/ConjunctiveSearchModifierFactory.java
new file mode 100644
index 0000000..0db6008
--- /dev/null
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/ConjunctiveSearchModifierFactory.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers;
+
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifierFactory;
+
+public class ConjunctiveSearchModifierFactory implements IInvertedIndexSearchModifierFactory {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IInvertedIndexSearchModifier createSearchModifier() {
+ return new ConjunctiveSearchModifier();
+ }
+}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/EditDistanceSearchModifierFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/EditDistanceSearchModifierFactory.java
new file mode 100644
index 0000000..128d9db
--- /dev/null
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/EditDistanceSearchModifierFactory.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers;
+
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifierFactory;
+
+public class EditDistanceSearchModifierFactory implements IInvertedIndexSearchModifierFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private final int gramLength;
+ private final int edThresh;
+
+ public EditDistanceSearchModifierFactory(int gramLength, int edThresh) {
+ this.gramLength = gramLength;
+ this.edThresh = edThresh;
+ }
+
+ @Override
+ public IInvertedIndexSearchModifier createSearchModifier() {
+ return new EditDistanceSearchModifier(gramLength, edThresh);
+ }
+}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifierFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifierFactory.java
new file mode 100644
index 0000000..bd27c03
--- /dev/null
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifierFactory.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers;
+
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifierFactory;
+
+public class JaccardSearchModifierFactory implements IInvertedIndexSearchModifierFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private final float jaccThresh;
+
+ public JaccardSearchModifierFactory(float jaccThresh) {
+ this.jaccThresh = jaccThresh;
+ }
+
+ @Override
+ public IInvertedIndexSearchModifier createSearchModifier() {
+ return new JaccardSearchModifier(jaccThresh);
+ }
+}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
index 02142dc..be6bfb0 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
@@ -19,7 +19,7 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
-import edu.uci.ics.hyracks.dataflow.common.data.util.StringUtils;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
public abstract class AbstractUTF8StringBinaryTokenizer implements IBinaryTokenizer {
@@ -63,7 +63,7 @@
if (sourceHasTypeTag) {
index++; // skip type tag
}
- utf8Length = StringUtils.getUTFLen(data, index);
+ utf8Length = UTF8StringPointable.getUTFLen(data, index);
index += 2; // skip utf8 length indicator
this.data = data;
this.length = length + start;
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
index 0e9038a..65afa65 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
@@ -16,13 +16,12 @@
*
* Author: Alexander Behm <abehm (at) ics.uci.edu>
*/
-
package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
import java.io.DataOutput;
import java.io.IOException;
-import edu.uci.ics.hyracks.dataflow.common.data.util.StringUtils;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
public abstract class AbstractUTF8Token implements IToken {
public static final int GOLDEN_RATIO_32 = 0x09e3779b9;
@@ -59,9 +58,9 @@
int lowerCaseUTF8Len = 0;
int pos = start;
for (int i = 0; i < size; i++) {
- char c = Character.toLowerCase(StringUtils.charAt(data, pos));
- lowerCaseUTF8Len += StringUtils.getModifiedUTF8Len(c);
- pos += StringUtils.charSize(data, pos);
+ char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
+ lowerCaseUTF8Len += UTF8StringPointable.getModifiedUTF8Len(c);
+ pos += UTF8StringPointable.charSize(data, pos);
}
return lowerCaseUTF8Len;
}
@@ -102,4 +101,4 @@
handleCountTypeTag(dos);
dos.writeInt(tokenCount);
}
-}
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8TokenFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8TokenFactory.java
index b883c9a..3b0b82d 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8TokenFactory.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8TokenFactory.java
@@ -20,17 +20,17 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
public abstract class AbstractUTF8TokenFactory implements ITokenFactory {
- private static final long serialVersionUID = 1L;
- protected final byte tokenTypeTag;
- protected final byte countTypeTag;
+ private static final long serialVersionUID = 1L;
+ protected final byte tokenTypeTag;
+ protected final byte countTypeTag;
- public AbstractUTF8TokenFactory() {
- tokenTypeTag = -1;
- countTypeTag = -1;
- }
+ public AbstractUTF8TokenFactory() {
+ tokenTypeTag = -1;
+ countTypeTag = -1;
+ }
- public AbstractUTF8TokenFactory(byte tokenTypeTag, byte countTypeTag) {
- this.tokenTypeTag = tokenTypeTag;
- this.countTypeTag = countTypeTag;
- }
+ public AbstractUTF8TokenFactory(byte tokenTypeTag, byte countTypeTag) {
+ this.tokenTypeTag = tokenTypeTag;
+ this.countTypeTag = countTypeTag;
+ }
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
index 8f49c74..9dacde6 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
@@ -19,7 +19,7 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
-import edu.uci.ics.hyracks.dataflow.common.data.util.StringUtils;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
@@ -31,8 +31,8 @@
@Override
public boolean hasNext() {
// skip delimiters
- while (index < length && isSeparator(StringUtils.charAt(data, index))) {
- index += StringUtils.charSize(data, index);
+ while (index < length && isSeparator(UTF8StringPointable.charAt(data, index))) {
+ index += UTF8StringPointable.charSize(data, index);
}
return index < length;
}
@@ -45,8 +45,8 @@
public void next() {
tokenLength = 0;
int currentTokenStart = index;
- while (index < length && !isSeparator(StringUtils.charAt(data, index))) {
- index += StringUtils.charSize(data, index);
+ while (index < length && !isSeparator(UTF8StringPointable.charAt(data, index))) {
+ index += UTF8StringPointable.charSize(data, index);
tokenLength++;
}
int tokenCount = 1;
@@ -60,11 +60,12 @@
int currLength = 0;
while (currLength < tokenLength) {
// case insensitive comparison
- if (Character.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != Character.toLowerCase(StringUtils.charAt(data, tokenStart + offset))) {
+ if (Character.toLowerCase(UTF8StringPointable.charAt(data, currentTokenStart + offset)) != Character
+ .toLowerCase(UTF8StringPointable.charAt(data, tokenStart + offset))) {
tokenCount--;
break;
}
- offset += StringUtils.charSize(data, currentTokenStart + offset);
+ offset += UTF8StringPointable.charSize(data, currentTokenStart + offset);
currLength++;
}
}
@@ -77,4 +78,4 @@
// set token
token.reset(data, currentTokenStart, index, tokenLength, tokenCount);
}
-}
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizerFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizerFactory.java
index 894501d..4a350b3 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizerFactory.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizerFactory.java
@@ -19,22 +19,24 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
-public class DelimitedUTF8StringBinaryTokenizerFactory implements IBinaryTokenizerFactory {
+public class DelimitedUTF8StringBinaryTokenizerFactory implements
+ IBinaryTokenizerFactory {
- private static final long serialVersionUID = 1L;
- private final boolean ignoreTokenCount;
- private final boolean sourceHasTypeTag;
- private final ITokenFactory tokenFactory;
+ private static final long serialVersionUID = 1L;
+ private final boolean ignoreTokenCount;
+ private final boolean sourceHasTypeTag;
+ private final ITokenFactory tokenFactory;
- public DelimitedUTF8StringBinaryTokenizerFactory(boolean ignoreTokenCount, boolean sourceHasTypeTag,
- ITokenFactory tokenFactory) {
- this.ignoreTokenCount = ignoreTokenCount;
- this.sourceHasTypeTag = sourceHasTypeTag;
- this.tokenFactory = tokenFactory;
- }
+ public DelimitedUTF8StringBinaryTokenizerFactory(boolean ignoreTokenCount,
+ boolean sourceHasTypeTag, ITokenFactory tokenFactory) {
+ this.ignoreTokenCount = ignoreTokenCount;
+ this.sourceHasTypeTag = sourceHasTypeTag;
+ this.tokenFactory = tokenFactory;
+ }
- @Override
- public IBinaryTokenizer createTokenizer() {
- return new DelimitedUTF8StringBinaryTokenizer(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
- }
+ @Override
+ public IBinaryTokenizer createTokenizer() {
+ return new DelimitedUTF8StringBinaryTokenizer(ignoreTokenCount,
+ sourceHasTypeTag, tokenFactory);
+ }
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
index 43f89c7..b7bb828 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
@@ -22,7 +22,7 @@
import java.io.DataOutput;
import java.io.IOException;
-import edu.uci.ics.hyracks.dataflow.common.data.util.StringUtils;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
public class HashedUTF8NGramToken extends UTF8NGramToken {
public HashedUTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
@@ -45,9 +45,9 @@
int numRegGrams = tokenLength - numPreChars - numPostChars;
int pos = start;
for (int i = 0; i < numRegGrams; i++) {
- hash ^= Character.toLowerCase(StringUtils.charAt(data, pos));
+ hash ^= Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
hash *= GOLDEN_RATIO_32;
- pos += StringUtils.charSize(data, pos);
+ pos += UTF8StringPointable.charSize(data, pos);
}
// post chars
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramTokenFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramTokenFactory.java
index f04594a..4a87793 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramTokenFactory.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramTokenFactory.java
@@ -21,18 +21,18 @@
public class HashedUTF8NGramTokenFactory extends AbstractUTF8TokenFactory {
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
- public HashedUTF8NGramTokenFactory() {
- super();
- }
+ public HashedUTF8NGramTokenFactory() {
+ super();
+ }
- public HashedUTF8NGramTokenFactory(byte tokenTypeTag, byte countTypeTag) {
- super(tokenTypeTag, countTypeTag);
- }
+ public HashedUTF8NGramTokenFactory(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
- @Override
- public IToken createToken() {
- return new HashedUTF8NGramToken(tokenTypeTag, countTypeTag);
- }
+ @Override
+ public IToken createToken() {
+ return new HashedUTF8NGramToken(tokenTypeTag, countTypeTag);
+ }
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
index 747b65d..42ed053 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
@@ -22,7 +22,7 @@
import java.io.DataOutput;
import java.io.IOException;
-import edu.uci.ics.hyracks.dataflow.common.data.util.StringUtils;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
public class HashedUTF8WordToken extends UTF8WordToken {
@@ -46,10 +46,11 @@
}
int offset = 0;
for (int i = 0; i < tokenLength; i++) {
- if (StringUtils.charAt(t.getData(), t.getStart() + offset) != StringUtils.charAt(data, start + offset)) {
+ if (UTF8StringPointable.charAt(t.getData(), t.getStart() + offset) != UTF8StringPointable.charAt(data,
+ start + offset)) {
return false;
}
- offset += StringUtils.charSize(data, start + offset);
+ offset += UTF8StringPointable.charSize(data, start + offset);
}
return true;
}
@@ -67,9 +68,9 @@
int pos = start;
hash = GOLDEN_RATIO_32;
for (int i = 0; i < tokenLength; i++) {
- hash ^= Character.toLowerCase(StringUtils.charAt(data, pos));
+ hash ^= Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
hash *= GOLDEN_RATIO_32;
- pos += StringUtils.charSize(data, pos);
+ pos += UTF8StringPointable.charSize(data, pos);
}
hash += tokenCount;
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordTokenFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordTokenFactory.java
index e8bbde8..318f041 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordTokenFactory.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordTokenFactory.java
@@ -21,18 +21,18 @@
public class HashedUTF8WordTokenFactory extends AbstractUTF8TokenFactory {
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
- public HashedUTF8WordTokenFactory() {
- super();
- }
+ public HashedUTF8WordTokenFactory() {
+ super();
+ }
- public HashedUTF8WordTokenFactory(byte tokenTypeTag, byte countTypeTag) {
- super(tokenTypeTag, countTypeTag);
- }
+ public HashedUTF8WordTokenFactory(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
- @Override
- public IToken createToken() {
- return new HashedUTF8WordToken(tokenTypeTag, countTypeTag);
- }
+ @Override
+ public IToken createToken() {
+ return new HashedUTF8WordToken(tokenTypeTag, countTypeTag);
+ }
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IBinaryTokenizer.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IBinaryTokenizer.java
index c9087d7..05c6d0b 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IBinaryTokenizer.java
@@ -20,11 +20,11 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
public interface IBinaryTokenizer {
- public IToken getToken();
+ public IToken getToken();
- public boolean hasNext();
+ public boolean hasNext();
- public void next();
+ public void next();
- public void reset(byte[] data, int start, int length);
+ public void reset(byte[] data, int start, int length);
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IBinaryTokenizerFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IBinaryTokenizerFactory.java
index e6b9286..bfe78ee 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IBinaryTokenizerFactory.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IBinaryTokenizerFactory.java
@@ -22,5 +22,5 @@
import java.io.Serializable;
public interface IBinaryTokenizerFactory extends Serializable {
- public IBinaryTokenizer createTokenizer();
+ public IBinaryTokenizer createTokenizer();
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/INGramToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/INGramToken.java
index 183c421..befc6d2 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/INGramToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/INGramToken.java
@@ -20,9 +20,9 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
public interface INGramToken {
- public int getNumPostChars();
+ public int getNumPostChars();
- public int getNumPreChars();
+ public int getNumPreChars();
- public void setNumPrePostChars(int numPreChars, int numPostChars);
+ public void setNumPrePostChars(int numPreChars, int numPostChars);
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IToken.java
index 0f6e34a..c1840d7 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IToken.java
@@ -23,17 +23,18 @@
import java.io.IOException;
public interface IToken {
- public byte[] getData();
+ public byte[] getData();
- public int getLength();
+ public int getLength();
- public int getStart();
+ public int getStart();
- public int getTokenLength();
+ public int getTokenLength();
- public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount);
+ public void reset(byte[] data, int start, int length, int tokenLength,
+ int tokenCount);
- public void serializeToken(DataOutput dos) throws IOException;
+ public void serializeToken(DataOutput dos) throws IOException;
- public void serializeTokenCount(DataOutput dos) throws IOException;
+ public void serializeTokenCount(DataOutput dos) throws IOException;
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
index 746ee1d..fdfc02f 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
@@ -19,7 +19,7 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
-import edu.uci.ics.hyracks.dataflow.common.data.util.StringUtils;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
public class NGramUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
@@ -62,7 +62,7 @@
concreteToken.setNumPrePostChars(numPreChars, numPostChars);
if (numPreChars == 0) {
- index += StringUtils.charSize(data, index);
+ index += UTF8StringPointable.charSize(data, index);
}
// compute token count
@@ -73,13 +73,14 @@
tokenCount++; // assume found
int offset = 0;
for (int j = 0; j < gramLength; j++) {
- if (Character.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != Character.toLowerCase(StringUtils.charAt(data, tmpIndex + offset))) {
+ if (Character.toLowerCase(UTF8StringPointable.charAt(data, currentTokenStart + offset)) != Character
+ .toLowerCase(UTF8StringPointable.charAt(data, tmpIndex + offset))) {
tokenCount--;
break;
}
- offset += StringUtils.charSize(data, tmpIndex + offset);
+ offset += UTF8StringPointable.charSize(data, tmpIndex + offset);
}
- tmpIndex += StringUtils.charSize(data, tmpIndex);
+ tmpIndex += UTF8StringPointable.charSize(data, tmpIndex);
}
}
@@ -97,7 +98,7 @@
int end = pos + utf8Length;
while (pos < end) {
numChars++;
- pos += StringUtils.charSize(data, pos);
+ pos += UTF8StringPointable.charSize(data, pos);
}
if (usePrePost) {
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
index 1b124dc..59cadc8 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
@@ -22,6 +22,7 @@
import java.io.DataOutput;
import java.io.IOException;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
import edu.uci.ics.hyracks.dataflow.common.data.util.StringUtils;
public class UTF8NGramToken extends AbstractUTF8Token implements INGramToken {
@@ -67,9 +68,9 @@
int pos = start;
for (int i = 0; i < numRegChars; i++) {
- char c = Character.toLowerCase(StringUtils.charAt(data, pos));
+ char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
StringUtils.writeCharAsModifiedUTF8(c, dos);
- pos += StringUtils.charSize(data, pos);
+ pos += UTF8StringPointable.charSize(data, pos);
}
// post chars
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramTokenFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramTokenFactory.java
index 7a72caa..968d8e1 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramTokenFactory.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramTokenFactory.java
@@ -21,19 +21,19 @@
public class UTF8NGramTokenFactory extends AbstractUTF8TokenFactory {
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
- public UTF8NGramTokenFactory() {
- super();
- }
+ public UTF8NGramTokenFactory() {
+ super();
+ }
- public UTF8NGramTokenFactory(byte tokenTypeTag, byte countTypeTag) {
- super(tokenTypeTag, countTypeTag);
- }
+ public UTF8NGramTokenFactory(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
- @Override
- public IToken createToken() {
- return new UTF8NGramToken(tokenTypeTag, countTypeTag);
- }
+ @Override
+ public IToken createToken() {
+ return new UTF8NGramToken(tokenTypeTag, countTypeTag);
+ }
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
index 2a74145..97a1e12 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
@@ -22,6 +22,7 @@
import java.io.DataOutput;
import java.io.IOException;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
import edu.uci.ics.hyracks.dataflow.common.data.util.StringUtils;
public class UTF8WordToken extends AbstractUTF8Token {
@@ -38,9 +39,9 @@
StringUtils.writeUTF8Len(tokenUTF8Len, dos);
int pos = start;
for (int i = 0; i < tokenLength; i++) {
- char c = Character.toLowerCase(StringUtils.charAt(data, pos));
+ char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
StringUtils.writeCharAsModifiedUTF8(c, dos);
- pos += StringUtils.charSize(data, pos);
+ pos += UTF8StringPointable.charSize(data, pos);
}
}
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordTokenFactory.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordTokenFactory.java
index 471747b..4358254 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordTokenFactory.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordTokenFactory.java
@@ -21,19 +21,19 @@
public class UTF8WordTokenFactory extends AbstractUTF8TokenFactory {
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
- public UTF8WordTokenFactory() {
- super();
- }
+ public UTF8WordTokenFactory() {
+ super();
+ }
- public UTF8WordTokenFactory(byte tokenTypeTag, byte countTypeTag) {
- super(tokenTypeTag, countTypeTag);
- }
+ public UTF8WordTokenFactory(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
- @Override
- public IToken createToken() {
- return new UTF8WordToken(tokenTypeTag, countTypeTag);
- }
+ @Override
+ public IToken createToken() {
+ return new UTF8WordToken(tokenTypeTag, countTypeTag);
+ }
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/util/InvertedIndexUtils.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/util/InvertedIndexUtils.java
new file mode 100644
index 0000000..a1d1f06
--- /dev/null
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/util/InvertedIndexUtils.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.invertedindex.util;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+
+public class InvertedIndexUtils {
+ // Type traits to be appended to the token type trait which finally form the BTree field type traits.
+ private static final ITypeTraits[] btreeValueTypeTraits = new ITypeTraits[4];
+ static {
+ // startPageId
+ btreeValueTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
+ // endPageId
+ btreeValueTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
+ // startOff
+ btreeValueTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
+ // numElements
+ btreeValueTypeTraits[3] = IntegerPointable.TYPE_TRAITS;
+ }
+
+ public static ITypeTraits[] getBTreeTypeTraits(ITypeTraits[] tokenTypeTraits) {
+ ITypeTraits[] btreeTypeTraits = new ITypeTraits[tokenTypeTraits.length + btreeValueTypeTraits.length];
+ // Set key type traits.
+ for (int i = 0; i < tokenTypeTraits.length; i++) {
+ btreeTypeTraits[i] = tokenTypeTraits[i];
+ }
+ // Set value type traits.
+ for (int i = 0; i < btreeValueTypeTraits.length; i++) {
+ btreeTypeTraits[i + tokenTypeTraits.length] = btreeValueTypeTraits[i];
+ }
+ return btreeTypeTraits;
+ }
+}