Started working on the lifecycle stuff for the LSM inverted index.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1827 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index 784aa35..ef73413 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -14,6 +14,7 @@
*/
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls;
+import java.io.File;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
@@ -25,8 +26,10 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
+import edu.uci.ics.hyracks.storage.am.btree.util.BTreeUtils;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoadContext;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
@@ -35,9 +38,11 @@
import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.api.IndexType;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponentFinalizer;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMFileManager;
@@ -46,16 +51,21 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
+import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryBufferCache;
import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryFreePageManager;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BTreeFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BlockingIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMHarness;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexFileManager.LSMInvertedFileNameComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexFileManager.LSMRInvertedIndexFileNameComponent;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory.InMemoryInvertedIndex;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexFactory;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
@@ -63,15 +73,15 @@
private final Logger LOGGER = Logger.getLogger(LSMInvertedIndex.class.getName());
public class LSMInvertedIndexComponent {
- private final IIndex invIndex;
+ private final IInvertedIndex invIndex;
private final BTree deleteKeysBTree;
- LSMInvertedIndexComponent(IIndex invIndex, BTree deleteKeysBTree) {
+ LSMInvertedIndexComponent(IInvertedIndex invIndex, BTree deleteKeysBTree) {
this.invIndex = invIndex;
this.deleteKeysBTree = deleteKeysBTree;
}
- public IIndex getInvIndex() {
+ public IInvertedIndex getInvIndex() {
return invIndex;
}
@@ -86,11 +96,15 @@
protected final LSMInvertedIndexComponent memComponent;
protected final IBufferCache memBufferCache;
protected final InMemoryFreePageManager memFreePageManager;
-
+ protected final IBinaryTokenizerFactory tokenizerFactory;
+ protected FileReference memDeleteKeysBTreeFile = new FileReference(new File("membtree"));
+
// On-disk components.
protected final ILSMFileManager fileManager;
// For creating inverted indexes in flush and merge.
protected final OnDiskInvertedIndexFactory diskInvIndexFactory;
+ // For creating deleted-keys BTrees in flush and merge.
+ protected final BTreeFactory diskBTreeFactory;
protected final IBufferCache diskBufferCache;
protected final IFileMapProvider diskFileMapProvider;
// List of LSMInvertedIndexComponent instances. Using Object for better sharing via
@@ -107,92 +121,210 @@
private boolean isActivated = false;
- public LSMInvertedIndex(IBufferCache memBufferCache, InMemoryFreePageManager memFreePageManager, OnDiskInvertedIndexFactory diskInvIndexFactory,
- ILSMFileManager fileManager, IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
+ public LSMInvertedIndex(IBufferCache memBufferCache, InMemoryFreePageManager memFreePageManager,
+ OnDiskInvertedIndexFactory diskInvIndexFactory, BTreeFactory diskBTreeFactory, ILSMFileManager fileManager,
+ IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
- IBinaryComparatorFactory[] tokenCmpFactories, ILSMFlushController flushController,
- ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler) {
- // TODO: Finish this one properly.
- InMemoryInvertedIndex memInvIndex = null;
- BTree deleteKeysBTree = null;
+ IBinaryComparatorFactory[] tokenCmpFactories, IBinaryTokenizerFactory tokenizerFactory,
+ ILSMFlushController flushController, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker,
+ ILSMIOOperationScheduler ioScheduler) throws IndexException {
+ InMemoryInvertedIndex memInvIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(memBufferCache,
+ memFreePageManager, invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories,
+ tokenizerFactory);
+ BTree deleteKeysBTree = BTreeUtils.createBTree(memBufferCache, diskFileMapProvider, invListTypeTraits,
+ invListCmpFactories, BTreeLeafFrameType.REGULAR_NSM, memDeleteKeysBTreeFile);
memComponent = new LSMInvertedIndexComponent(memInvIndex, deleteKeysBTree);
this.memBufferCache = memBufferCache;
this.memFreePageManager = memFreePageManager;
+ this.tokenizerFactory = tokenizerFactory;
this.fileManager = fileManager;
- this.diskInvIndexFactory = diskInvIndexFactory;
+ this.diskInvIndexFactory = diskInvIndexFactory;
+ this.diskBTreeFactory = diskBTreeFactory;
this.diskBufferCache = diskInvIndexFactory.getBufferCache();
this.diskFileMapProvider = diskFileMapProvider;
this.invListTypeTraits = invListTypeTraits;
this.invListCmpFactories = invListCmpFactories;
this.tokenTypeTraits = tokenTypeTraits;
this.tokenCmpFactories = tokenCmpFactories;
-
this.lsmHarness = new LSMHarness(this, flushController, mergePolicy, opTracker, ioScheduler);
this.componentFinalizer = new InvertedIndexComponentFinalizer(diskFileMapProvider);
}
@Override
- public synchronized void create(int indexFileId) throws HyracksDataException {
+ public synchronized void create() throws HyracksDataException {
if (isActivated) {
- return;
+ throw new HyracksDataException("Failed to create the index since it is activated.");
}
-
- // TODO: What else is needed here?
- memoryInvertedIndex.create(indexFileId);
+
+ fileManager.deleteDirs();
fileManager.createDirs();
}
@Override
- public void open(int indexFileId) throws HyracksDataException {
- synchronized (this) {
- if (isOpen)
- return;
-
- isOpen = true;
- memoryInvertedIndex.open(indexFileId);
- // TODO: What else is needed here?
- // ...
+ // TODO: Properly implement this one.
+ public synchronized void activate() throws HyracksDataException {
+ if (isActivated) {
+ return;
}
+ ((InMemoryBufferCache) memComponent.getInvIndex().getBufferCache()).open();
+ memComponent.getInvIndex().create();
+ memComponent.getDeletedKeysBTree().create();
+ List<Object> validFileNames = fileManager.cleanupAndGetValidFiles(componentFinalizer);
+ for (Object o : validFileNames) {
+ LSMRInvertedIndexFileNameComponent component = (LSMRInvertedIndexFileNameComponent) o;
+ FileReference rtreeFile = new FileReference(new File(component.getRTreeFileName()));
+ FileReference btreeFile = new FileReference(new File(component.getBTreeFileName()));
+ RTree rtree = (RTree) createDiskTree(diskRTreeFactory, rtreeFile, false);
+ BTree btree = (BTree) createDiskTree(diskBTreeFactory, btreeFile, false);
+ LSMRTreeComponent diskComponent = new LSMRTreeComponent(rtree, btree);
+ diskComponents.add(diskComponent);
+ }
+ isActivated = true;
+ }
+
+ protected IInvertedIndex createDiskInvIndex(OnDiskInvertedIndexFactory invIndexFactory, FileReference fileRef, boolean create) throws HyracksDataException, IndexException {
+ IInvertedIndex invIndex = invIndexFactory.createIndexInstance(fileRef);
+ if (create) {
+ invIndex.create();
+ }
+ // Will be closed during cleanup of merge().
+ invIndex.activate();
+ return invIndex;
+ }
+
+ protected ITreeIndex createDiskDeletedKeysBTree(BTreeFactory btreeFactory, FileReference fileRef, boolean create) throws HyracksDataException, IndexException {
+ ITreeIndex btree = btreeFactory.createIndexInstance(fileRef);
+ if (create) {
+ btree.create();
+ }
+ // Will be closed during cleanup of merge().
+ btree.activate();
+ return btree;
+ }
+
+ @Override
+ public void clear() throws HyracksDataException {
+ if (!isActivated) {
+ throw new HyracksDataException("Failed to clear the index since it is not activated.");
+ }
+ memComponent.getInvIndex().clear();
+ memComponent.getDeletedKeysBTree().clear();
+ for (Object o : diskComponents) {
+ LSMInvertedIndexComponent component = (LSMInvertedIndexComponent) o;
+ component.getInvIndex().deactivate();
+ component.getDeletedKeysBTree().deactivate();
+ component.getInvIndex().destroy();
+ component.getDeletedKeysBTree().destroy();
+ }
+ diskComponents.clear();
}
@Override
- public void close() throws HyracksDataException {
- synchronized (this) {
- if (!isOpen) {
- return;
- }
- // TODO: What else is needed here?
- // ...
- memoryInvertedIndex.close();
- isOpen = false;
+ public synchronized void deactivate() throws HyracksDataException {
+ if (!isActivated) {
+ return;
}
+
+ isActivated = false;
+
+ BlockingIOOperationCallback blockingCallBack = new BlockingIOOperationCallback();
+ ILSMIndexAccessor accessor = (ILSMIndexAccessor) createAccessor(NoOpOperationCallback.INSTANCE,
+ NoOpOperationCallback.INSTANCE);
+ lsmHarness.getIOScheduler().scheduleOperation(accessor.createFlushOperation(blockingCallBack));
+ try {
+ blockingCallBack.waitForIO();
+ } catch (InterruptedException e) {
+ throw new HyracksDataException(e);
+ }
+
+ memComponent.getInvIndex().deactivate();
+ memComponent.getDeletedKeysBTree().deactivate();
+ memComponent.getInvIndex().destroy();
+ memComponent.getDeletedKeysBTree().destroy();
+ ((InMemoryBufferCache) memComponent.getInvIndex().getBufferCache()).close();
}
- public IIndexAccessor createAccessor() {
+ @Override
+ public synchronized void destroy() throws HyracksDataException {
+ if (isActivated) {
+ throw new HyracksDataException("Failed to destroy the index since it is activated.");
+ }
+
+ memComponent.getInvIndex().destroy();
+ memComponent.getDeletedKeysBTree().destroy();
+ for (Object o : diskComponents) {
+ LSMInvertedIndexComponent component = (LSMInvertedIndexComponent) o;
+ component.getInvIndex().destroy();
+ component.getDeletedKeysBTree().destroy();
+ }
+ fileManager.deleteDirs();
+ }
+
+ @Override
+ public IIndexAccessor createAccessor(IModificationOperationCallback modificationCallback,
+ ISearchOperationCallback searchCallback) {
+ // TODO: Ignore opcallbacks for now.
return new LSMInvertedIndexAccessor(lsmHarness, createOpContext());
}
- private LSMInvertedIndexOpContext createOpContext() {
- return new LSMInvertedIndexOpContext(memoryInvertedIndex);
+ @Override
+ public void validate() throws HyracksDataException {
+ // TODO Auto-generated method stub
+
}
@Override
- public IIndexBulkLoadContext beginBulkLoad(float fillFactor) throws IndexException, HyracksDataException {
+ public long getInMemorySize() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput) throws IndexException {
// TODO Auto-generated method stub
return null;
}
@Override
- public void bulkLoadAddTuple(ITupleReference tuple, IIndexBulkLoadContext ictx) throws HyracksDataException {
+ public ILSMIOOperation createMergeOperation(ILSMIOOperationCallback callback) throws HyracksDataException {
// TODO Auto-generated method stub
-
+ return null;
}
@Override
- public void endBulkLoad(IIndexBulkLoadContext ictx) throws HyracksDataException {
+ public Object merge(List<Object> mergedComponents, ILSMIOOperation operation) throws HyracksDataException,
+ IndexException {
// TODO Auto-generated method stub
+ return null;
+ }
+ @Override
+ public Object flush(ILSMIOOperation operation) throws HyracksDataException, IndexException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public ILSMFlushController getFlushController() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public ILSMOperationTracker getOperationTracker() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public ILSMIOOperationScheduler getIOScheduler() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ private LSMInvertedIndexOpContext createOpContext() {
+ return new LSMInvertedIndexOpContext(memoryInvertedIndex);
}
@Override
@@ -211,7 +343,6 @@
// are also supported.
LSMInvertedIndexOpContext ctx = (LSMInvertedIndexOpContext) ictx;
memAccessor.insert(tuple);
-
return true;
}
@@ -315,8 +446,8 @@
@Override
public void addMergedComponent(Object newComponent, List<Object> mergedComponents) {
- diskInvertedIndexList.removeAll(mergedComponents);
- diskInvertedIndexList.addLast(newComponent);
+ diskComponents.removeAll(mergedComponents);
+ diskComponents.addLast(newComponent);
}
@Override
@@ -457,127 +588,11 @@
@Override
public List<Object> getDiskComponents() {
- return diskInvertedIndexList;
+ return diskComponents;
}
@Override
public ILSMComponentFinalizer getComponentFinalizer() {
return componentFinalizer;
}
-
- @Override
- public IInvertedListCursor createInvertedListCursor() {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference tupleReference)
- throws HyracksDataException, IndexException {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public IBinaryComparatorFactory[] getInvListElementCmpFactories() {
- return memoryInvertedIndex.getInvListCmpFactories();
- }
-
- @Override
- public ITypeTraits[] getTypeTraits() {
- return memoryInvertedIndex.getInvListTypeTraits();
- }
-
- @Override
- public void create() throws HyracksDataException {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void activate() throws HyracksDataException {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void clear() throws HyracksDataException {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void deactivate() throws HyracksDataException {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void destroy() throws HyracksDataException {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public IIndexAccessor createAccessor(IModificationOperationCallback modificationCallback,
- ISearchOperationCallback searchCallback) {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public void validate() throws HyracksDataException {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public long getInMemorySize() {
- // TODO Auto-generated method stub
- return 0;
- }
-
- @Override
- public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput) throws IndexException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public ILSMIOOperation createMergeOperation(ILSMIOOperationCallback callback) throws HyracksDataException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public Object merge(List<Object> mergedComponents, ILSMIOOperation operation) throws HyracksDataException,
- IndexException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public Object flush(ILSMIOOperation operation) throws HyracksDataException, IndexException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public ILSMFlushController getFlushController() {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public ILSMOperationTracker getOperationTracker() {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public ILSMIOOperationScheduler getIOScheduler() {
- // TODO Auto-generated method stub
- return null;
- }
-
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
index 4c893a9..213f87d 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
@@ -17,37 +17,53 @@
import java.io.File;
import java.io.FilenameFilter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.control.nc.io.IOManager;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.api.io.IIOManager;
+import edu.uci.ics.hyracks.api.io.IODeviceHandle;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponentFinalizer;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMTreeFileManager;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+// TODO: Implement this one properly!
public class LSMInvertedIndexFileManager extends LSMTreeFileManager {
-
- private static final String INVERTED_STRING = "i";
+ private static final String RTREE_STRING = "r";
private static final String BTREE_STRING = "b";
+ private final TreeIndexFactory<? extends ITreeIndex> rtreeFactory;
+ private final TreeIndexFactory<? extends ITreeIndex> btreeFactory;
+
private static FilenameFilter btreeFilter = new FilenameFilter() {
public boolean accept(File dir, String name) {
return !name.startsWith(".") && name.endsWith(BTREE_STRING);
}
};
-
- private static FilenameFilter invertedFilter = new FilenameFilter() {
+
+ private static FilenameFilter rtreeFilter = new FilenameFilter() {
public boolean accept(File dir, String name) {
- return !name.startsWith(".") && name.endsWith(INVERTED_STRING);
+ return !name.startsWith(".") && name.endsWith(RTREE_STRING);
}
};
-
- public LSMInvertedIndexFileManager(IOManager ioManager, IFileMapProvider fileMapProvider, String baseDir) {
- super(ioManager, fileMapProvider, baseDir);
+
+ public LSMInvertedIndexFileManager(IIOManager ioManager, IFileMapProvider fileMapProvider, FileReference file,
+ TreeIndexFactory<? extends ITreeIndex> rtreeFactory, TreeIndexFactory<? extends ITreeIndex> btreeFactory) {
+ super(ioManager, fileMapProvider, file, null);
+ this.rtreeFactory = rtreeFactory;
+ this.btreeFactory = btreeFactory;
}
@Override
public Object getRelFlushFileName() {
String baseName = (String) super.getRelFlushFileName();
- return new LSMInvertedFileNameComponent(baseName + SPLIT_STRING + INVERTED_STRING, baseName + SPLIT_STRING
+ return new LSMRInvertedIndexFileNameComponent(baseName + SPLIT_STRING + RTREE_STRING, baseName + SPLIT_STRING
+ BTREE_STRING);
}
@@ -55,127 +71,125 @@
@Override
public Object getRelMergeFileName(String firstFileName, String lastFileName) throws HyracksDataException {
String baseName = (String) super.getRelMergeFileName(firstFileName, lastFileName);
- return new LSMInvertedFileNameComponent(baseName + SPLIT_STRING + INVERTED_STRING, baseName + SPLIT_STRING
+ return new LSMRInvertedIndexFileNameComponent(baseName + SPLIT_STRING + RTREE_STRING, baseName + SPLIT_STRING
+ BTREE_STRING);
}
-// @Override
-// public List<Object> cleanupAndGetValidFiles(Object lsmComponent, ILSMComponentFinalizer componentFinalizer) throws HyracksDataException {
-// List<Object> validFiles = new ArrayList<Object>();
-// ArrayList<ComparableFileName> allInvertedFiles = new ArrayList<ComparableFileName>();
-// ArrayList<ComparableFileName> allBTreeFiles = new ArrayList<ComparableFileName>();
-// LSMInvertedComponent component = (LSMInvertedComponent) lsmComponent;
-//
-// // Gather files from all IODeviceHandles.
-// for (IODeviceHandle dev : ioManager.getIODevices()) {
-// getValidFiles(dev, btreeFilter, component.getBTree(), componentFinalizer, allBTreeFiles);
-// HashSet<String> btreeFilesSet = new HashSet<String>();
-// for (ComparableFileName cmpFileName : allBTreeFiles) {
-// int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
-// btreeFilesSet.add(cmpFileName.fileName.substring(0, index));
-// }
-// // List of valid Inverted files that may or may not have a BTree buddy. Will check for buddies below.
-// ArrayList<ComparableFileName> tmpAllInvertedFiles = new ArrayList<ComparableFileName>();
-// getValidFiles(dev, invertedFilter, component.getInverted(), componentFinalizer, tmpAllInvertedFiles);
-// // Look for buddy BTrees for all valid Inverteds.
-// // If no buddy is found, delete the file, otherwise add the Inverted to allInvertedFiles.
-// for (ComparableFileName cmpFileName : tmpAllInvertedFiles) {
-// int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
-// String file = cmpFileName.fileName.substring(0, index);
-// if (btreeFilesSet.contains(file)) {
-// allInvertedFiles.add(cmpFileName);
-// } else {
-// // Couldn't find the corresponding BTree file; thus, delete
-// // the Inverted file.
-// File invalidInvertedFile = new File(cmpFileName.fullPath);
-// invalidInvertedFile.delete();
-// }
-// }
-// }
-// // Sanity check.
-// if (allInvertedFiles.size() != allBTreeFiles.size()) {
-// throw new HyracksDataException("Unequal number of valid Inverted and BTree files found. Aborting cleanup.");
-// }
-//
-// // Trivial cases.
-// if (allInvertedFiles.isEmpty() || allBTreeFiles.isEmpty()) {
-// return validFiles;
-// }
-//
-// if (allInvertedFiles.size() == 1 && allBTreeFiles.size() == 1) {
-// validFiles.add(new LSMInvertedFileNameComponent(allInvertedFiles.get(0).fullPath, allBTreeFiles.get(0).fullPath));
-// return validFiles;
-// }
-//
-// // Sorts files names from earliest to latest timestamp.
-// Collections.sort(allInvertedFiles);
-// Collections.sort(allBTreeFiles);
-//
-// List<ComparableFileName> validComparableInvertedFiles = new ArrayList<ComparableFileName>();
-// ComparableFileName lastInverted = allInvertedFiles.get(0);
-// validComparableInvertedFiles.add(lastInverted);
-//
-// List<ComparableFileName> validComparableBTreeFiles = new ArrayList<ComparableFileName>();
-// ComparableFileName lastBTree = allBTreeFiles.get(0);
-// validComparableBTreeFiles.add(lastBTree);
-//
-// for (int i = 1; i < allInvertedFiles.size(); i++) {
-// ComparableFileName currentInverted = allInvertedFiles.get(i);
-// ComparableFileName currentBTree = allBTreeFiles.get(i);
-// // Current start timestamp is greater than last stop timestamp.
-// if (currentInverted.interval[0].compareTo(lastInverted.interval[1]) > 0
-// && currentBTree.interval[0].compareTo(lastBTree.interval[1]) > 0) {
-// validComparableInvertedFiles.add(currentInverted);
-// validComparableBTreeFiles.add(currentBTree);
-// lastInverted = currentInverted;
-// lastBTree = currentBTree;
-// } else if (currentInverted.interval[0].compareTo(lastInverted.interval[0]) >= 0
-// && currentInverted.interval[1].compareTo(lastInverted.interval[1]) <= 0
-// && currentBTree.interval[0].compareTo(lastBTree.interval[0]) >= 0
-// && currentBTree.interval[1].compareTo(lastBTree.interval[1]) <= 0) {
-// // Invalid files are completely contained in last interval.
-// File invalidInvertedFile = new File(currentInverted.fullPath);
-// invalidInvertedFile.delete();
-// File invalidBTreeFile = new File(currentBTree.fullPath);
-// invalidBTreeFile.delete();
-// } else {
-// // This scenario should not be possible.
-// throw new HyracksDataException("Found LSM files with overlapping but not contained timetamp intervals.");
-// }
-// }
-//
-// // Sort valid files in reverse lexicographical order, such that newer
-// // files come first.
-// Collections.sort(validComparableInvertedFiles, recencyCmp);
-// Collections.sort(validComparableBTreeFiles, recencyCmp);
-//
-// Iterator<ComparableFileName> invertedFileIter = validComparableInvertedFiles.iterator();
-// Iterator<ComparableFileName> btreeFileIter = validComparableBTreeFiles.iterator();
-// while (invertedFileIter.hasNext() && btreeFileIter.hasNext()) {
-// ComparableFileName cmpInvertedFileName = invertedFileIter.next();
-// ComparableFileName cmpBTreeFileName = btreeFileIter.next();
-// validFiles.add(new LSMInvertedFileNameComponent(cmpInvertedFileName.fullPath, cmpBTreeFileName.fullPath));
-// }
-//
-// return validFiles;
-// }
+ @Override
+ public List<Object> cleanupAndGetValidFiles(ILSMComponentFinalizer componentFinalizer) throws HyracksDataException {
+ List<Object> validFiles = new ArrayList<Object>();
+ ArrayList<ComparableFileName> allRTreeFiles = new ArrayList<ComparableFileName>();
+ ArrayList<ComparableFileName> allBTreeFiles = new ArrayList<ComparableFileName>();
- public class LSMInvertedFileNameComponent {
- private final String invertedFileName;
+ // Gather files from all IODeviceHandles.
+ for (IODeviceHandle dev : ioManager.getIODevices()) {
+ cleanupAndGetValidFilesInternal(dev, btreeFilter, btreeFactory, componentFinalizer, allBTreeFiles);
+ HashSet<String> btreeFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : allBTreeFiles) {
+ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
+ btreeFilesSet.add(cmpFileName.fileName.substring(0, index));
+ }
+ // List of valid RTree files that may or may not have a BTree buddy. Will check for buddies below.
+ ArrayList<ComparableFileName> tmpAllRTreeFiles = new ArrayList<ComparableFileName>();
+ cleanupAndGetValidFilesInternal(dev, rtreeFilter, rtreeFactory, componentFinalizer, tmpAllRTreeFiles);
+ // Look for buddy BTrees for all valid RTrees.
+ // If no buddy is found, delete the file, otherwise add the RTree to allRTreeFiles.
+ for (ComparableFileName cmpFileName : tmpAllRTreeFiles) {
+ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
+ String file = cmpFileName.fileName.substring(0, index);
+ if (btreeFilesSet.contains(file)) {
+ allRTreeFiles.add(cmpFileName);
+ } else {
+ // Couldn't find the corresponding BTree file; thus, delete
+ // the RTree file.
+ File invalidRTreeFile = new File(cmpFileName.fullPath);
+ invalidRTreeFile.delete();
+ }
+ }
+ }
+ // Sanity check.
+ if (allRTreeFiles.size() != allBTreeFiles.size()) {
+ throw new HyracksDataException("Unequal number of valid RTree and BTree files found. Aborting cleanup.");
+ }
+
+ // Trivial cases.
+ if (allRTreeFiles.isEmpty() || allBTreeFiles.isEmpty()) {
+ return validFiles;
+ }
+
+ if (allRTreeFiles.size() == 1 && allBTreeFiles.size() == 1) {
+ validFiles.add(new LSMRInvertedIndexFileNameComponent(allRTreeFiles.get(0).fullPath, allBTreeFiles.get(0).fullPath));
+ return validFiles;
+ }
+
+ // Sorts files names from earliest to latest timestamp.
+ Collections.sort(allRTreeFiles);
+ Collections.sort(allBTreeFiles);
+
+ List<ComparableFileName> validComparableRTreeFiles = new ArrayList<ComparableFileName>();
+ ComparableFileName lastRTree = allRTreeFiles.get(0);
+ validComparableRTreeFiles.add(lastRTree);
+
+ List<ComparableFileName> validComparableBTreeFiles = new ArrayList<ComparableFileName>();
+ ComparableFileName lastBTree = allBTreeFiles.get(0);
+ validComparableBTreeFiles.add(lastBTree);
+
+ for (int i = 1; i < allRTreeFiles.size(); i++) {
+ ComparableFileName currentRTree = allRTreeFiles.get(i);
+ ComparableFileName currentBTree = allBTreeFiles.get(i);
+ // Current start timestamp is greater than last stop timestamp.
+ if (currentRTree.interval[0].compareTo(lastRTree.interval[1]) > 0
+ && currentBTree.interval[0].compareTo(lastBTree.interval[1]) > 0) {
+ validComparableRTreeFiles.add(currentRTree);
+ validComparableBTreeFiles.add(currentBTree);
+ lastRTree = currentRTree;
+ lastBTree = currentBTree;
+ } else if (currentRTree.interval[0].compareTo(lastRTree.interval[0]) >= 0
+ && currentRTree.interval[1].compareTo(lastRTree.interval[1]) <= 0
+ && currentBTree.interval[0].compareTo(lastBTree.interval[0]) >= 0
+ && currentBTree.interval[1].compareTo(lastBTree.interval[1]) <= 0) {
+ // Invalid files are completely contained in last interval.
+ File invalidRTreeFile = new File(currentRTree.fullPath);
+ invalidRTreeFile.delete();
+ File invalidBTreeFile = new File(currentBTree.fullPath);
+ invalidBTreeFile.delete();
+ } else {
+ // This scenario should not be possible.
+ throw new HyracksDataException("Found LSM files with overlapping but not contained timetamp intervals.");
+ }
+ }
+
+ // Sort valid files in reverse lexicographical order, such that newer
+ // files come first.
+ Collections.sort(validComparableRTreeFiles, recencyCmp);
+ Collections.sort(validComparableBTreeFiles, recencyCmp);
+
+ Iterator<ComparableFileName> rtreeFileIter = validComparableRTreeFiles.iterator();
+ Iterator<ComparableFileName> btreeFileIter = validComparableBTreeFiles.iterator();
+ while (rtreeFileIter.hasNext() && btreeFileIter.hasNext()) {
+ ComparableFileName cmpRTreeFileName = rtreeFileIter.next();
+ ComparableFileName cmpBTreeFileName = btreeFileIter.next();
+ validFiles.add(new LSMRInvertedIndexFileNameComponent(cmpRTreeFileName.fullPath, cmpBTreeFileName.fullPath));
+ }
+
+ return validFiles;
+ }
+
+ public class LSMRInvertedIndexFileNameComponent {
+ private final String rtreeFileName;
private final String btreeFileName;
- LSMInvertedFileNameComponent(String invertedFileName, String btreeFileName) {
- this.invertedFileName = invertedFileName;
+ LSMRInvertedIndexFileNameComponent(String rtreeFileName, String btreeFileName) {
+ this.rtreeFileName = rtreeFileName;
this.btreeFileName = btreeFileName;
}
- public String getInvertedFileName() {
- return invertedFileName;
+ public String getRTreeFileName() {
+ return rtreeFileName;
}
public String getBTreeFileName() {
return btreeFileName;
}
}
-
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexFactory.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexFactory.java
index 306695d..bd2605c 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexFactory.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexFactory.java
@@ -20,13 +20,13 @@
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
-import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.IndexFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
-public class OnDiskInvertedIndexFactory extends IndexFactory<IIndex> {
+public class OnDiskInvertedIndexFactory extends IndexFactory<IInvertedIndex> {
protected final IInvertedListBuilder invListBuilder;
protected final ITypeTraits[] invListTypeTraits;
@@ -47,7 +47,7 @@
}
@Override
- public IIndex createIndexInstance(FileReference file) throws IndexException {
+ public IInvertedIndex createIndexInstance(FileReference file) throws IndexException {
FileReference btreeFile = new FileReference(new File(file.getFile().getPath() + "_btree"));
return new OnDiskInvertedIndex(bufferCache, fileMapProvider, invListBuilder, invListTypeTraits, invListCmpFactories,
tokenTypeTraits, tokenCmpFactories, btreeFile, file);