Fixed the logic used to clean the lsm files in a directory. The previous logic caused some stale files to be considered as valid files, and as a result a sanity excpetion was thrown.
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_lsm_staging@3294 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java
index 38766c3..8ef58b1 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java
@@ -82,6 +82,18 @@
// Gather files from all IODeviceHandles.
for (IODeviceHandle dev : ioManager.getIODevices()) {
+ // List of valid BTree files.
+ cleanupAndGetValidFilesInternal(dev, btreeFilter, btreeFactory, allBTreeFiles);
+ HashSet<String> btreeFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : allBTreeFiles) {
+ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
+ btreeFilesSet.add(cmpFileName.fileName.substring(0, index));
+ }
+ validateFiles(dev, btreeFilesSet, allBloomFilterFiles, bloomFilterFilter, null);
+ }
+
+ // Gather files from all IODeviceHandles.
+ for (IODeviceHandle dev : ioManager.getIODevices()) {
cleanupAndGetValidFilesInternal(dev, bloomFilterFilter, null, allBloomFilterFiles);
HashSet<String> bloomFilterFilesSet = new HashSet<String>();
for (ComparableFileName cmpFileName : allBloomFilterFiles) {
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java
index a84f8c9..a808143 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java
@@ -23,6 +23,7 @@
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
+import java.util.HashSet;
import java.util.List;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -115,6 +116,23 @@
}
}
+ protected void validateFiles(IODeviceHandle dev, HashSet<String> groundTruth,
+ ArrayList<ComparableFileName> validFiles, FilenameFilter filter,
+ TreeIndexFactory<? extends ITreeIndex> treeFactory) throws HyracksDataException, IndexException {
+ ArrayList<ComparableFileName> tmpAllInvListsFiles = new ArrayList<ComparableFileName>();
+ cleanupAndGetValidFilesInternal(dev, filter, treeFactory, tmpAllInvListsFiles);
+ for (ComparableFileName cmpFileName : tmpAllInvListsFiles) {
+ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
+ String file = cmpFileName.fileName.substring(0, index);
+ if (groundTruth.contains(file)) {
+ validFiles.add(cmpFileName);
+ } else {
+ File invalidFile = new File(cmpFileName.fullPath);
+ invalidFile.delete();
+ }
+ }
+ }
+
@Override
public void createDirs() {
for (IODeviceHandle dev : ioManager.getIODevices()) {
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index c69a8df..34195ce 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -42,7 +42,6 @@
import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
@@ -708,14 +707,14 @@
int maxPage = invIndexComponent.getBloomFilter().getNumPages();
forceFlushDirtyPages(bufferCache, fileId, startPage, maxPage);
- ITreeIndex treeIndex = invIndex.getBTree();
// Flush inverted index second.
- forceFlushDirtyPages(treeIndex);
+ forceFlushDirtyPages(invIndex.getBTree());
forceFlushInvListsFileDirtyPages(invIndex);
+ markAsValidInternal(invIndex.getBTree());
+
// Flush deleted keys BTree.
forceFlushDirtyPages(invIndexComponent.getDeletedKeysBTree());
- // We use the dictionary BTree for marking the inverted index as valid.
- markAsValidInternal(treeIndex);
+ markAsValidInternal(invIndexComponent.getDeletedKeysBTree());
}
@Override
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
index 15a1633..ccba624 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
@@ -50,6 +50,12 @@
}
};
+ private static FilenameFilter invListFilter = new FilenameFilter() {
+ public boolean accept(File dir, String name) {
+ return !name.startsWith(".") && name.endsWith(INVLISTS_SUFFIX);
+ }
+ };
+
private static FilenameFilter deletedKeysBTreeFilter = new FilenameFilter() {
public boolean accept(File dir, String name) {
return !name.startsWith(".") && name.endsWith(DELETED_KEYS_BTREE_SUFFIX);
@@ -90,76 +96,40 @@
public List<LSMComponentFileReferences> cleanupAndGetValidFiles() throws HyracksDataException, IndexException {
List<LSMComponentFileReferences> validFiles = new ArrayList<LSMComponentFileReferences>();
ArrayList<ComparableFileName> allDictBTreeFiles = new ArrayList<ComparableFileName>();
+ ArrayList<ComparableFileName> allInvListsFiles = new ArrayList<ComparableFileName>();
ArrayList<ComparableFileName> allDeletedKeysBTreeFiles = new ArrayList<ComparableFileName>();
ArrayList<ComparableFileName> allBloomFilterFiles = new ArrayList<ComparableFileName>();
// Gather files from all IODeviceHandles.
for (IODeviceHandle dev : ioManager.getIODevices()) {
- cleanupAndGetValidFilesInternal(dev, bloomFilterFilter, null, allBloomFilterFiles);
- HashSet<String> bloomFilterFilesSet = new HashSet<String>();
- for (ComparableFileName cmpFileName : allBloomFilterFiles) {
- int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
- bloomFilterFilesSet.add(cmpFileName.fileName.substring(0, index));
- }
- // List of valid BTree files that may or may not have a bloom filter buddy. Will check for buddies below.
- ArrayList<ComparableFileName> tmpAllDeletedBTreeFiles = new ArrayList<ComparableFileName>();
- cleanupAndGetValidFilesInternal(dev, deletedKeysBTreeFilter, btreeFactory, tmpAllDeletedBTreeFiles);
-
- // Look for buddy bloom filters for all valid BTrees.
- // If no buddy is found, delete the file, otherwise add the BTree to allBTreeFiles.
+ cleanupAndGetValidFilesInternal(dev, deletedKeysBTreeFilter, btreeFactory, allDeletedKeysBTreeFiles);
HashSet<String> deletedKeysBTreeFilesSet = new HashSet<String>();
- for (ComparableFileName cmpFileName : tmpAllDeletedBTreeFiles) {
+ for (ComparableFileName cmpFileName : allDeletedKeysBTreeFiles) {
int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
- String file = cmpFileName.fileName.substring(0, index);
- if (bloomFilterFilesSet.contains(file)) {
- allDeletedKeysBTreeFiles.add(cmpFileName);
- deletedKeysBTreeFilesSet.add(cmpFileName.fileName.substring(0, index));
- } else {
- // Couldn't find the corresponding BTree file; thus, delete
- // the deleted-keys BTree file.
- // There is no need to delete the inverted-lists file corresponding to the non-existent
- // dictionary BTree, because we flush the dictionary BTree first. So if a dictionary BTree
- // file does not exists, then neither can its inverted-list file.
- File invalidDeletedKeysBTreeFile = new File(cmpFileName.fullPath);
- invalidDeletedKeysBTreeFile.delete();
- }
+ deletedKeysBTreeFilesSet.add(cmpFileName.fileName.substring(0, index));
}
- // We use the dictionary BTree of the inverted index for validation.
- // List of valid dictionary BTree files that may or may not have a deleted-keys BTree buddy. Will check for buddies below.
- ArrayList<ComparableFileName> tmpAllBTreeFiles = new ArrayList<ComparableFileName>();
- cleanupAndGetValidFilesInternal(dev, dictBTreeFilter, btreeFactory, tmpAllBTreeFiles);
- // Look for buddy deleted-keys BTrees for all valid dictionary BTrees.
- // If no buddy is found, delete the file, otherwise add the dictionary BTree to allBTreeFiles.
- for (ComparableFileName cmpFileName : tmpAllBTreeFiles) {
- int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
- String file = cmpFileName.fileName.substring(0, index);
- if (deletedKeysBTreeFilesSet.contains(file)) {
- allDictBTreeFiles.add(cmpFileName);
- } else {
- // Couldn't find the corresponding BTree file; thus, delete
- // the deleted-keys BTree file.
- // There is no need to delete the inverted-lists file corresponding to the non-existent
- // dictionary BTree, because we flush the dictionary BTree first. So if a dictionary BTree
- // file does not exists, then neither can its inverted-list file.
- File invalidDeletedKeysBTreeFile = new File(cmpFileName.fullPath);
- invalidDeletedKeysBTreeFile.delete();
- }
- }
+ // TODO: do we really need to validate the inverted lists files or is validating the dict. BTrees is enough?
+ validateFiles(dev, deletedKeysBTreeFilesSet, allInvListsFiles, invListFilter, null);
+ validateFiles(dev, deletedKeysBTreeFilesSet, allDictBTreeFiles, dictBTreeFilter, btreeFactory);
+ validateFiles(dev, deletedKeysBTreeFilesSet, allBloomFilterFiles, bloomFilterFilter, null);
}
// Sanity check.
- if (allDictBTreeFiles.size() != allDeletedKeysBTreeFiles.size()
+ if (allDictBTreeFiles.size() != allInvListsFiles.size()
+ || allDictBTreeFiles.size() != allDeletedKeysBTreeFiles.size()
|| allDictBTreeFiles.size() != allBloomFilterFiles.size()) {
throw new HyracksDataException(
- "Unequal number of valid Dictionary BTree, Deleted BTree, and Bloom Filter files found. Aborting cleanup.");
+ "Unequal number of valid Dictionary BTree, Inverted Lists, Deleted BTree, and Bloom Filter files found. Aborting cleanup.");
}
// Trivial cases.
- if (allDictBTreeFiles.isEmpty() || allDeletedKeysBTreeFiles.isEmpty() || allBloomFilterFiles.isEmpty()) {
+ if (allDictBTreeFiles.isEmpty() || allInvListsFiles.isEmpty() || allDeletedKeysBTreeFiles.isEmpty()
+ || allBloomFilterFiles.isEmpty()) {
return validFiles;
}
- if (allDictBTreeFiles.size() == 1 && allDeletedKeysBTreeFiles.size() == 1 && allBloomFilterFiles.size() == 1) {
+ if (allDictBTreeFiles.size() == 1 && allInvListsFiles.size() == 1 && allDeletedKeysBTreeFiles.size() == 1
+ && allBloomFilterFiles.size() == 1) {
validFiles.add(new LSMComponentFileReferences(allDictBTreeFiles.get(0).fileRef, allDeletedKeysBTreeFiles
.get(0).fileRef, allBloomFilterFiles.get(0).fileRef));
return validFiles;
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java
index e698990..851235e 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java
@@ -95,50 +95,14 @@
// Gather files from all IODeviceHandles.
for (IODeviceHandle dev : ioManager.getIODevices()) {
- cleanupAndGetValidFilesInternal(dev, bloomFilterFilter, null, allBloomFilterFiles);
- HashSet<String> bloomFilterFilesSet = new HashSet<String>();
- for (ComparableFileName cmpFileName : allBloomFilterFiles) {
- int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
- bloomFilterFilesSet.add(cmpFileName.fileName.substring(0, index));
- }
-
- // List of valid BTree files that may or may not have a bloom filter buddy. Will check for buddies below.
- ArrayList<ComparableFileName> tmpAllBTreeFiles = new ArrayList<ComparableFileName>();
- cleanupAndGetValidFilesInternal(dev, btreeFilter, btreeFactory, tmpAllBTreeFiles);
- // Look for buddy bloom filters for all valid BTrees.
- // If no buddy is found, delete the file, otherwise add the BTree to allBTreeFiles.
+ cleanupAndGetValidFilesInternal(dev, btreeFilter, btreeFactory, allBTreeFiles);
HashSet<String> btreeFilesSet = new HashSet<String>();
- for (ComparableFileName cmpFileName : tmpAllBTreeFiles) {
+ for (ComparableFileName cmpFileName : allBTreeFiles) {
int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
- String file = cmpFileName.fileName.substring(0, index);
- if (bloomFilterFilesSet.contains(file)) {
- allBTreeFiles.add(cmpFileName);
- btreeFilesSet.add(cmpFileName.fileName.substring(0, index));
- } else {
- // Couldn't find the corresponding bloom filter file; thus, delete
- // the BTree file.
- File invalidBTreeFile = new File(cmpFileName.fullPath);
- invalidBTreeFile.delete();
- }
+ btreeFilesSet.add(cmpFileName.fileName.substring(0, index));
}
-
- // List of valid RTree files that may or may not have a BTree buddy. Will check for buddies below.
- ArrayList<ComparableFileName> tmpAllRTreeFiles = new ArrayList<ComparableFileName>();
- cleanupAndGetValidFilesInternal(dev, rtreeFilter, rtreeFactory, tmpAllRTreeFiles);
- // Look for buddy BTrees for all valid RTrees.
- // If no buddy is found, delete the file, otherwise add the RTree to allRTreeFiles.
- for (ComparableFileName cmpFileName : tmpAllRTreeFiles) {
- int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
- String file = cmpFileName.fileName.substring(0, index);
- if (btreeFilesSet.contains(file)) {
- allRTreeFiles.add(cmpFileName);
- } else {
- // Couldn't find the corresponding BTree file; thus, delete
- // the RTree file.
- File invalidRTreeFile = new File(cmpFileName.fullPath);
- invalidRTreeFile.delete();
- }
- }
+ validateFiles(dev, btreeFilesSet, allRTreeFiles, rtreeFilter, rtreeFactory);
+ validateFiles(dev, btreeFilesSet, allBloomFilterFiles, bloomFilterFilter, null);
}
// Sanity check.
if (allRTreeFiles.size() != allBTreeFiles.size() || allBTreeFiles.size() != allBloomFilterFiles.size()) {
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFileManager.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFileManager.java
index 10b982f..6ddf766 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFileManager.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFileManager.java
@@ -15,112 +15,17 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
-import java.io.File;
-import java.io.FilenameFilter;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.List;
-
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.io.IIOManager;
-import edu.uci.ics.hyracks.api.io.IODeviceHandle;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
-import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractLSMIndexFileManager;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
public class LSMRTreeWithAntiMatterTuplesFileManager extends AbstractLSMIndexFileManager {
- private final TreeIndexFactory<? extends ITreeIndex> rtreeFactory;
-
public LSMRTreeWithAntiMatterTuplesFileManager(IIOManager ioManager, IFileMapProvider fileMapProvider,
FileReference file, TreeIndexFactory<? extends ITreeIndex> rtreeFactory, int startIODeviceIndex) {
- super(ioManager, fileMapProvider, file, null, startIODeviceIndex);
- this.rtreeFactory = rtreeFactory;
- }
-
- @Override
- public LSMComponentFileReferences getRelFlushFileReference() {
- Date date = new Date();
- String ts = formatter.format(date);
- // Begin timestamp and end timestamp are identical since it is a flush
- return new LSMComponentFileReferences(createFlushFile(baseDir + ts + SPLIT_STRING + ts), null, null);
- }
-
- @Override
- public LSMComponentFileReferences getRelMergeFileReference(String firstFileName, String lastFileName)
- throws HyracksDataException {
- String[] firstTimestampRange = firstFileName.split(SPLIT_STRING);
- String[] lastTimestampRange = lastFileName.split(SPLIT_STRING);
- // Get the range of timestamps by taking the earliest and the latest timestamps
- return new LSMComponentFileReferences(createMergeFile(baseDir + firstTimestampRange[0] + SPLIT_STRING
- + lastTimestampRange[1]), null, null);
- }
-
- private static FilenameFilter fileNameFilter = new FilenameFilter() {
- public boolean accept(File dir, String name) {
- return !name.startsWith(".");
- }
- };
-
- @Override
- public List<LSMComponentFileReferences> cleanupAndGetValidFiles() throws HyracksDataException, IndexException {
- List<LSMComponentFileReferences> validFiles = new ArrayList<LSMComponentFileReferences>();
- ArrayList<ComparableFileName> allFiles = new ArrayList<ComparableFileName>();
-
- // Gather files from all IODeviceHandles and delete invalid files
- // There are two types of invalid files:
- // (1) The isValid flag is not set
- // (2) The file's interval is contained by some other file
- // Here, we only filter out (1).
- for (IODeviceHandle dev : ioManager.getIODevices()) {
- cleanupAndGetValidFilesInternal(dev, fileNameFilter, rtreeFactory, allFiles);
- }
-
- if (allFiles.isEmpty()) {
- return validFiles;
- }
-
- if (allFiles.size() == 1) {
- validFiles.add(new LSMComponentFileReferences(allFiles.get(0).fileRef, null, null));
- return validFiles;
- }
-
- // Sorts files names from earliest to latest timestamp.
- Collections.sort(allFiles);
-
- List<ComparableFileName> validComparableFiles = new ArrayList<ComparableFileName>();
- ComparableFileName last = allFiles.get(0);
- validComparableFiles.add(last);
- for (int i = 1; i < allFiles.size(); i++) {
- ComparableFileName current = allFiles.get(i);
- // The current start timestamp is greater than last stop timestamp so current is valid.
- if (current.interval[0].compareTo(last.interval[1]) > 0) {
- validComparableFiles.add(current);
- last = current;
- } else if (current.interval[0].compareTo(last.interval[0]) >= 0
- && current.interval[1].compareTo(last.interval[1]) <= 0) {
- // The current file is completely contained in the interval of the
- // last file. Thus the last file must contain at least as much information
- // as the current file, so delete the current file.
- current.fileRef.delete();
- } else {
- // This scenario should not be possible since timestamps are monotonically increasing.
- throw new HyracksDataException("Found LSM files with overlapping timestamp intervals, "
- + "but the intervals were not contained by another file.");
- }
- }
-
- // Sort valid files in reverse lexicographical order, such that newer files come first.
- Collections.sort(validComparableFiles, recencyCmp);
- for (ComparableFileName cmpFileName : validComparableFiles) {
- validFiles.add(new LSMComponentFileReferences(cmpFileName.fileRef, null, null));
- }
-
- return validFiles;
+ super(ioManager, fileMapProvider, file, rtreeFactory, startIODeviceIndex);
}
}