Added ILSMFileNameManager to properly handle the lifecycle of LSM files. Added its implementation for LSM-BTrees with corresponding test. Implemented LSMTree open() using its file name manager.

git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_lsm_tree@1055 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IIndex.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IIndex.java
index 38b275b..d404a83 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IIndex.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IIndex.java
@@ -38,7 +38,7 @@
      * @param indexFileId
      *            The file id backing this index.
      */
-    public void open(int indexFileId);
+    public void open(int indexFileId) throws HyracksDataException;
     
     /**
      * Closes the index.
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/impls/LSMBTreeFileNameManager.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/impls/LSMBTreeFileNameManager.java
new file mode 100644
index 0000000..8e0cbe2
--- /dev/null
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/impls/LSMBTreeFileNameManager.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.impls;
+
+import java.text.Format;
+import java.text.SimpleDateFormat;
+import java.util.Comparator;
+import java.util.Date;
+
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMFileNameManager;
+
+public class LSMBTreeFileNameManager implements ILSMFileNameManager {
+
+    private final String baseDir;
+    private final Format formatter = new SimpleDateFormat("yyyy_MM_dd_HH_mm_ss_SSS");
+    private final Comparator<String> cmp = new FileNameComparator();
+    
+    public LSMBTreeFileNameManager(String baseDir) {
+        if (!baseDir.endsWith(System.getProperty("file.separator"))) {
+            baseDir += System.getProperty("file.separator");
+        }
+        this.baseDir = baseDir;
+    }
+    
+    @Override
+    public String getFlushFileName() {
+        Date date = new Date();        
+        // "Z" prefix to indicate flush. Relies on "Z" sorting higher than "A".
+        return baseDir + "Z" + formatter.format(date);
+    }
+
+    @Override
+    public String getMergeFileName() {
+        Date date = new Date();
+        // "A" prefix to indicate merge. Relies on "A" sorting lower than "Z".
+        return baseDir + "A" + formatter.format(date);
+    }
+
+    @Override
+    public Comparator<String> getFileNameComparator() {
+        return cmp;
+    }
+
+    /**
+     * Sorts strings in reverse lexicographical order. The way we construct the
+     * file names above guarantees that:
+     * 
+     * 1. Flushed files ("Z" prefix) sort lower than merged files ("A" prefix)
+     * 
+     * 2. Flushed files are sorted from newest to oldest (based on the timestamp
+     * string)
+     * 
+     */
+    private class FileNameComparator implements Comparator<String> {
+        @Override
+        public int compare(String a, String b) {
+            // Consciously ignoring locale.
+            return -a.compareTo(b);
+        }
+    }
+
+    @Override
+    public String getBaseDir() {
+        return baseDir;
+    }
+}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/impls/LSMTree.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/impls/LSMTree.java
index b5401f4..4d9236f 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/impls/LSMTree.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/impls/LSMTree.java
@@ -16,6 +16,9 @@
 package edu.uci.ics.hyracks.storage.am.lsm.impls;
 
 import java.io.File;
+import java.io.FilenameFilter;
+import java.util.Arrays;
+import java.util.Comparator;
 import java.util.LinkedList;
 import java.util.ListIterator;
 
@@ -38,6 +41,7 @@
 import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOp;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMFileNameManager;
 import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMTree;
 import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryFreePageManager;
 import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
@@ -49,7 +53,7 @@
     private final InMemoryFreePageManager memFreePageManager;    
 
     // On-disk components.
-    private final String onDiskDir;
+    private final ILSMFileNameManager fileNameManager;
     private final BTreeFactory diskBTreeFactory;
     private final IBufferCache diskBufferCache;
     private final IFileMapProvider diskFileMapProvider;    
@@ -69,7 +73,7 @@
 
     public LSMTree(IBufferCache memBufferCache, InMemoryFreePageManager memFreePageManager,
             ITreeIndexFrameFactory interiorFrameFactory, ITreeIndexFrameFactory insertLeafFrameFactory,
-            ITreeIndexFrameFactory deleteLeafFrameFactory, String onDiskDir, BTreeFactory diskBTreeFactory,
+            ITreeIndexFrameFactory deleteLeafFrameFactory, ILSMFileNameManager fileNameManager, BTreeFactory diskBTreeFactory,
             IFileMapProvider diskFileMapProvider, int fieldCount, MultiComparator cmp) {
         memBTree = new BTree(memBufferCache, fieldCount, cmp, memFreePageManager, interiorFrameFactory,
                 insertLeafFrameFactory);
@@ -85,24 +89,52 @@
         this.onDiskBTreeCount = 0;
         this.threadRefCount = 0;
         this.flushFlag = false;
-        if (!onDiskDir.endsWith(System.getProperty("file.separator"))) {
-            onDiskDir += System.getProperty("file.separator");
-        }
-        this.onDiskDir = onDiskDir;
+        this.fileNameManager = fileNameManager;
     }
 
     @Override
     public void create(int indexFileId) throws HyracksDataException {
         memBTree.create(indexFileId);
     }
-
+    
+    /**
+     * Opens LSMBTree, assuming a consistent state of the disk-resident
+     * components. In particular, registers all files in in base dir of
+     * fileNameManager as on-disk BTrees.
+     * 
+     * Example pathological scenario to explain "consistent state assumption":
+     * Suppose a merge finished, but before the original files were deleted the
+     * system crashes. We are left in a state where we have the original BTrees
+     * in addition to the merged one. We assume that prior to calling this
+     * method a separate recovery process has ensured the consistent of the
+     * disk-resident components.
+     * 
+     * @param indexFileId
+     *            Dummy file id used for in-memory BTree.
+     * @throws HyracksDataException
+     */
     @Override
-    public void open(int indexFileId) {
+    public void open(int indexFileId) throws HyracksDataException {
         memBTree.open(indexFileId);
+        File dir = new File(fileNameManager.getBaseDir());
+        FilenameFilter filter = new FilenameFilter() {
+            public boolean accept(File dir, String name) {
+                return !name.startsWith(".");
+            }
+        };
+        String[] files = dir.list(filter);
+        Comparator<String> cmp = fileNameManager.getFileNameComparator();
+        Arrays.sort(files, cmp);
+        for (String fileName : files) {
+            BTree btree = createDiskBTree(fileName, false);
+            onDiskBTrees.add(btree);
+        }
     }
 
     @Override
     public void close() {
+        onDiskBTrees.clear();
+        onDiskBTreeCount = 0;
         memBTree.close();
     }
 
@@ -162,7 +194,7 @@
         RangePredicate nullPred = new RangePredicate(true, null, null, true, true, null, null);
         ITreeIndexAccessor memBTreeAccessor = memBTree.createAccessor();
         memBTreeAccessor.search(scanCursor, nullPred);
-        BTree diskBTree = createDiskBTree();
+        BTree diskBTree = createFlushTargetBTree();
         // Bulk load the tuples from the in-memory BTree into the new disk BTree.
         IIndexBulkLoadContext bulkLoadCtx = diskBTree.beginBulkLoad(1.0f);
         try {
@@ -184,13 +216,19 @@
         memBTree.create(memBTree.getFileId());
     }
     
-    private String getNextFileName() {
-        return onDiskDir + "btree_" + onDiskBTreeCount++;
+    private BTree createFlushTargetBTree() throws HyracksDataException {
+        String fileName = fileNameManager.getFlushFileName();
+        return createDiskBTree(fileName, true);
     }
     
-    private BTree createDiskBTree() throws HyracksDataException {
-        // Register the new BTree file.
-        FileReference file = new FileReference(new File(getNextFileName()));
+    private BTree createMergeTargetBTree() throws HyracksDataException {
+        String fileName = fileNameManager.getMergeFileName();
+        return createDiskBTree(fileName, true);
+    }
+    
+    private BTree createDiskBTree(String fileName, boolean createBTree) throws HyracksDataException {
+        // Register the new BTree file.        
+        FileReference file = new FileReference(new File(fileName));
         // TODO: Delete the file during cleanup.
         diskBufferCache.createFile(file);
         int diskBTreeFileId = diskFileMapProvider.lookupFileId(file);
@@ -198,7 +236,9 @@
         diskBufferCache.openFile(diskBTreeFileId);
         // Create new BTree instance.
         BTree diskBTree = diskBTreeFactory.createBTreeInstance(diskBTreeFileId);
-        diskBTree.create(diskBTreeFileId);
+        if (createBTree) {
+            diskBTree.create(diskBTreeFileId);
+        }
         // TODO: Close the BTree during cleanup.
         diskBTree.open(diskBTreeFileId);
         return diskBTree;
@@ -267,7 +307,9 @@
         // Create a new Merged BTree, which have full fillfactor.
         // Register the BTree information into system.
         // TODO: change the naming schema for the new tree
-        FileReference file = new FileReference(new File(getNextFileName()));
+        // TODO: Alex. fix this.
+        String dummy = "abc";
+        FileReference file = new FileReference(new File(dummy));
         // TODO: Delete the file during cleanup.
         diskBufferCache.createFile(file);
         int mergedBTreeId = diskFileMapProvider.lookupFileId(file);
@@ -345,8 +387,10 @@
     }
     
     @Override
-    public IIndexBulkLoadContext beginBulkLoad(float fillFactor) throws TreeIndexException, HyracksDataException {        
-        BTree diskBTree = createDiskBTree();
+    public IIndexBulkLoadContext beginBulkLoad(float fillFactor) throws TreeIndexException, HyracksDataException {
+        // Note that by using a flush target file name, we state that the new
+        // bulk loaded tree is "newer" than any other merged tree.
+        BTree diskBTree = createFlushTargetBTree();
         LSMTreeBulkLoadContext bulkLoadCtx = new LSMTreeBulkLoadContext(diskBTree);
         bulkLoadCtx.beginBulkLoad(fillFactor);
         return bulkLoadCtx;
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/util/LSMBTreeUtils.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/util/LSMBTreeUtils.java
index 38e123e..c6bb3fd 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/util/LSMBTreeUtils.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/util/LSMBTreeUtils.java
@@ -24,9 +24,11 @@
 import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
 import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManagerFactory;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMFileNameManager;
 import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryBufferCache;
 import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryFreePageManager;
 import edu.uci.ics.hyracks.storage.am.lsm.impls.BTreeFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.impls.LSMBTreeFileNameManager;
 import edu.uci.ics.hyracks.storage.am.lsm.impls.LSMTree;
 import edu.uci.ics.hyracks.storage.am.lsm.tuples.LSMTypeAwareTupleWriterFactory;
 import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
@@ -47,8 +49,9 @@
                 metaFrameFactory);
         BTreeFactory btreeFactory = new BTreeFactory(diskBufferCache, freePageManagerFactory, cmp, typeTraits.length,
                 interiorFrameFactory, insertLeafFrameFactory);
+        ILSMFileNameManager fileNameManager = new LSMBTreeFileNameManager(onDiskDir);
         LSMTree lsmTree = new LSMTree(memBufferCache, memFreePageManager, interiorFrameFactory, insertLeafFrameFactory,
-                deleteLeafFrameFactory, onDiskDir, btreeFactory, diskFileMapProvider, typeTraits.length, cmp);
+                deleteLeafFrameFactory, fileNameManager, btreeFactory, diskFileMapProvider, typeTraits.length, cmp);
         return lsmTree;
     }
 }
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMFileNameManager.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMFileNameManager.java
new file mode 100644
index 0000000..c48c48e
--- /dev/null
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMFileNameManager.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.common.api;
+
+import java.util.Comparator;
+
+/**
+ * Provides file names for LSM on-disk components.
+ * 
+ * There are separate methods to get file names for merge and flush, because we
+ * need to guarantee the correct order of on-disk components (i.e., the
+ * components produced by flush are always newer than those produced by a
+ * merge).
+ * 
+ * 
+ */
+public interface ILSMFileNameManager {
+	public String getFlushFileName();
+	
+	public String getMergeFileName();
+	
+	public String getBaseDir();
+	
+	public Comparator<String> getFileNameComparator();
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeFileNameManagerTest.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeFileNameManagerTest.java
new file mode 100644
index 0000000..87ee59f
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeFileNameManagerTest.java
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.btree;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMFileNameManager;
+import edu.uci.ics.hyracks.storage.am.lsm.impls.LSMBTreeFileNameManager;
+
+public class LSMBTreeFileNameManagerTest {
+    protected final static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
+    protected final static String tmpDir = System.getProperty("java.io.tmpdir");
+    protected final static String sep = System.getProperty("file.separator");
+    protected String baseDir;
+
+    @Before
+    public void setUp() throws HyracksDataException {
+        baseDir = tmpDir + sep + "lsm_btree" + simpleDateFormat.format(new Date()) + sep;
+    }
+
+    public void flushFileNamesTest(boolean testFlushFileName) throws InterruptedException {
+        ILSMFileNameManager fileNameManager = new LSMBTreeFileNameManager(baseDir);
+        LinkedList<String> fileNames = new LinkedList<String>();
+
+        int numFileNames = 100;
+        long sleepTime = 5;
+        for (int i = 0; i < numFileNames; i++) {
+            String fileName = null;
+            if (testFlushFileName) {
+                fileName = fileNameManager.getFlushFileName();
+            } else {
+                fileName = fileNameManager.getMergeFileName();
+            }
+            fileNames.addFirst(fileName);
+            Thread.sleep(sleepTime);
+        }
+
+        List<String> sortedFileNames = new ArrayList<String>();
+        sortedFileNames.addAll(fileNames);        
+
+        // Make sure the comparator sorts in the correct order (i.e., the
+        // reverse insertion order in this case).
+        Comparator<String> cmp = fileNameManager.getFileNameComparator();
+        Collections.sort(sortedFileNames, cmp);
+        for (int i = 0; i < numFileNames; i++) {
+            assertEquals(fileNames.get(i), sortedFileNames.get(i));
+        }
+    }
+
+    @Test
+    public void individualFileNamesTest() throws InterruptedException {
+        flushFileNamesTest(true);
+        flushFileNamesTest(false);
+    }
+
+    @Test
+    public void mixedFileNamesTest() throws InterruptedException {
+        ILSMFileNameManager fileNameManager = new LSMBTreeFileNameManager(baseDir);
+        List<String> fileNames = new ArrayList<String>();
+        HashSet<String> flushFileNames = new HashSet<String>();
+        HashSet<String> mergeFileNames = new HashSet<String>();
+        
+        int numFileNames = 100;
+        long sleepTime = 5;
+        for (int i = 0; i < numFileNames; i++) {
+            String fileName = null;
+            if (i % 2 == 0) {
+                fileName = fileNameManager.getFlushFileName();
+                flushFileNames.add(fileName);
+            } else {
+                fileName = fileNameManager.getMergeFileName();
+                mergeFileNames.add(fileName);
+            }
+            fileNames.add(fileName);
+            Thread.sleep(sleepTime);
+        }
+
+        // Make sure the comparator sorts in the correct order.
+        // We only need to check whether the flushed files and merged files are
+        // clustered.
+        // We verified the secondary sort order (based on timestamp) in the
+        // individualFileNamesTest().
+        Comparator<String> cmp = fileNameManager.getFileNameComparator();
+        Collections.sort(fileNames, cmp);
+        // We expect flush file names at the front.
+        int i = 0;
+        while (flushFileNames.contains(fileNames.get(i))) {
+            i++;
+        }
+        // We expect only merge file names from here on.
+        while (i < numFileNames) {
+            if (!mergeFileNames.contains(fileNames.get(i))) {
+                fail("Expected a merge file name at position: " + i);
+            }
+            i++;
+        }
+    }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
index ccfabdf..b5f1324 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
@@ -8,7 +8,6 @@
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeException;
 import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrameFactory;
 import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
 import edu.uci.ics.hyracks.storage.am.common.datagen.DataGenThread;
 import edu.uci.ics.hyracks.storage.am.common.datagen.TupleBatch;