[NO ISSUE][STO] Fix deadlock in buffer cache
- user model changes: no
- storage format changes: no
- interface changes: no
details:
- When two threads concurrently attempt to pin a page,
one as a new page, the other as an existing page, a deadlock
might happen.
- The deadlock is caused by the reader as a new page setting the
valid flag before the ReaderThread attempts to read the page.
- The fix is to have the reader thread ignore the read request
and have the reader as a new page notify all the other readers
that the page is valid.
Change-Id: I50cf18d96228a1ad78ce11f5258252e8d0107d86
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2407
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Michael Blow <mblow@apache.org>
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/buffercache/BufferCache.java b/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/buffercache/BufferCache.java
index 2414be0..7f915c6 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/buffercache/BufferCache.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/buffercache/BufferCache.java
@@ -201,43 +201,48 @@
pinSanityCheck(dpid);
}
CachedPage cPage = findPage(dpid);
- if (!newPage) {
- if (DEBUG) {
- confiscateLock.lock();
- try {
- for (CachedPage c : confiscatedPages) {
- if (c.dpid == dpid && c.confiscated.get()) {
- throw new IllegalStateException();
- }
- }
- } finally {
- confiscateLock.unlock();
- }
- }
- // Resolve race of multiple threads trying to read the page from
- // disk.
+ if (cPage.state != State.VALID) {
synchronized (cPage) {
- if (cPage.state != State.VALID) {
- try {
- // Will attempt to re-read even if previous read failed
- if (cPage.state == State.INVALID || cPage.state == State.READ_FAILED) {
- // submit request to read
- cPage.state = State.READ_REQUESTED;
- readRequests.put(cPage);
+ if (!newPage) {
+ if (DEBUG) {
+ confiscateLock.lock();
+ try {
+ for (CachedPage c : confiscatedPages) {
+ if (c.dpid == dpid && c.confiscated.get()) {
+ throw new IllegalStateException();
+ }
+ }
+ } finally {
+ confiscateLock.unlock();
}
- cPage.awaitRead();
- } catch (InterruptedException e) {
- cPage.state = State.INVALID;
- unpin(cPage);
- throw HyracksDataException.create(e);
- } catch (Throwable th) {
- unpin(cPage);
- throw HyracksDataException.create(th);
}
+ // Resolve race of multiple threads trying to read the page from
+ // disk.
+
+ if (cPage.state != State.VALID) {
+ try {
+ // Will attempt to re-read even if previous read failed
+ if (cPage.state == State.INVALID || cPage.state == State.READ_FAILED) {
+ // submit request to read
+ cPage.state = State.READ_REQUESTED;
+ readRequests.put(cPage);
+ }
+ cPage.awaitRead();
+ } catch (InterruptedException e) {
+ cPage.state = State.INVALID;
+ unpin(cPage);
+ throw HyracksDataException.create(e);
+ } catch (Throwable th) {
+ unpin(cPage);
+ throw HyracksDataException.create(th);
+ }
+ }
+
+ } else {
+ cPage.state = State.VALID;
+ cPage.notifyAll();
}
}
- } else {
- cPage.state = State.VALID;
}
pageReplacementStrategy.notifyCachePageAccess(cPage);
if (DEBUG) {
@@ -735,22 +740,24 @@
}
break;
}
- if (next.state != State.READ_REQUESTED) {
- LOGGER.log(Level.ERROR,
- "Exiting BufferCache reader thread. Took a page with state = {} out of the queue",
- next.state);
- break;
- }
- try {
- tryRead(next);
- next.state = State.VALID;
- } catch (HyracksDataException e) {
- next.readFailure = e;
- next.state = State.READ_FAILED;
- LOGGER.log(Level.WARN, "Failed to read a page", e);
- }
synchronized (next) {
- next.notifyAll();
+ if (next.state != State.VALID) {
+ if (next.state != State.READ_REQUESTED) {
+ LOGGER.log(Level.ERROR,
+ "Exiting BufferCache reader thread. Took a page with state = {} out of the queue",
+ next.state);
+ break;
+ }
+ try {
+ tryRead(next);
+ next.state = State.VALID;
+ } catch (HyracksDataException e) {
+ next.readFailure = e;
+ next.state = State.READ_FAILED;
+ LOGGER.log(Level.WARN, "Failed to read a page", e);
+ }
+ next.notifyAll();
+ }
}
}
}