001: /*-
002: * See the file LICENSE for redistribution information.
003: *
004: * Copyright (c) 2002,2008 Oracle. All rights reserved.
005: *
006: * $Id: FSyncManager.java,v 1.18.2.3 2008/01/07 15:14:13 cwl Exp $
007: */
008:
009: package com.sleepycat.je.log;
010:
011: import com.sleepycat.je.DatabaseException;
012: import com.sleepycat.je.EnvironmentStats;
013: import com.sleepycat.je.RunRecoveryException;
014: import com.sleepycat.je.StatsConfig;
015: import com.sleepycat.je.config.EnvironmentParams;
016: import com.sleepycat.je.dbi.EnvironmentImpl;
017: import com.sleepycat.je.latch.Latch;
018: import com.sleepycat.je.latch.LatchSupport;
019: import com.sleepycat.je.utilint.PropUtil;
020:
021: /*
022: * The FsyncManager ensures that only one file fsync is issued at a time, for
023: * performance optimization. The goal is to reduce the number of fsyncs issued
024: * by the system by issuing 1 fsync on behalf of a number of threads.
025: *
026: * For example, suppose these writes happen which all need to be fsynced to
027: * disk:
028: *
029: * thread 1 writes a commit record
030: * thread 2 writes a checkpoint
031: * thread 3 writes a commit record
032: * thread 4 writes a commit record
033: * thread 5 writes a checkpoint
034: *
035: * Rather than executing 5 fsyncs, which all must happen synchronously, we hope
036: * to issue fewer. How many fewer depend on timing. Note that the writes
037: * themselves are serialized and are guaranteed to run in order.
038: *
039: * For example:
040: * thread 1 wants to fsync first, no other fsync going on, will issue fsync
041: * thread 2 waits
042: * thread 3 waits
043: * thread 4 waits
044: * - before thread 5 comes, thread 1 finishes fsyncing and returns to
045: * the caller. Now another fsync can be issued that will cover threads
046: * 2,3,4. One of those threads (2, 3, 4} issues the fsync, the others
047: * block.
048: * thread 5 wants to fsync, but sees one going on, so will wait.
049: * - the fsync issued for 2,3,4 can't cover thread 5 because we're not sure
050: * if thread 5's write finished before that fsync call. Thread 5 will have
051: * to issue its own fsync.
052: *
053: * Target file
054: * -----------
055: * Note that when the buffer pool starts a new file, we fsync the previous file
056: * under the log write latch. Therefore, at any time we only have one target
057: * file to fsync, which is the current write buffer. We do this so that we
058: * don't have to coordinate between files. For example, suppose log files have
059: * 1000 bytes and a commit record is 10 bytes. An LSN of value 6/990 is in
060: * file 6 at offset 990.
061: *
062: * thread 1: logWriteLatch.acquire()
063: * write commit record to LSN 6/980
064: * logWriteLatch.release()
065: * thread 2: logWriteLatch.acquire()
066: * write commit record to LSN 6/990
067: * logWriteLatch.release
068: * thread 3: logWriteLatch.acquire()
069: * gets 7/000 as the next LSN to use
070: * see that we flipped to a new file, so call fsync on file 6
071: * write commit record to LSN 7/000
072: * logWriteLatch.release()
073: *
074: * Thread 3 will fsync file 6 within the log write latch. That way, at any
075: * time, any non-latched fsyncs should only fsync the latest file. If we
076: * didn't do, there's the chance that thread 3 would fsync file 7 and return to
077: * its caller before the thread 1 and 2 got an fsync for file 6. That wouldn't
078: * be correct, because thread 3's commit might depend on file 6.
079: *
080: * Note that the FileManager keeps a file descriptor that corresponds to the
081: * current end of file, and that is what we fsync.
082: */
083: class FSyncManager {
084: private EnvironmentImpl envImpl;
085: private long timeout;
086:
087: /* Use as the target for a synchronization block. */
088: private Latch fsyncLatch;
089:
090: private volatile boolean fsyncInProgress;
091: private FSyncGroup nextFSyncWaiters;
092:
093: /* stats */
094: private long nFSyncRequests = 0;
095: private long nFSyncs = 0;
096: private long nTimeouts = 0;
097:
098: FSyncManager(EnvironmentImpl envImpl) throws DatabaseException {
099: timeout = PropUtil.microsToMillis(envImpl.getConfigManager()
100: .getLong(EnvironmentParams.LOG_FSYNC_TIMEOUT));
101: this .envImpl = envImpl;
102:
103: fsyncLatch = LatchSupport.makeLatch("fsyncLatch", envImpl);
104: fsyncInProgress = false;
105: nextFSyncWaiters = new FSyncGroup(timeout, envImpl);
106: }
107:
108: /**
109: * Request that this file be fsynced to disk. This thread may or may not
110: * actually execute the fsync, but will not return until a fsync has been
111: * issued and executed on behalf of its write. There is a timeout period
112: * specified by EnvironmentParam.LOG_FSYNC_TIMEOUT that ensures that no
113: * thread gets stuck here indefinitely.
114: *
115: * When a thread comes in, it will find one of two things.
116: * 1. There is no fsync going on right now. This thread should go
117: * ahead and fsync.
118: * 2. There is an active fsync, wait until it's over before
119: * starting a new fsync.
120: *
121: * When a fsync is going on, all those threads that come along are grouped
122: * together as the nextFsyncWaiters. When the current fsync is finished,
123: * one of those nextFsyncWaiters will be selected as a leader to issue the
124: * next fsync. The other members of the group will merely wait until the
125: * fsync done on their behalf is finished.
126: *
127: * When a thread finishes a fsync, it has to:
128: * 1. wake up all the threads that were waiting for its fsync call.
129: * 2. wake up one member of the next group of waiting threads (the
130: * nextFsyncWaiters) so that thread can become the new leader
131: * and issue the next fysnc call.
132: *
133: * If a non-leader member of the nextFsyncWaiters times out, it will issue
134: * its own fsync anyway, in case something happened to the leader.
135: */
136: void fsync() throws DatabaseException {
137:
138: boolean doFsync = false;
139: boolean isLeader = false;
140: boolean needToWait = false;
141: FSyncGroup inProgressGroup = null;
142: FSyncGroup myGroup = null;
143:
144: synchronized (fsyncLatch) {
145: nFSyncRequests++;
146:
147: /* Figure out if we're calling fsync or waiting. */
148: if (fsyncInProgress) {
149: needToWait = true;
150: myGroup = nextFSyncWaiters;
151: } else {
152: isLeader = true;
153: doFsync = true;
154: fsyncInProgress = true;
155: inProgressGroup = nextFSyncWaiters;
156: nextFSyncWaiters = new FSyncGroup(timeout, envImpl);
157: }
158: }
159:
160: if (needToWait) {
161:
162: /*
163: * Note that there's no problem if we miss the notify on this set
164: * of waiters. We can check state in the FSyncGroup before we begin
165: * to wait.
166: *
167: * All members of the group may return from their waitForFSync()
168: * call with the need to do a fsync, because of timeout. Only one
169: * will return as the leader.
170: */
171: int waitStatus = myGroup.waitForFsync();
172:
173: if (waitStatus == FSyncGroup.DO_LEADER_FSYNC) {
174: synchronized (fsyncLatch) {
175:
176: /*
177: * Check if there's a fsync in progress; this might happen
178: * even if you were designated the leader if a new thread
179: * came in between the point when the old leader woke you
180: * up and now. This new thread may have found that there
181: * was no fsync in progress, and may have started a fsync.
182: */
183: if (!fsyncInProgress) {
184: isLeader = true;
185: doFsync = true;
186: fsyncInProgress = true;
187: inProgressGroup = myGroup;
188: nextFSyncWaiters = new FSyncGroup(timeout,
189: envImpl);
190: }
191: }
192: } else if (waitStatus == FSyncGroup.DO_TIMEOUT_FSYNC) {
193: doFsync = true;
194: synchronized (fsyncLatch) {
195: nTimeouts++;
196: }
197: }
198: }
199:
200: if (doFsync) {
201:
202: /*
203: * There are 3 ways that this fsync gets called:
204: *
205: * 1. A thread calls sync and there is not a sync call already in
206: * progress. That thread executes fsync for itself only. Other
207: * threads requesting sync form a group of waiters.
208: *
209: * 2. A sync finishes and wakes up a group of waiters. The first
210: * waiter in the group to wake up becomes the leader. It executes
211: * sync for it's group of waiters. As above, other threads
212: * requesting sync form a new group of waiters.
213: *
214: * 3. If members of a group of waiters have timed out, they'll all
215: * just go and do their own sync for themselves.
216: */
217: executeFSync();
218:
219: synchronized (fsyncLatch) {
220: nFSyncs++;
221: if (isLeader) {
222:
223: /*
224: * Wake up the group that requested the fsync before you
225: * started. They've piggybacked off your fsync.
226: */
227: inProgressGroup.wakeupAll();
228:
229: /*
230: * Wake up a single waiter, who will become the next
231: * leader.
232: */
233: nextFSyncWaiters.wakeupOne();
234: fsyncInProgress = false;
235: }
236: }
237: }
238: }
239:
240: /*
241: * Stats.
242: */
243: long getNFSyncRequests() {
244: return nFSyncRequests;
245: }
246:
247: long getNFSyncs() {
248: return nFSyncs;
249: }
250:
251: long getNTimeouts() {
252: return nTimeouts;
253: }
254:
255: void loadStats(StatsConfig config, EnvironmentStats stats)
256: throws DatabaseException {
257:
258: stats.setNFSyncs(nFSyncs);
259: stats.setNFSyncRequests(nFSyncRequests);
260: stats.setNFSyncTimeouts(nTimeouts);
261:
262: if (config.getClear()) {
263: nFSyncs = 0;
264: nFSyncRequests = 0;
265: nTimeouts = 0;
266: }
267: }
268:
269: /**
270: * Put the fsync execution into this method so it can be overridden for
271: * testing purposes.
272: */
273: protected void executeFSync() throws DatabaseException {
274:
275: envImpl.getFileManager().syncLogEnd();
276: }
277:
278: /*
279: * Embodies a group of threads waiting for a common fsync. Note that
280: * there's no collection here; group membership is merely that the threads
281: * are all waiting on the same monitor.
282: */
283: static class FSyncGroup {
284: static int DO_TIMEOUT_FSYNC = 0;
285: static int DO_LEADER_FSYNC = 1;
286: static int NO_FSYNC_NEEDED = 2;
287:
288: private volatile boolean fsyncDone;
289: private long fsyncTimeout;
290: private boolean leaderExists;
291: private EnvironmentImpl envImpl;
292:
293: FSyncGroup(long fsyncTimeout, EnvironmentImpl envImpl) {
294: this .fsyncTimeout = fsyncTimeout;
295: fsyncDone = false;
296: leaderExists = false;
297: this .envImpl = envImpl;
298: }
299:
300: synchronized boolean getLeader() {
301: if (fsyncDone) {
302: return false;
303: } else {
304: if (leaderExists) {
305: return false;
306: } else {
307: leaderExists = true;
308: return true;
309: }
310: }
311: }
312:
313: /**
314: * Wait for either a turn to execute a fsync, or to find out that a
315: * fsync was done on your behalf.
316: *
317: * @return true if the fsync wasn't done, and this thread needs to
318: * execute a fsync when it wakes up. This may be true because it's the
319: * leader of its group, or because the wait timed out.
320: */
321: synchronized int waitForFsync() throws RunRecoveryException {
322:
323: int status = 0;
324:
325: if (!fsyncDone) {
326: long startTime = System.currentTimeMillis();
327: while (true) {
328:
329: try {
330: wait(fsyncTimeout);
331: } catch (InterruptedException e) {
332: throw new RunRecoveryException(
333: envImpl,
334: "Unexpected interrupt while waiting for fsync",
335: e);
336: }
337:
338: /*
339: * This thread was awoken either by a timeout, by a notify,
340: * or by an interrupt. Is the fsync done?
341: */
342: if (fsyncDone) {
343: /* The fsync we're waiting on is done, leave. */
344: status = NO_FSYNC_NEEDED;
345: break;
346: } else {
347:
348: /*
349: * The fsync is not done -- were we woken up to become
350: * the leader?
351: */
352: if (!leaderExists) {
353: leaderExists = true;
354: status = DO_LEADER_FSYNC;
355: break;
356: } else {
357:
358: /*
359: * We're just a waiter. See if we're timed out or
360: * have more to wait.
361: */
362: long now = System.currentTimeMillis();
363: if ((now - startTime) > fsyncTimeout) {
364: /* we timed out. */
365: status = DO_TIMEOUT_FSYNC;
366: break;
367: }
368: }
369: }
370: }
371: }
372:
373: return status;
374: }
375:
376: synchronized void wakeupAll() {
377: fsyncDone = true;
378: notifyAll();
379: }
380:
381: synchronized void wakeupOne() {
382: /* FindBugs whines here. */
383: notify();
384: }
385: }
386: }
|