summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard W.M. Jones <rjones@redhat.com>2012-07-02 16:38:19 +0100
committerRichard W.M. Jones <rjones@redhat.com>2012-07-07 11:26:25 +0100
commit7fc03e9a45bbf87298dd98e7eb7db649e0dba4ff (patch)
treeb096e15fe9bf05140f004feab0be7e9734e9d7a5
parentcef7946133cef08b94814de9a1581b7c3702a3e0 (diff)
downloadlibguestfs-7fc03e9a45bbf87298dd98e7eb7db649e0dba4ff.tar.gz
libguestfs-7fc03e9a45bbf87298dd98e7eb7db649e0dba4ff.tar.xz
libguestfs-7fc03e9a45bbf87298dd98e7eb7db649e0dba4ff.zip
daemon: Run fsync on block devices after sync (RHBZ#836710).
On Linux, sync(2) does not actually issue a write barrier, thus it doesn't force a flush of the underlying hardware write cache (or qemu's disk cache in the virtual case). This can be a problem, because libguestfs relies on running sync in the appliance, followed by killing qemu (using SIGTERM). In most cases, this is fine, because killing qemu with SIGTERM should cause it to flush out the disk cache before it exits. However we have found various bugs in qemu which cause qemu to crash while doing the flush, leaving the data unwritten (see RHBZ#836913). The solution is to issue fsync(2) to the block devices. This has a write barrier, so it ensures that qemu writes out its cache long before we get around to killing qemu. (cherry picked from commit c0a3c9ce70b98171e737e49e6dccc4457963f2ec)
-rw-r--r--configure.ac1
-rw-r--r--daemon/sync.c78
2 files changed, 79 insertions, 0 deletions
diff --git a/configure.ac b/configure.ac
index 88bfc8ec..979c3600 100644
--- a/configure.ac
+++ b/configure.ac
@@ -213,6 +213,7 @@ AC_CHECK_HEADERS([\
dnl Functions.
AC_CHECK_FUNCS([\
+ fsync \
futimens \
getxattr \
htonl \
diff --git a/daemon/sync.c b/daemon/sync.c
index fcb887e2..2338a3d4 100644
--- a/daemon/sync.c
+++ b/daemon/sync.c
@@ -23,7 +23,11 @@
#endif
#include <stdio.h>
+#include <stdlib.h>
#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/types.h>
#include "daemon.h"
#include "actions.h"
@@ -32,6 +36,10 @@
static int sync_win32 (void);
#endif
+#ifdef HAVE_FSYNC
+static void fsync_devices (void);
+#endif
+
int
do_sync (void)
{
@@ -52,6 +60,18 @@ sync_disks (void)
{
#if defined(HAVE_SYNC)
sync ();
+
+ /* On Linux, sync(2) doesn't perform a barrier, so qemu (which may
+ * have a writeback cache, even with cache=none) will still have
+ * some unwritten data. Force the data out of any qemu caches, by
+ * calling fsync on all block devices. Note we still need the
+ * call to sync above in order to schedule the writes.
+ * Thanks to: Avi Kivity, Kevin Wolf.
+ */
+#ifdef HAVE_FSYNC
+ fsync_devices ();
+#endif
+
return 0;
#elif defined(WIN32)
return sync_win32 ();
@@ -60,6 +80,64 @@ sync_disks (void)
#endif
}
+#ifdef HAVE_FSYNC
+static void
+fsync_devices (void)
+{
+ DIR *dir;
+ struct dirent *d;
+ char dev_path[256];
+ int fd;
+
+ dir = opendir ("/sys/block");
+ if (!dir) {
+ perror ("opendir: /sys/block");
+ return;
+ }
+
+ for (;;) {
+ errno = 0;
+ d = readdir(dir);
+ if (!d) break;
+
+ if (STREQLEN (d->d_name, "sd", 2) ||
+ STREQLEN (d->d_name, "hd", 2) ||
+ STREQLEN (d->d_name, "vd", 2) ||
+ STREQLEN (d->d_name, "sr", 2)) {
+ snprintf (dev_path, sizeof dev_path, "/dev/%s", d->d_name);
+
+ /* Ignore the root device. */
+ if (is_root_device (dev_path))
+ continue;
+
+ fd = open (dev_path, O_RDONLY|O_CLOEXEC);
+ if (fd == -1) {
+ perror (dev_path);
+ continue;
+ }
+
+ /* fsync the device. */
+ if (verbose)
+ fprintf (stderr, "fsync %s\n", dev_path);
+
+ if (fsync (fd) == -1)
+ perror ("fsync");
+
+ if (close (fd) == -1)
+ perror ("close");
+ }
+ }
+
+ /* Check readdir didn't fail */
+ if (errno != 0)
+ perror ("readdir: /sys/block");
+
+ /* Close the directory handle */
+ if (closedir (dir) == -1)
+ perror ("closedir");
+}
+#endif /* HAVE_FSYNC */
+
#ifdef WIN32
static int
sync_win32 (void)