[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Libguestfs] [PATCH] Improve zeroing and detection of zeroes.



The test script I used is also attached.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://libguestfs.org
>From 5a50c04906828f6e99db6a9be420c84114476d39 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones redhat com>
Date: Tue, 16 Aug 2011 12:39:28 +0100
Subject: [PATCH] Improve zeroing and detection of zeroes.

This code modifies zero, zero-device, is-zero, is-zero-device.

zero and zero-device are modified so that if the blocks of the device
already contain zeroes, then we don't write zeroes.  The reason for
this is to avoid unnecessarily making the underlying storage
non-sparse or (in the qcow2 case) growing it.

is-zero and is-zero-device are modified so that zero detection is
faster.  This is a nice side effect of making the first change.

Since avoiding unnecessary zeroing involves reading the blocks before
writing them, whereas before we just blindly wrote, this can be
slower.  As you can see from the tests below, in the case where the
disk is sparse, it actually turns out to be faster, because we avoid
allocating the underlying blocks.

However in the case where the disk is non-sparse and full of existing
data, it is much slower.  There might be a case for an API flag to
adjust whether or not we perform the zero check.  I did not add this
flag because it is unlikely that the caller would have enough
information to be able to set the flag correctly.

                                (Elapsed time in seconds)
Format  Test case                 Before     After

Raw     Sparse                    16.4       5.3

        Preallocated zero         17.0       18.8

        Preallocated random       16.0       41.3

Qcow2   preallocation=off         18.7       5.6

        preallocation=metadata    17.4       5.8

The current code uses a fixed block size of 4K for reading and
writing.  I also tried the same tests with a block size of 64K but it
didn't make any significant difference.

(Thanks to Federico Simoncelli for suggesting this change)
---
 daemon/zero.c                  |   81 ++++++++++++++++++++++++++++++----------
 generator/generator_actions.ml |   10 ++++-
 2 files changed, 70 insertions(+), 21 deletions(-)

diff --git a/daemon/zero.c b/daemon/zero.c
index c8e79ae..c9f6bf7 100644
--- a/daemon/zero.c
+++ b/daemon/zero.c
@@ -28,26 +28,57 @@
 #include "daemon.h"
 #include "actions.h"
 
+/* Return true iff the buffer is all zero bytes.
+ *
+ * Note that gcc is smart enough to optimize this properly:
+ * http://stackoverflow.com/questions/1493936/faster-means-of-checking-for-an-empty-buffer-in-c/1493989#1493989
+ */
+static int
+is_zero (const char *buffer, size_t size)
+{
+  size_t i;
+
+  for (i = 0; i < size; ++i) {
+    if (buffer[i] != 0)
+      return 0;
+  }
+
+  return 1;
+}
+
+static const char zero_buf[4096];
+
 int
 do_zero (const char *device)
 {
-  int fd, i;
-  char buf[4096];
+  char buf[sizeof zero_buf];
+  int fd;
+  size_t i, offset;
 
-  fd = open (device, O_WRONLY);
+  fd = open (device, O_RDWR);
   if (fd == -1) {
     reply_with_perror ("%s", device);
     return -1;
   }
 
-  memset (buf, 0, sizeof buf);
-
   for (i = 0; i < 32; ++i) {
-    if (write (fd, buf, sizeof buf) != sizeof buf) {
-      reply_with_perror ("write: %s", device);
+    offset = i * sizeof zero_buf;
+
+    /* Check if the block is already zero before overwriting it. */
+    if (pread (fd, buf, sizeof buf, offset) != sizeof buf) {
+      reply_with_perror ("pread: %s", device);
       close (fd);
       return -1;
     }
+
+    if (!is_zero (buf, sizeof buf)) {
+      if (pwrite (fd, zero_buf, sizeof zero_buf, offset) != sizeof zero_buf) {
+        reply_with_perror ("pwrite: %s", device);
+        close (fd);
+        return -1;
+      }
+    }
+
     notify_progress ((uint64_t) i, 32);
   }
 
@@ -67,14 +98,13 @@ do_zero_device (const char *device)
     return -1;
   uint64_t size = (uint64_t) ssize;
 
-  int fd = open (device, O_WRONLY);
+  int fd = open (device, O_RDWR);
   if (fd == -1) {
     reply_with_perror ("%s", device);
     return -1;
   }
 
-  char buf[1024*1024];
-  memset (buf, 0, sizeof buf);
+  char buf[sizeof zero_buf];
 
   uint64_t pos = 0;
 
@@ -86,15 +116,28 @@ do_zero_device (const char *device)
     else
       n = (size_t) n64; /* safe because of if condition */
 
-    ssize_t r = write (fd, buf, n);
+    /* Check if the block is already zero before overwriting it. */
+    ssize_t r;
+    r = pread (fd, buf, n, pos);
     if (r == -1) {
-      reply_with_perror ("write: %s (with %" PRId64 " bytes left to write)",
-                         device, size);
+      reply_with_perror ("pread: %s at offset %" PRIu64, device, pos);
       close (fd);
       return -1;
     }
 
-    pos += r;
+    if (!is_zero (buf, sizeof buf)) {
+      r = pwrite (fd, zero_buf, n, pos);
+      if (r == -1) {
+        reply_with_perror ("pwrite: %s (with %" PRId64 " bytes left to write)",
+                           device, size);
+        close (fd);
+        return -1;
+      }
+      pos += r;
+    }
+    else
+      pos += n;
+
     notify_progress (pos, size);
   }
 
@@ -106,13 +149,11 @@ do_zero_device (const char *device)
   return 0;
 }
 
-static char zero[BUFSIZ];
-
 int
 do_is_zero (const char *path)
 {
   int fd;
-  char buf[BUFSIZ];
+  char buf[1024*1024];
   ssize_t r;
 
   CHROOT_IN;
@@ -125,7 +166,7 @@ do_is_zero (const char *path)
   }
 
   while ((r = read (fd, buf, sizeof buf)) > 0) {
-    if (memcmp (buf, zero, r) != 0) {
+    if (!is_zero (buf, r)) {
       close (fd);
       return 0;
     }
@@ -149,7 +190,7 @@ int
 do_is_zero_device (const char *device)
 {
   int fd;
-  char buf[BUFSIZ];
+  char buf[1024*1024];
   ssize_t r;
 
   fd = open (device, O_RDONLY);
@@ -159,7 +200,7 @@ do_is_zero_device (const char *device)
   }
 
   while ((r = read (fd, buf, sizeof buf)) > 0) {
-    if (memcmp (buf, zero, r) != 0) {
+    if (!is_zero (buf, r)) {
       close (fd);
       return 0;
     }
diff --git a/generator/generator_actions.ml b/generator/generator_actions.ml
index c684e35..a924d87 100644
--- a/generator/generator_actions.ml
+++ b/generator/generator_actions.ml
@@ -3006,6 +3006,10 @@ How many blocks are zeroed isn't specified (but it's I<not> enough
 to securely wipe the device).  It should be sufficient to remove
 any partition tables, filesystem superblocks and so on.
 
+If blocks are already zero, then this command avoids writing
+zeroes.  This prevents the underlying device from becoming non-sparse
+or growing unnecessarily.
+
 See also: C<guestfs_zero_device>, C<guestfs_scrub_device>,
 C<guestfs_is_zero_device>");
 
@@ -5229,7 +5233,11 @@ is not large enough.");
    "\
 This command writes zeroes over the entire C<device>.  Compare
 with C<guestfs_zero> which just zeroes the first few blocks of
-a device.");
+a device.
+
+If blocks are already zero, then this command avoids writing
+zeroes.  This prevents the underlying device from becoming non-sparse
+or growing unnecessarily.");
 
   ("txz_in", (RErr, [FileIn "tarball"; Pathname "directory"], []), 229, [Optional "xz"],
    [InitScratchFS, Always, TestOutput (
-- 
1.7.6

#!/bin/bash -

size=1G
sizemegabytes=1024
develdir=$HOME/d/libguestfs

for guestfish in "$develdir/run $develdir/fish/guestfish" /usr/bin/guestfish
do
    echo Making sure the appliance is cached before we run the test ...
    $guestfish -a /dev/null run
    $guestfish -a /dev/null run

    echo Sparse raw all zeroes test with "$guestfish" ...
    rm -f test.img test.qcow2
    truncate -s $size test.img
    du -sh test.img
    $guestfish -a test.img run : time zero-device /dev/sda
    du -sh test.img

    echo Non-sparse raw all zeroes test with "$guestfish" ...
    rm -f test.img test.qcow2
    dd if=/dev/zero of=test.img bs=1024k count=$sizemegabytes >/dev/null 2>&1
    du -sh test.img
    $guestfish -a test.img run : time zero-device /dev/sda
    du -sh test.img

    echo Non-sparse raw random data test with "$guestfish" ...
    rm -f test.img test.qcow2
    dd if=/dev/urandom of=test.img bs=1024k count=$sizemegabytes >/dev/null 2>&1
    du -sh test.img
    $guestfish -a test.img run : time zero-device /dev/sda
    du -sh test.img

    echo QCow2 no preallocation all zeroes test with "$guestfish" ...
    rm -f test.img test.qcow2
    qemu-img create -f qcow2 test.qcow2 $size >/dev/null
    du -sh test.qcow2
    $guestfish -a test.qcow2 run : time zero-device /dev/sda
    du -sh test.qcow2

    echo QCow2 with preallocation all zeroes test with "$guestfish" ...
    rm -f test.img test.qcow2
    qemu-img create -f qcow2 -o preallocation=metadata test.qcow2 $size >/dev/null
    du -sh test.qcow2
    $guestfish -a test.qcow2 run : time zero-device /dev/sda
    du -sh test.qcow2
done

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]