[libvirt] [PATCH v2] qemu: domain I/O asynchronous handling

Michal Privoznik mprivozn at redhat.com
Mon Jun 20 10:33:29 UTC 2011


For virtio disks and interfaces, qemu allows users to enable or disable
ioeventfd feature. This means, qemu can execute domain code, while
another thread waits for I/O event. Basically, in some cases it is win,
in some loss. This feature is available via 'ioeventfd' attribute in disk
and interface <driver> element. It accepts 'on' and 'off'. Leaving this
attribute out defaults to hypervisor decision.
---
diff to v1:
-reverted to 'ioeventfd' attribute:
  https://www.redhat.com/archives/libvir-list/2011-June/msg00712.html

 docs/formatdomain.html.in                          |   34 +++++++++++++-
 docs/schemas/domain.rng                            |   14 ++++++
 src/conf/domain_conf.c                             |   49 +++++++++++++++++++-
 src/conf/domain_conf.h                             |   11 ++++
 src/libvirt_private.syms                           |    2 +
 src/qemu/qemu_capabilities.c                       |    3 +
 src/qemu/qemu_capabilities.h                       |    1 +
 src/qemu/qemu_command.c                            |   13 +++++
 tests/qemuhelptest.c                               |    3 +-
 .../qemuxml2argv-disk-ioeventfd.args               |   11 ++++
 .../qemuxml2argv-disk-ioeventfd.xml                |   50 ++++++++++++++++++++
 tests/qemuxml2argvtest.c                           |    4 ++
 12 files changed, 191 insertions(+), 4 deletions(-)
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index ab39417..39e1a85 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -785,7 +785,7 @@
     </disk>
       ...
     <disk type='network'>
-      <driver name="qemu" type="raw" io="threads"/>
+      <driver name="qemu" type="raw" io="threads" ioeventfd="on"/>
       <source protocol="sheepdog" name="image_name">
         <host name="hostname" port="7000"/>
       </source>
@@ -869,6 +869,20 @@
             policies on I/O; qemu guests support "threads" and
             "native". <span class="since">Since 0.8.8</span>
           </li>
+          <li>
+            The optional <code>ioeventfd</code> attribute allows users to
+            set <a href='https://patchwork.kernel.org/patch/43390/'>
+            domain I/O asynchronous handling</a> for disk device.
+            The default is left to the discretion of the hypervisor.
+            Accepted values are "on" and "off". Enabling this allows
+            qemu to execute VM while a separate thread handles I/O.
+            Typically guests experiencing high system CPU utilization
+            during I/O will benefit from this. On the other hand,
+            on overloaded host it could increase guest I/O latency.
+            <span class="since">Since 0.9.3 (QEMU and KVM only)</span>
+            <b>In general you should leave this option alone, unless you
+            are very certain you know what you are doing.</b>
+          </li>
         </ul>
       </dd>
       <dt><code>boot</code></dt>
@@ -1649,7 +1663,7 @@ qemu-kvm -net nic,model=? /dev/null
       <source network='default'/>
       <target dev='vnet1'/>
       <model type='virtio'/>
-      <b><driver name='vhost' txmode='iothread'/></b>
+      <b><driver name='vhost' txmode='iothread' ioeventfd='on'/></b>
     </interface>
   </devices>
   ...</pre>
@@ -1700,6 +1714,22 @@ qemu-kvm -net nic,model=? /dev/null
         <b>In general you should leave this option alone, unless you
         are very certain you know what you are doing.</b>
       </dd>
+      <dt><code>ioeventfd</code></dt>
+      <dd>
+        This optional attribute allows users to set
+        <a href='https://patchwork.kernel.org/patch/43390/'>
+        domain I/O asynchronous handling</a> for interface device.
+        The default is left to the discretion of the hypervisor.
+        Accepted values are "on" and "off". Enabling this allows
+        qemu to execute VM while a separate thread handles I/O.
+        Typically guests experiencing high system CPU utilization
+        during I/O will benefit from this. On the other hand,
+        on overloaded host it could increase guest I/O latency.
+        <span class="since">Since 0.9.3 (QEMU and KVM only)</span><br/><br/>
+
+        <b>In general you should leave this option alone, unless you
+        are very certain you know what you are doing.</b>
+      </dd>
     </dl>
 
     <h5><a name="elementsNICSTargetOverride">Overriding the target element</a></h5>
diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng
index 6de024e..891662d 100644
--- a/docs/schemas/domain.rng
+++ b/docs/schemas/domain.rng
@@ -778,6 +778,9 @@
       <optional>
         <ref name="driverIO"/>
       </optional>
+      <optional>
+        <ref name="ioeventfd"/>
+      </optional>
       <empty/>
     </element>
   </define>
@@ -817,6 +820,14 @@
       </choice>
     </attribute>
   </define>
+  <define name="ioeventfd">
+    <attribute name="ioeventfd">
+      <choice>
+        <value>on</value>
+        <value>off</value>
+      </choice>
+    </attribute>
+  </define>
   <define name="controller">
     <element name="controller">
       <choice>
@@ -1117,6 +1128,9 @@
               </choice>
             </attribute>
           </optional>
+          <optional>
+            <ref name="ioeventfd"/>
+          </optional>
           <empty/>
         </element>
       </optional>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 5360863..2234857 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -163,6 +163,11 @@ VIR_ENUM_IMPL(virDomainDiskIo, VIR_DOMAIN_DISK_IO_LAST,
               "default",
               "native",
               "threads")
+VIR_ENUM_IMPL(virDomainIoEventFd, VIR_DOMAIN_IO_EVENT_FD_LAST,
+              "default",
+              "on",
+              "off")
+
 
 VIR_ENUM_IMPL(virDomainController, VIR_DOMAIN_CONTROLLER_TYPE_LAST,
               "ide",
@@ -2013,6 +2018,7 @@ virDomainDiskDefParseXML(virCapsPtr caps,
     char *cachetag = NULL;
     char *error_policy = NULL;
     char *iotag = NULL;
+    char *ioeventfd = NULL;
     char *devaddr = NULL;
     virStorageEncryptionPtr encryption = NULL;
     char *serial = NULL;
@@ -2128,6 +2134,7 @@ virDomainDiskDefParseXML(virCapsPtr caps,
                 cachetag = virXMLPropString(cur, "cache");
                 error_policy = virXMLPropString(cur, "error_policy");
                 iotag = virXMLPropString(cur, "io");
+                ioeventfd = virXMLPropString(cur, "ioeventfd");
             } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) {
                 def->readonly = 1;
             } else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) {
@@ -2264,6 +2271,24 @@ virDomainDiskDefParseXML(virCapsPtr caps,
         }
     }
 
+    if (ioeventfd) {
+        if (def->bus != VIR_DOMAIN_DISK_BUS_VIRTIO) {
+            virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                                 _("disk ioeventfd mode supported "
+                                   "only for virtio bus"));
+            goto error;
+        }
+
+        int i;
+        if ((i = virDomainIoEventFdTypeFromString(ioeventfd)) <= 0) {
+            virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                                 _("unknown disk ioeventfd mode '%s'"),
+                                 ioeventfd);
+            goto error;
+        }
+        def->ioeventfd=i;
+    }
+
     if (devaddr) {
         if (virDomainParseLegacyDeviceAddress(devaddr,
                                               &def->info.addr.pci) < 0) {
@@ -2326,6 +2351,7 @@ cleanup:
     VIR_FREE(cachetag);
     VIR_FREE(error_policy);
     VIR_FREE(iotag);
+    VIR_FREE(ioeventfd);
     VIR_FREE(devaddr);
     VIR_FREE(serial);
     virStorageEncryptionFree(encryption);
@@ -2713,6 +2739,7 @@ virDomainNetDefParseXML(virCapsPtr caps,
     char *model = NULL;
     char *backend = NULL;
     char *txmode = NULL;
+    char *ioeventfd = NULL;
     char *filter = NULL;
     char *internal = NULL;
     char *devaddr = NULL;
@@ -2802,6 +2829,7 @@ virDomainNetDefParseXML(virCapsPtr caps,
             } else if (xmlStrEqual (cur->name, BAD_CAST "driver")) {
                 backend = virXMLPropString(cur, "name");
                 txmode = virXMLPropString(cur, "txmode");
+                ioeventfd = virXMLPropString(cur, "ioeventfd");
             } else if (xmlStrEqual (cur->name, BAD_CAST "filterref")) {
                 filter = virXMLPropString(cur, "filter");
                 VIR_FREE(filterparams);
@@ -3018,6 +3046,16 @@ virDomainNetDefParseXML(virCapsPtr caps,
             }
             def->driver.virtio.txmode = m;
         }
+        if (ioeventfd) {
+            int i;
+            if ((i = virDomainIoEventFdTypeFromString(ioeventfd)) <= 0) {
+                virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                                     _("unknown interface ioeventfd mode '%s'"),
+                                     ioeventfd);
+                goto error;
+            }
+            def->driver.virtio.ioeventfd = i;
+        }
     }
 
     if (filter != NULL) {
@@ -3057,6 +3095,7 @@ cleanup:
     VIR_FREE(model);
     VIR_FREE(backend);
     VIR_FREE(txmode);
+    VIR_FREE(ioeventfd);
     VIR_FREE(filter);
     VIR_FREE(type);
     VIR_FREE(internal);
@@ -8291,6 +8330,7 @@ virDomainDiskDefFormat(virBufferPtr buf,
     const char *cachemode = virDomainDiskCacheTypeToString(def->cachemode);
     const char *error_policy = virDomainDiskErrorPolicyTypeToString(def->error_policy);
     const char *iomode = virDomainDiskIoTypeToString(def->iomode);
+    const char *ioeventfd = virDomainIoEventFdTypeToString(def->ioeventfd);
 
     if (!type) {
         virDomainReportError(VIR_ERR_INTERNAL_ERROR,
@@ -8322,7 +8362,8 @@ virDomainDiskDefFormat(virBufferPtr buf,
                       "    <disk type='%s' device='%s'>\n",
                       type, device);
 
-    if (def->driverName || def->driverType || def->cachemode) {
+    if (def->driverName || def->driverType || def->cachemode ||
+        def->ioeventfd) {
         virBufferAsprintf(buf, "      <driver");
         if (def->driverName)
             virBufferAsprintf(buf, " name='%s'", def->driverName);
@@ -8334,6 +8375,8 @@ virDomainDiskDefFormat(virBufferPtr buf,
             virBufferAsprintf(buf, " error_policy='%s'", error_policy);
         if (def->iomode)
             virBufferAsprintf(buf, " io='%s'", iomode);
+        if (def->ioeventfd)
+            virBufferAsprintf(buf, " ioeventfd='%s'", ioeventfd);
         virBufferAsprintf(buf, "/>\n");
     }
 
@@ -8624,6 +8667,10 @@ virDomainNetDefFormat(virBufferPtr buf,
                 virBufferAsprintf(buf, " txmode='%s'",
                                   virDomainNetVirtioTxModeTypeToString(def->driver.virtio.txmode));
             }
+            if (def->driver.virtio.ioeventfd) {
+                virBufferAsprintf(buf, " ioeventfd='%s'",
+                                  virDomainIoEventFdTypeToString(def->driver.virtio.ioeventfd));
+            }
             virBufferAddLit(buf, "/>\n");
         }
     }
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index ff5c28d..994ff91 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -206,6 +206,14 @@ enum  virDomainDiskIo {
     VIR_DOMAIN_DISK_IO_LAST
 };
 
+enum virDomainIoEventFd {
+    VIR_DOMAIN_IO_EVENT_FD_DEFAULT = 0,
+    VIR_DOMAIN_IO_EVENT_FD_ON,
+    VIR_DOMAIN_IO_EVENT_FD_OFF,
+
+    VIR_DOMAIN_IO_EVENT_FD_LAST
+};
+
 /* Stores the virtual disk configuration */
 typedef struct _virDomainDiskDef virDomainDiskDef;
 typedef virDomainDiskDef *virDomainDiskDefPtr;
@@ -225,6 +233,7 @@ struct _virDomainDiskDef {
     int error_policy;
     int bootIndex;
     int iomode;
+    int ioeventfd;
     unsigned int readonly : 1;
     unsigned int shared : 1;
     virDomainDeviceInfo info;
@@ -361,6 +370,7 @@ struct _virDomainNetDef {
         struct {
             enum virDomainNetBackendType name; /* which driver backend to use */
             enum virDomainNetVirtioTxModeType txmode;
+            enum virDomainIoEventFd ioeventfd;
         } virtio;
     } driver;
     union {
@@ -1554,6 +1564,7 @@ VIR_ENUM_DECL(virDomainDiskCache)
 VIR_ENUM_DECL(virDomainDiskErrorPolicy)
 VIR_ENUM_DECL(virDomainDiskProtocol)
 VIR_ENUM_DECL(virDomainDiskIo)
+VIR_ENUM_DECL(virDomainIoEventFd)
 VIR_ENUM_DECL(virDomainController)
 VIR_ENUM_DECL(virDomainControllerModel)
 VIR_ENUM_DECL(virDomainFS)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 03d2ddb..5c8d272 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -292,6 +292,8 @@ virDomainHostdevDefFree;
 virDomainHostdevModeTypeToString;
 virDomainHostdevSubsysTypeToString;
 virDomainInputDefFree;
+virDomainIoEventFdTypeFromString;
+virDomainIoEventFdTypeToString;
 virDomainLeaseIndex;
 virDomainLeaseInsert;
 virDomainLeaseInsertPreAlloc;
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
index 28c89b5..ad62a07 100644
--- a/src/qemu/qemu_capabilities.c
+++ b/src/qemu/qemu_capabilities.c
@@ -121,6 +121,7 @@ VIR_ENUM_IMPL(qemuCaps, QEMU_CAPS_LAST,
               "device-qxl-vga",
 
               "pci-multifunction", /* 60 */
+              "virtio-blk-pci.ioeventfd",
     );
 
 struct qemu_feature_flags {
@@ -1207,6 +1208,8 @@ qemuCapsParseDeviceStr(const char *str, virBitmapPtr flags)
         qemuCapsSet(flags, QEMU_CAPS_VIRTIO_TX_ALG);
     if (strstr(str, "name \"qxl-vga\""))
         qemuCapsSet(flags, QEMU_CAPS_DEVICE_QXL_VGA);
+    if (strstr(str, "virtio-blk-pci.ioeventfd"))
+        qemuCapsSet(flags, QEMU_CAPS_VIRTIO_IOEVENTFD);
 
     return 0;
 }
diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h
index e6d2fa3..0b9c8be 100644
--- a/src/qemu/qemu_capabilities.h
+++ b/src/qemu/qemu_capabilities.h
@@ -96,6 +96,7 @@ enum qemuCapsFlags {
     QEMU_CAPS_VIRTIO_TX_ALG     = 58, /* -device virtio-net-pci,tx=string */
     QEMU_CAPS_DEVICE_QXL_VGA    = 59, /* Is the primary and vga campatible qxl device named qxl-vga? */
     QEMU_CAPS_PCI_MULTIFUNCTION = 60, /* -device multifunction=on|off */
+    QEMU_CAPS_VIRTIO_IOEVENTFD  = 61, /* IOeventFD feature: virtio-{net|blk}-pci.ioeventfd=on/off */
 
     QEMU_CAPS_LAST,                   /* this must always be the last item */
 };
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 6346243..f7c06f8 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -1287,6 +1287,16 @@ qemuBuildDeviceAddressStr(virBufferPtr buf,
     return 0;
 }
 
+static int
+qemuBuildIoEventFdStr(virBufferPtr buf,
+                      enum virDomainIoEventFd use,
+                      virBitmapPtr qemuCaps)
+{
+    if (use && qemuCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_IOEVENTFD))
+        virBufferAsprintf(buf, ",ioeventfd=%s",
+                          virDomainIoEventFdTypeToString(use));
+    return 0;
+}
 
 #define QEMU_SERIAL_PARAM_ACCEPTED_CHARS \
   "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"
@@ -1554,6 +1564,7 @@ qemuBuildDriveDevStr(virDomainDiskDefPtr disk,
         break;
     case VIR_DOMAIN_DISK_BUS_VIRTIO:
         virBufferAddLit(&opt, "virtio-blk-pci");
+        qemuBuildIoEventFdStr(&opt, disk->ioeventfd, qemuCaps);
         qemuBuildDeviceAddressStr(&opt, &disk->info, qemuCaps);
         break;
     case VIR_DOMAIN_DISK_BUS_USB:
@@ -1777,6 +1788,8 @@ qemuBuildNicDevStr(virDomainNetDefPtr net,
             goto error;
         }
     }
+    if (usingVirtio)
+        qemuBuildIoEventFdStr(&buf, net->driver.virtio.ioeventfd, qemuCaps);
     if (vlan == -1)
         virBufferAsprintf(&buf, ",netdev=host%s", net->info.alias);
     else
diff --git a/tests/qemuhelptest.c b/tests/qemuhelptest.c
index 327a0c7..119e771 100644
--- a/tests/qemuhelptest.c
+++ b/tests/qemuhelptest.c
@@ -475,7 +475,8 @@ mymain(void)
             QEMU_CAPS_CCID_PASSTHRU,
             QEMU_CAPS_CHARDEV_SPICEVMC,
             QEMU_CAPS_DEVICE_QXL_VGA,
-            QEMU_CAPS_VIRTIO_TX_ALG);
+            QEMU_CAPS_VIRTIO_TX_ALG,
+            QEMU_CAPS_VIRTIO_IOEVENTFD);
 
     return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
 }
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args
new file mode 100644
index 0000000..c512f15
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args
@@ -0,0 +1,11 @@
+LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \
+/usr/bin/qemu -S -M pc-0.13 -m 1024 -smp 1 -nodefaults \
+-monitor unix:/tmp/test-monitor,server,nowait -no-acpi \
+-boot dc -device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x6 \
+-drive file=/var/lib/libvirt/images/f14.img,if=none,id=drive-virtio-disk0 \
+-device virtio-blk-pci,ioeventfd=on,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 \
+-drive file=/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso,if=none,media=cdrom,id=drive-ide0-1-0 \
+-device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 \
+-device virtio-net-pci,tx=bh,ioeventfd=off,vlan=0,id=net0,mac=52:54:00:e5:48:58,bus=pci.0,addr=0x3 \
+-net user,vlan=0,name=hostnet0 -serial pty -usb -vnc 127.0.0.1:-809 -std-vga \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml
new file mode 100644
index 0000000..c565c9f
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml
@@ -0,0 +1,50 @@
+<domain type='qemu'>
+  <name>test</name>
+  <memory>1048576</memory>
+  <vcpu>1</vcpu>
+  <os>
+    <type arch='x86_64' machine='pc-0.13'>hvm</type>
+    <boot dev='cdrom'/>
+    <boot dev='hd'/>
+    <bootmenu enable='yes'/>
+  </os>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>restart</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu</emulator>
+    <disk type='file' device='disk'>
+      <driver name='qemu' type='qcow2' ioeventfd='on'/>
+      <source file='/var/lib/libvirt/images/f14.img'/>
+      <target dev='vda' bus='virtio'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/>
+    </disk>
+      <disk type='file' device='cdrom'>
+      <driver name='qemu' type='raw'/>
+      <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/>
+      <target dev='hdc' bus='ide'/>
+      <readonly/>
+      <address type='drive' controller='0' bus='1' unit='0'/>
+    </disk>
+    <interface type='user'>
+      <mac address='52:54:00:e5:48:58'/>
+      <model type='virtio'/>
+      <driver name='vhost' txmode='iothread' ioeventfd='off'/>
+    </interface>
+    <controller type='virtio-serial' index='0'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/>
+    </controller>
+    <serial type='pty'>
+      <target port='0'/>
+    </serial>
+    <console type='pty'>
+      <target type='serial' port='0'/>
+    </console>
+    <graphics type='vnc' port='5091' autoport='no' listen='127.0.0.1'/>
+    <video>
+      <model type='vga' vram='9216' heads='1'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/>
+    </video>
+  </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index bd07efa..782664a 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -350,6 +350,10 @@ mymain(void)
     DO_TEST("disk-aio", false,
             QEMU_CAPS_DRIVE, QEMU_CAPS_DRIVE_AIO,
             QEMU_CAPS_DRIVE_CACHE_V2, QEMU_CAPS_DRIVE_FORMAT);
+    DO_TEST("disk-ioeventfd", false,
+            QEMU_CAPS_DRIVE, QEMU_CAPS_VIRTIO_IOEVENTFD,
+            QEMU_CAPS_VIRTIO_TX_ALG, QEMU_CAPS_DEVICE);
+
     DO_TEST("graphics-vnc", false, NONE);
     DO_TEST("graphics-vnc-socket", false, NONE);
 
-- 
1.7.5.rc3




More information about the libvir-list mailing list