[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [libvirt] [PATCH 5/5] qemu: Prefer VFIO for PCI device passthrough



On Fri, Sep 20, 2013 at 11:07:00AM +0200, Peter Krempa wrote:
> Prefer using VFIO (if available) to the legacy KVM device passthrough.
> 
> With this patch a PCI passthrough device without the driver configured
> will be started with VFIO if it's available on the host. If not legacy
> KVM passthrough is checked and error is reported if it's not available.
> ---
>  docs/formatdomain.html.in                          |  9 ++++----
>  src/conf/domain_conf.h                             |  2 +-
>  src/qemu/qemu_command.c                            |  2 +-
>  src/qemu/qemu_hotplug.c                            | 26 +++++++++++++++++++++-
>  src/qemu/qemu_process.c                            | 18 ++++++++++++++-
>  .../qemuxml2argv-hostdev-pci-address-device.args   |  2 +-
>  .../qemuxml2argvdata/qemuxml2argv-net-hostdev.args |  2 +-
>  tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args   |  4 ++--
>  8 files changed, 52 insertions(+), 13 deletions(-)
> 
> diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
> index 3689399..6f3f7cf 100644
> --- a/docs/formatdomain.html.in
> +++ b/docs/formatdomain.html.in
> @@ -2755,11 +2755,10 @@
>          backend, which is compatible with UEFI SecureBoot) or "kvm"
>          (for the legacy device assignment handled directly by the KVM
>          kernel module)<span class="since">Since 1.0.5 (QEMU and KVM
> -        only, requires kernel 3.6 or newer)</span>. Currently, "kvm"
> -        is the default used by libvirt when not explicitly provided,
> -        but since the two are functionally equivalent, this default
> -        could be changed in the future with no impact to domains that
> -        don't specify anything.
> +        only, requires kernel 3.6 or newer)</span>. The default, when
> +        the driver name is not explicitly specified, is to check wether
> +        VFIO is available and use it if it's the case. If VFIO is not
> +        available, the legacy "kvm" assignment is attempted.
>        </dd>
>        <dt><code>readonly</code></dt>
>        <dd>Indicates that the device is readonly, only supported by SCSI host
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index 9414ebf..43d8746 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType {
> 
>  /* the backend driver used for PCI hostdev devices */
>  typedef enum {
> -    VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */
> +    VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */
>      VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM,    /* force legacy kvm style */
>      VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO,   /* force vfio */
> 
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 1411533..0d7c02e 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -5395,13 +5395,13 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,
> 
>      switch ((virDomainHostdevSubsysPciBackendType)
>              dev->source.subsys.u.pci.backend) {
> -    case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
>      case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
>          virBufferAddLit(&buf, "pci-assign");
>          if (configfd && *configfd)
>              virBufferAsprintf(&buf, ",configfd=%s", configfd);
>          break;
> 
> +    case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
>      case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
>          virBufferAddLit(&buf, "vfio-pci");
>          break;

The code calling this function should have translated 'DEFAULT' into
either 'KVM' or 'VFIO', so if we see 'DEFAULT' here shouldn't we
raise an error.

> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
> index 9320ab9..5a8fa44 100644
> --- a/src/qemu/qemu_hotplug.c
> +++ b/src/qemu/qemu_hotplug.c
> @@ -1144,6 +1144,31 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
>          return -1;
> 
>      switch ((virDomainHostdevSubsysPciBackendType) *backend) {
> +    case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> +        if (qemuHostdevHostSupportsPassthroughVFIO() &&
> +            virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
> +            *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO;
> +            /* VFIO requires all of the guest's memory to be locked resident.
> +             * In this case, the guest's memory may already be locked, but it
> +             * doesn't hurt to "change" the limit to the same value.
> +             */
> +            if (vm->def->mem.hard_limit)
> +                virProcessSetMaxMemLock(vm->pid, vm->def->mem.hard_limit);
> +            else
> +                virProcessSetMaxMemLock(vm->pid,
> +                                        vm->def->mem.max_balloon + (1024 * 1024));
> +        } else if (qemuHostdevHostSupportsPassthroughLegacy() &&
> +                   (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) ||
> +                    virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) {
> +            *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
> +        } else {
> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                           _("host doesn't support passthrough of "
> +                             "host PCI devices"));
> +        }
> +
> +        break;
> +
>      case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
>          if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
>              virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> @@ -1170,7 +1195,6 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
> 
>          break;
> 
> -    case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
>      case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
>          if (!qemuHostdevHostSupportsPassthroughLegacy()) {
>              virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index e36ab99..9648aa0 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -3730,6 +3730,23 @@ int qemuProcessStart(virConnectPtr conn,
>              }
> 
>              switch ((virDomainHostdevSubsysPciBackendType) *backend) {
> +            case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> +                if (supportsPassthroughVFIO &&
> +                    virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
> +                    *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO;
> +                } else if (supportsPassthroughKVM &&
> +                           (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) ||
> +                            virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) {
> +                    *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
> +                } else {
> +                    virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                                   _("host doesn't support passthrough of "
> +                                     "host PCI devices"));
> +                    goto cleanup;
> +                }
> +
> +                break;

There's alot of duplication between here & the hotplug code - can we get
this logic shared in a helper function ?

> +
>              case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
>                  if (!supportsPassthroughVFIO) {
>                      virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> @@ -3738,7 +3755,6 @@ int qemuProcessStart(virConnectPtr conn,
>                  }
>                  break;
> 
> -            case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
>              case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
>                  if (!supportsPassthroughKVM) {
>                      virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
> index 214b246..557b733 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
> @@ -2,5 +2,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \
>  /usr/bin/qemu -S -M \
>  pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \
>  unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \
> -/dev/HostVG/QEMUGuest2 -device pci-assign,host=06:12.5,id=hostdev0,\
> +/dev/HostVG/QEMUGuest2 -device vfio-pci,host=06:12.5,id=hostdev0,\
>  bus=pci.0,addr=0x3 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
> index 184811b..3a3963c 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
> @@ -3,5 +3,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \
>  -M pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \
>  unix:/tmp/test-monitor,server,nowait -no-acpi -boot c \
>  -usb -hda /dev/HostVG/QEMUGuest1 \
> --device pci-assign,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \
> +-device vfio-pci,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \
>  -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
> index c850613..615047a 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
> @@ -7,7 +7,7 @@ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \
>  -net user,vlan=0,name=hostnet0 \
>  -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:24:a5:9e,bus=pci.0,addr=0x4,\
>  romfile=/etc/fake/bootrom.bin -net user,vlan=1,name=hostnet1 \
> --device pci-assign,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \
> --device pci-assign,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\
> +-device vfio-pci,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \
> +-device vfio-pci,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\
>  romfile=/etc/fake/bootrom.bin \
>  -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7

Why is the XML here changing ? Shouldn't the test suite be stable in
what it generates regardless of what the host happens to support ?

Danielx
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]