[libvirt] [PATCH 5/5] qemu: Prefer VFIO for PCI device passthrough
Daniel P. Berrange
berrange at redhat.com
Fri Sep 20 11:56:16 UTC 2013
On Fri, Sep 20, 2013 at 11:07:00AM +0200, Peter Krempa wrote:
> Prefer using VFIO (if available) to the legacy KVM device passthrough.
>
> With this patch a PCI passthrough device without the driver configured
> will be started with VFIO if it's available on the host. If not legacy
> KVM passthrough is checked and error is reported if it's not available.
> ---
> docs/formatdomain.html.in | 9 ++++----
> src/conf/domain_conf.h | 2 +-
> src/qemu/qemu_command.c | 2 +-
> src/qemu/qemu_hotplug.c | 26 +++++++++++++++++++++-
> src/qemu/qemu_process.c | 18 ++++++++++++++-
> .../qemuxml2argv-hostdev-pci-address-device.args | 2 +-
> .../qemuxml2argvdata/qemuxml2argv-net-hostdev.args | 2 +-
> tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args | 4 ++--
> 8 files changed, 52 insertions(+), 13 deletions(-)
>
> diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
> index 3689399..6f3f7cf 100644
> --- a/docs/formatdomain.html.in
> +++ b/docs/formatdomain.html.in
> @@ -2755,11 +2755,10 @@
> backend, which is compatible with UEFI SecureBoot) or "kvm"
> (for the legacy device assignment handled directly by the KVM
> kernel module)<span class="since">Since 1.0.5 (QEMU and KVM
> - only, requires kernel 3.6 or newer)</span>. Currently, "kvm"
> - is the default used by libvirt when not explicitly provided,
> - but since the two are functionally equivalent, this default
> - could be changed in the future with no impact to domains that
> - don't specify anything.
> + only, requires kernel 3.6 or newer)</span>. The default, when
> + the driver name is not explicitly specified, is to check wether
> + VFIO is available and use it if it's the case. If VFIO is not
> + available, the legacy "kvm" assignment is attempted.
> </dd>
> <dt><code>readonly</code></dt>
> <dd>Indicates that the device is readonly, only supported by SCSI host
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index 9414ebf..43d8746 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType {
>
> /* the backend driver used for PCI hostdev devices */
> typedef enum {
> - VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */
> + VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */
> VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM, /* force legacy kvm style */
> VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO, /* force vfio */
>
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 1411533..0d7c02e 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -5395,13 +5395,13 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,
>
> switch ((virDomainHostdevSubsysPciBackendType)
> dev->source.subsys.u.pci.backend) {
> - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
> virBufferAddLit(&buf, "pci-assign");
> if (configfd && *configfd)
> virBufferAsprintf(&buf, ",configfd=%s", configfd);
> break;
>
> + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
> virBufferAddLit(&buf, "vfio-pci");
> break;
The code calling this function should have translated 'DEFAULT' into
either 'KVM' or 'VFIO', so if we see 'DEFAULT' here shouldn't we
raise an error.
> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
> index 9320ab9..5a8fa44 100644
> --- a/src/qemu/qemu_hotplug.c
> +++ b/src/qemu/qemu_hotplug.c
> @@ -1144,6 +1144,31 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
> return -1;
>
> switch ((virDomainHostdevSubsysPciBackendType) *backend) {
> + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> + if (qemuHostdevHostSupportsPassthroughVFIO() &&
> + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
> + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO;
> + /* VFIO requires all of the guest's memory to be locked resident.
> + * In this case, the guest's memory may already be locked, but it
> + * doesn't hurt to "change" the limit to the same value.
> + */
> + if (vm->def->mem.hard_limit)
> + virProcessSetMaxMemLock(vm->pid, vm->def->mem.hard_limit);
> + else
> + virProcessSetMaxMemLock(vm->pid,
> + vm->def->mem.max_balloon + (1024 * 1024));
> + } else if (qemuHostdevHostSupportsPassthroughLegacy() &&
> + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) ||
> + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) {
> + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
> + } else {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("host doesn't support passthrough of "
> + "host PCI devices"));
> + }
> +
> + break;
> +
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
> if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> @@ -1170,7 +1195,6 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
>
> break;
>
> - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
> if (!qemuHostdevHostSupportsPassthroughLegacy()) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index e36ab99..9648aa0 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -3730,6 +3730,23 @@ int qemuProcessStart(virConnectPtr conn,
> }
>
> switch ((virDomainHostdevSubsysPciBackendType) *backend) {
> + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> + if (supportsPassthroughVFIO &&
> + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
> + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO;
> + } else if (supportsPassthroughKVM &&
> + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) ||
> + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) {
> + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
> + } else {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("host doesn't support passthrough of "
> + "host PCI devices"));
> + goto cleanup;
> + }
> +
> + break;
There's alot of duplication between here & the hotplug code - can we get
this logic shared in a helper function ?
> +
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
> if (!supportsPassthroughVFIO) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> @@ -3738,7 +3755,6 @@ int qemuProcessStart(virConnectPtr conn,
> }
> break;
>
> - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
> if (!supportsPassthroughKVM) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
> index 214b246..557b733 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
> @@ -2,5 +2,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \
> /usr/bin/qemu -S -M \
> pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \
> unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \
> -/dev/HostVG/QEMUGuest2 -device pci-assign,host=06:12.5,id=hostdev0,\
> +/dev/HostVG/QEMUGuest2 -device vfio-pci,host=06:12.5,id=hostdev0,\
> bus=pci.0,addr=0x3 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
> index 184811b..3a3963c 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
> @@ -3,5 +3,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \
> -M pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \
> unix:/tmp/test-monitor,server,nowait -no-acpi -boot c \
> -usb -hda /dev/HostVG/QEMUGuest1 \
> --device pci-assign,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \
> +-device vfio-pci,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \
> -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
> index c850613..615047a 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
> @@ -7,7 +7,7 @@ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \
> -net user,vlan=0,name=hostnet0 \
> -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:24:a5:9e,bus=pci.0,addr=0x4,\
> romfile=/etc/fake/bootrom.bin -net user,vlan=1,name=hostnet1 \
> --device pci-assign,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \
> --device pci-assign,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\
> +-device vfio-pci,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \
> +-device vfio-pci,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\
> romfile=/etc/fake/bootrom.bin \
> -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7
Why is the XML here changing ? Shouldn't the test suite be stable in
what it generates regardless of what the host happens to support ?
Danielx
--
|: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org -o- http://virt-manager.org :|
|: http://autobuild.org -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
More information about the libvir-list
mailing list