[libvirt] [PATCH RFC] Add domainSave/Restore to libxl driver
Markus Groß
gross at univention.de
Tue May 24 10:15:42 UTC 2011
Quoting Jim Fehlig <jfehlig at novell.com>:
> Markus Groß wrote:
>> Am Montag 23 Mai 2011 04:30:12 schrieb Jim Fehlig:
>>
>>> Jim Fehlig wrote:
>>>
>>>> Markus Groß wrote:
>>>>
>>>>
>>>>> This patch adds save/restore functionality to the libxl driver.
>>>>>
>>>>> It is a v2 of this patch:
>>>>> https://www.redhat.com/archives/libvir-list/2011-April/msg00338.html
>>>>>
>>>>> v2:
>>>>> * header is now padded and has a version field
>>>>> * the correct restore function from libxl is used
>>>>> * only create the restore event once in libxlVmStart
>>>>>
>>>>>
>>>>>
>>>> Hi Markus,
>>>>
>>>> Finally found time to try your patch. Thanks for the patience :-).
>>>>
>>>>
>>>>
>>>>> However I ran into a reproducible segfault.
>>>>> Assume you saved a vm with:
>>>>> # virsh save domU foo.img
>>>>>
>>>>>
>>>>>
>>>> I think the problem actually lies in the save function. The domain does
>>>> not appear to be cleaned up properly. From xl's perspective after virsh
>>>> save domU foo.img
>>>>
>>>> xen33: # xl list
>>>> Name ID Mem VCPUs State
>>>> Time(s)
>>>> Domain-0 0 2023 8
>>>> r----- 330.0
>>>> (null) 11 1
>>>> 2 --pssd 27.1
>>>>
>>>> The orphaned domain disappears after libvirtd restart.
>>>>
>>> I manged to track down this problem, patch posted to xen-devel
>>>
>>> http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01314.html
>>>
>>>
>>
>> Great! I attached the current version of the save/restore patch.
>> It is rebased against the current master.
>>
>
> Ok, thanks, I have some comments below.
>
>>
>>>>
>>>>
>>>>> If you restore the save image,
>>>>> destroy the vm and restore it again, a segfault occurs:
>>>>> # virsh restore foo.img
>>>>> # virsh destroy domU
>>>>> # virsh restore foo.img
>>>>> # segfault
>>>>>
>>>>>
>>> But I still see the segfault, in addition to domain not booting and
>>> consuming 100% cpu on first restore :-(. I'll look at these issues next.
>>>
>
> I've identified cause of both of these issues. For the domain not
> _restoring_ (booting doesn't make much sense on a restore) and consuming
> 100% cpu, I've posted this patch (which still needs some work)
>
> https://www.redhat.com/archives/libvir-list/2011-May/msg01450.html
>
> The segfault ended up being a bug in libxc. I've posted a fix to xen-devel
>
> http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01511.html
>
>> diff --git a/src/libxl/libxl_conf.h b/src/libxl/libxl_conf.h
>> index 65110cf..e75e418 100644
>> --- a/src/libxl/libxl_conf.h
>> +++ b/src/libxl/libxl_conf.h
>> @@ -1,5 +1,6 @@
>>
>> /*---------------------------------------------------------------------------*/
>> /* Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany.
>> + * Copyright (C) 2011 Univention GmbH.
>> *
>> * This library is free software; you can redistribute it and/or
>> * modify it under the terms of the GNU Lesser General Public
>> @@ -17,6 +18,7 @@
>> *
>> * Authors:
>> * Jim Fehlig <jfehlig at novell.com>
>> + * Markus Groß <gross at univention.de>
>> */
>>
>> /*---------------------------------------------------------------------------*/
>>
>> @@ -81,6 +83,18 @@ struct _libxlDomainObjPrivate {
>> int eventHdl;
>> };
>>
>> +#define LIBXL_SAVE_MAGIC "libvirt-xml\n \0 \r"
>> +#define LIBXL_SAVE_VERSION 1
>> +
>> +typedef struct _libxlSavefileHeader libxlSavefileHeader;
>> +typedef libxlSavefileHeader *libxlSavefileHeaderPtr;
>> +struct _libxlSavefileHeader {
>> + char magic[sizeof(LIBXL_SAVE_MAGIC)-1];
>> + uint32_t version;
>> + uint32_t xmlLen;
>> + /* 24 bytes used, pad up to 64 bytes */
>> + uint32_t unused[10];
>> +};
>>
>> # define libxlError(code, ...) \
>> virReportErrorHelper(VIR_FROM_LIBXL, code, __FILE__, \
>> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
>> index b2cc0e8..9bc71fd 100644
>> --- a/src/libxl/libxl_driver.c
>> +++ b/src/libxl/libxl_driver.c
>> @@ -29,6 +29,7 @@
>> #include <sys/utsname.h>
>> #include <math.h>
>> #include <libxl.h>
>> +#include <fcntl.h>
>>
>> #include "internal.h"
>> #include "logging.h"
>> @@ -60,11 +61,10 @@
>>
>> static libxlDriverPrivatePtr libxl_driver = NULL;
>>
>> -
>> /* Function declarations */
>> static int
>> -libxlVmStart(libxlDriverPrivatePtr driver,
>> - virDomainObjPtr vm, bool start_paused);
>> +libxlVmStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
>> + bool start_paused, int restore_fd);
>>
>>
>> /* Function definitions */
>> @@ -168,7 +168,7 @@ libxlAutostartDomain(void *payload, const void
>> *name ATTRIBUTE_UNUSED,
>> virResetLastError();
>>
>> if (vm->autostart && !virDomainObjIsActive(vm) &&
>> - libxlVmStart(driver, vm, false) < 0) {
>> + libxlVmStart(driver, vm, false, -1) < 0) {
>> err = virGetLastError();
>> VIR_ERROR(_("Failed to autostart VM '%s': %s"),
>> vm->def->name,
>> @@ -369,7 +369,7 @@ static void libxlEventHandler(int watch,
>> break;
>> case SHUTDOWN_reboot:
>> libxlVmReap(driver, vm, 0, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
>> - libxlVmStart(driver, vm, 0);
>> + libxlVmStart(driver, vm, 0, -1);
>> break;
>>
>
> You'll need to add a 'case SHUTDOWN_suspend' here and just call
> libxlVmCleanup().
>
>> default:
>> VIR_INFO("Unhandled shutdown_reason %d",
>> info.shutdown_reason);
>> @@ -535,8 +535,8 @@ libxlFreeMem(libxlDomainObjPrivatePtr priv,
>> libxl_domain_config *d_config)
>> * virDomainObjPtr should be locked on invocation
>> */
>> static int
>> -libxlVmStart(libxlDriverPrivatePtr driver,
>> - virDomainObjPtr vm, bool start_paused)
>> +libxlVmStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
>> + bool start_paused, int restore_fd)
>> {
>> libxl_domain_config d_config;
>> virDomainDefPtr def = vm->def;
>> @@ -559,12 +559,23 @@ libxlVmStart(libxlDriverPrivatePtr driver,
>> goto error;
>> }
>>
>> - ret = libxl_domain_create_new(&priv->ctx, &d_config,
>> - NULL, &child_console_pid, &domid);
>> + if (restore_fd < 0)
>> + ret = libxl_domain_create_new(&priv->ctx, &d_config,
>> + NULL, &child_console_pid, &domid);
>> + else
>> + ret = libxl_domain_create_restore(&priv->ctx, &d_config, NULL,
>> + &child_console_pid, &domid,
>> + restore_fd);
>> +
>> if (ret) {
>> - libxlError(VIR_ERR_INTERNAL_ERROR,
>> - _("libxenlight failed to create new domain '%s'"),
>> - d_config.c_info.name);
>> + if (restore_fd < 0)
>> + libxlError(VIR_ERR_INTERNAL_ERROR,
>> + _("libxenlight failed to create new domain '%s'"),
>> + d_config.c_info.name);
>> + else
>> + libxlError(VIR_ERR_INTERNAL_ERROR,
>> + _("libxenlight failed to restore domain '%s'"),
>> + d_config.c_info.name);
>> goto error;
>> }
>>
>> @@ -597,7 +608,9 @@ libxlVmStart(libxlDriverPrivatePtr driver,
>> goto error;
>>
>> event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STARTED,
>> - VIR_DOMAIN_EVENT_STARTED_BOOTED);
>> + restore_fd < 0 ?
>> + VIR_DOMAIN_EVENT_STARTED_BOOTED :
>> +
>> VIR_DOMAIN_EVENT_STARTED_RESTORED);
>> libxlDomainEventQueue(driver, event);
>>
>> libxl_domain_config_destroy(&d_config);
>> @@ -1075,7 +1088,8 @@ libxlDomainCreateXML(virConnectPtr conn,
>> const char *xml,
>> goto cleanup;
>> def = NULL;
>>
>> - if (libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED)
>> != 0) < 0) {
>> + if (libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0,
>> + -1) < 0) {
>> virDomainRemoveInactive(&driver->domains, vm);
>> vm = NULL;
>> goto cleanup;
>> @@ -1627,6 +1641,184 @@ libxlDomainGetState(virDomainPtr dom,
>> }
>>
>> static int
>> +libxlDomainSave(virDomainPtr dom, const char *to)
>> +{
>> + libxlDriverPrivatePtr driver = dom->conn->privateData;
>> + virDomainObjPtr vm;
>> + libxlDomainObjPrivatePtr priv;
>> + virDomainEventPtr event = NULL;
>> + libxlSavefileHeader hdr;
>> + char *xml;
>> + uint32_t xml_len;
>> + int fd;
>> + int ret = -1;
>> +
>> + libxlDriverLock(driver);
>> + vm = virDomainFindByUUID(&driver->domains, dom->uuid);
>> +
>> + if (!vm) {
>> + char uuidstr[VIR_UUID_STRING_BUFLEN];
>> + virUUIDFormat(dom->uuid, uuidstr);
>> + libxlError(VIR_ERR_NO_DOMAIN,
>> + _("No domain with matching uuid '%s'"), uuidstr);
>> + goto cleanup;
>> + }
>> +
>> + if (!virDomainObjIsActive(vm)) {
>> + libxlError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is
>> not running"));
>> + goto cleanup;
>> + }
>> +
>> + priv = vm->privateData;
>> +
>> + if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_PAUSED) {
>> +
>> + if ((fd = virFileOpenAs(to, O_CREAT|O_TRUNC|O_WRONLY,
>> S_IRUSR|S_IWUSR,
>> + getuid(), getgid(), 0)) < 0) {
>> + virReportSystemError(-fd,
>> + _("Failed to create domain save
>> file '%s'"),
>> + to);
>> + goto cleanup;
>> + }
>> +
>> + if ((xml = virDomainDefFormat(vm->def, 0)) == NULL)
>> + goto cleanup;
>> + xml_len = strlen(xml) + 1;
>> +
>> + memset(&hdr, 0, sizeof(hdr));
>> + memcpy(hdr.magic, LIBXL_SAVE_MAGIC, sizeof(hdr.magic));
>> + hdr.version = LIBXL_SAVE_VERSION;
>> + hdr.xmlLen = xml_len;
>> +
>> + if (safewrite(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
>> + libxlError(VIR_ERR_OPERATION_FAILED,
>> + _("Failed to write save file header"));
>> + goto cleanup;
>> + }
>> +
>> + if (safewrite(fd, xml, xml_len) != xml_len) {
>> + libxlError(VIR_ERR_OPERATION_FAILED,
>> + _("Failed to write xml description"));
>> + goto cleanup;
>> + }
>> +
>> + if (libxl_domain_suspend(&priv->ctx, NULL, dom->id, fd) != 0) {
>> + libxlError(VIR_ERR_INTERNAL_ERROR,
>> + _("Failed to save domain '%d' with libxenlight"),
>> + dom->id);
>> + goto cleanup;
>> + }
>> +
>> + event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
>> + VIR_DOMAIN_EVENT_STOPPED_SAVED);
>> +
>> + if (libxlVmReap(driver, vm, 1, VIR_DOMAIN_SHUTOFF_SAVED) != 0) {
>> + libxlError(VIR_ERR_INTERNAL_ERROR,
>> + _("Failed to destroy domain '%d'"), dom->id);
>> + goto cleanup;
>> + }
>>
>
> And here call libxl_domain_destroy() directly instead of libxlVmReap(),
> allowing the event handler to cleanup the vm.
>
> Can you make these changes and ensure all the save/restore issues are
> resolved in your environment?
>
> Regards,
> Jim
Thanks for your review. I will post a v3 of this patch tomorrow,
incorporating your suggestions.
Cheers,
Markus
>
>> +
>> + if (!vm->persistent) {
>> + virDomainRemoveInactive(&driver->domains, vm);
>> + vm = NULL;
>> + }
>> + ret = 0;
>> + }
>> +cleanup:
>> + VIR_FREE(xml);
>> + if (VIR_CLOSE(fd) < 0)
>> + virReportSystemError(errno, "%s", _("cannot close file"));
>> + if (vm)
>> + virDomainObjUnlock(vm);
>> + if (event)
>> + libxlDomainEventQueue(driver, event);
>> + libxlDriverUnlock(driver);
>> + return ret;
>> +}
>> +
>> +static int
>> +libxlDomainRestore(virConnectPtr conn, const char *from)
>> +{
>> + libxlDriverPrivatePtr driver = conn->privateData;
>> + virDomainDefPtr def = NULL;
>> + virDomainObjPtr vm = NULL;
>> + libxlSavefileHeader hdr;
>> + char *xml = NULL;
>> + int fd;
>> + int ret = -1;
>> +
>> + libxlDriverLock(driver);
>> +
>> + if ((fd = virFileOpenAs(from, O_RDONLY, 0, getuid(), getgid(),
>> 0)) < 0) {
>> + libxlError(VIR_ERR_OPERATION_FAILED,
>> + "%s", _("cannot read domain image"));
>> + goto cleanup;
>> + }
>> +
>> + if (saferead(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
>> + libxlError(VIR_ERR_OPERATION_FAILED,
>> + "%s", _("failed to read libxl header"));
>> + goto cleanup;
>> + }
>> +
>> + if (memcmp(hdr.magic, LIBXL_SAVE_MAGIC, sizeof(hdr.magic))) {
>> + libxlError(VIR_ERR_INVALID_ARG, "%s", _("image magic is
>> incorrect"));
>> + goto cleanup;
>> + }
>> +
>> + if (hdr.version > LIBXL_SAVE_VERSION) {
>> + libxlError(VIR_ERR_OPERATION_FAILED,
>> + _("image version is not supported (%d > %d)"),
>> + hdr.version, LIBXL_SAVE_VERSION);
>> + goto cleanup;
>> + }
>> +
>> + if (hdr.xmlLen <= 0) {
>> + libxlError(VIR_ERR_OPERATION_FAILED,
>> + _("invalid XML length: %d"), hdr.xmlLen);
>> + goto cleanup;
>> + }
>> +
>> + if (VIR_ALLOC_N(xml, hdr.xmlLen) < 0) {
>> + virReportOOMError();
>> + goto cleanup;
>> + }
>> +
>> + if (saferead(fd, xml, hdr.xmlLen) != hdr.xmlLen) {
>> + libxlError(VIR_ERR_OPERATION_FAILED, "%s", _("failed to
>> read XML"));
>> + goto cleanup;
>> + }
>> +
>> + if (!(def = virDomainDefParseString(driver->caps, xml,
>> + VIR_DOMAIN_XML_INACTIVE)))
>> + goto cleanup;
>> +
>> + if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0)
>> + goto cleanup;
>> +
>> + if (!(vm = virDomainAssignDef(driver->caps, &driver->domains,
>> def, true)))
>> + goto cleanup;
>> +
>> + def = NULL;
>> +
>> + if ((ret = libxlVmStart(driver, vm, false, fd)) < 0 &&
>> + !vm->persistent) {
>> + virDomainRemoveInactive(&driver->domains, vm);
>> + vm = NULL;
>> + }
>> +
>> +cleanup:
>> + VIR_FREE(xml);
>> + virDomainDefFree(def);
>> + if (VIR_CLOSE(fd) < 0)
>> + virReportSystemError(errno, "%s", _("cannot close file"));
>> + if (vm)
>> + virDomainObjUnlock(vm);
>> + libxlDriverUnlock(driver);
>> + return ret;
>> +}
>> +
>> +static int
>> libxlDomainSetVcpusFlags(virDomainPtr dom, unsigned int nvcpus,
>> unsigned int flags)
>> {
>> @@ -2096,7 +2288,7 @@ libxlDomainCreateWithFlags(virDomainPtr dom,
>> goto cleanup;
>> }
>>
>> - ret = libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0);
>> + ret = libxlVmStart(driver, vm, (flags &
>> VIR_DOMAIN_START_PAUSED) != 0, -1);
>>
>> cleanup:
>> if (vm)
>> @@ -2722,6 +2914,8 @@ static virDriver libxlDriver = {
>> .domainSetMemoryFlags = libxlDomainSetMemoryFlags, /* 0.9.0 */
>> .domainGetInfo = libxlDomainGetInfo, /* 0.9.0 */
>> .domainGetState = libxlDomainGetState, /* 0.9.2 */
>> + .domainSave = libxlDomainSave, /* 0.9.2 */
>> + .domainRestore = libxlDomainRestore, /* 0.9.2 */
>> .domainSetVcpus = libxlDomainSetVcpus, /* 0.9.0 */
>> .domainSetVcpusFlags = libxlDomainSetVcpusFlags, /* 0.9.0 */
>> .domainGetVcpusFlags = libxlDomainGetVcpusFlags, /* 0.9.0 */
>>
>
>
More information about the libvir-list
mailing list