[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [libvirt] [PATCH RFC] Add domainSave/Restore to libxl driver



Markus Groß wrote:
> Am Montag 23 Mai 2011 04:30:12 schrieb Jim Fehlig:
>   
>> Jim Fehlig wrote:
>>     
>>> Markus Groß wrote:
>>>   
>>>       
>>>> This patch adds save/restore functionality to the libxl driver.
>>>>
>>>> It is a v2 of this patch:
>>>> https://www.redhat.com/archives/libvir-list/2011-April/msg00338.html
>>>>
>>>> v2:
>>>> * header is now padded and has a version field
>>>> * the correct restore function from libxl is used
>>>> * only create the restore event once in libxlVmStart
>>>>   
>>>>     
>>>>         
>>> Hi Markus,
>>>
>>> Finally found time to try your patch.  Thanks for the patience :-).
>>>
>>>   
>>>       
>>>> However I ran into a reproducible segfault.
>>>> Assume you saved a vm with:
>>>> # virsh save domU foo.img
>>>>   
>>>>     
>>>>         
>>> I think the problem actually lies in the save function.  The domain does
>>> not appear to be cleaned up properly.  From xl's perspective after virsh
>>> save domU foo.img
>>>
>>> xen33: # xl list
>>> Name                                        ID   Mem VCPUs      State  
>>> Time(s)
>>> Domain-0                                  0    2023     8            
>>> r-----     330.0
>>> (null)                                         11     1         
>>> 2             --pssd      27.1
>>>
>>> The orphaned domain disappears after libvirtd restart.
>>>       
>> I manged to track down this problem, patch posted to xen-devel
>>
>> http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01314.html
>>
>>     
>
> Great! I attached the current version of the save/restore patch.
> It is rebased against the current master.
>   

Ok, thanks,  I have some comments below.

>   
>>>   
>>>       
>>>> If you restore the save image,
>>>> destroy the vm and restore it again, a segfault occurs:
>>>> # virsh restore foo.img
>>>> # virsh destroy domU
>>>> # virsh restore foo.img
>>>> # segfault
>>>>     
>>>>         
>> But I still see the segfault, in addition to domain not booting and
>> consuming 100% cpu on first restore :-(.  I'll look at these issues next.
>>     

I've identified cause of both of these issues.  For the domain not
_restoring_ (booting doesn't make much sense on a restore) and consuming
100% cpu, I've posted this patch (which still needs some work)

https://www.redhat.com/archives/libvir-list/2011-May/msg01450.html

The segfault ended up being a bug in libxc.  I've posted a fix to xen-devel

http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01511.html

> diff --git a/src/libxl/libxl_conf.h b/src/libxl/libxl_conf.h
> index 65110cf..e75e418 100644
> --- a/src/libxl/libxl_conf.h
> +++ b/src/libxl/libxl_conf.h
> @@ -1,5 +1,6 @@
>  /*---------------------------------------------------------------------------*/
>  /*  Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany.
> + *  Copyright (C) 2011 Univention GmbH.
>   *
>   * This library is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU Lesser General Public
> @@ -17,6 +18,7 @@
>   *
>   * Authors:
>   *     Jim Fehlig <jfehlig novell com>
> + *     Markus Groß <gross univention de>
>   */
>  /*---------------------------------------------------------------------------*/
>  
> @@ -81,6 +83,18 @@ struct _libxlDomainObjPrivate {
>      int eventHdl;
>  };
>  
> +#define LIBXL_SAVE_MAGIC "libvirt-xml\n \0 \r"
> +#define LIBXL_SAVE_VERSION 1
> +
> +typedef struct _libxlSavefileHeader libxlSavefileHeader;
> +typedef libxlSavefileHeader *libxlSavefileHeaderPtr;
> +struct _libxlSavefileHeader {
> +    char magic[sizeof(LIBXL_SAVE_MAGIC)-1];
> +    uint32_t version;
> +    uint32_t xmlLen;
> +    /* 24 bytes used, pad up to 64 bytes */
> +    uint32_t unused[10];
> +};
>  
>  # define libxlError(code, ...)                                     \
>      virReportErrorHelper(VIR_FROM_LIBXL, code, __FILE__,           \
> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
> index b2cc0e8..9bc71fd 100644
> --- a/src/libxl/libxl_driver.c
> +++ b/src/libxl/libxl_driver.c
> @@ -29,6 +29,7 @@
>  #include <sys/utsname.h>
>  #include <math.h>
>  #include <libxl.h>
> +#include <fcntl.h>
>  
>  #include "internal.h"
>  #include "logging.h"
> @@ -60,11 +61,10 @@
>  
>  static libxlDriverPrivatePtr libxl_driver = NULL;
>  
> -
>  /* Function declarations */
>  static int
> -libxlVmStart(libxlDriverPrivatePtr driver,
> -             virDomainObjPtr vm, bool start_paused);
> +libxlVmStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
> +             bool start_paused, int restore_fd);
>  
>  
>  /* Function definitions */
> @@ -168,7 +168,7 @@ libxlAutostartDomain(void *payload, const void *name ATTRIBUTE_UNUSED,
>      virResetLastError();
>  
>      if (vm->autostart && !virDomainObjIsActive(vm) &&
> -        libxlVmStart(driver, vm, false) < 0) {
> +        libxlVmStart(driver, vm, false, -1) < 0) {
>          err = virGetLastError();
>          VIR_ERROR(_("Failed to autostart VM '%s': %s"),
>                    vm->def->name,
> @@ -369,7 +369,7 @@ static void libxlEventHandler(int watch,
>                  break;
>              case SHUTDOWN_reboot:
>                  libxlVmReap(driver, vm, 0, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
> -                libxlVmStart(driver, vm, 0);
> +                libxlVmStart(driver, vm, 0, -1);
>                  break;
>   

You'll need to add a 'case SHUTDOWN_suspend' here and just call
libxlVmCleanup().

>              default:
>                  VIR_INFO("Unhandled shutdown_reason %d", info.shutdown_reason);
> @@ -535,8 +535,8 @@ libxlFreeMem(libxlDomainObjPrivatePtr priv, libxl_domain_config *d_config)
>   * virDomainObjPtr should be locked on invocation
>   */
>  static int
> -libxlVmStart(libxlDriverPrivatePtr driver,
> -             virDomainObjPtr vm, bool start_paused)
> +libxlVmStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
> +             bool start_paused, int restore_fd)
>  {
>      libxl_domain_config d_config;
>      virDomainDefPtr def = vm->def;
> @@ -559,12 +559,23 @@ libxlVmStart(libxlDriverPrivatePtr driver,
>          goto error;
>      }
>  
> -    ret = libxl_domain_create_new(&priv->ctx, &d_config,
> -                                  NULL, &child_console_pid, &domid);
> +    if (restore_fd < 0)
> +        ret = libxl_domain_create_new(&priv->ctx, &d_config,
> +                                      NULL, &child_console_pid, &domid);
> +    else
> +        ret = libxl_domain_create_restore(&priv->ctx, &d_config, NULL,
> +                                          &child_console_pid, &domid,
> +                                          restore_fd);
> +
>      if (ret) {
> -        libxlError(VIR_ERR_INTERNAL_ERROR,
> -                   _("libxenlight failed to create new domain '%s'"),
> -                   d_config.c_info.name);
> +        if (restore_fd < 0)
> +            libxlError(VIR_ERR_INTERNAL_ERROR,
> +                       _("libxenlight failed to create new domain '%s'"),
> +                       d_config.c_info.name);
> +        else
> +            libxlError(VIR_ERR_INTERNAL_ERROR,
> +                       _("libxenlight failed to restore domain '%s'"),
> +                       d_config.c_info.name);
>          goto error;
>      }
>  
> @@ -597,7 +608,9 @@ libxlVmStart(libxlDriverPrivatePtr driver,
>          goto error;
>  
>      event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STARTED,
> -                                     VIR_DOMAIN_EVENT_STARTED_BOOTED);
> +                                     restore_fd < 0 ?
> +                                         VIR_DOMAIN_EVENT_STARTED_BOOTED :
> +                                         VIR_DOMAIN_EVENT_STARTED_RESTORED);
>      libxlDomainEventQueue(driver, event);
>  
>      libxl_domain_config_destroy(&d_config);
> @@ -1075,7 +1088,8 @@ libxlDomainCreateXML(virConnectPtr conn, const char *xml,
>          goto cleanup;
>      def = NULL;
>  
> -    if (libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0) < 0) {
> +    if (libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0,
> +                     -1) < 0) {
>          virDomainRemoveInactive(&driver->domains, vm);
>          vm = NULL;
>          goto cleanup;
> @@ -1627,6 +1641,184 @@ libxlDomainGetState(virDomainPtr dom,
>  }
>  
>  static int
> +libxlDomainSave(virDomainPtr dom, const char *to)
> +{
> +    libxlDriverPrivatePtr driver = dom->conn->privateData;
> +    virDomainObjPtr vm;
> +    libxlDomainObjPrivatePtr priv;
> +    virDomainEventPtr event = NULL;
> +    libxlSavefileHeader hdr;
> +    char *xml;
> +    uint32_t xml_len;
> +    int fd;
> +    int ret = -1;
> +
> +    libxlDriverLock(driver);
> +    vm = virDomainFindByUUID(&driver->domains, dom->uuid);
> +
> +    if (!vm) {
> +        char uuidstr[VIR_UUID_STRING_BUFLEN];
> +        virUUIDFormat(dom->uuid, uuidstr);
> +        libxlError(VIR_ERR_NO_DOMAIN,
> +                   _("No domain with matching uuid '%s'"), uuidstr);
> +        goto cleanup;
> +    }
> +
> +    if (!virDomainObjIsActive(vm)) {
> +        libxlError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is not running"));
> +        goto cleanup;
> +    }
> +
> +    priv = vm->privateData;
> +
> +    if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_PAUSED) {
> +
> +        if ((fd = virFileOpenAs(to, O_CREAT|O_TRUNC|O_WRONLY, S_IRUSR|S_IWUSR,
> +                                getuid(), getgid(), 0)) < 0) {
> +            virReportSystemError(-fd,
> +                                 _("Failed to create domain save file '%s'"),
> +                                 to);
> +            goto cleanup;
> +        }
> +
> +        if ((xml = virDomainDefFormat(vm->def, 0)) == NULL)
> +            goto cleanup;
> +        xml_len = strlen(xml) + 1;
> +
> +        memset(&hdr, 0, sizeof(hdr));
> +        memcpy(hdr.magic, LIBXL_SAVE_MAGIC, sizeof(hdr.magic));
> +        hdr.version = LIBXL_SAVE_VERSION;
> +        hdr.xmlLen = xml_len;
> +
> +        if (safewrite(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
> +            libxlError(VIR_ERR_OPERATION_FAILED,
> +                       _("Failed to write save file header"));
> +            goto cleanup;
> +        }
> +
> +        if (safewrite(fd, xml, xml_len) != xml_len) {
> +            libxlError(VIR_ERR_OPERATION_FAILED,
> +                       _("Failed to write xml description"));
> +            goto cleanup;
> +        }
> +
> +        if (libxl_domain_suspend(&priv->ctx, NULL, dom->id, fd) != 0) {
> +            libxlError(VIR_ERR_INTERNAL_ERROR,
> +                       _("Failed to save domain '%d' with libxenlight"),
> +                       dom->id);
> +            goto cleanup;
> +        }
> +
> +        event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
> +                                         VIR_DOMAIN_EVENT_STOPPED_SAVED);
> +
> +        if (libxlVmReap(driver, vm, 1, VIR_DOMAIN_SHUTOFF_SAVED) != 0) {
> +            libxlError(VIR_ERR_INTERNAL_ERROR,
> +                       _("Failed to destroy domain '%d'"), dom->id);
> +            goto cleanup;
> +        }
>   

And here call libxl_domain_destroy() directly instead of libxlVmReap(),
allowing the event handler to cleanup the vm.

Can you make these changes and ensure all the save/restore issues are
resolved in your environment?

Regards,
Jim

> +
> +        if (!vm->persistent) {
> +            virDomainRemoveInactive(&driver->domains, vm);
> +            vm = NULL;
> +        }
> +        ret = 0;
> +    }
> +cleanup:
> +    VIR_FREE(xml);
> +    if (VIR_CLOSE(fd) < 0)
> +        virReportSystemError(errno, "%s", _("cannot close file"));
> +    if (vm)
> +        virDomainObjUnlock(vm);
> +    if (event)
> +        libxlDomainEventQueue(driver, event);
> +    libxlDriverUnlock(driver);
> +    return ret;
> +}
> +
> +static int
> +libxlDomainRestore(virConnectPtr conn, const char *from)
> +{
> +    libxlDriverPrivatePtr driver = conn->privateData;
> +    virDomainDefPtr def = NULL;
> +    virDomainObjPtr vm = NULL;
> +    libxlSavefileHeader hdr;
> +    char *xml = NULL;
> +    int fd;
> +    int ret = -1;
> +
> +    libxlDriverLock(driver);
> +
> +    if ((fd = virFileOpenAs(from, O_RDONLY, 0, getuid(), getgid(), 0)) < 0) {
> +        libxlError(VIR_ERR_OPERATION_FAILED,
> +                   "%s", _("cannot read domain image"));
> +        goto cleanup;
> +    }
> +
> +    if (saferead(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
> +        libxlError(VIR_ERR_OPERATION_FAILED,
> +                   "%s", _("failed to read libxl header"));
> +        goto cleanup;
> +    }
> +
> +    if (memcmp(hdr.magic, LIBXL_SAVE_MAGIC, sizeof(hdr.magic))) {
> +        libxlError(VIR_ERR_INVALID_ARG, "%s", _("image magic is incorrect"));
> +        goto cleanup;
> +    }
> +
> +    if (hdr.version > LIBXL_SAVE_VERSION) {
> +        libxlError(VIR_ERR_OPERATION_FAILED,
> +                   _("image version is not supported (%d > %d)"),
> +                   hdr.version, LIBXL_SAVE_VERSION);
> +        goto cleanup;
> +    }
> +
> +    if (hdr.xmlLen <= 0) {
> +        libxlError(VIR_ERR_OPERATION_FAILED,
> +                   _("invalid XML length: %d"), hdr.xmlLen);
> +        goto cleanup;
> +    }
> +
> +    if (VIR_ALLOC_N(xml, hdr.xmlLen) < 0) {
> +        virReportOOMError();
> +        goto cleanup;
> +    }
> +
> +    if (saferead(fd, xml, hdr.xmlLen) != hdr.xmlLen) {
> +        libxlError(VIR_ERR_OPERATION_FAILED, "%s", _("failed to read XML"));
> +        goto cleanup;
> +    }
> +
> +    if (!(def = virDomainDefParseString(driver->caps, xml,
> +                                        VIR_DOMAIN_XML_INACTIVE)))
> +        goto cleanup;
> +
> +    if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0)
> +        goto cleanup;
> +
> +    if (!(vm = virDomainAssignDef(driver->caps, &driver->domains, def, true)))
> +        goto cleanup;
> +
> +    def = NULL;
> +
> +    if ((ret = libxlVmStart(driver, vm, false, fd)) < 0 &&
> +        !vm->persistent) {
> +        virDomainRemoveInactive(&driver->domains, vm);
> +        vm = NULL;
> +    }
> +
> +cleanup:
> +    VIR_FREE(xml);
> +    virDomainDefFree(def);
> +    if (VIR_CLOSE(fd) < 0)
> +        virReportSystemError(errno, "%s", _("cannot close file"));
> +    if (vm)
> +        virDomainObjUnlock(vm);
> +    libxlDriverUnlock(driver);
> +    return ret;
> +}
> +
> +static int
>  libxlDomainSetVcpusFlags(virDomainPtr dom, unsigned int nvcpus,
>                           unsigned int flags)
>  {
> @@ -2096,7 +2288,7 @@ libxlDomainCreateWithFlags(virDomainPtr dom,
>          goto cleanup;
>      }
>  
> -    ret = libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0);
> +    ret = libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0, -1);
>  
>  cleanup:
>      if (vm)
> @@ -2722,6 +2914,8 @@ static virDriver libxlDriver = {
>      .domainSetMemoryFlags = libxlDomainSetMemoryFlags, /* 0.9.0 */
>      .domainGetInfo = libxlDomainGetInfo, /* 0.9.0 */
>      .domainGetState = libxlDomainGetState, /* 0.9.2 */
> +    .domainSave = libxlDomainSave, /* 0.9.2 */
> +    .domainRestore = libxlDomainRestore, /* 0.9.2 */
>      .domainSetVcpus = libxlDomainSetVcpus, /* 0.9.0 */
>      .domainSetVcpusFlags = libxlDomainSetVcpusFlags, /* 0.9.0 */
>      .domainGetVcpusFlags = libxlDomainGetVcpusFlags, /* 0.9.0 */
>   


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]