[libvirt] [PATCH 4/1] qemu: detect incomplete save files
Daniel Veillard
veillard at redhat.com
Fri Sep 2 03:48:21 UTC 2011
On Tue, Aug 30, 2011 at 03:06:36PM -0600, Eric Blake wrote:
> Several users have reported problems with 'virsh start' failing because
> it was encountering a managed save situation where the managed save file
> was incomplete. Be more robust to this by using two different magic
> numbers, so that newer libvirt can gracefully handle an incomplete file
> differently than a complete one, while older libvirt will at least fail
> up front rather than trying to load only to have qemu fail at the end.
>
> Managed save is a convenience - it exists to preserve as much state
> as possible; if the state was not preserved, it is reasonable to just
> log that fact, then proceed with a fresh boot. On the other hand,
> user saves are under user control, so we must fail, but by making
> the failure message distinct, the user can better decide how to handle
> the situation of an incomplete save file.
>
> * src/qemu/qemu_driver.c (QEMUD_SAVE_PARTIAL): New define.
> (qemuDomainSaveInternal): Use it to mark incomplete images.
> (qemuDomainSaveImageOpen, qemuDomainObjRestore): Add parameter
> that controls what to do with partial images.
> (qemuDomainRestoreFlags, qemuDomainSaveImageGetXMLDesc)
> (qemuDomainSaveImageDefineXML, qemuDomainObjStart): Update callers.
> Based on an initial idea by Osier Yang.
> ---
>
> This should implement everything mentioned here:
> https://www.redhat.com/archives/libvir-list/2011-August/msg00854.html
>
> src/qemu/qemu_driver.c | 84 ++++++++++++++++++++++++++++++++++++++----------
> 1 files changed, 67 insertions(+), 17 deletions(-)
>
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 9cb034b..61b33fe 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -2111,9 +2111,12 @@ cleanup:
> }
>
>
> -#define QEMUD_SAVE_MAGIC "LibvirtQemudSave"
> +#define QEMUD_SAVE_MAGIC "LibvirtQemudSave"
> +#define QEMUD_SAVE_PARTIAL "LibvirtQemudPart"
> #define QEMUD_SAVE_VERSION 2
>
> +verify(sizeof(QEMUD_SAVE_MAGIC) == sizeof(QEMUD_SAVE_PARTIAL));
> +
> enum qemud_save_formats {
> QEMUD_SAVE_FORMAT_RAW = 0,
> QEMUD_SAVE_FORMAT_GZIP = 1,
> @@ -2331,7 +2334,7 @@ qemuDomainSaveInternal(struct qemud_driver *driver, virDomainPtr dom,
> }
>
> memset(&header, 0, sizeof(header));
> - memcpy(header.magic, QEMUD_SAVE_MAGIC, sizeof(header.magic));
> + memcpy(header.magic, QEMUD_SAVE_PARTIAL, sizeof(header.magic));
> header.version = QEMUD_SAVE_VERSION;
>
> header.compressed = compressed;
> @@ -2435,12 +2438,37 @@ qemuDomainSaveInternal(struct qemud_driver *driver, virDomainPtr dom,
> bypassSecurityDriver,
> QEMU_ASYNC_JOB_SAVE) < 0)
> goto endjob;
> +
> + /* Touch up file header to mark image complete. */
> + if (bypass_cache) {
> + /* Reopen the file to touch up the header, since we aren't set
> + * up to seek backwards on directFd. The reopened fd will
> + * trigger a single page of file system cache pollution, but
> + * that's acceptable. */
> + if (VIR_CLOSE(fd) < 0) {
> + virReportSystemError(errno, _("unable to close %s"), path);
> + goto endjob;
> + }
> + if (virFileDirectFdClose(directFd) < 0)
> + goto endjob;
> + fd = qemuOpenFile(driver, path, O_WRONLY, NULL, NULL);
> + if (fd < 0)
> + goto endjob;
> + } else {
> + if (lseek(fd, 0, SEEK_SET) != 0) {
> + virReportSystemError(errno, _("unable to seek %s"), path);
> + goto endjob;
> + }
> + }
> + memcpy(header.magic, QEMUD_SAVE_MAGIC, sizeof(header.magic));
> + if (safewrite(fd, &header, sizeof(header)) != sizeof(header)) {
> + virReportSystemError(errno, _("unable to write %s"), path);
> + goto endjob;
> + }
> if (VIR_CLOSE(fd) < 0) {
> virReportSystemError(errno, _("unable to close %s"), path);
> goto endjob;
> }
> - if (virFileDirectFdClose(directFd) < 0)
> - goto endjob;
>
> ret = 0;
>
> @@ -3769,15 +3797,16 @@ cleanup:
> return ret;
> }
>
> -/* Return -1 on failure, -2 if edit was specified but xmlin does not
> - * represent any changes, and opened fd on all other success. */
> +/* Return -1 on most failures after raising error, -2 if edit was specified
> + * but xmlin does not represent any changes (no error raised), -3 if corrupt
> + * image was unlinked (no error raised), and opened fd on success. */
> static int ATTRIBUTE_NONNULL(3) ATTRIBUTE_NONNULL(4)
> qemuDomainSaveImageOpen(struct qemud_driver *driver,
> const char *path,
> virDomainDefPtr *ret_def,
> struct qemud_save_header *ret_header,
> bool bypass_cache, virFileDirectFdPtr *directFd,
> - const char *xmlin, bool edit)
> + const char *xmlin, bool edit, bool unlink_corrupt)
> {
> int fd;
> struct qemud_save_header header;
> @@ -3807,8 +3836,22 @@ qemuDomainSaveImageOpen(struct qemud_driver *driver,
> }
>
> if (memcmp(header.magic, QEMUD_SAVE_MAGIC, sizeof(header.magic)) != 0) {
> - qemuReportError(VIR_ERR_OPERATION_FAILED,
> - "%s", _("image magic is incorrect"));
> + const char *msg = _("image magic is incorrect");
> +
> + if (memcmp(header.magic, QEMUD_SAVE_PARTIAL,
> + sizeof(header.magic)) == 0) {
> + msg = _("save image is incomplete");
> + if (unlink_corrupt) {
> + if (VIR_CLOSE(fd) < 0 || unlink(path) < 0) {
> + virReportSystemError(errno,
> + _("cannot remove corrupt file: %s"),
> + path);
> + goto error;
> + }
> + return -3;
> + }
> + }
> + qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", msg);
> goto error;
> }
>
> @@ -4009,7 +4052,7 @@ qemuDomainRestoreFlags(virConnectPtr conn,
>
> fd = qemuDomainSaveImageOpen(driver, path, &def, &header,
> (flags & VIR_DOMAIN_SAVE_BYPASS_CACHE) != 0,
> - &directFd, dxml, false);
> + &directFd, dxml, false, false);
> if (fd < 0)
> goto cleanup;
>
> @@ -4071,7 +4114,7 @@ qemuDomainSaveImageGetXMLDesc(virConnectPtr conn, const char *path,
> qemuDriverLock(driver);
>
> fd = qemuDomainSaveImageOpen(driver, path, &def, &header, false, NULL,
> - NULL, false);
> + NULL, false, false);
>
> if (fd < 0)
> goto cleanup;
> @@ -4102,7 +4145,7 @@ qemuDomainSaveImageDefineXML(virConnectPtr conn, const char *path,
> qemuDriverLock(driver);
>
> fd = qemuDomainSaveImageOpen(driver, path, &def, &header, false, NULL,
> - dxml, true);
> + dxml, true, false);
>
> if (fd < 0) {
> /* Check for special case of no change needed. */
> @@ -4147,6 +4190,8 @@ cleanup:
> return ret;
> }
>
> +/* Return 0 on success, 1 if incomplete saved image was silently unlinked,
> + * and -1 on failure with error raised. */
> static int
> qemuDomainObjRestore(virConnectPtr conn,
> struct qemud_driver *driver,
> @@ -4161,9 +4206,12 @@ qemuDomainObjRestore(virConnectPtr conn,
> virFileDirectFdPtr directFd = NULL;
>
> fd = qemuDomainSaveImageOpen(driver, path, &def, &header,
> - bypass_cache, &directFd, NULL, false);
> - if (fd < 0)
> + bypass_cache, &directFd, NULL, false, true);
> + if (fd < 0) {
> + if (fd == -3)
> + ret = 1;
> goto cleanup;
> + }
>
> if (STRNEQ(vm->def->name, def->name) ||
> memcmp(vm->def->uuid, def->uuid, VIR_UUID_BUFLEN)) {
> @@ -4472,10 +4520,12 @@ qemuDomainObjStart(virConnectPtr conn,
> ret = qemuDomainObjRestore(conn, driver, vm, managed_save,
> bypass_cache);
>
> - if ((ret == 0) && (unlink(managed_save) < 0))
> + if (ret == 0 && unlink(managed_save) < 0)
> VIR_WARN("Failed to remove the managed state %s", managed_save);
> -
> - goto cleanup;
> + if (ret > 0)
> + VIR_WARN("Ignoring incomplete managed state %s", managed_save);
> + else
> + goto cleanup;
> }
> }
Looks and sounds okay, ACK,
Daniel
--
Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/
daniel at veillard.com | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library http://libvirt.org/
More information about the libvir-list
mailing list