[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[libvirt] [PATCH 1/2] migration: Make erroring out on I/O error controllable by flag



Paolo Bonzini pointed out that it's actually possible to migrate a qemu
instance that was paused due to I/O error and it will be able to work on
the destination if the storage is accessible.

This patch introduces flag VIR_MIGRATE_ABORT_ON_ERROR that cancels the
migration in case an I/O error happens while it's being performed and
allows migration without this flag. This flag can be possibly used for
other error reasons that may be introduced in the future.
---
 include/libvirt/libvirt.h.in |  1 +
 src/qemu/qemu_driver.c       |  4 ++--
 src/qemu/qemu_migration.c    | 23 ++++++++++++++---------
 src/qemu/qemu_migration.h    |  6 ++++--
 tools/virsh-domain.c         |  7 +++++++
 tools/virsh.pod              |  6 ++++--
 6 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
index 574b970..bd75e1d 100644
--- a/include/libvirt/libvirt.h.in
+++ b/include/libvirt/libvirt.h.in
@@ -1188,6 +1188,7 @@ typedef enum {
     VIR_MIGRATE_UNSAFE            = (1 << 9), /* force migration even if it is considered unsafe */
     VIR_MIGRATE_OFFLINE           = (1 << 10), /* offline migrate */
     VIR_MIGRATE_COMPRESSED        = (1 << 11), /* compress data during migration */
+    VIR_MIGRATE_ABORT_ON_ERROR    = (1 << 12), /* abort migration on I/O errors happened during migration */
 } virDomainMigrateFlags;

 /* Domain migration. */
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index c886378..4b23bc3 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -2819,7 +2819,7 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom,
     if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
         goto cleanup;

-    if (!qemuMigrationIsAllowed(driver, vm, vm->def, false))
+    if (!qemuMigrationIsAllowed(driver, vm, vm->def, false, false))
         goto cleanup;

     if (qemuDomainObjBeginAsyncJob(driver, vm,
@@ -11663,7 +11663,7 @@ qemuDomainSnapshotCreateActiveExternal(virConnectPtr conn,
     /* do the memory snapshot if necessary */
     if (memory) {
         /* check if migration is possible */
-        if (!qemuMigrationIsAllowed(driver, vm, vm->def, false))
+        if (!qemuMigrationIsAllowed(driver, vm, vm->def, false, false))
             goto endjob;

         /* allow the migration job to be cancelled or the domain to be paused */
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 48e0d44..281467b 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1420,7 +1420,7 @@ cleanup:
  * the fact that older servers did not do checks on the source. */
 bool
 qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm,
-                       virDomainDefPtr def, bool remote)
+                       virDomainDefPtr def, bool remote, bool abort_on_error)
 {
     int nsnapshots;
     int pauseReason;
@@ -1448,7 +1448,8 @@ qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm,
             }

             /* cancel migration if disk I/O error is emitted while migrating */
-            if (virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED &&
+            if (abort_on_error &&
+                virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED &&
                 pauseReason == VIR_DOMAIN_PAUSED_IOERROR) {
                 virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                                _("cannot migrate domain with I/O error"));
@@ -1692,7 +1693,7 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver,
 static int
 qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm,
                                enum qemuDomainAsyncJob asyncJob,
-                               virConnectPtr dconn)
+                               virConnectPtr dconn, bool abort_on_error)
 {
     qemuDomainObjPrivatePtr priv = vm->privateData;
     const char *job;
@@ -1719,7 +1720,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm,
         struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull };

         /* cancel migration if disk I/O error is emitted while migrating */
-        if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT &&
+        if (abort_on_error &&
             virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED &&
             pauseReason == VIR_DOMAIN_PAUSED_IOERROR)
             goto cancel;
@@ -1920,6 +1921,7 @@ char *qemuMigrationBegin(virQEMUDriverPtr driver,
     qemuDomainObjPrivatePtr priv = vm->privateData;
     virCapsPtr caps = NULL;
     unsigned int cookieFlags = QEMU_MIGRATION_COOKIE_LOCKSTATE;
+    bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);

     VIR_DEBUG("driver=%p, vm=%p, xmlin=%s, dname=%s,"
               " cookieout=%p, cookieoutlen=%p, flags=%lx",
@@ -1936,7 +1938,7 @@ char *qemuMigrationBegin(virQEMUDriverPtr driver,
     if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT)
         qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_BEGIN3);

-    if (!qemuMigrationIsAllowed(driver, vm, NULL, true))
+    if (!qemuMigrationIsAllowed(driver, vm, NULL, true, abort_on_error))
         goto cleanup;

     if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def))
@@ -2052,6 +2054,7 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
     virCapsPtr caps = NULL;
     const char *listenAddr = NULL;
     char *migrateFrom = NULL;
+    bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);

     if (virTimeMillisNow(&now) < 0)
         return -1;
@@ -2081,7 +2084,7 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
     if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
         goto cleanup;

-    if (!qemuMigrationIsAllowed(driver, NULL, *def, true))
+    if (!qemuMigrationIsAllowed(driver, NULL, *def, true, abort_on_error))
         goto cleanup;

     /* Let migration hook filter domain XML */
@@ -2777,6 +2780,7 @@ qemuMigrationRun(virQEMUDriverPtr driver,
     unsigned long migrate_speed = resource ? resource : priv->migMaxBandwidth;
     virErrorPtr orig_err = NULL;
     unsigned int cookieFlags = 0;
+    bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);

     VIR_DEBUG("driver=%p, vm=%p, cookiein=%s, cookieinlen=%d, "
               "cookieout=%p, cookieoutlen=%p, flags=%lx, resource=%lu, "
@@ -2929,7 +2933,7 @@ qemuMigrationRun(virQEMUDriverPtr driver,

     if (qemuMigrationWaitForCompletion(driver, vm,
                                        QEMU_ASYNC_JOB_MIGRATION_OUT,
-                                       dconn) < 0)
+                                       dconn, abort_on_error) < 0)
         goto cleanup;

     /* When migration completed, QEMU will have paused the
@@ -3610,6 +3614,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
     int resume = 0;
     virErrorPtr orig_err = NULL;
     virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
+    bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);

     if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
         goto cleanup;
@@ -3620,7 +3625,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
         goto endjob;
     }

-    if (!qemuMigrationIsAllowed(driver, vm, NULL, true))
+    if (!qemuMigrationIsAllowed(driver, vm, NULL, true, abort_on_error))
         goto endjob;

     if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def))
@@ -4316,7 +4321,7 @@ qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm,
     if (rc < 0)
         goto cleanup;

-    rc = qemuMigrationWaitForCompletion(driver, vm, asyncJob, NULL);
+    rc = qemuMigrationWaitForCompletion(driver, vm, asyncJob, NULL, false);

     if (rc < 0)
         goto cleanup;
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index 22b04b4..5b21ca2 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -38,7 +38,8 @@
      VIR_MIGRATE_CHANGE_PROTECTION |            \
      VIR_MIGRATE_UNSAFE |                       \
      VIR_MIGRATE_OFFLINE |                      \
-     VIR_MIGRATE_COMPRESSED)
+     VIR_MIGRATE_COMPRESSED |                   \
+     VIR_MIGRATE_ABORT_ON_ERROR)

 enum qemuMigrationJobPhase {
     QEMU_MIGRATION_PHASE_NONE = 0,
@@ -147,7 +148,8 @@ int qemuMigrationConfirm(virQEMUDriverPtr driver,
                          int retcode);

 bool qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm,
-                            virDomainDefPtr def, bool remote);
+                            virDomainDefPtr def, bool remote,
+                            bool abort_on_error);

 int qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm,
                         int fd, off_t offset, const char *path,
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index 955e882..7f9b9e3 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -8306,6 +8306,10 @@ static const vshCmdOptDef opts_migrate[] = {
      .type = VSH_OT_BOOL,
      .help = N_("compress repeated pages during live migration")
     },
+    {.name = "abort-on-error",
+     .type = VSH_OT_BOOL,
+     .help = N_("abort on soft errors during migration")
+    },
     {.name = "domain",
      .type = VSH_OT_DATA,
      .flags = VSH_OFLAG_REQ,
@@ -8399,6 +8403,9 @@ doMigrate(void *opaque)
         flags |= VIR_MIGRATE_OFFLINE;
     }

+    if (vshCommandOptBool(cmd, "abort-on-error"))
+        flags |= VIR_MIGRATE_ABORT_ON_ERROR;
+
     if (xmlfile &&
         virFileReadAll(xmlfile, 8192, &xml) < 0) {
         vshError(ctl, _("file '%s' doesn't exist"), xmlfile);
diff --git a/tools/virsh.pod b/tools/virsh.pod
index 405b4d2..21367d4 100644
--- a/tools/virsh.pod
+++ b/tools/virsh.pod
@@ -1043,7 +1043,8 @@ stats.
 =item B<migrate> [I<--live>] [I<--offline>] [I<--direct>] [I<--p2p> [I<--tunnelled>]]
 [I<--persistent>] [I<--undefinesource>] [I<--suspend>] [I<--copy-storage-all>]
 [I<--copy-storage-inc>] [I<--change-protection>] [I<--unsafe>] [I<--verbose>]
-[I<--compressed>] I<domain> I<desturi> [I<migrateuri>] [I<dname>]
+[I<--compressed>] [I<--abort-on-error>]
+I<domain> I<desturi> [I<migrateuri>] [I<dname>]
 [I<--timeout> B<seconds>] [I<--xml> B<file>]

 Migrate domain to another host.  Add I<--live> for live migration; <--p2p>
@@ -1066,7 +1067,8 @@ is implicitly enabled when supported by the hypervisor, but can be explicitly
 used to reject the migration if the hypervisor lacks change protection
 support.  I<--verbose> displays the progress of migration.  I<--compressed>
 activates compression of memory pages that have to be transferred repeatedly
-during live migration.
+during live migration. I<--abort-on-error> cancels the migration if a soft
+error (for example I/O error) happens during the migration.

 B<Note>: Individual hypervisors usually do not support all possible types of
 migration. For example, QEMU does not support direct migration.
-- 
1.8.2.1


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]