[libvirt] [PATCH v3 07/12] qemu_migration: Introduce qemuMigrationDriveMirror

John Ferlan jferlan at redhat.com
Mon Feb 18 16:04:56 UTC 2013


On 02/06/2013 08:52 AM, Michal Privoznik wrote:
> This function does the source part of NBD magic. It
> invokes drive-mirror on each non shared and RW disk with
> a source and wait till the mirroring process completes.
> When it does we can proceed with migration.
> 
> Currently, an active waiting is done: every 500ms libvirt
> asks qemu if block-job is finished or not.  However, once
> the job finishes, qemu doesn't report its progress so we
> can only assume if the job finished successfully or not.
> The better solution would be to listen to the event which
> is sent as soon as the job finishes. The event does
> contain the result of job.
> ---
>  src/qemu/qemu_migration.c | 190 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 189 insertions(+), 1 deletion(-)
> 
> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index 16840b2..defad6b 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c
> @@ -1165,6 +1165,188 @@ cleanup:
>      return ret;
>  }
>  
> +/**
> + * qemuMigrationDiskMirror:
> + * @driver: qemu driver
> + * @vm: domain
> + * @mig: migration cookie
> + * @host: where are we migrating to
> + * @speed: how much should the copying be limited
> + * @migrate_flags: migrate monitor command flags
> + *
> + * Run drive-mirror to feed NBD server running on dst and
> + * wait till the process completes. On success, update
> + * @migrate_flags so we don't tell 'migrate' command to
> + * do the very same operation.
> + *
> + * Returns 0 on success (@migrate_flags updated),
> + *        -1 otherwise.
> + */
> +static int
> +qemuMigrationDriveMirror(virQEMUDriverPtr driver,
> +                         virDomainObjPtr vm,
> +                         qemuMigrationCookiePtr mig,
> +                         const char *host,
> +                         unsigned long speed,
> +                         unsigned int *migrate_flags)
> +{
> +    qemuDomainObjPrivatePtr priv = vm->privateData;
> +    int ret = -1;
> +    int mon_ret;
> +    int port;
> +    ssize_t i;

Isn't this really just a loop counter?

> +    char *diskAlias = NULL;
> +    char *nbd_dest = NULL;
> +    unsigned int mirror_flags = VIR_DOMAIN_BLOCK_REBASE_REUSE_EXT;
> +    virErrorPtr err = NULL;
> +
> +    if (!(*migrate_flags & (QEMU_MONITOR_MIGRATE_NON_SHARED_DISK |
> +                            QEMU_MONITOR_MIGRATE_NON_SHARED_INC)))
> +        return 0;
> +
> +    if (!mig->nbd) {
> +        /* Destination doesn't support NBD server.
> +         * Fall back to previous implementation.
> +         * XXX Or should we report an error here? */
> +        VIR_DEBUG("Destination doesn't support NBD server "
> +                  "Falling back to previous implementation.");
> +        ret = 0;
> +        goto cleanup;
> +    }
> +
> +    /* steal NBD port and thus prevent it's propagation back to destination */
> +    port = mig->nbd->port;
> +    mig->nbd->port = 0;
> +
> +    if (*migrate_flags & QEMU_MONITOR_MIGRATE_NON_SHARED_INC)
> +        mirror_flags |= VIR_DOMAIN_BLOCK_REBASE_SHALLOW;
> +
> +    for (i = 0; i < vm->def->ndisks; i++) {
> +        virDomainDiskDefPtr disk = vm->def->disks[i];
> +        virDomainBlockJobInfo info;
> +
> +        /* skip shared, RO and source-less disks */
> +        if (disk->shared || disk->readonly || !disk->src)
> +            continue;
> +
> +        VIR_FREE(diskAlias);
> +        VIR_FREE(nbd_dest);
> +        if ((virAsprintf(&diskAlias, "%s%s",
> +                         QEMU_DRIVE_HOST_PREFIX, disk->info.alias) < 0) ||
> +            (virAsprintf(&nbd_dest, "nbd:%s:%u:exportname=%s",
> +                         host, port, diskAlias) < 0)) {
> +            virReportOOMError();
> +            goto error;
> +        }
> +
> +        if (qemuDomainObjEnterMonitorAsync(driver, vm,
> +                                           QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
> +            goto error;
> +        mon_ret = qemuMonitorDriveMirror(priv->mon, diskAlias, nbd_dest,
> +                                         NULL, speed, mirror_flags);
> +        qemuDomainObjExitMonitorWithDriver(driver, vm);
> +
> +        if (mon_ret < 0)
> +            goto error;
> +
> +        /* wait for completion */
> +        while (true) {
> +            /* Poll every 500ms for progress & to allow cancellation */
> +            struct timespec ts = { .tv_sec = 0, .tv_nsec = 500 * 1000 * 1000ull };
> +
> +            memset(&info, 0, sizeof(info));
> +
> +            if (qemuDomainObjEnterMonitorAsync(driver, vm,
> +                                               QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
> +                goto error;
> +            if (priv->job.asyncAbort) {
> +                /* explicitly do this *after* we entered the monitor,
> +                 * as this is a critical section so we are guaranteed
> +                 * priv->job.asyncAbort will not change */
> +                qemuDomainObjExitMonitorWithDriver(driver, vm);
> +                virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"),
> +                               qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
> +                               _("canceled by client"));
> +                goto error;
> +            }
> +            mon_ret = qemuMonitorBlockJob(priv->mon, diskAlias, NULL, 0,
> +                                          &info, BLOCK_JOB_INFO, true);
> +            qemuDomainObjExitMonitorWithDriver(driver, vm);
> +
> +            if (mon_ret < 0) {
> +                /* If there's not any job running on a block device,
> +                 * qemu won't report anything, so it is not fatal
> +                 * if we fail to query status as job may have finished
> +                 * while we were sleeping. */
> +                virReportError(VIR_ERR_OPERATION_FAILED,
> +                               _("Unable to query drive-mirror job status. "
> +                                 "Stop polling on '%s' cur:%llu end:%llu"),
> +                               diskAlias, info.cur, info.end);
> +                goto error;
> +            }
> +
> +            if (info.cur == info.end) {
> +                VIR_DEBUG("Drive mirroring of '%s' completed", diskAlias);
> +                break;
> +            }
> +
> +            /* XXX Frankly speaking, we should listen to the events,
> +             * instead of doing this. But this works for now and we
> +             * are doing something similar in migration itself anyway */
> +
> +            virObjectUnlock(vm);
> +            qemuDriverUnlock(driver);
> +
> +            nanosleep(&ts, NULL);
> +
> +            qemuDriverLock(driver);
> +            virObjectLock(vm);
> +        }
> +    }
> +
> +    /* Okay, copied. Modify migrate_flags */
> +    *migrate_flags &= ~(QEMU_MONITOR_MIGRATE_NON_SHARED_DISK |
> +                        QEMU_MONITOR_MIGRATE_NON_SHARED_INC);
> +    ret = 0;
> +
> +cleanup:
> +    VIR_FREE(diskAlias);
> +    VIR_FREE(nbd_dest);
> +    return ret;
> +
> +error:
> +    /* don't overwrite any errors */
> +    err = virSaveLastError();
> +    /* cancel any outstanding jobs */
> +    for (; i >= 0; i--) {

You could enter this loop without actually having started a job for the
current "i" value. Also, it seems to me from reading the code that the
processing is start a job, wait for completion... start a job, wait for
completion.  Thus is going backwards here necessary? IOW - wouldn't you
only need to cancel the "current" job?  There's a number of reasons to
get to error a couple which could be because of some other failure, such
as priv->job.asyncAbort and "(mon_ret < 0)" when there's job a job running.

> +        virDomainDiskDefPtr disk = vm->def->disks[i];
> +
> +        /* skip shared, RO disks */
> +        if (disk->shared || disk->readonly || !disk->src)
> +            continue;
> +
> +        VIR_FREE(diskAlias);
> +        if (virAsprintf(&diskAlias, "%s%s",
> +                        QEMU_DRIVE_HOST_PREFIX, disk->info.alias) < 0) {
> +            virReportOOMError();
> +            goto error;

This could be one nasty loop

> +        }
> +        if (qemuDomainObjEnterMonitorAsync(driver, vm,
> +                                           QEMU_ASYNC_JOB_MIGRATION_OUT) == 0) {
> +            if (qemuMonitorBlockJob(priv->mon, diskAlias, NULL, 0,
> +                                    NULL, BLOCK_JOB_ABORT, true) < 0) {
> +                VIR_WARN("Unable to cancel block-job on '%s'", diskAlias);
> +            }
> +            qemuDomainObjExitMonitorWithDriver(driver, vm);
> +        } else {
> +            VIR_WARN("Unable to enter monitor. No block job cancelled");
> +        }
> +    }
> +    if (err)
> +        virSetError(err);
> +    virFreeError(err);
> +    goto cleanup;
> +}
>  
>  /* Validate whether the domain is safe to migrate.  If vm is NULL,
>   * then this is being run in the v2 Prepare stage on the destination
> @@ -2429,6 +2611,13 @@ qemuMigrationRun(virQEMUDriverPtr driver,
>      if (qemuDomainMigrateGraphicsRelocate(driver, vm, mig) < 0)
>          VIR_WARN("unable to provide data for graphics client relocation");
>  
> +    /* this will update migrate_flags on success */
> +    if (qemuMigrationDriveMirror(driver, vm, mig, spec->dest.host.name,
> +                                 migrate_speed, &migrate_flags) < 0) {
> +        /* error reported by helper func */
> +        goto cleanup;
> +    }
> +
>      /* Before EnterMonitor, since qemuMigrationSetOffline already does that */
>      if (!(flags & VIR_MIGRATE_LIVE) &&
>          virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
> @@ -2456,7 +2645,6 @@ qemuMigrationRun(virQEMUDriverPtr driver,
>          goto cleanup;
>      }
>  
> -
>      /* connect to the destination qemu if needed */
>      if (spec->destType == MIGRATION_DEST_CONNECT_HOST &&
>          qemuMigrationConnect(driver, vm, spec) < 0) {
> 




More information about the libvir-list mailing list