[libvirt] [PATCH] qemu: Allow graceful domain destroy
Daniel Veillard
veillard at redhat.com
Mon Aug 22 08:48:31 UTC 2011
On Sat, Aug 20, 2011 at 01:06:10PM +0200, Michal Privoznik wrote:
> This patch introduces support for domain destroying via 'quit' monitor
> command which gives qemu time to flush caches and therefore prevent
> disks corruption. However, qemu can be unresponsive and to prevent
> waiting indefinitely, execute command in a separate thread and wait
> reasonable time (QEMU_QUIT_WAIT_SECONDS) on a condition. If we hit
> timeout, qemu is qemuProcessKill'ed which causes monitor close and
> therefore also thread being terminable.
>
> The synchronization between qemu driver and monitor-quit thread is done
> through mutex and condition. However, this alone is not enough. If a
> condition is signalized but without anybody listening signal is lost. To
> prevent this a boolean variable is used that is set iff nobody is
> listening but condition would be signalized, or iff driver is waiting on
> given condition.
> ---
> include/libvirt/libvirt.h.in | 11 ++-
> src/qemu/qemu_driver.c | 132 +++++++++++++++++++++++++++++++++++++++--
> src/qemu/qemu_monitor.c | 13 ++++
> src/qemu/qemu_monitor.h | 2 +
> src/qemu/qemu_monitor_json.c | 22 +++++++
> src/qemu/qemu_monitor_json.h | 1 +
> src/qemu/qemu_monitor_text.c | 20 ++++++
> src/qemu/qemu_monitor_text.h | 1 +
> tools/virsh.c | 8 ++-
> 9 files changed, 198 insertions(+), 12 deletions(-)
>
> diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
> index aa29fb6..fa98b8d 100644
> --- a/include/libvirt/libvirt.h.in
> +++ b/include/libvirt/libvirt.h.in
> @@ -919,10 +919,13 @@ virConnectPtr virDomainGetConnect (virDomainPtr domain);
> * Domain creation and destruction
> */
>
> -/*
> - * typedef enum {
> - * } virDomainDestroyFlagsValues;
> - */
> +
> +typedef enum {
> + VIR_DOMAIN_DESTROY_MONITOR = 1 << 0, /* In hypervisors supporting this,
> + try to send 'quit' command prior
> + to killing hypervisor */
> +} virDomainDestroyFlagsValues;
Rather than describing the flag based on the QEmu command I would
rather describe it's intended effects, i.e.
VIR_DOMAIN_DESTROY_FLUSH_CACHE = 1 << 0, /* In hypervisors supporting this,
try to get cache flushed prior
to destroying the domain */
> virDomainPtr virDomainCreateXML (virConnectPtr conn,
> const char *xmlDesc,
> unsigned int flags);
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 421a98e..6585cb4 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -1564,6 +1564,42 @@ cleanup:
> return ret;
> }
>
> +struct qemuDomainDestroyHelperData {
> + struct qemud_driver *driver;
> + virDomainObjPtr vm;
> + qemuDomainObjPrivatePtr priv;
> + virMutex lock;
> + virCond cond;
> + bool guard;
> +};
> +
> +static void
> +qemuDomainDestroyHelper(void *opaque)
> +{
> + struct qemuDomainDestroyHelperData *data = opaque;
> + struct qemud_driver *driver = data->driver;
> + virDomainObjPtr vm = data->vm;
> + qemuDomainObjPrivatePtr priv = data->priv;
> +
> + /* Job was already obtained by caller */
> + qemuDomainObjEnterMonitorWithDriver(driver, vm);
> + qemuMonitorQuit(priv->mon);
> + qemuDomainObjExitMonitorWithDriver(driver, vm);
> +
> + /* To avoid loosing signal, we need to remember
> + * we tried to send one, but nobody was waiting
> + * for it.
> + */
> + virMutexLock(&data->lock);
> + if (data->guard) {
> + virCondSignal(&data->cond);
> + } else {
> + data->guard = true;
> + }
> + virMutexUnlock(&data->lock);
> +}
> +
> +#define QEMU_QUIT_WAIT_SECONDS 5
>
> static int
> qemuDomainDestroyFlags(virDomainPtr dom,
> @@ -1574,8 +1610,13 @@ qemuDomainDestroyFlags(virDomainPtr dom,
> int ret = -1;
> virDomainEventPtr event = NULL;
> qemuDomainObjPrivatePtr priv;
> + bool use_thread = false;
> + bool use_kill = false;
> + virThread destroy_thread;
> + struct qemuDomainDestroyHelperData data;
> + unsigned long long then;
>
> - virCheckFlags(0, -1);
> + virCheckFlags(VIR_DOMAIN_DESTROY_MONITOR, -1);
>
> qemuDriverLock(driver);
> vm = virDomainFindByUUID(&driver->domains, dom->uuid);
> @@ -1590,12 +1631,14 @@ qemuDomainDestroyFlags(virDomainPtr dom,
> priv = vm->privateData;
> priv->fakeReboot = false;
>
> - /* Although qemuProcessStop does this already, there may
> - * be an outstanding job active. We want to make sure we
> - * can kill the process even if a job is active. Killing
> - * it now means the job will be released
> - */
> - qemuProcessKill(vm);
> + if (!flags) {
please test the given flag here instead of assuming the bit we know
about now.
> + /* Although qemuProcessStop does this already, there may
> + * be an outstanding job active. We want to make sure we
> + * can kill the process even if a job is active. Killing
> + * it now means the job will be released
> + */
> + qemuProcessKill(vm);
> + }
>
> if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0)
> goto cleanup;
> @@ -1606,6 +1649,70 @@ qemuDomainDestroyFlags(virDomainPtr dom,
> goto endjob;
> }
>
> + if (flags & VIR_DOMAIN_DESTROY_MONITOR) {
> + /* Try to shutdown domain gracefully.
> + * Send 'quit' to qemu. However, qemu can be unresponsive.
> + * Therefore create a separate thread in which we execute
> + * that monitor comand. Wait max QEMU_QUIT_WAIT_SECONDS.
> + */
> + if (virMutexInit(&data.lock) < 0) {
> + virReportOOMError();
> + goto endjob;
> + }
> +
> + if (virCondInit(&data.cond) < 0) {
> + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("cannot initialize thread condition"));
> + virMutexDestroy(&data.lock);
> + goto endjob;
> + }
> +
> + data.driver = driver;
> + data.vm = vm;
> + data.priv = priv;
> + data.guard = false;
> +
> + if (virThreadCreate(&destroy_thread, true,
> + qemuDomainDestroyHelper, &data) < 0) {
> + virReportSystemError(errno, "%s",
> + _("unable to create destroy thread"));
> + ignore_value(virCondDestroy(&data.cond));
> + virMutexDestroy(&data.lock);
> + goto endjob;
> + }
> + use_thread = true;
> +
> + if (virTimeMs(&then) < 0)
> + goto endjob;
> +
> + then += QEMU_QUIT_WAIT_SECONDS * 1000;
> +
> + /* How synchronization with destroy thread works:
> + * Basically wait on data.cond. But because conditions
> + * does not 'remember' that they have been signalized
> + * data.guard is used. Practically, data.guard says
> + * to destroy thread if we are waiting on condition
> + * and to us whether we should even try.
> + */
> + virMutexLock(&data.lock);
> + if (!data.guard) {
> + data.guard = true;
> + if (virCondWaitUntil(&data.cond, &data.lock, then) < 0) {
> + if (errno == ETIMEDOUT) {
> + use_kill = true;
> + data.guard = false;
> + } else {
> + virMutexUnlock(&data.lock);
> + goto endjob;
> + }
> + }
> + }
> + virMutexUnlock(&data.lock);
> +
> + if (use_kill)
> + qemuProcessKill(vm);
> + }
> +
> qemuProcessStop(driver, vm, 0, VIR_DOMAIN_SHUTOFF_DESTROYED);
> event = virDomainEventNewFromObj(vm,
> VIR_DOMAIN_EVENT_STOPPED,
> @@ -1626,6 +1733,17 @@ endjob:
> vm = NULL;
>
> cleanup:
> + if (use_thread) {
> + virMutexLock(&data.lock);
> + if (!data.guard) {
> + data.guard = true;
> + ignore_value(virCondWait(&data.cond, &data.lock));
> + }
> + virMutexUnlock(&data.lock);
> + ignore_value(virCondDestroy(&data.cond));
> + virMutexDestroy(&data.lock);
> + virThreadJoin(&destroy_thread);
> + }
> if (vm)
> virDomainObjUnlock(vm);
> if (event)
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index db6107c..41b9c5c 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -2475,3 +2475,16 @@ int qemuMonitorBlockJob(qemuMonitorPtr mon,
> ret = qemuMonitorTextBlockJob(mon, device, bandwidth, info, mode);
> return ret;
> }
> +
> +int qemuMonitorQuit(qemuMonitorPtr mon)
> +{
> + int ret;
> +
> + VIR_DEBUG("mon=%p", mon);
> +
> + if (mon->json)
> + ret = qemuMonitorJSONQuit(mon);
> + else
> + ret = qemuMonitorTextQuit(mon);
> + return ret;
> +}
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index f241c9e..3fe6bb9 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -475,6 +475,8 @@ int qemuMonitorBlockJob(qemuMonitorPtr mon,
> virDomainBlockJobInfoPtr info,
> int mode);
>
> +int qemuMonitorQuit(qemuMonitorPtr mon);
> +
> /**
> * When running two dd process and using <> redirection, we need a
> * shell that will not truncate files. These two strings serve that
> diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
> index 7adfb26..1f078cd 100644
> --- a/src/qemu/qemu_monitor_json.c
> +++ b/src/qemu/qemu_monitor_json.c
> @@ -2956,3 +2956,25 @@ int qemuMonitorJSONBlockJob(qemuMonitorPtr mon,
> virJSONValueFree(reply);
> return ret;
> }
> +
> +
> +int qemuMonitorJSONQuit(qemuMonitorPtr mon)
> +{
> + int ret = -1;
> + virJSONValuePtr cmd = NULL;
> + virJSONValuePtr reply = NULL;
> +
> + cmd = qemuMonitorJSONMakeCommand("quit", NULL);
> +
> + if (!cmd)
> + return -1;
> +
> + ret =qemuMonitorJSONCommand(mon, cmd, &reply);
> +
> + if (ret == 0)
> + ret = qemuMonitorJSONCheckError(cmd, reply);
> +
> + virJSONValueFree(cmd);
> + virJSONValueFree(reply);
> + return ret;
> +}
> diff --git a/src/qemu/qemu_monitor_json.h b/src/qemu/qemu_monitor_json.h
> index 9512793..2a7df76 100644
> --- a/src/qemu/qemu_monitor_json.h
> +++ b/src/qemu/qemu_monitor_json.h
> @@ -231,4 +231,5 @@ int qemuMonitorJSONBlockJob(qemuMonitorPtr mon,
> virDomainBlockJobInfoPtr info,
> int mode);
>
> +int qemuMonitorJSONQuit(qemuMonitorPtr mon);
> #endif /* QEMU_MONITOR_JSON_H */
> diff --git a/src/qemu/qemu_monitor_text.c b/src/qemu/qemu_monitor_text.c
> index 335e39e..19c2690 100644
> --- a/src/qemu/qemu_monitor_text.c
> +++ b/src/qemu/qemu_monitor_text.c
> @@ -3046,3 +3046,23 @@ cleanup:
> VIR_FREE(reply);
> return ret;
> }
> +
> +
> +int qemuMonitorTextQuit(qemuMonitorPtr mon)
> +{
> + const char *cmd = "quit";
> + char *reply = NULL;
> + int ret = -1;
> +
> + if (qemuMonitorHMPCommand(mon, cmd, &reply) < 0) {
> + qemuReportError(VIR_ERR_INTERNAL_ERROR,
> + "%s", _("cannot run monitor command"));
> + goto cleanup;
> + }
> +
> + ret = 0;
> +
> +cleanup:
> + VIR_FREE(reply);
> + return ret;
> +}
> diff --git a/src/qemu/qemu_monitor_text.h b/src/qemu/qemu_monitor_text.h
> index b250738..9ade938 100644
> --- a/src/qemu/qemu_monitor_text.h
> +++ b/src/qemu/qemu_monitor_text.h
> @@ -224,4 +224,5 @@ int qemuMonitorTextBlockJob(qemuMonitorPtr mon,
> virDomainBlockJobInfoPtr info,
> int mode);
>
> +int qemuMonitorTextQuit(qemuMonitorPtr mon);
> #endif /* QEMU_MONITOR_TEXT_H */
> diff --git a/tools/virsh.c b/tools/virsh.c
> index f1eb4ca..0c18d8b 100644
> --- a/tools/virsh.c
> +++ b/tools/virsh.c
> @@ -2565,6 +2565,7 @@ static const vshCmdInfo info_destroy[] = {
>
> static const vshCmdOptDef opts_destroy[] = {
> {"domain", VSH_OT_DATA, VSH_OFLAG_REQ, N_("domain name, id or uuid")},
> + {"monitor", VSH_OT_BOOL, 0, N_("try graceful destroy via monitor first")},
> {NULL, 0, 0, NULL}
> };
>
> @@ -2574,6 +2575,7 @@ cmdDestroy(vshControl *ctl, const vshCmd *cmd)
> virDomainPtr dom;
> bool ret = true;
> const char *name;
> + int flags = 0;
>
> if (!vshConnectionUsability(ctl, ctl->conn))
> return false;
> @@ -2581,7 +2583,11 @@ cmdDestroy(vshControl *ctl, const vshCmd *cmd)
> if (!(dom = vshCommandOptDomain(ctl, cmd, &name)))
> return false;
>
> - if (virDomainDestroy(dom) == 0) {
> + if (vshCommandOptBool(cmd, "monitor"))
> + flags |= VIR_DOMAIN_DESTROY_MONITOR;
> +
> + if ((!flags && virDomainDestroy(dom) == 0) ||
> + virDomainDestroyFlags(dom, flags) == 0) {
> vshPrint(ctl, _("Domain %s destroyed\n"), name);
> } else {
> vshError(ctl, _("Failed to destroy domain %s"), name);
> --
> 1.7.3.4
This sounds right, but the devil lies in the detail, I would add far
more debug in the thread and around synchronization as this is
informations we would need if something goes really wrong.
Daniel
--
Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/
daniel at veillard.com | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library http://libvirt.org/
More information about the libvir-list
mailing list