[libvirt] [PATCH] Keep track of guest paused state after disk IO errors

Daniel P. Berrange berrange at redhat.com
Tue Mar 16 17:24:38 UTC 2010


With the QMP mode monitor, it is possible to get a notification
when the guest is paused indirectly (eg as result of a disk IO
error). This patch enables such reporting and when receiving an
error updates libvirt's view of the guest to indicate that it is
now paused. It also emits an event

  VIR_DOMAIN_EVENT_SUSPENDED

with a detail of:

  VIR_DOMAIN_EVENT_SUSPENDED_IOERROR

NB this patch does not make the error behaviour configurable. It
just copes with the pause transition, if QEMU is setup to do that
by default (current upstream now works this way for disks).

* include/libvirt/libvirt.h.in: Add VIR_DOMAIN_EVENT_SUSPENDED_IOERROR
* src/qemu/qemu_driver.c: Update VM state to paused when IO error
  occurrs
* src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h,
  src/qemu/qemu_monitor_json.c: Wire up handlers for disk IO errors
---
 include/libvirt/libvirt.h.in |    1 +
 src/qemu/qemu_driver.c       |   42 ++++++++++++++++++++++++++++++++++++++++++
 src/qemu/qemu_monitor.c      |   18 ++++++++++++++++++
 src/qemu/qemu_monitor.h      |   16 ++++++++++++++++
 src/qemu/qemu_monitor_json.c |   34 ++++++++++++++++++++++++++++++++++
 5 files changed, 111 insertions(+), 0 deletions(-)

diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
index 260505e..b7a6922 100644
--- a/include/libvirt/libvirt.h.in
+++ b/include/libvirt/libvirt.h.in
@@ -1361,6 +1361,7 @@ typedef enum {
 typedef enum {
     VIR_DOMAIN_EVENT_SUSPENDED_PAUSED = 0,   /* Normal suspend due to admin pause */
     VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED = 1, /* Suspended for offline migration */
+    VIR_DOMAIN_EVENT_SUSPENDED_IOERROR = 2,  /* Suspended due to a disk I/O error */
 } virDomainEventSuspendedDetailType;
 
 /**
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 8766ca2..ae19097 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -824,9 +824,51 @@ cleanup:
     return ret;
 }
 
+
+static int
+qemuHandleDiskIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
+                      virDomainObjPtr vm,
+                      int action,
+                      const char *diskalias ATTRIBUTE_UNUSED)
+{
+    struct qemud_driver *driver = qemu_driver;
+    virDomainEventPtr event = NULL;
+
+    VIR_DEBUG("Received IO error on %p '%s': action=%d disk=%s", vm, vm->def->name, action, diskalias);
+
+    if (action != QEMU_MONITOR_DISK_IO_ERROR_STOP)
+        return 0;
+
+    virDomainObjLock(vm);
+
+    if (action == QEMU_MONITOR_DISK_IO_ERROR_STOP &&
+        vm->state == VIR_DOMAIN_RUNNING) {
+        VIR_DEBUG("Transitioned guest %s to paused state", vm->def->name);
+
+        vm->state = VIR_DOMAIN_PAUSED;
+        event = virDomainEventNewFromObj(vm,
+                                         VIR_DOMAIN_EVENT_SUSPENDED,
+                                         VIR_DOMAIN_EVENT_SUSPENDED_IOERROR);
+
+        if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0)
+            VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name);
+    }
+
+    virDomainObjUnlock(vm);
+
+    if (event) {
+        qemuDriverLock(driver);
+        qemuDomainEventQueue(driver, event);
+        qemuDriverUnlock(driver);
+    }
+    return 0;
+}
+
+
 static qemuMonitorCallbacks monitorCallbacks = {
     .eofNotify = qemuHandleMonitorEOF,
     .diskSecretLookup = findVolumeQcowPassphrase,
+    .diskIOError = qemuHandleDiskIOError,
 };
 
 static int
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index b88532c..7205001 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -791,6 +791,24 @@ int qemuMonitorEmitStop(qemuMonitorPtr mon)
 }
 
 
+int qemuMonitorEmitDiskIOError(qemuMonitorPtr mon,
+                               int action,
+                               const char *deviceAlias)
+{
+    int ret = -1;
+    VIR_DEBUG("mon=%p action=%d deviceAlias=%s", mon, action, deviceAlias);
+
+    qemuMonitorRef(mon);
+    qemuMonitorUnlock(mon);
+    if (mon->cb && mon->cb->diskIOError)
+        ret = mon->cb->diskIOError(mon, mon->vm, action, deviceAlias);
+    qemuMonitorLock(mon);
+    qemuMonitorUnref(mon);
+
+    return ret;
+}
+
+
 int qemuMonitorSetCapabilities(qemuMonitorPtr mon)
 {
     int ret;
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index 0ac3957..75a2853 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -60,6 +60,14 @@ struct _qemuMonitorMessage {
     void *passwordOpaque;
 };
 
+typedef enum {
+    QEMU_MONITOR_DISK_IO_ERROR_STOP,
+    QEMU_MONITOR_DISK_IO_ERROR_REPORT,
+    QEMU_MONITOR_DISK_IO_ERROR_IGNORE,
+
+    QEMU_MONITOR_DISK_IO_ERROR_LAST
+} qemuMonitorDiskIOErrorAction;
+
 typedef struct _qemuMonitorCallbacks qemuMonitorCallbacks;
 typedef qemuMonitorCallbacks *qemuMonitorCallbacksPtr;
 struct _qemuMonitorCallbacks {
@@ -86,6 +94,11 @@ struct _qemuMonitorCallbacks {
                            virDomainObjPtr vm);
     int (*domainStop)(qemuMonitorPtr mon,
                       virDomainObjPtr vm);
+
+    int (*diskIOError)(qemuMonitorPtr mon,
+                       virDomainObjPtr vm,
+                       int actOBion,
+                       const char *diskAlias);
 };
 
 
@@ -122,6 +135,9 @@ int qemuMonitorEmitShutdown(qemuMonitorPtr mon);
 int qemuMonitorEmitReset(qemuMonitorPtr mon);
 int qemuMonitorEmitPowerdown(qemuMonitorPtr mon);
 int qemuMonitorEmitStop(qemuMonitorPtr mon);
+int qemuMonitorEmitDiskIOError(qemuMonitorPtr mon,
+                               int action,
+                               const char *deviceAlias);
 
 int qemuMonitorStartCPUs(qemuMonitorPtr mon,
                          virConnectPtr conn);
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index 7b45594..8b3cda1 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -49,6 +49,7 @@ static void qemuMonitorJSONHandleShutdown(qemuMonitorPtr mon, virJSONValuePtr da
 static void qemuMonitorJSONHandleReset(qemuMonitorPtr mon, virJSONValuePtr data);
 static void qemuMonitorJSONHandlePowerdown(qemuMonitorPtr mon, virJSONValuePtr data);
 static void qemuMonitorJSONHandleStop(qemuMonitorPtr mon, virJSONValuePtr data);
+static void qemuMonitorJSONHandleDiskIOError(qemuMonitorPtr mon, virJSONValuePtr data);
 
 struct {
     const char *type;
@@ -58,6 +59,7 @@ struct {
     { "RESET", qemuMonitorJSONHandleReset, },
     { "POWERDOWN", qemuMonitorJSONHandlePowerdown, },
     { "STOP", qemuMonitorJSONHandleStop, },
+    { "BLOCK_IO_ERROR", qemuMonitorJSONHandleDiskIOError, },
 };
 
 
@@ -496,6 +498,38 @@ static void qemuMonitorJSONHandleStop(qemuMonitorPtr mon, virJSONValuePtr data A
 }
 
 
+VIR_ENUM_DECL(qemuMonitorDiskIOErrorAction)
+VIR_ENUM_IMPL(qemuMonitorDiskIOErrorAction, QEMU_MONITOR_DISK_IO_ERROR_LAST,
+              "stop", "report", "ignore");
+
+
+static void qemuMonitorJSONHandleDiskIOError(qemuMonitorPtr mon, virJSONValuePtr data)
+{
+    const char *device;
+    const char *action;
+    int actionID;
+
+    /* Throughout here we try our best to carry on upon errors,
+       since its imporatant to get as much info as possible out
+       to the application */
+
+    if ((action = virJSONValueObjectGetString(data, "action")) == NULL) {
+        VIR_WARN0("Missing action in disk io error event");
+        action = "ignore";
+    }
+
+    if ((device = virJSONValueObjectGetString(data, "device")) == NULL) {
+        VIR_WARN0("missing device in disk io error event");
+    }
+
+    if ((actionID = qemuMonitorDiskIOErrorActionTypeFromString(action)) < 0) {
+        VIR_WARN("unknown disk io error action '%s'", action);
+        actionID = QEMU_MONITOR_DISK_IO_ERROR_IGNORE;
+    }
+
+    qemuMonitorEmitDiskIOError(mon, actionID, device);
+}
+
 int
 qemuMonitorJSONSetCapabilities(qemuMonitorPtr mon)
 {
-- 
1.6.2.5




More information about the libvir-list mailing list