[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[libvirt] patch: allow disk cache mode to be specified in a domain's xml definition.



We have found certain application scenarios where
overriding of the default qemu host cache mode
provides a substantial improvement in guest
performance.  In particular, disabling host caching
of the file/dev backing a guest drive.  A summary
of performance metrics may be found below.

Attached is a simple patch which adds general
support for a "cache=*" attribute to be passed
to qemu from libvirt as part of a xml <driver>
element.  It is parsed in a domain's xml definition
as well  as being generated during output of the
same.  For example:

   <disk type='file' device='disk'>
     <source file='/guest_roots/fc8.root'/>
     <target dev='hda'/>
     <driver name='qemu' type='qwerty' cache='none'/>
   </disk>

where both the existing "type=*" and added "cache=*"
attributes are optional and independent.

Note this is only intended to serve as an internal
hook allowing override of qemu's default cache
behavior.  At the present it is not proposed as an
end-user documented/visible feature.

-john


Mark Wagner wrote:
This data is provided is provided by the Red Hat Performance
team. It is intended to support the request for adding the
required functionality to libvirt to allow for setting a
storage parameter to control the caching behavior of QEMU.
This value would be passed on the QEMU command line.


Our testing of commercial databases in a larger, Enterprise
configuration (MSA, 16 cores, etc)  has shown that the default
QEMU caching behavior is not always the best. This set of data
compares the drop off in performance of both writethrough
and writeback cache compared to the nocache option. The main
metric is transactions per minute (tpm).  The other "axis" is
the number of simulated users (x1000)

        % nocache TPM Results
K Users  WT      WB
 10     60%      64%
 20     66%      71%
 40     68%      72%
 60     71%      75%
 80     74%      79%
100     76%      83%

From the above set of data, it is clear that the default behavior
of QEMU is the worst performer out of the three cache options for
this type of use case.  It is also clear that we at minimum, 25%
of the possible TPM performance just due to the cache setting.






--
john cooper redhat com

 domain_conf.c |   32 +++++++++++++++++++++++++-------
 domain_conf.h |   15 +++++++++++++++
 qemu_conf.c   |   32 ++++++++++++++++++++++++++++----
 3 files changed, 68 insertions(+), 11 deletions(-)
=================================================================
--- a/src/domain_conf.h
+++ b/src/domain_conf.h
@@ -80,6 +80,20 @@ enum virDomainDiskBus {
     VIR_DOMAIN_DISK_BUS_LAST
 };
 
+/* summary of all possible host open file cache modes
+ */
+typedef enum {
+    VIR_DOMAIN_DISK_CACHE_DEFAULT = 0,
+    VIR_DOMAIN_DISK_CACHE_DISABLE,
+    VIR_DOMAIN_DISK_CACHE_WRITEBACK,
+    VIR_DOMAIN_DISK_CACHE_WRITETHRU,
+    } host_cachemode;
+
+typedef struct {
+    char *tag;
+    host_cachemode idx;
+    } cache_map_t;
+
 /* Stores the virtual disk configuration */
 typedef struct _virDomainDiskDef virDomainDiskDef;
 typedef virDomainDiskDef *virDomainDiskDefPtr;
@@ -91,6 +105,7 @@ struct _virDomainDiskDef {
     char *dst;
     char *driverName;
     char *driverType;
+    host_cachemode cachemode;
     unsigned int readonly : 1;
     unsigned int shared : 1;
     int slotnum; /* pci slot number for unattach */
=================================================================
--- a/src/qemu_conf.c
+++ b/src/qemu_conf.c
@@ -616,6 +616,26 @@ qemudNetworkIfaceConnect(virConnectPtr c
     return NULL;
 }
 
+/* map from internal cache mode to qemu cache arg text
+ */ 
+static cache_map_t qemu_cache_map[] = {
+    {"none", VIR_DOMAIN_DISK_CACHE_DISABLE},
+    {"writeback", VIR_DOMAIN_DISK_CACHE_WRITEBACK},
+    {"writethrough", VIR_DOMAIN_DISK_CACHE_WRITETHRU},
+    {NULL}};
+
+/* return qemu arg text * corresponding to idx if found, NULL otherwise
+ */
+static inline char *qemu_cache_mode(host_cachemode idx)
+{
+    int i;
+
+    for (i = 0; qemu_cache_map[i].tag; ++i)
+	if (qemu_cache_map[i].idx == idx)
+            return (qemu_cache_map[i].tag);
+    return (NULL);
+}
+
 static int qemudBuildCommandLineChrDevStr(virDomainChrDefPtr dev,
                                           char *buf,
                                           int buflen)
@@ -946,6 +966,8 @@ int qemudBuildCommandLine(virConnectPtr 
             virDomainDiskDefPtr disk = vm->def->disks[i];
             int idx = virDiskNameToIndex(disk->dst);
             const char *bus = virDomainDiskQEMUBusTypeToString(disk->bus);
+            int nc;
+            char *cachemode;
 
             if (disk->bus == VIR_DOMAIN_DISK_BUS_USB) {
                 if (disk->device == VIR_DOMAIN_DISK_DEVICE_DISK) {
@@ -980,15 +1002,17 @@ int qemudBuildCommandLine(virConnectPtr 
                 break;
             }
 
-            snprintf(opt, PATH_MAX, "file=%s,if=%s,%sindex=%d%s%s",
+            nc = snprintf(opt, PATH_MAX, "file=%s,if=%s,%sindex=%d%s",
                      disk->src ? disk->src : "", bus,
                      media ? media : "",
                      idx,
                      bootable &&
                      disk->device == VIR_DOMAIN_DISK_DEVICE_DISK
-                     ? ",boot=on" : "",
-                     disk->shared && ! disk->readonly
-                     ? ",cache=off" : "");
+                     ? ",boot=on" : "");
+            cachemode = qemu_cache_mode(disk->cachemode);
+            if (cachemode || disk->shared && !disk->readonly)
+                snprintf(opt + nc, PATH_MAX - nc, ",cache=%s",
+                    cachemode ? cachemode : "off");
 
             ADD_ARG_LIT("-drive");
             ADD_ARG_LIT(opt);
=================================================================
--- a/src/domain_conf.c
+++ b/src/domain_conf.c
@@ -528,6 +528,16 @@ int virDomainDiskCompare(virDomainDiskDe
 
 
 #ifndef PROXY
+
+/* map from xml cache tag to internal cache mode
+ */
+static cache_map_t cache_map[] = {
+    {"none", VIR_DOMAIN_DISK_CACHE_DISABLE},
+    {"off", VIR_DOMAIN_DISK_CACHE_DISABLE},
+    {"writeback", VIR_DOMAIN_DISK_CACHE_WRITEBACK},
+    {"writethrough", VIR_DOMAIN_DISK_CACHE_WRITETHRU},
+    {NULL}};
+
 /* Parse the XML definition for a disk
  * @param node XML nodeset to parse for disk definition
  */
@@ -543,6 +553,8 @@ virDomainDiskDefParseXML(virConnectPtr c
     char *source = NULL;
     char *target = NULL;
     char *bus = NULL;
+    char *cachetag = NULL;
+    int i;
 
     if (VIR_ALLOC(def) < 0) {
         virDomainReportError(conn, VIR_ERR_NO_MEMORY, NULL);
@@ -592,6 +604,14 @@ virDomainDiskDefParseXML(virConnectPtr c
                        (xmlStrEqual(cur->name, BAD_CAST "driver"))) {
                 driverName = virXMLPropString(cur, "name");
                 driverType = virXMLPropString(cur, "type");
+                if (cachetag = virXMLPropString(cur, "cache")) {
+                    for (i = 0; cache_map[i].tag; ++i)
+                        if (!strcmp(cache_map[i].tag, cachetag)) {
+                            def->cachemode = cache_map[i].idx;
+                            break;
+                        }
+                VIR_FREE(cachetag);
+                }
             } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) {
                 def->readonly = 1;
             } else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) {
@@ -2620,14 +2640,12 @@ virDomainDiskDefFormat(virConnectPtr con
                       type, device);
 
     if (def->driverName) {
+        virBufferVSprintf(buf, "      <driver name='%s'", def->driverName);
         if (def->driverType)
-            virBufferVSprintf(buf,
-                              "      <driver name='%s' type='%s'/>\n",
-                              def->driverName, def->driverType);
-        else
-            virBufferVSprintf(buf,
-                              "      <driver name='%s'/>\n",
-                              def->driverName);
+            virBufferVSprintf(buf, " type='%s'", def->driverType);
+        if (def->cachemode)
+            virBufferVSprintf(buf, " cache='%s'", cache_map[def->cachemode].tag);
+        virBufferVSprintf(buf, "/>\n");
     }
 
     if (def->src) {

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]