[libvirt] [PATCH 04/10] qemu: Implement period and quota tunable XML configuration and parsing.

Wen Congyang wency at cn.fujitsu.com
Thu Jun 30 03:09:07 UTC 2011


---
 src/conf/domain_conf.c                          |  272 ++++++++++++++++++++++-
 src/conf/domain_conf.h                          |   25 ++
 src/libvirt_private.syms                        |    4 +
 src/qemu/qemu_cgroup.c                          |  131 +++++++++++
 src/qemu/qemu_cgroup.h                          |    2 +
 src/qemu/qemu_process.c                         |    4 +
 tests/qemuxml2argvdata/qemuxml2argv-cputune.xml |    2 +
 7 files changed, 438 insertions(+), 2 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 60e0318..0a1f973 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -997,6 +997,21 @@ virDomainVcpuPinDefFree(virDomainVcpuPinDefPtr *def,
     VIR_FREE(def);
 }
 
+static void
+virDomainVcpuBWDefFree(virDomainVcpuBWDefPtr *def,
+                       int nvcpubw)
+{
+    int i;
+
+    if (!def || !nvcpubw)
+        return;
+
+    for(i = 0; i < nvcpubw; i++)
+        VIR_FREE(def[i]);
+
+    VIR_FREE(def);
+}
+
 void virDomainDefFree(virDomainDefPtr def)
 {
     unsigned int i;
@@ -1089,6 +1104,9 @@ void virDomainDefFree(virDomainDefPtr def)
 
     virCPUDefFree(def->cpu);
 
+    virDomainVcpuBWDefFree(def->cputune.vcpubw,
+                           def->cputune.nvcpubw);
+
     virDomainVcpuPinDefFree(def->cputune.vcpupin, def->cputune.nvcpupin);
 
     VIR_FREE(def->numatune.memory.nodemask);
@@ -5715,6 +5733,62 @@ error:
     goto cleanup;
 }
 
+/* Parse the XML definition for a vcpubandwidth */
+static virDomainVcpuBWDefPtr
+virDomainVcpuBWDefParseXML(const xmlNodePtr node,
+                           xmlXPathContextPtr ctxt,
+                           int maxvcpus)
+{
+    virDomainVcpuBWDefPtr def;
+    xmlNodePtr oldnode = ctxt->node;
+    unsigned int vcpuid;
+    unsigned long long period;
+    long long quota;
+    int ret;
+
+    if (VIR_ALLOC(def) < 0) {
+        virReportOOMError();
+        return NULL;
+    }
+
+    ctxt->node = node;
+
+    ret = virXPathUInt("string(./@vcpu)", ctxt, &vcpuid);
+    if (ret == -2) {
+        virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+                             "%s", _("vcpu id must be an unsigned integer"));
+        goto error;
+    } else if (ret == -1) {
+        virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+                             "%s", _("can't parse vcpupin node"));
+        goto error;
+    }
+
+    if (vcpuid >= maxvcpus) {
+        virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+                             "%s", _("vcpu id must be less than maxvcpus"));
+        goto error;
+    }
+
+    if (virXPathULongLong("string(./@period)", ctxt, &period) < 0)
+        period = 0;
+
+    if (virXPathLongLong("string(./@quota)", ctxt, &quota) < 0)
+        quota = 0;
+
+    def->vcpuid = vcpuid;
+    def->period = period;
+    def->quota = quota;
+
+cleanup:
+    ctxt->node = oldnode;
+    return def;
+
+error:
+    VIR_FREE(def);
+    goto cleanup;
+}
+
 
 static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
                                             xmlDocPtr xml,
@@ -5881,6 +5955,49 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
                       &def->cputune.shares) < 0)
         def->cputune.shares = 0;
 
+    if ((n = virXPathNodeSet("./cputune/bandwidth", ctxt, &nodes)) < 0)
+        goto error;
+
+    if (n > def->maxvcpus) {
+        virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+                             "%s", _("bandwith nodes must be less than"
+                                     " maxvcpus"));
+        goto error;
+    }
+
+    if (n && VIR_ALLOC_N(def->cputune.vcpubw, n) < 0)
+        goto no_memory;
+
+    for (i = 0; i < n; i++) {
+        virDomainVcpuBWDefPtr vcpubw = NULL;
+        vcpubw = virDomainVcpuBWDefParseXML(nodes[i], ctxt, def->maxvcpus);
+
+        if (!vcpubw)
+            goto error;
+
+        if (virDomainVcpuBWIsDuplicate(def->cputune.vcpubw,
+                                       def->cputune.nvcpubw,
+                                       vcpubw->vcpuid)) {
+            virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+                                 "%s", _("duplicate vcpubandwidth for same"
+                                         " vcpu"));
+            VIR_FREE(vcpubw);
+            goto error;
+        }
+
+        if (vcpubw->period || vcpubw->quota)
+            def->cputune.vcpubw[def->cputune.nvcpubw++] = vcpubw;
+        else
+            VIR_FREE(vcpubw);
+    }
+    if (def->cputune.nvcpubw)
+        ignore_value(VIR_REALLOC_N(def->cputune.vcpubw,
+                                   def->cputune.nvcpubw));
+    else
+        VIR_FREE(def->cputune.vcpubw);
+
+    VIR_FREE(nodes);
+
     if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) {
         goto error;
     }
@@ -8274,6 +8391,144 @@ virDomainVcpuPinDel(virDomainDefPtr def, int vcpu)
     return 0;
 }
 
+/* Check if vcpupin with same vcpuid already exists.
+ * Return 1 if exists, 0 if not. */
+int
+virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def,
+                           int nvcpubw,
+                           int vcpu)
+{
+    int i;
+
+    if (!def || !nvcpubw)
+        return 0;
+
+    for (i = 0; i < nvcpubw; i++) {
+        if (def[i]->vcpuid == vcpu)
+            return 1;
+    }
+
+    return 0;
+}
+
+virDomainVcpuBWDefPtr
+virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def,
+                          int nvcpubw,
+                          int vcpu)
+{
+    int i;
+
+    if (!def || !nvcpubw)
+        return NULL;
+
+    for (i = 0; i < nvcpubw; i++) {
+        if (def[i]->vcpuid == vcpu)
+            return def[i];
+    }
+
+    return NULL;
+}
+
+int
+virDomainVcpuBWAdd(virDomainDefPtr def,
+                   unsigned long long period,
+                   long long quota,
+                   int vcpu)
+{
+    virDomainVcpuBWDefPtr *vcpubw_list = NULL;
+    virDomainVcpuBWDefPtr vcpubw = NULL;
+
+    /* No vcpubw exists yet. */
+    if (!def->cputune.nvcpubw) {
+        if (period == 0 && quota == 0)
+            return 0;
+
+        if (VIR_ALLOC(vcpubw) < 0)
+            goto no_memory;
+
+        if (VIR_ALLOC(vcpubw_list) < 0)
+            goto no_memory;
+
+        vcpubw->vcpuid = vcpu;
+        vcpubw->period = period;
+        vcpubw->quota = quota;
+        vcpubw_list[def->cputune.nvcpubw++] = vcpubw;
+
+        def->cputune.vcpubw = vcpubw_list;
+    } else {
+        int nvcpubw = def->cputune.nvcpubw;
+        vcpubw_list = def->cputune.vcpubw;
+        if (virDomainVcpuBWIsDuplicate(vcpubw_list, nvcpubw, vcpu)) {
+            vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, vcpu);
+            if (period == 0 && quota == 0) {
+                return virDomainVcpuBWDel(def, vcpu);
+            } else {
+                vcpubw->vcpuid = vcpu;
+                vcpubw->period = period;
+                vcpubw->quota = quota;
+            }
+        } else {
+            if (period == 0 && quota == 0)
+                return 0;
+
+            if (VIR_ALLOC(vcpubw) < 0)
+                goto no_memory;
+
+            if (VIR_REALLOC_N(vcpubw_list, nvcpubw + 1) < 0)
+                goto no_memory;
+
+            vcpubw->vcpuid = vcpu;
+            vcpubw->period = period;
+            vcpubw->quota = quota;
+            vcpubw_list[def->cputune.nvcpubw++] = vcpubw;
+       }
+    }
+
+    return 0;
+
+no_memory:
+    virReportOOMError();
+    VIR_FREE(vcpubw);
+    return -1;
+}
+
+int
+virDomainVcpuBWDel(virDomainDefPtr def, int vcpu)
+{
+    int n;
+    bool deleted = false;
+    virDomainVcpuBWDefPtr *vcpubw_list = def->cputune.vcpubw;
+
+    /* No vcpubw exists yet */
+    if (!def->cputune.nvcpubw)
+        return 0;
+
+    for (n = 0; n < def->cputune.nvcpubw; n++) {
+        if (vcpubw_list[n]->vcpuid == vcpu) {
+            VIR_FREE(vcpubw_list[n]);
+            memmove(&vcpubw_list[n], &vcpubw_list[n+1],
+                    (def->cputune.nvcpubw - n - 1) *
+                    sizeof(virDomainVcpuBWDefPtr));
+            deleted = true;
+            break;
+        }
+    }
+
+    if (!deleted)
+        return 0;
+
+    if (--def->cputune.nvcpubw == 0) {
+        VIR_FREE(def->cputune.vcpubw);
+    } else {
+        if (VIR_REALLOC_N(def->cputune.vcpubw,
+                          def->cputune.nvcpubw) < 0) {
+            /* ignore, harmless */
+        }
+    }
+
+    return 0;
+}
+
 static int
 virDomainLifecycleDefFormat(virBufferPtr buf,
                             int type,
@@ -9553,12 +9808,24 @@ char *virDomainDefFormat(virDomainDefPtr def,
         virBufferAsprintf(&buf, " current='%u'", def->vcpus);
     virBufferAsprintf(&buf, ">%u</vcpu>\n", def->maxvcpus);
 
-    if (def->cputune.shares || def->cputune.vcpupin)
+    if (def->cputune.shares || def->cputune.vcpupin ||
+        def->cputune.vcpubw)
         virBufferAddLit(&buf, "  <cputune>\n");
 
     if (def->cputune.shares)
         virBufferAsprintf(&buf, "    <shares>%lu</shares>\n",
                           def->cputune.shares);
+    if (def->cputune.vcpubw) {
+        int i;
+        for (i = 0; i < def->cputune.nvcpubw; i++) {
+            virBufferAsprintf(&buf, "    <bandwidth vcpu='%u' ",
+                              def->cputune.vcpubw[i]->vcpuid);
+            virBufferAsprintf(&buf, "period='%llu' ",
+                              def->cputune.vcpubw[i]->period);
+            virBufferAsprintf(&buf, "quota='%lld'/>\n",
+                              def->cputune.vcpubw[i]->quota);
+        }
+    }
     if (def->cputune.vcpupin) {
         int i;
         for (i = 0; i < def->cputune.nvcpupin; i++) {
@@ -9580,7 +9847,8 @@ char *virDomainDefFormat(virDomainDefPtr def,
         }
     }
 
-    if (def->cputune.shares || def->cputune.vcpupin)
+    if (def->cputune.shares || def->cputune.vcpupin ||
+        def->cputune.vcpubw)
         virBufferAddLit(&buf, "  </cputune>\n");
 
     if (def->numatune.memory.nodemask)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index e81977c..a2929b5 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1108,6 +1108,14 @@ struct _virDomainVcpuPinDef {
     char *cpumask;
 };
 
+typedef struct _virDomainVcpuBWDef virDomainVcpuBWDef;
+typedef virDomainVcpuBWDef *virDomainVcpuBWDefPtr;
+struct _virDomainVcpuBWDef {
+    int vcpuid;
+    unsigned long long period;
+    long long quota;
+};
+
 int virDomainVcpuPinIsDuplicate(virDomainVcpuPinDefPtr *def,
                                 int nvcpupin,
                                 int vcpu);
@@ -1116,6 +1124,14 @@ virDomainVcpuPinDefPtr virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def,
                                                   int nvcpupin,
                                                   int vcpu);
 
+int virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def,
+                               int nvcpubw,
+                               int vcpu);
+
+virDomainVcpuBWDefPtr virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def,
+                                                int nvcpubw,
+                                                int vcpu);
+
 enum virDomainNumatuneMemMode {
     VIR_DOMAIN_NUMATUNE_MEM_STRICT,
     VIR_DOMAIN_NUMATUNE_MEM_PREFERRED,
@@ -1170,6 +1186,8 @@ struct _virDomainDef {
 
     struct {
         unsigned long shares;
+        int nvcpubw;
+        virDomainVcpuBWDefPtr *vcpubw;
         int nvcpupin;
         virDomainVcpuPinDefPtr *vcpupin;
     } cputune;
@@ -1413,6 +1431,13 @@ int virDomainVcpuPinAdd(virDomainDefPtr def,
 
 int virDomainVcpuPinDel(virDomainDefPtr def, int vcpu);
 
+int virDomainVcpuBWAdd(virDomainDefPtr def,
+                       unsigned long long period,
+                       long long quota,
+                       int vcpu);
+
+int virDomainVcpuBWDel(virDomainDefPtr def, int vcpu);
+
 int virDomainDiskIndexByName(virDomainDefPtr def, const char *name);
 int virDomainDiskInsert(virDomainDefPtr def,
                         virDomainDiskDefPtr disk);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 9b9b6ce..aad0c3a 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -371,6 +371,10 @@ virDomainTimerTickpolicyTypeFromString;
 virDomainTimerTickpolicyTypeToString;
 virDomainTimerTrackTypeFromString;
 virDomainTimerTrackTypeToString;
+virDomainVcpuBWAdd;
+virDomainVcpuBWDel;
+virDomainVcpuBWFindByVcpu;
+virDomainVcpuBWIsDuplicate;
 virDomainVcpuPinAdd;
 virDomainVcpuPinDel;
 virDomainVcpuPinFindByVcpu;
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 1298924..201c0b8 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -24,6 +24,7 @@
 #include <config.h>
 
 #include "qemu_cgroup.h"
+#include "qemu_domain.h"
 #include "cgroup.h"
 #include "logging.h"
 #include "memory.h"
@@ -376,6 +377,136 @@ cleanup:
     return -1;
 }
 
+int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw)
+{
+    int rc;
+    unsigned long long old_period;
+
+    if (!vcpubw)
+        return 0;
+
+    if (vcpubw->period == 0 && vcpubw->quota == 0)
+        return 0;
+
+    if (vcpubw->period) {
+        /* get old period, and we can rollback if set quota failed */
+        rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period);
+        if (rc < 0) {
+            virReportSystemError(-rc,
+                                 _("%s"), "Unable to get cpu bandwidth period");
+            return -1;
+        }
+
+        rc = virCgroupSetCpuCfsPeriod(cgroup, vcpubw->period);
+        if (rc < 0) {
+            virReportSystemError(-rc,
+                                 _("%s"), "Unable to set cpu bandwidth period");
+            return -1;
+        }
+    }
+
+    if (vcpubw->quota) {
+        rc = virCgroupSetCpuCfsQuota(cgroup, vcpubw->quota);
+        if (rc < 0) {
+            virReportSystemError(-rc,
+                                 _("%s"), "Unable to set cpu bandwidth quota");
+            goto cleanup;
+        }
+    }
+
+    return 0;
+
+cleanup:
+    if (vcpubw->period) {
+        rc = virCgroupSetCpuCfsPeriod(cgroup, old_period);
+        if (rc < 0)
+            virReportSystemError(-rc,
+                                 _("%s"),
+                                 "Unable to rollback cpu bandwidth period");
+    }
+
+    return -1;
+}
+
+int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
+{
+    virCgroupPtr cgroup = NULL;
+    virCgroupPtr cgroup_vcpu = NULL;
+    qemuDomainObjPrivatePtr priv = vm->privateData;
+    int rc;
+    unsigned int i;
+    virDomainVcpuBWDefPtr *vcpubw_list = vm->def->cputune.vcpubw;
+    virDomainVcpuBWDefPtr vcpubw = NULL;
+    int nvcpubw = vm->def->cputune.nvcpubw;
+
+    if (driver->cgroup == NULL)
+        return 0; /* Not supported, so claim success */
+
+    rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0);
+    if (rc != 0) {
+        virReportSystemError(-rc,
+                             _("Unable to find cgroup for %s"),
+                             vm->def->name);
+        goto cleanup;
+    }
+
+    if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
+        /* If we does not know VCPU<->PID mapping or all vcpu runs in the same
+         * thread, we can not control each vcpu. So just use the last config.
+         */
+        if (vcpubw_list) {
+            if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
+                if (qemuSetupCgroupVcpuBW(cgroup, vcpubw_list[nvcpubw - 1]) < 0)
+                    goto cleanup;
+            }
+        }
+        return 0;
+    }
+
+    for (i = 0; i < priv->nvcpupids; i++) {
+        rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 1);
+        if (rc < 0) {
+            virReportSystemError(-rc,
+                                 _("Unable to create vcpu cgroup for %s(vcpu:"
+                                   " %d)"),
+                                 vm->def->name, i);
+            goto cleanup;
+        }
+
+        /* move the thread for vcpu to sub dir */
+        rc = virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]);
+        if (rc < 0) {
+            virReportSystemError(-rc,
+                                 _("unable to add vcpu %d task %d to cgroup"),
+                                 i, priv->vcpupids[i]);
+            goto cleanup;
+        }
+
+        if (vcpubw_list) {
+            if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
+                vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, i);
+                if (qemuSetupCgroupVcpuBW(cgroup, vcpubw) < 0)
+                    goto cleanup;
+            }
+        }
+
+        virCgroupFree(&cgroup_vcpu);
+    }
+
+    virCgroupFree(&cgroup_vcpu);
+    virCgroupFree(&cgroup);
+    return 0;
+
+cleanup:
+    virCgroupFree(&cgroup_vcpu);
+    if (cgroup) {
+        virCgroupRemove(cgroup);
+        virCgroupFree(&cgroup);
+    }
+
+    return -1;
+}
+
 
 int qemuRemoveCgroup(struct qemud_driver *driver,
                      virDomainObjPtr vm,
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index e8abfb4..f0a5cee 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -49,6 +49,8 @@ int qemuSetupHostUsbDeviceCgroup(usbDevice *dev,
                                  void *opaque);
 int qemuSetupCgroup(struct qemud_driver *driver,
                     virDomainObjPtr vm);
+int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw);
+int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm);
 int qemuRemoveCgroup(struct qemud_driver *driver,
                      virDomainObjPtr vm,
                      int quiet);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 88a31a3..ce3a4bb 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2677,6 +2677,10 @@ int qemuProcessStart(virConnectPtr conn,
     if (qemuProcessDetectVcpuPIDs(driver, vm) < 0)
         goto cleanup;
 
+    VIR_DEBUG("Setting cgroup for each VCPU(if required)");
+    if (qemuSetupCgroupForVcpu(driver, vm) < 0)
+        goto cleanup;
+
     VIR_DEBUG("Setting VCPU affinities");
     if (qemuProcessSetVcpuAffinites(conn, vm) < 0)
         goto cleanup;
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
index 0afbadb..0a67e40 100644
--- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
+++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
@@ -6,6 +6,8 @@
   <vcpu>2</vcpu>
   <cputune>
     <shares>2048</shares>
+    <bandwidth vcpu='0' period='1000000' quota='-1'/>
+    <bandwidth vcpu='1' period='1000' quota='1000'/>
     <vcpupin vcpu='0' cpuset='0'/>
     <vcpupin vcpu='1' cpuset='1'/>
   </cputune>
-- 
1.7.1




More information about the libvir-list mailing list