[libvirt] [RFC PATCH 2/2] Introduce capacity to virCapsHostNUMACellCPU to help vcpu pinning decisions

Shivaprasad G Bhat shivaprasadbhat at gmail.com
Fri Jan 29 06:36:53 UTC 2016


The cpus tag in NUMA cell today explains core's id, socket id and the
siblings list. The sibling list is used by users to pin the vcpus of a guest
from the same guest-core to threads from the same host-core. The host
topology gives a hint of how many max siblings one might expect in the
siblings list.

For PPC64, the previous assumptions fail to make sense when the subcores are
in use. The core is split into subcores and the siblings that libvirt sees from
the sysfs are list of subcores instead of siblings. The real siblings of
subcore are in fact offline on host and brought online by the KVM scheduler
in guest context.

So, the best way to achieve efficiency in this case is to pin all the guest
threads from same guest-core to the same subcore(the primary thread which is
online). The new parameter "capacity" reflects the thread capacity of the
core/subcore and how many vcpus from the same guest-core be pinned to the
primary thread.

On PPC64, the capacity will be set to the threads_per_subcore and the
siblings list will contain "self" cpu. The capacity thus can be used to
indicate how many vcpus from guest-core can be pinned to the same subcore.

On other archs, the capacity will be set to "1" indicating to treat each core
in the sibling list to be pinned only once.

Signed-off-by: Shivaprasad G Bhat <sbhat at linux.vnet.ibm.com>
---
 src/conf/capabilities.c                        |    3 +-
 src/conf/capabilities.h                        |    1 +
 src/nodeinfo.c                                 |   34 +++++++++++++++++++++---
 tests/vircaps2xmldata/vircaps-basic-4-4-2G.xml |   32 +++++++++++------------
 tests/vircaps2xmltest.c                        |    1 +
 5 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/src/conf/capabilities.c b/src/conf/capabilities.c
index 86ea212..62ade2e 100644
--- a/src/conf/capabilities.c
+++ b/src/conf/capabilities.c
@@ -799,7 +799,8 @@ virCapabilitiesFormatNUMATopology(virBufferPtr buf,
         virBufferAsprintf(buf, "<cpus num='%d'>\n", cells[i]->ncpus);
         virBufferAdjustIndent(buf, 2);
         for (j = 0; j < cells[i]->ncpus; j++) {
-            virBufferAsprintf(buf, "<cpu id='%d'", cells[i]->cpus[j].id);
+            virBufferAsprintf(buf, "<cpu id='%d' capacity='%d'",
+                              cells[i]->cpus[j].id, cells[i]->cpus[j].capacity);
 
             if (cells[i]->cpus[j].siblings) {
                 if (!(siblings = virBitmapFormat(cells[i]->cpus[j].siblings)))
diff --git a/src/conf/capabilities.h b/src/conf/capabilities.h
index 1754b13..94aa729 100644
--- a/src/conf/capabilities.h
+++ b/src/conf/capabilities.h
@@ -90,6 +90,7 @@ typedef struct _virCapsHostNUMACellCPU virCapsHostNUMACellCPU;
 typedef virCapsHostNUMACellCPU *virCapsHostNUMACellCPUPtr;
 struct _virCapsHostNUMACellCPU {
     unsigned int id;
+    unsigned int capacity;
     unsigned int socket_id;
     unsigned int core_id;
     virBitmapPtr siblings;
diff --git a/src/nodeinfo.c b/src/nodeinfo.c
index 531e0ee..dd2b205 100644
--- a/src/nodeinfo.c
+++ b/src/nodeinfo.c
@@ -1175,7 +1175,6 @@ linuxParseCPUmap(int max_cpuid, const char *path)
     return NULL;
 }
 
-
 static virBitmapPtr
 virNodeGetSiblingsList(const char *dir, int cpu_id)
 {
@@ -1873,6 +1872,7 @@ nodeCapsInitNUMAFake(const char *sysfs_prefix,
                 if (virNodeGetCpuValue(cpupath, id, "online", 1)) {
 #endif
                     cpus[cid].id = id;
+                    cpus[cid].capacity = 1;
                     cpus[cid].socket_id = s;
                     cpus[cid].core_id = c;
                     if (!(cpus[cid].siblings = virBitmapNew(ncpus)))
@@ -1999,7 +1999,8 @@ nodeGetMemoryFake(unsigned long long *mem,
 static int
 virNodeCapsFillCPUInfo(const char *cpupath ATTRIBUTE_UNUSED,
                        int cpu_id ATTRIBUTE_UNUSED,
-                       virCapsHostNUMACellCPUPtr cpu ATTRIBUTE_UNUSED)
+                       virCapsHostNUMACellCPUPtr cpu ATTRIBUTE_UNUSED,
+                       int threads_per_subcore)
 {
 #ifdef __linux__
     int tmp;
@@ -2017,9 +2018,24 @@ virNodeCapsFillCPUInfo(const char *cpupath ATTRIBUTE_UNUSED,
 
     cpu->core_id = tmp;
 
-    if (!(cpu->siblings = virNodeGetSiblingsList(cpupath, cpu_id)))
+    cpu->capacity = 1;
+
+    if (!threads_per_subcore &&
+        !(cpu->siblings = virNodeGetSiblingsList(cpupath, cpu_id)))
         return -1;
 
+    /* The primary thread which is online acts in the capacity of whole core
+     * and all its thread siblings. So, if at all one wants to pin the siblings
+     * for efficiency, its better to pin the same primary thread than a
+     * primary thread from other subcore. So, show the siblings as self.
+     */
+    if (threads_per_subcore) {
+        cpu->capacity = threads_per_subcore;
+        if ((cpu->siblings = virBitmapNew(virNumaGetMaxCPUs())) == 0 ||
+            virBitmapSetBit(cpu->siblings, cpu_id) < 0)
+            return -1;
+    }
+
     return 0;
 #else
     virReportError(VIR_ERR_NO_SUPPORT, "%s",
@@ -2123,6 +2139,7 @@ nodeCapsInitNUMA(const char *sysfs_prefix,
     int cpu;
     bool topology_failed = false;
     int max_node;
+    int threads_per_subcore = 0;
 
     if (virAsprintf(&cpupath, "%s/cpu", prefix) < 0)
         return -1;
@@ -2132,6 +2149,14 @@ nodeCapsInitNUMA(const char *sysfs_prefix,
         goto cleanup;
     }
 
+    if ((threads_per_subcore = nodeGetThreadsPerSubcore(caps->host.arch)) < 0)
+        goto cleanup;
+
+    /* If the subcore configuration is not valid, just pretend subcores
+     * are not in use and set the thread capacity as 1*/
+    if (!nodeHasValidSubcoreConfiguration(sysfs_prefix, threads_per_subcore))
+        threads_per_subcore = 0;
+
     if ((max_node = virNumaGetMaxNode()) < 0)
         goto cleanup;
 
@@ -2151,7 +2176,8 @@ nodeCapsInitNUMA(const char *sysfs_prefix,
 
         for (i = 0; i < virBitmapSize(cpumap); i++) {
             if (virBitmapIsBitSet(cpumap, i)) {
-                if (virNodeCapsFillCPUInfo(cpupath, i, cpus + cpu++) < 0) {
+                if (virNodeCapsFillCPUInfo(cpupath, i, cpus + cpu++,
+                                           threads_per_subcore) < 0) {
                     topology_failed = true;
                     virResetLastError();
                 }
diff --git a/tests/vircaps2xmldata/vircaps-basic-4-4-2G.xml b/tests/vircaps2xmldata/vircaps-basic-4-4-2G.xml
index 8694f87..c979c4b 100644
--- a/tests/vircaps2xmldata/vircaps-basic-4-4-2G.xml
+++ b/tests/vircaps2xmldata/vircaps-basic-4-4-2G.xml
@@ -16,10 +16,10 @@
             <sibling id='3' value='20'/>
           </distances>
           <cpus num='4'>
-            <cpu id='0' socket_id='0' core_id='0' siblings='0'/>
-            <cpu id='0' socket_id='0' core_id='1' siblings='0'/>
-            <cpu id='0' socket_id='0' core_id='2' siblings='0'/>
-            <cpu id='0' socket_id='0' core_id='3' siblings='0'/>
+            <cpu id='0' capacity='1' socket_id='0' core_id='0' siblings='0'/>
+            <cpu id='0' capacity='1' socket_id='0' core_id='1' siblings='0'/>
+            <cpu id='0' capacity='1' socket_id='0' core_id='2' siblings='0'/>
+            <cpu id='0' capacity='1' socket_id='0' core_id='3' siblings='0'/>
           </cpus>
         </cell>
         <cell id='1'>
@@ -31,10 +31,10 @@
             <sibling id='3' value='20'/>
           </distances>
           <cpus num='4'>
-            <cpu id='1' socket_id='1' core_id='1' siblings='1'/>
-            <cpu id='1' socket_id='1' core_id='2' siblings='1'/>
-            <cpu id='1' socket_id='1' core_id='3' siblings='1'/>
-            <cpu id='1' socket_id='1' core_id='4' siblings='1'/>
+            <cpu id='1' capacity='1' socket_id='1' core_id='1' siblings='1'/>
+            <cpu id='1' capacity='1' socket_id='1' core_id='2' siblings='1'/>
+            <cpu id='1' capacity='1' socket_id='1' core_id='3' siblings='1'/>
+            <cpu id='1' capacity='1' socket_id='1' core_id='4' siblings='1'/>
           </cpus>
         </cell>
         <cell id='2'>
@@ -46,10 +46,10 @@
             <sibling id='3' value='20'/>
           </distances>
           <cpus num='4'>
-            <cpu id='2' socket_id='2' core_id='2' siblings='2'/>
-            <cpu id='2' socket_id='2' core_id='3' siblings='2'/>
-            <cpu id='2' socket_id='2' core_id='4' siblings='2'/>
-            <cpu id='2' socket_id='2' core_id='5' siblings='2'/>
+            <cpu id='2' capacity='1' socket_id='2' core_id='2' siblings='2'/>
+            <cpu id='2' capacity='1' socket_id='2' core_id='3' siblings='2'/>
+            <cpu id='2' capacity='1' socket_id='2' core_id='4' siblings='2'/>
+            <cpu id='2' capacity='1' socket_id='2' core_id='5' siblings='2'/>
           </cpus>
         </cell>
         <cell id='3'>
@@ -61,10 +61,10 @@
             <sibling id='3' value='10'/>
           </distances>
           <cpus num='4'>
-            <cpu id='3' socket_id='3' core_id='3' siblings='3'/>
-            <cpu id='3' socket_id='3' core_id='4' siblings='3'/>
-            <cpu id='3' socket_id='3' core_id='5' siblings='3'/>
-            <cpu id='3' socket_id='3' core_id='6' siblings='3'/>
+            <cpu id='3' capacity='1' socket_id='3' core_id='3' siblings='3'/>
+            <cpu id='3' capacity='1' socket_id='3' core_id='4' siblings='3'/>
+            <cpu id='3' capacity='1' socket_id='3' core_id='5' siblings='3'/>
+            <cpu id='3' capacity='1' socket_id='3' core_id='6' siblings='3'/>
           </cpus>
         </cell>
       </cells>
diff --git a/tests/vircaps2xmltest.c b/tests/vircaps2xmltest.c
index be2bad5..18bc490 100644
--- a/tests/vircaps2xmltest.c
+++ b/tests/vircaps2xmltest.c
@@ -51,6 +51,7 @@ buildVirCapabilities(int max_cells,
 
         for (core_id = 0; core_id < max_cpus_in_cell; core_id++) {
             cell_cpus[core_id].id = id;
+            cell_cpus[core_id].capacity = 1;
             cell_cpus[core_id].socket_id = cell_id;
             cell_cpus[core_id].core_id = id + core_id;
             if (!(cell_cpus[core_id].siblings =




More information about the libvir-list mailing list