[libvirt] [PATCHv3 2/4] nodeinfo: Fix gathering of nodeinfo data structure

Peter Krempa pkrempa at redhat.com
Mon Jul 9 15:17:10 UTC 2012


This patch changes the way data to fill the nodeinfo structure are
gathered. We've gathere the test data by iterating processors an sockets
separately from nodes. The reported data was based solely on information
about core id. Problems arise when eg cores in mulit-processor machines
don't have same id's on both processors or maybe one physical processor
contains more NUMA nodes.

This patch changes the approach how we detect processors and nodes. Now
we start at enumerating nodes and for each node processors, sockets and
threads are enumerated separately. This approach provides acurate data
that comply to docs about the nodeinfo structure. This also enables to
get rid of hacks: see commits 10d9038b744a69c8d4bd29c2e8c012a097481586,
ac9dd4a676f21b5e3ca6dbe0526f2a6709072beb. (Those changes in nodeinfo.c
are efectively reverted by this patch).

This patch also changes output of one of the tests, as the processor
topology is now acquired more precisely.
---
 src/nodeinfo.c                                     |  311 ++++++++++++--------
 .../linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt |    2 +-
 2 files changed, 185 insertions(+), 128 deletions(-)

diff --git a/src/nodeinfo.c b/src/nodeinfo.c
index 819f954..ae713da 100644
--- a/src/nodeinfo.c
+++ b/src/nodeinfo.c
@@ -188,38 +188,131 @@ virNodeParseSocket(const char *dir, unsigned int cpu)
     return ret;
 }

+/* parses a node entry, returning number of processors in the node and
+ * filling arguments */
 static int
-virNodeParseNode(const char *sysfs_dir)
+virNodeParseNode(const char *node, int *sockets, int *cores, int *threads)
 {
-    char *file = NULL;
-    char *possible = NULL;
-    char *tmp;
     int ret = -1;
+    int processors = 0;
+    DIR *cpudir = NULL;
+    struct dirent *cpudirent = NULL;
+    int sock_max = 0;
+    cpu_set_t sock_map;
+    int sock;
+    cpu_set_t *core_maps = NULL;
+    int core;
+    int i;
+    int siblings;
+    unsigned int cpu;
+    int online;

-    if (virAsprintf(&file, "%s/node/possible", sysfs_dir) < 0) {
-        virReportOOMError();
+    *threads = 0;
+    *cores = 0;
+    *sockets = 0;
+
+    if (!(cpudir = opendir(node))) {
+        virReportSystemError(errno, _("cannot opendir %s"), node);
         goto cleanup;
     }
-    /* Assume that a missing node/possible file implies no NUMA
-     * support, and hence all cpus belong to the same node.  */
-    if (!virFileExists(file)) {
-        ret = 1;
+
+    /* enumerate sockets in the node */
+    CPU_ZERO(&sock_map);
+    while ((cpudirent = readdir(cpudir))) {
+        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
+            continue;
+
+        /* Parse socket */
+        sock = virNodeParseSocket(node, cpu);
+        CPU_SET(sock, &sock_map);
+
+        if (sock > sock_max)
+            sock_max = sock;
+    }
+
+    if (errno) {
+        virReportSystemError(errno, _("problem reading %s"), node);
         goto cleanup;
     }
-    if (virFileReadAll(file, 1024, &possible) < 0)
+
+    sock_max++;
+
+    /* allocate cpu maps for each socket */
+    if (VIR_ALLOC_N(core_maps, sock_max) < 0) {
+        virReportOOMError();
         goto cleanup;
-    if (virStrToLong_i(possible, &tmp, 10, &ret) < 0 ||
-        (*tmp == '-' && virStrToLong_i(tmp+1, &tmp, 10, &ret) < 0) ||
-        *tmp != '\n') {
-        nodeReportError(VIR_ERR_INTERNAL_ERROR,
-                        _("failed to parse possible nodes '%s'"), possible);
+    }
+
+    for (i = 0; i < sock_max; i++)
+        CPU_ZERO(&core_maps[i]);
+
+    /* iterate over all CPU's in the node */
+    rewinddir(cpudir);
+    while ((cpudirent = readdir(cpudir))) {
+        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
+            continue;
+
+        if ((online = virNodeGetCpuValue(node, cpu, "online", true)) < 0)
+            goto cleanup;
+
+        if (!online)
+            continue;
+
+        processors++;
+
+        /* Parse socket */
+        sock = virNodeParseSocket(node, cpu);
+        if (!CPU_ISSET(sock, &sock_map)) {
+            nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                            _("CPU socket topology has changed"));
+            goto cleanup;
+        }
+
+        /* Parse core */
+# if defined(__s390__) || \
+    defined(__s390x__)
+    /* logical cpu is equivalent to a core on s390 */
+        core = cpu;
+# else
+        core = virNodeGetCpuValue(node, cpu, "topology/core_id", false);
+# endif
+
+        CPU_SET(core, &core_maps[sock]);
+
+        if (!(siblings = virNodeCountThreadSiblings(node, cpu)))
+            goto cleanup;
+
+        if (siblings > *threads)
+            *threads = siblings;
+    }
+
+    if (errno) {
+        virReportSystemError(errno, _("problem reading %s"), node);
         goto cleanup;
     }
-    ret++;
+
+    /* finalize the returned data */
+    *sockets = CPU_COUNT(&sock_map);
+
+    for (i = 0; i < sock_max; i++) {
+        if (!CPU_ISSET(i, &sock_map))
+            continue;
+
+        core = CPU_COUNT(&core_maps[i]);
+        if (core > *cores)
+            *cores = core;
+    }
+
+    ret = processors;

 cleanup:
-    VIR_FREE(file);
-    VIR_FREE(possible);
+    /* don't shadow a more serious error */
+    if (cpudir && closedir(cpudir) < 0 && ret >= 0) {
+        virReportSystemError(errno, _("problem closing %s"), node);
+        ret = -1;
+    }
+    VIR_FREE(core_maps);
+
     return ret;
 }

@@ -228,20 +321,18 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
                              virNodeInfoPtr nodeinfo)
 {
     char line[1024];
-    DIR *cpudir = NULL;
-    struct dirent *cpudirent = NULL;
-    unsigned int cpu;
-    unsigned long core, sock, cur_threads;
-    cpu_set_t core_mask;
-    cpu_set_t socket_mask;
-    int online;
+    DIR *nodedir = NULL;
+    struct dirent *nodedirent = NULL;
+    int cpus, cores, socks, threads;
+    unsigned int node;
     int ret = -1;
+    char *sysfs_nodedir = NULL;
     char *sysfs_cpudir = NULL;
-    unsigned int cpu_cores = 0;

     nodeinfo->cpus = 0;
     nodeinfo->mhz = 0;
     nodeinfo->cores = 0;
+    nodeinfo->nodes = 0;

     /* Start with parsing /proc/cpuinfo; although it tends to have
      * fewer details.  Hyperthreads are ignored at this stage.  */
@@ -254,40 +345,22 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
             char *p;
             unsigned int ui;

-            buf += 9;
+            buf += 7;
             while (*buf && c_isspace(*buf))
                 buf++;

             if (*buf != ':' || !buf[1]) {
-                nodeReportError(VIR_ERR_INTERNAL_ERROR,
-                                "%s", _("parsing cpu MHz from cpuinfo"));
+                nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                                _("parsing cpu MHz from cpuinfo"));
                 goto cleanup;
             }

-            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
+            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
                 /* Accept trailing fractional part.  */
-                && (*p == '\0' || *p == '.' || c_isspace(*p)))
+                (*p == '\0' || *p == '.' || c_isspace(*p)))
                 nodeinfo->mhz = ui;
         }

-        if (STRPREFIX(buf, "cpu cores")) {
-            char *p;
-            unsigned int ui;
-
-            buf += 9;
-            while (*buf && c_isspace(*buf))
-                buf++;
-
-            if (*buf != ':' || !buf[1]) {
-                nodeReportError(VIR_ERR_INTERNAL_ERROR,
-                                "%s", _("parsing cpu cores from cpuinfo"));
-                return -1;
-            }
-
-            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
-                && (*p == '\0' || c_isspace(*p)))
-                cpu_cores = ui;
-         }
 # elif defined(__powerpc__) || \
       defined(__powerpc64__)
         char *buf = line;
@@ -300,14 +373,14 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
                 buf++;

             if (*buf != ':' || !buf[1]) {
-                nodeReportError(VIR_ERR_INTERNAL_ERROR,
-                                "%s", _("parsing cpu MHz from cpuinfo"));
+                nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                                _("parsing cpu MHz from cpuinfo"));
                 goto cleanup;
             }

-            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
+            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
                 /* Accept trailing fractional part.  */
-                && (*p == '\0' || *p == '.' || c_isspace(*p)))
+                (*p == '\0' || *p == '.' || c_isspace(*p)))
                 nodeinfo->mhz = ui;
             /* No other interesting infos are available in /proc/cpuinfo.
              * However, there is a line identifying processor's version,
@@ -328,115 +401,99 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
     /* OK, we've parsed clock speed out of /proc/cpuinfo. Get the
      * core, node, socket, thread and topology information from /sys
      */
-    if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) {
+    if (virAsprintf(&sysfs_nodedir, "%s/node", sysfs_dir) < 0) {
         virReportOOMError();
         goto cleanup;
     }
-    cpudir = opendir(sysfs_cpudir);
-    if (cpudir == NULL) {
-        virReportSystemError(errno, _("cannot opendir %s"), sysfs_cpudir);
-        goto cleanup;
+
+    if (!(nodedir = opendir(sysfs_nodedir))) {
+        /* the host isn't probably running a NUMA architecture */
+        goto fallback;
     }

-    CPU_ZERO(&core_mask);
-    CPU_ZERO(&socket_mask);

-    while ((cpudirent = readdir(cpudir))) {
-        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
+    while ((nodedirent = readdir(nodedir))) {
+        if (sscanf(nodedirent->d_name, "node%u", &node) != 1)
             continue;

-        online = virNodeGetCpuValue(sysfs_cpudir, cpu, "online", true);
-        if (online < 0) {
-            closedir(cpudir);
+        nodeinfo->nodes++;
+
+        if (virAsprintf(&sysfs_cpudir, "%s/node/%s",
+                        sysfs_dir, nodedirent->d_name) < 0) {
+            virReportOOMError();
             goto cleanup;
         }
-        if (!online)
-            continue;
-        nodeinfo->cpus++;

-        /* Parse core */
-# if defined(__s390__) || \
-    defined(__s390x__)
-    /* logical cpu is equivalent to a core on s390 */
-        core = cpu;
-# else
-        core = virNodeGetCpuValue(sysfs_cpudir, cpu, "topology/core_id", false);
-# endif
-        if (!CPU_ISSET(core, &core_mask)) {
-            CPU_SET(core, &core_mask);
-            nodeinfo->cores++;
-        }
+        if ((cpus = virNodeParseNode(sysfs_cpudir, &socks,
+                                     &cores, &threads)) < 0)
+            goto cleanup;

-        /* Parse socket */
-        sock = virNodeParseSocket(sysfs_cpudir, cpu);
-        if (!CPU_ISSET(sock, &socket_mask)) {
-            CPU_SET(sock, &socket_mask);
-            nodeinfo->sockets++;
-        }
+        VIR_FREE(sysfs_cpudir);

-        cur_threads = virNodeCountThreadSiblings(sysfs_cpudir, cpu);
-        if (cur_threads == 0) {
-            closedir(cpudir);
-            goto cleanup;
-        }
-        if (cur_threads > nodeinfo->threads)
-            nodeinfo->threads = cur_threads;
+        nodeinfo->cpus += cpus;
+
+        if (socks > nodeinfo->sockets)
+            nodeinfo->sockets = socks;
+
+        if (cores > nodeinfo->cores)
+            nodeinfo->cores = cores;
+
+        if (threads > nodeinfo->threads)
+            nodeinfo->threads = threads;
     }
+
     if (errno) {
-        virReportSystemError(errno,
-                             _("problem reading %s"), sysfs_cpudir);
-        closedir(cpudir);
+        virReportSystemError(errno, _("problem reading %s"), sysfs_nodedir);
         goto cleanup;
     }
-    if (closedir(cpudir) < 0) {
-        virReportSystemError(errno,
-                             _("problem closing %s"), sysfs_cpudir);
+
+    if (nodeinfo->cpus && nodeinfo->nodes)
+        goto done;
+
+fallback:
+    VIR_FREE(sysfs_cpudir);
+
+    if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) {
+        virReportOOMError();
         goto cleanup;
     }

-    if ((nodeinfo->nodes = virNodeParseNode(sysfs_dir)) <= 0)
+    if ((cpus = virNodeParseNode(sysfs_cpudir, &socks, &cores, &threads)) < 0)
         goto cleanup;

+    nodeinfo->nodes = 1;
+    nodeinfo->cpus = cpus;
+    nodeinfo->sockets = socks;
+    nodeinfo->cores = cores;
+    nodeinfo->threads = threads;
+
+done:
     /* There should always be at least one cpu, socket, node, and thread. */
     if (nodeinfo->cpus == 0) {
-        nodeReportError(VIR_ERR_INTERNAL_ERROR,
-                        "%s", _("no CPUs found"));
+        nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no CPUs found"));
         goto cleanup;
     }
+
     if (nodeinfo->sockets == 0) {
-        nodeReportError(VIR_ERR_INTERNAL_ERROR,
-                        "%s", _("no sockets found"));
+        nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no sockets found"));
         goto cleanup;
     }
+
     if (nodeinfo->threads == 0) {
-        nodeReportError(VIR_ERR_INTERNAL_ERROR,
-                        "%s", _("no threads found"));
+        nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no threads found"));
         goto cleanup;
     }

-    /* Platform like AMD Magny Cours has two NUMA nodes each package, and
-     * the two nodes share the same core ID set, it results in the cores
-     * number calculated from sysfs is not the actual cores number. Use
-     * "cpu cores" in /proc/cpuinfo as the cores number instead in this case.
-     * More details about the problem:
-     * https://www.redhat.com/archives/libvir-list/2012-May/msg00607.html
-     */
-    if (cpu_cores && (cpu_cores > nodeinfo->cores))
-        nodeinfo->cores = cpu_cores;
-
-    /* nodeinfo->sockets is supposed to be a number of sockets per NUMA node,
-     * however if NUMA nodes are not composed of whole sockets, we just lie
-     * about the number of NUMA nodes and force apps to check capabilities XML
-     * for the actual NUMA topology.
-     */
-    if (nodeinfo->sockets % nodeinfo->nodes == 0)
-        nodeinfo->sockets /= nodeinfo->nodes;
-    else
-        nodeinfo->nodes = 1;
-
     ret = 0;

 cleanup:
+    /* don't shadow a more serious error */
+    if (nodedir && closedir(nodedir) < 0 && ret >= 0) {
+        virReportSystemError(errno, _("problem closing %s"), sysfs_nodedir);
+        ret = -1;
+    }
+
+    VIR_FREE(sysfs_nodedir);
     VIR_FREE(sysfs_cpudir);
     return ret;
 }
diff --git a/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt b/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt
index 333187e..0306f86 100644
--- a/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt
+++ b/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt
@@ -1 +1 @@
-CPUs: 48/48, MHz: 2100, Nodes: 1, Sockets: 4, Cores: 12, Threads: 1
+CPUs: 48/48, MHz: 2100, Nodes: 8, Sockets: 1, Cores: 6, Threads: 1
-- 
1.7.8.6




More information about the libvir-list mailing list