[libvirt] [PATCHv3 2/4] nodeinfo: Fix gathering of nodeinfo data structure

Michal Privoznik mprivozn at redhat.com
Tue Jul 10 10:56:43 UTC 2012


On 09.07.2012 17:17, Peter Krempa wrote:
> This patch changes the way data to fill the nodeinfo structure are
> gathered. We've gathere the test data by iterating processors an sockets
> separately from nodes. The reported data was based solely on information
> about core id. Problems arise when eg cores in mulit-processor machines
> don't have same id's on both processors or maybe one physical processor
> contains more NUMA nodes.
> 
> This patch changes the approach how we detect processors and nodes. Now
> we start at enumerating nodes and for each node processors, sockets and
> threads are enumerated separately. This approach provides acurate data
> that comply to docs about the nodeinfo structure. This also enables to
> get rid of hacks: see commits 10d9038b744a69c8d4bd29c2e8c012a097481586,
> ac9dd4a676f21b5e3ca6dbe0526f2a6709072beb. (Those changes in nodeinfo.c
> are efectively reverted by this patch).
> 
> This patch also changes output of one of the tests, as the processor
> topology is now acquired more precisely.
> ---
>  src/nodeinfo.c                                     |  311 ++++++++++++--------
>  .../linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt |    2 +-
>  2 files changed, 185 insertions(+), 128 deletions(-)
> 
> diff --git a/src/nodeinfo.c b/src/nodeinfo.c
> index 819f954..ae713da 100644
> --- a/src/nodeinfo.c
> +++ b/src/nodeinfo.c
> @@ -188,38 +188,131 @@ virNodeParseSocket(const char *dir, unsigned int cpu)
>      return ret;
>  }
> 
> +/* parses a node entry, returning number of processors in the node and
> + * filling arguments */
>  static int
> -virNodeParseNode(const char *sysfs_dir)
> +virNodeParseNode(const char *node, int *sockets, int *cores, int *threads)

I'd mark these as ATTRIBUTE_NONNULL esp when ...

>  {
> -    char *file = NULL;
> -    char *possible = NULL;
> -    char *tmp;
>      int ret = -1;
> +    int processors = 0;
> +    DIR *cpudir = NULL;
> +    struct dirent *cpudirent = NULL;
> +    int sock_max = 0;
> +    cpu_set_t sock_map;
> +    int sock;
> +    cpu_set_t *core_maps = NULL;
> +    int core;
> +    int i;
> +    int siblings;
> +    unsigned int cpu;
> +    int online;
> 
> -    if (virAsprintf(&file, "%s/node/possible", sysfs_dir) < 0) {
> -        virReportOOMError();
> +    *threads = 0;
> +    *cores = 0;
> +    *sockets = 0;

... doing this.

> +
> +    if (!(cpudir = opendir(node))) {
> +        virReportSystemError(errno, _("cannot opendir %s"), node);
>          goto cleanup;
>      }
> -    /* Assume that a missing node/possible file implies no NUMA
> -     * support, and hence all cpus belong to the same node.  */
> -    if (!virFileExists(file)) {
> -        ret = 1;
> +
> +    /* enumerate sockets in the node */
> +    CPU_ZERO(&sock_map);
> +    while ((cpudirent = readdir(cpudir))) {

I guess we want reentrant version of readdir here, don't we?

> +        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
> +            continue;
> +
> +        /* Parse socket */
> +        sock = virNodeParseSocket(node, cpu);
> +        CPU_SET(sock, &sock_map);
> +
> +        if (sock > sock_max)
> +            sock_max = sock;
> +    }
> +
> +    if (errno) {

You should have reset errno before while() loop.

> +        virReportSystemError(errno, _("problem reading %s"), node);
>          goto cleanup;
>      }
> -    if (virFileReadAll(file, 1024, &possible) < 0)
> +
> +    sock_max++;
> +
> +    /* allocate cpu maps for each socket */
> +    if (VIR_ALLOC_N(core_maps, sock_max) < 0) {
> +        virReportOOMError();
>          goto cleanup;
> -    if (virStrToLong_i(possible, &tmp, 10, &ret) < 0 ||
> -        (*tmp == '-' && virStrToLong_i(tmp+1, &tmp, 10, &ret) < 0) ||
> -        *tmp != '\n') {
> -        nodeReportError(VIR_ERR_INTERNAL_ERROR,
> -                        _("failed to parse possible nodes '%s'"), possible);
> +    }
> +
> +    for (i = 0; i < sock_max; i++)
> +        CPU_ZERO(&core_maps[i]);
> +
> +    /* iterate over all CPU's in the node */
> +    rewinddir(cpudir);
> +    while ((cpudirent = readdir(cpudir))) {

Again, s/readdir/readdir_r/

> +        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
> +            continue;
> +
> +        if ((online = virNodeGetCpuValue(node, cpu, "online", true)) < 0)
> +            goto cleanup;
> +
> +        if (!online)
> +            continue;
> +
> +        processors++;
> +
> +        /* Parse socket */
> +        sock = virNodeParseSocket(node, cpu);
> +        if (!CPU_ISSET(sock, &sock_map)) {
> +            nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                            _("CPU socket topology has changed"));
> +            goto cleanup;
> +        }
> +
> +        /* Parse core */
> +# if defined(__s390__) || \
> +    defined(__s390x__)
> +    /* logical cpu is equivalent to a core on s390 */

Bad indentation.

> +        core = cpu;
> +# else
> +        core = virNodeGetCpuValue(node, cpu, "topology/core_id", false);
> +# endif
> +
> +        CPU_SET(core, &core_maps[sock]);
> +
> +        if (!(siblings = virNodeCountThreadSiblings(node, cpu)))
> +            goto cleanup;
> +
> +        if (siblings > *threads)
> +            *threads = siblings;
> +    }
> +
> +    if (errno) {
> +        virReportSystemError(errno, _("problem reading %s"), node);
>          goto cleanup;
>      }
> -    ret++;
> +
> +    /* finalize the returned data */
> +    *sockets = CPU_COUNT(&sock_map);
> +
> +    for (i = 0; i < sock_max; i++) {
> +        if (!CPU_ISSET(i, &sock_map))
> +            continue;
> +
> +        core = CPU_COUNT(&core_maps[i]);
> +        if (core > *cores)
> +            *cores = core;
> +    }
> +
> +    ret = processors;
> 
>  cleanup:
> -    VIR_FREE(file);
> -    VIR_FREE(possible);
> +    /* don't shadow a more serious error */
> +    if (cpudir && closedir(cpudir) < 0 && ret >= 0) {
> +        virReportSystemError(errno, _("problem closing %s"), node);
> +        ret = -1;
> +    }
> +    VIR_FREE(core_maps);
> +
>      return ret;
>  }
> 
> @@ -228,20 +321,18 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
>                               virNodeInfoPtr nodeinfo)
>  {
>      char line[1024];
> -    DIR *cpudir = NULL;
> -    struct dirent *cpudirent = NULL;
> -    unsigned int cpu;
> -    unsigned long core, sock, cur_threads;
> -    cpu_set_t core_mask;
> -    cpu_set_t socket_mask;
> -    int online;
> +    DIR *nodedir = NULL;
> +    struct dirent *nodedirent = NULL;
> +    int cpus, cores, socks, threads;
> +    unsigned int node;
>      int ret = -1;
> +    char *sysfs_nodedir = NULL;
>      char *sysfs_cpudir = NULL;
> -    unsigned int cpu_cores = 0;
> 
>      nodeinfo->cpus = 0;
>      nodeinfo->mhz = 0;
>      nodeinfo->cores = 0;
> +    nodeinfo->nodes = 0;
> 
>      /* Start with parsing /proc/cpuinfo; although it tends to have
>       * fewer details.  Hyperthreads are ignored at this stage.  */
> @@ -254,40 +345,22 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
>              char *p;
>              unsigned int ui;
> 
> -            buf += 9;
> +            buf += 7;
>              while (*buf && c_isspace(*buf))
>                  buf++;
> 
>              if (*buf != ':' || !buf[1]) {
> -                nodeReportError(VIR_ERR_INTERNAL_ERROR,
> -                                "%s", _("parsing cpu MHz from cpuinfo"));
> +                nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                                _("parsing cpu MHz from cpuinfo"));
>                  goto cleanup;
>              }
> 
> -            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
> +            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
>                  /* Accept trailing fractional part.  */
> -                && (*p == '\0' || *p == '.' || c_isspace(*p)))
> +                (*p == '\0' || *p == '.' || c_isspace(*p)))
>                  nodeinfo->mhz = ui;
>          }
> 
> -        if (STRPREFIX(buf, "cpu cores")) {
> -            char *p;
> -            unsigned int ui;
> -
> -            buf += 9;
> -            while (*buf && c_isspace(*buf))
> -                buf++;
> -
> -            if (*buf != ':' || !buf[1]) {
> -                nodeReportError(VIR_ERR_INTERNAL_ERROR,
> -                                "%s", _("parsing cpu cores from cpuinfo"));
> -                return -1;
> -            }
> -
> -            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
> -                && (*p == '\0' || c_isspace(*p)))
> -                cpu_cores = ui;
> -         }
>  # elif defined(__powerpc__) || \
>        defined(__powerpc64__)
>          char *buf = line;
> @@ -300,14 +373,14 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
>                  buf++;
> 
>              if (*buf != ':' || !buf[1]) {
> -                nodeReportError(VIR_ERR_INTERNAL_ERROR,
> -                                "%s", _("parsing cpu MHz from cpuinfo"));
> +                nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                                _("parsing cpu MHz from cpuinfo"));
>                  goto cleanup;
>              }
> 
> -            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
> +            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
>                  /* Accept trailing fractional part.  */
> -                && (*p == '\0' || *p == '.' || c_isspace(*p)))
> +                (*p == '\0' || *p == '.' || c_isspace(*p)))
>                  nodeinfo->mhz = ui;
>              /* No other interesting infos are available in /proc/cpuinfo.
>               * However, there is a line identifying processor's version,
> @@ -328,115 +401,99 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
>      /* OK, we've parsed clock speed out of /proc/cpuinfo. Get the
>       * core, node, socket, thread and topology information from /sys
>       */
> -    if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) {
> +    if (virAsprintf(&sysfs_nodedir, "%s/node", sysfs_dir) < 0) {
>          virReportOOMError();
>          goto cleanup;
>      }
> -    cpudir = opendir(sysfs_cpudir);
> -    if (cpudir == NULL) {
> -        virReportSystemError(errno, _("cannot opendir %s"), sysfs_cpudir);
> -        goto cleanup;
> +
> +    if (!(nodedir = opendir(sysfs_nodedir))) {
> +        /* the host isn't probably running a NUMA architecture */
> +        goto fallback;
>      }
> 
> -    CPU_ZERO(&core_mask);
> -    CPU_ZERO(&socket_mask);
> 
> -    while ((cpudirent = readdir(cpudir))) {
> -        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
> +    while ((nodedirent = readdir(nodedir))) {
> +        if (sscanf(nodedirent->d_name, "node%u", &node) != 1)
>              continue;
> 
> -        online = virNodeGetCpuValue(sysfs_cpudir, cpu, "online", true);
> -        if (online < 0) {
> -            closedir(cpudir);
> +        nodeinfo->nodes++;
> +
> +        if (virAsprintf(&sysfs_cpudir, "%s/node/%s",
> +                        sysfs_dir, nodedirent->d_name) < 0) {
> +            virReportOOMError();
>              goto cleanup;
>          }
> -        if (!online)
> -            continue;
> -        nodeinfo->cpus++;
> 
> -        /* Parse core */
> -# if defined(__s390__) || \
> -    defined(__s390x__)
> -    /* logical cpu is equivalent to a core on s390 */
> -        core = cpu;
> -# else
> -        core = virNodeGetCpuValue(sysfs_cpudir, cpu, "topology/core_id", false);
> -# endif
> -        if (!CPU_ISSET(core, &core_mask)) {
> -            CPU_SET(core, &core_mask);
> -            nodeinfo->cores++;
> -        }
> +        if ((cpus = virNodeParseNode(sysfs_cpudir, &socks,
> +                                     &cores, &threads)) < 0)
> +            goto cleanup;
> 
> -        /* Parse socket */
> -        sock = virNodeParseSocket(sysfs_cpudir, cpu);
> -        if (!CPU_ISSET(sock, &socket_mask)) {
> -            CPU_SET(sock, &socket_mask);
> -            nodeinfo->sockets++;
> -        }
> +        VIR_FREE(sysfs_cpudir);
> 
> -        cur_threads = virNodeCountThreadSiblings(sysfs_cpudir, cpu);
> -        if (cur_threads == 0) {
> -            closedir(cpudir);
> -            goto cleanup;
> -        }
> -        if (cur_threads > nodeinfo->threads)
> -            nodeinfo->threads = cur_threads;
> +        nodeinfo->cpus += cpus;
> +
> +        if (socks > nodeinfo->sockets)
> +            nodeinfo->sockets = socks;
> +
> +        if (cores > nodeinfo->cores)
> +            nodeinfo->cores = cores;
> +
> +        if (threads > nodeinfo->threads)
> +            nodeinfo->threads = threads;
>      }
> +
>      if (errno) {
> -        virReportSystemError(errno,
> -                             _("problem reading %s"), sysfs_cpudir);
> -        closedir(cpudir);
> +        virReportSystemError(errno, _("problem reading %s"), sysfs_nodedir);
>          goto cleanup;
>      }
> -    if (closedir(cpudir) < 0) {
> -        virReportSystemError(errno,
> -                             _("problem closing %s"), sysfs_cpudir);
> +
> +    if (nodeinfo->cpus && nodeinfo->nodes)
> +        goto done;
> +
> +fallback:
> +    VIR_FREE(sysfs_cpudir);
> +
> +    if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) {
> +        virReportOOMError();
>          goto cleanup;
>      }
> 
> -    if ((nodeinfo->nodes = virNodeParseNode(sysfs_dir)) <= 0)
> +    if ((cpus = virNodeParseNode(sysfs_cpudir, &socks, &cores, &threads)) < 0)
>          goto cleanup;
> 
> +    nodeinfo->nodes = 1;
> +    nodeinfo->cpus = cpus;
> +    nodeinfo->sockets = socks;
> +    nodeinfo->cores = cores;
> +    nodeinfo->threads = threads;
> +
> +done:
>      /* There should always be at least one cpu, socket, node, and thread. */
>      if (nodeinfo->cpus == 0) {
> -        nodeReportError(VIR_ERR_INTERNAL_ERROR,
> -                        "%s", _("no CPUs found"));
> +        nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no CPUs found"));
>          goto cleanup;
>      }
> +
>      if (nodeinfo->sockets == 0) {
> -        nodeReportError(VIR_ERR_INTERNAL_ERROR,
> -                        "%s", _("no sockets found"));
> +        nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no sockets found"));
>          goto cleanup;
>      }
> +
>      if (nodeinfo->threads == 0) {
> -        nodeReportError(VIR_ERR_INTERNAL_ERROR,
> -                        "%s", _("no threads found"));
> +        nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no threads found"));
>          goto cleanup;
>      }
> 
> -    /* Platform like AMD Magny Cours has two NUMA nodes each package, and
> -     * the two nodes share the same core ID set, it results in the cores
> -     * number calculated from sysfs is not the actual cores number. Use
> -     * "cpu cores" in /proc/cpuinfo as the cores number instead in this case.
> -     * More details about the problem:
> -     * https://www.redhat.com/archives/libvir-list/2012-May/msg00607.html
> -     */
> -    if (cpu_cores && (cpu_cores > nodeinfo->cores))
> -        nodeinfo->cores = cpu_cores;
> -
> -    /* nodeinfo->sockets is supposed to be a number of sockets per NUMA node,
> -     * however if NUMA nodes are not composed of whole sockets, we just lie
> -     * about the number of NUMA nodes and force apps to check capabilities XML
> -     * for the actual NUMA topology.
> -     */
> -    if (nodeinfo->sockets % nodeinfo->nodes == 0)
> -        nodeinfo->sockets /= nodeinfo->nodes;
> -    else
> -        nodeinfo->nodes = 1;
> -
>      ret = 0;
> 
>  cleanup:
> +    /* don't shadow a more serious error */
> +    if (nodedir && closedir(nodedir) < 0 && ret >= 0) {
> +        virReportSystemError(errno, _("problem closing %s"), sysfs_nodedir);
> +        ret = -1;
> +    }
> +
> +    VIR_FREE(sysfs_nodedir);
>      VIR_FREE(sysfs_cpudir);
>      return ret;
>  }
> diff --git a/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt b/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt
> index 333187e..0306f86 100644
> --- a/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt
> +++ b/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt
> @@ -1 +1 @@
> -CPUs: 48/48, MHz: 2100, Nodes: 1, Sockets: 4, Cores: 12, Threads: 1
> +CPUs: 48/48, MHz: 2100, Nodes: 8, Sockets: 1, Cores: 6, Threads: 1
> 

Otherwise looking good. ACK with switching to reentrant readdir, errno
setting before first usage and comment indentation fixed.

Michal




More information about the libvir-list mailing list