[libvirt] [PATCHv3 2/4] nodeinfo: Fix gathering of nodeinfo data structure
Michal Privoznik
mprivozn at redhat.com
Tue Jul 10 10:56:43 UTC 2012
On 09.07.2012 17:17, Peter Krempa wrote:
> This patch changes the way data to fill the nodeinfo structure are
> gathered. We've gathere the test data by iterating processors an sockets
> separately from nodes. The reported data was based solely on information
> about core id. Problems arise when eg cores in mulit-processor machines
> don't have same id's on both processors or maybe one physical processor
> contains more NUMA nodes.
>
> This patch changes the approach how we detect processors and nodes. Now
> we start at enumerating nodes and for each node processors, sockets and
> threads are enumerated separately. This approach provides acurate data
> that comply to docs about the nodeinfo structure. This also enables to
> get rid of hacks: see commits 10d9038b744a69c8d4bd29c2e8c012a097481586,
> ac9dd4a676f21b5e3ca6dbe0526f2a6709072beb. (Those changes in nodeinfo.c
> are efectively reverted by this patch).
>
> This patch also changes output of one of the tests, as the processor
> topology is now acquired more precisely.
> ---
> src/nodeinfo.c | 311 ++++++++++++--------
> .../linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt | 2 +-
> 2 files changed, 185 insertions(+), 128 deletions(-)
>
> diff --git a/src/nodeinfo.c b/src/nodeinfo.c
> index 819f954..ae713da 100644
> --- a/src/nodeinfo.c
> +++ b/src/nodeinfo.c
> @@ -188,38 +188,131 @@ virNodeParseSocket(const char *dir, unsigned int cpu)
> return ret;
> }
>
> +/* parses a node entry, returning number of processors in the node and
> + * filling arguments */
> static int
> -virNodeParseNode(const char *sysfs_dir)
> +virNodeParseNode(const char *node, int *sockets, int *cores, int *threads)
I'd mark these as ATTRIBUTE_NONNULL esp when ...
> {
> - char *file = NULL;
> - char *possible = NULL;
> - char *tmp;
> int ret = -1;
> + int processors = 0;
> + DIR *cpudir = NULL;
> + struct dirent *cpudirent = NULL;
> + int sock_max = 0;
> + cpu_set_t sock_map;
> + int sock;
> + cpu_set_t *core_maps = NULL;
> + int core;
> + int i;
> + int siblings;
> + unsigned int cpu;
> + int online;
>
> - if (virAsprintf(&file, "%s/node/possible", sysfs_dir) < 0) {
> - virReportOOMError();
> + *threads = 0;
> + *cores = 0;
> + *sockets = 0;
... doing this.
> +
> + if (!(cpudir = opendir(node))) {
> + virReportSystemError(errno, _("cannot opendir %s"), node);
> goto cleanup;
> }
> - /* Assume that a missing node/possible file implies no NUMA
> - * support, and hence all cpus belong to the same node. */
> - if (!virFileExists(file)) {
> - ret = 1;
> +
> + /* enumerate sockets in the node */
> + CPU_ZERO(&sock_map);
> + while ((cpudirent = readdir(cpudir))) {
I guess we want reentrant version of readdir here, don't we?
> + if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
> + continue;
> +
> + /* Parse socket */
> + sock = virNodeParseSocket(node, cpu);
> + CPU_SET(sock, &sock_map);
> +
> + if (sock > sock_max)
> + sock_max = sock;
> + }
> +
> + if (errno) {
You should have reset errno before while() loop.
> + virReportSystemError(errno, _("problem reading %s"), node);
> goto cleanup;
> }
> - if (virFileReadAll(file, 1024, &possible) < 0)
> +
> + sock_max++;
> +
> + /* allocate cpu maps for each socket */
> + if (VIR_ALLOC_N(core_maps, sock_max) < 0) {
> + virReportOOMError();
> goto cleanup;
> - if (virStrToLong_i(possible, &tmp, 10, &ret) < 0 ||
> - (*tmp == '-' && virStrToLong_i(tmp+1, &tmp, 10, &ret) < 0) ||
> - *tmp != '\n') {
> - nodeReportError(VIR_ERR_INTERNAL_ERROR,
> - _("failed to parse possible nodes '%s'"), possible);
> + }
> +
> + for (i = 0; i < sock_max; i++)
> + CPU_ZERO(&core_maps[i]);
> +
> + /* iterate over all CPU's in the node */
> + rewinddir(cpudir);
> + while ((cpudirent = readdir(cpudir))) {
Again, s/readdir/readdir_r/
> + if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
> + continue;
> +
> + if ((online = virNodeGetCpuValue(node, cpu, "online", true)) < 0)
> + goto cleanup;
> +
> + if (!online)
> + continue;
> +
> + processors++;
> +
> + /* Parse socket */
> + sock = virNodeParseSocket(node, cpu);
> + if (!CPU_ISSET(sock, &sock_map)) {
> + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("CPU socket topology has changed"));
> + goto cleanup;
> + }
> +
> + /* Parse core */
> +# if defined(__s390__) || \
> + defined(__s390x__)
> + /* logical cpu is equivalent to a core on s390 */
Bad indentation.
> + core = cpu;
> +# else
> + core = virNodeGetCpuValue(node, cpu, "topology/core_id", false);
> +# endif
> +
> + CPU_SET(core, &core_maps[sock]);
> +
> + if (!(siblings = virNodeCountThreadSiblings(node, cpu)))
> + goto cleanup;
> +
> + if (siblings > *threads)
> + *threads = siblings;
> + }
> +
> + if (errno) {
> + virReportSystemError(errno, _("problem reading %s"), node);
> goto cleanup;
> }
> - ret++;
> +
> + /* finalize the returned data */
> + *sockets = CPU_COUNT(&sock_map);
> +
> + for (i = 0; i < sock_max; i++) {
> + if (!CPU_ISSET(i, &sock_map))
> + continue;
> +
> + core = CPU_COUNT(&core_maps[i]);
> + if (core > *cores)
> + *cores = core;
> + }
> +
> + ret = processors;
>
> cleanup:
> - VIR_FREE(file);
> - VIR_FREE(possible);
> + /* don't shadow a more serious error */
> + if (cpudir && closedir(cpudir) < 0 && ret >= 0) {
> + virReportSystemError(errno, _("problem closing %s"), node);
> + ret = -1;
> + }
> + VIR_FREE(core_maps);
> +
> return ret;
> }
>
> @@ -228,20 +321,18 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
> virNodeInfoPtr nodeinfo)
> {
> char line[1024];
> - DIR *cpudir = NULL;
> - struct dirent *cpudirent = NULL;
> - unsigned int cpu;
> - unsigned long core, sock, cur_threads;
> - cpu_set_t core_mask;
> - cpu_set_t socket_mask;
> - int online;
> + DIR *nodedir = NULL;
> + struct dirent *nodedirent = NULL;
> + int cpus, cores, socks, threads;
> + unsigned int node;
> int ret = -1;
> + char *sysfs_nodedir = NULL;
> char *sysfs_cpudir = NULL;
> - unsigned int cpu_cores = 0;
>
> nodeinfo->cpus = 0;
> nodeinfo->mhz = 0;
> nodeinfo->cores = 0;
> + nodeinfo->nodes = 0;
>
> /* Start with parsing /proc/cpuinfo; although it tends to have
> * fewer details. Hyperthreads are ignored at this stage. */
> @@ -254,40 +345,22 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
> char *p;
> unsigned int ui;
>
> - buf += 9;
> + buf += 7;
> while (*buf && c_isspace(*buf))
> buf++;
>
> if (*buf != ':' || !buf[1]) {
> - nodeReportError(VIR_ERR_INTERNAL_ERROR,
> - "%s", _("parsing cpu MHz from cpuinfo"));
> + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("parsing cpu MHz from cpuinfo"));
> goto cleanup;
> }
>
> - if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
> + if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
> /* Accept trailing fractional part. */
> - && (*p == '\0' || *p == '.' || c_isspace(*p)))
> + (*p == '\0' || *p == '.' || c_isspace(*p)))
> nodeinfo->mhz = ui;
> }
>
> - if (STRPREFIX(buf, "cpu cores")) {
> - char *p;
> - unsigned int ui;
> -
> - buf += 9;
> - while (*buf && c_isspace(*buf))
> - buf++;
> -
> - if (*buf != ':' || !buf[1]) {
> - nodeReportError(VIR_ERR_INTERNAL_ERROR,
> - "%s", _("parsing cpu cores from cpuinfo"));
> - return -1;
> - }
> -
> - if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
> - && (*p == '\0' || c_isspace(*p)))
> - cpu_cores = ui;
> - }
> # elif defined(__powerpc__) || \
> defined(__powerpc64__)
> char *buf = line;
> @@ -300,14 +373,14 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
> buf++;
>
> if (*buf != ':' || !buf[1]) {
> - nodeReportError(VIR_ERR_INTERNAL_ERROR,
> - "%s", _("parsing cpu MHz from cpuinfo"));
> + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("parsing cpu MHz from cpuinfo"));
> goto cleanup;
> }
>
> - if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
> + if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
> /* Accept trailing fractional part. */
> - && (*p == '\0' || *p == '.' || c_isspace(*p)))
> + (*p == '\0' || *p == '.' || c_isspace(*p)))
> nodeinfo->mhz = ui;
> /* No other interesting infos are available in /proc/cpuinfo.
> * However, there is a line identifying processor's version,
> @@ -328,115 +401,99 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
> /* OK, we've parsed clock speed out of /proc/cpuinfo. Get the
> * core, node, socket, thread and topology information from /sys
> */
> - if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) {
> + if (virAsprintf(&sysfs_nodedir, "%s/node", sysfs_dir) < 0) {
> virReportOOMError();
> goto cleanup;
> }
> - cpudir = opendir(sysfs_cpudir);
> - if (cpudir == NULL) {
> - virReportSystemError(errno, _("cannot opendir %s"), sysfs_cpudir);
> - goto cleanup;
> +
> + if (!(nodedir = opendir(sysfs_nodedir))) {
> + /* the host isn't probably running a NUMA architecture */
> + goto fallback;
> }
>
> - CPU_ZERO(&core_mask);
> - CPU_ZERO(&socket_mask);
>
> - while ((cpudirent = readdir(cpudir))) {
> - if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
> + while ((nodedirent = readdir(nodedir))) {
> + if (sscanf(nodedirent->d_name, "node%u", &node) != 1)
> continue;
>
> - online = virNodeGetCpuValue(sysfs_cpudir, cpu, "online", true);
> - if (online < 0) {
> - closedir(cpudir);
> + nodeinfo->nodes++;
> +
> + if (virAsprintf(&sysfs_cpudir, "%s/node/%s",
> + sysfs_dir, nodedirent->d_name) < 0) {
> + virReportOOMError();
> goto cleanup;
> }
> - if (!online)
> - continue;
> - nodeinfo->cpus++;
>
> - /* Parse core */
> -# if defined(__s390__) || \
> - defined(__s390x__)
> - /* logical cpu is equivalent to a core on s390 */
> - core = cpu;
> -# else
> - core = virNodeGetCpuValue(sysfs_cpudir, cpu, "topology/core_id", false);
> -# endif
> - if (!CPU_ISSET(core, &core_mask)) {
> - CPU_SET(core, &core_mask);
> - nodeinfo->cores++;
> - }
> + if ((cpus = virNodeParseNode(sysfs_cpudir, &socks,
> + &cores, &threads)) < 0)
> + goto cleanup;
>
> - /* Parse socket */
> - sock = virNodeParseSocket(sysfs_cpudir, cpu);
> - if (!CPU_ISSET(sock, &socket_mask)) {
> - CPU_SET(sock, &socket_mask);
> - nodeinfo->sockets++;
> - }
> + VIR_FREE(sysfs_cpudir);
>
> - cur_threads = virNodeCountThreadSiblings(sysfs_cpudir, cpu);
> - if (cur_threads == 0) {
> - closedir(cpudir);
> - goto cleanup;
> - }
> - if (cur_threads > nodeinfo->threads)
> - nodeinfo->threads = cur_threads;
> + nodeinfo->cpus += cpus;
> +
> + if (socks > nodeinfo->sockets)
> + nodeinfo->sockets = socks;
> +
> + if (cores > nodeinfo->cores)
> + nodeinfo->cores = cores;
> +
> + if (threads > nodeinfo->threads)
> + nodeinfo->threads = threads;
> }
> +
> if (errno) {
> - virReportSystemError(errno,
> - _("problem reading %s"), sysfs_cpudir);
> - closedir(cpudir);
> + virReportSystemError(errno, _("problem reading %s"), sysfs_nodedir);
> goto cleanup;
> }
> - if (closedir(cpudir) < 0) {
> - virReportSystemError(errno,
> - _("problem closing %s"), sysfs_cpudir);
> +
> + if (nodeinfo->cpus && nodeinfo->nodes)
> + goto done;
> +
> +fallback:
> + VIR_FREE(sysfs_cpudir);
> +
> + if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) {
> + virReportOOMError();
> goto cleanup;
> }
>
> - if ((nodeinfo->nodes = virNodeParseNode(sysfs_dir)) <= 0)
> + if ((cpus = virNodeParseNode(sysfs_cpudir, &socks, &cores, &threads)) < 0)
> goto cleanup;
>
> + nodeinfo->nodes = 1;
> + nodeinfo->cpus = cpus;
> + nodeinfo->sockets = socks;
> + nodeinfo->cores = cores;
> + nodeinfo->threads = threads;
> +
> +done:
> /* There should always be at least one cpu, socket, node, and thread. */
> if (nodeinfo->cpus == 0) {
> - nodeReportError(VIR_ERR_INTERNAL_ERROR,
> - "%s", _("no CPUs found"));
> + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no CPUs found"));
> goto cleanup;
> }
> +
> if (nodeinfo->sockets == 0) {
> - nodeReportError(VIR_ERR_INTERNAL_ERROR,
> - "%s", _("no sockets found"));
> + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no sockets found"));
> goto cleanup;
> }
> +
> if (nodeinfo->threads == 0) {
> - nodeReportError(VIR_ERR_INTERNAL_ERROR,
> - "%s", _("no threads found"));
> + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no threads found"));
> goto cleanup;
> }
>
> - /* Platform like AMD Magny Cours has two NUMA nodes each package, and
> - * the two nodes share the same core ID set, it results in the cores
> - * number calculated from sysfs is not the actual cores number. Use
> - * "cpu cores" in /proc/cpuinfo as the cores number instead in this case.
> - * More details about the problem:
> - * https://www.redhat.com/archives/libvir-list/2012-May/msg00607.html
> - */
> - if (cpu_cores && (cpu_cores > nodeinfo->cores))
> - nodeinfo->cores = cpu_cores;
> -
> - /* nodeinfo->sockets is supposed to be a number of sockets per NUMA node,
> - * however if NUMA nodes are not composed of whole sockets, we just lie
> - * about the number of NUMA nodes and force apps to check capabilities XML
> - * for the actual NUMA topology.
> - */
> - if (nodeinfo->sockets % nodeinfo->nodes == 0)
> - nodeinfo->sockets /= nodeinfo->nodes;
> - else
> - nodeinfo->nodes = 1;
> -
> ret = 0;
>
> cleanup:
> + /* don't shadow a more serious error */
> + if (nodedir && closedir(nodedir) < 0 && ret >= 0) {
> + virReportSystemError(errno, _("problem closing %s"), sysfs_nodedir);
> + ret = -1;
> + }
> +
> + VIR_FREE(sysfs_nodedir);
> VIR_FREE(sysfs_cpudir);
> return ret;
> }
> diff --git a/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt b/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt
> index 333187e..0306f86 100644
> --- a/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt
> +++ b/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt
> @@ -1 +1 @@
> -CPUs: 48/48, MHz: 2100, Nodes: 1, Sockets: 4, Cores: 12, Threads: 1
> +CPUs: 48/48, MHz: 2100, Nodes: 8, Sockets: 1, Cores: 6, Threads: 1
>
Otherwise looking good. ACK with switching to reentrant readdir, errno
setting before first usage and comment indentation fixed.
Michal
More information about the libvir-list
mailing list