[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [libvirt] [PATCH] nodeinfo: skip offline CPUs



On Tue, Aug 10, 2010 at 03:44:37PM -0600, Eric Blake wrote:
> https://bugzilla.redhat.com/622515 - When hot-unplugging CPUs,
> libvirt failed to start a guest that had been pinned to CPUs that
> were still online, because it was failing to read status from
> unrelated offline CPUs.

 Argh, yes that's a nasty problem !

> Tested on a dual-core laptop, where I also discovered that, per
> http://www.cyberciti.biz/files/linux-kernel/Documentation/cpu-hotplug.txt,
> /sys/devices/system/cpu/cpu0/online does not exist on systems where it
> cannot be hot-unplugged.
> 
> * src/nodeinfo.c (linuxNodeInfoCPUPopulate): Ignore CPUs that are
> currently offline.  Detect readdir failure.
> (parse_socket): Move guts...
> (get_cpu_value): ...to new function, shared with...
> (cpu_online): New function.
> ---
>  src/nodeinfo.c |  109 +++++++++++++++++++++++++++++++++++++------------------
>  1 files changed, 73 insertions(+), 36 deletions(-)
> 
> diff --git a/src/nodeinfo.c b/src/nodeinfo.c
> index 5ec1bcf..6286aa2 100644
> --- a/src/nodeinfo.c
> +++ b/src/nodeinfo.c
> @@ -23,6 +23,7 @@
> 
>  #include <config.h>
> 
> +#include <stdbool.h>
>  #include <stdio.h>
>  #include <string.h>
>  #include <stdlib.h>
> @@ -60,6 +61,58 @@
>  int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
>                               virNodeInfoPtr nodeinfo);
> 
> +/* Return the positive decimal contents of the given
> + * CPU_SYS_PATH/cpu%u/FILE, or -1 on error.  If MISSING_OK and the
> + * file could not be found, return 1 instead of an error; this is
> + * because some machines cannot hot-unplug cpu0.  */
> +static int get_cpu_value(unsigned int cpu, const char *file, bool missing_ok)
> +{
> +    char *path;
> +    FILE *pathfp;
> +    char value_str[1024];
> +    char *tmp;
> +    int value = -1;
> +
> +    if (virAsprintf(&path, CPU_SYS_PATH "/cpu%u/%s", cpu, file) < 0) {
> +        virReportOOMError();
> +        return -1;
> +    }
> +
> +    pathfp = fopen(path, "r");
> +    if (pathfp == NULL) {
> +        if (missing_ok && errno == ENOENT)
> +            value = 1;
> +        else
> +            virReportSystemError(errno, _("cannot open %s"), path);
> +        goto cleanup;
> +    }
> +
> +    if (fgets(value_str, sizeof(value_str), pathfp) == NULL) {
> +        virReportSystemError(errno, _("cannot read from %s"), path);
> +        goto cleanup;
> +    }
> +    if (virStrToLong_i(value_str, &tmp, 10, &value) < 0) {
> +        nodeReportError(VIR_ERR_INTERNAL_ERROR,
> +                        _("could not convert '%s' to an integer"),
> +                        value_str);
> +        goto cleanup;
> +    }
> +
> +cleanup:
> +    if (pathfp)
> +        fclose(pathfp);
> +    VIR_FREE(path);
> +
> +    return value;
> +}
> +
> +/* Check if CPU is online via CPU_SYS_PATH/cpu%u/online.  Return 1 if online,
> +   0 if offline, and -1 on error.  */
> +static int cpu_online(unsigned int cpu)
> +{
> +    return get_cpu_value(cpu, "online", cpu == 0);
> +}
> +
>  static unsigned long count_thread_siblings(unsigned int cpu)
>  {
>      unsigned long ret = 0;
> @@ -106,41 +159,7 @@ cleanup:
> 
>  static int parse_socket(unsigned int cpu)
>  {
> -    char *path;
> -    FILE *pathfp;
> -    char socket_str[1024];
> -    char *tmp;
> -    int socket = -1;
> -
> -    if (virAsprintf(&path, CPU_SYS_PATH "/cpu%u/topology/physical_package_id",
> -                    cpu) < 0) {
> -        virReportOOMError();
> -        return -1;
> -    }
> -
> -    pathfp = fopen(path, "r");
> -    if (pathfp == NULL) {
> -        virReportSystemError(errno, _("cannot open %s"), path);
> -        VIR_FREE(path);
> -        return -1;
> -    }
> -
> -    if (fgets(socket_str, sizeof(socket_str), pathfp) == NULL) {
> -        virReportSystemError(errno, _("cannot read from %s"), path);
> -        goto cleanup;
> -    }
> -    if (virStrToLong_i(socket_str, &tmp, 10, &socket) < 0) {
> -        nodeReportError(VIR_ERR_INTERNAL_ERROR,
> -                        _("could not convert '%s' to an integer"),
> -                        socket_str);
> -        goto cleanup;
> -    }
> -
> -cleanup:
> -    fclose(pathfp);
> -    VIR_FREE(path);
> -
> -    return socket;
> +    return get_cpu_value(cpu, "topology/physical_package_id", false);
>  }
> 
>  int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
> @@ -153,6 +172,8 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
>      unsigned long cur_threads;
>      int socket;
>      unsigned long long socket_mask = 0;
> +    unsigned int remaining;
> +    int online;
> 
>      nodeinfo->cpus = 0;
>      nodeinfo->mhz = 0;
> @@ -222,15 +243,25 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
>      /* OK, we've parsed what we can out of /proc/cpuinfo.  Get the socket
>       * and thread information from /sys
>       */
> +    remaining = nodeinfo->cpus;
>      cpudir = opendir(CPU_SYS_PATH);
>      if (cpudir == NULL) {
>          virReportSystemError(errno, _("cannot opendir %s"), CPU_SYS_PATH);
>          return -1;
>      }
> -    while ((cpudirent = readdir(cpudir))) {
> +    while (errno = 0, remaining && (cpudirent = readdir(cpudir))) {
>          if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
>              continue;
> 
> +        online = cpu_online(cpu);
> +        if (online < 0) {
> +            closedir(cpudir);
> +            return -1;
> +        }
> +        if (!online)
> +            continue;
> +        remaining--;
> +
>          socket = parse_socket(cpu);
>          if (socket < 0) {
>              closedir(cpudir);
> @@ -249,6 +280,12 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
>          if (cur_threads > nodeinfo->threads)
>              nodeinfo->threads = cur_threads;
>      }
> +    if (errno) {
> +        virReportSystemError(errno,
> +                             _("problem reading %s"), CPU_SYS_PATH);
> +        closedir(cpudir);
> +        return -1;
> +    }
> 
>      closedir(cpudir);
> 

  ACK, that looks fine !

    thanks !

Daniel

-- 
Daniel Veillard      | libxml Gnome XML XSLT toolkit  http://xmlsoft.org/
daniel veillard com  | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library  http://libvirt.org/


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]