[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [libvirt] [PATCH 1/4] virDomainIOError public API and remote protocol



On 23.01.2012 14:30, Jiri Denemark wrote:
> We already provide ways to detect when a domain has been paused as a
> result of I/O error, but there was no way of getting the exact error or
> even the device that experienced it.  This new API may be used for both.
> ---
>  include/libvirt/libvirt.h.in |   19 +++++++++++++++
>  src/driver.h                 |    6 ++++
>  src/libvirt.c                |   53 ++++++++++++++++++++++++++++++++++++++++++
>  src/libvirt_public.syms      |    5 ++++
>  src/remote/remote_driver.c   |    1 +
>  src/remote/remote_protocol.x |   13 +++++++++-
>  src/remote_protocol-structs  |    9 +++++++
>  7 files changed, 105 insertions(+), 1 deletions(-)
> 
> diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
> index 958e5a6..2c3423a 100644
> --- a/include/libvirt/libvirt.h.in
> +++ b/include/libvirt/libvirt.h.in
> @@ -3746,6 +3746,25 @@ int virConnectSetKeepAlive(virConnectPtr conn,
>                             int interval,
>                             unsigned int count);
>  
> +/**
> + * virDomainIoError:
> + *
> + * Disk I/O error.
> + */
> +typedef enum {
> +    VIR_DOMAIN_IOERROR_NONE     = 0, /* no error */
> +    VIR_DOMAIN_IOERROR_NO_SPACE = 1, /* no space left on the device */
> +    VIR_DOMAIN_IOERROR_UNSPEC   = 2, /* unspecified I/O error */

Just cosmetic, since this is expected to be extended one day, I'd like
to see _UNSPEC either at the beginning or at the end, but not in the
middle. However, the latter is not possible as it's API change.
But I can live with this thought.

> +
> +#ifdef VIR_ENUM_SENTINELS
> +    VIR_DOMAIN_IOERROR_LAST
> +#endif
> +} virDomainIoError;
> +
> +int virDomainGetIoError(virDomainPtr dom,
> +                        const char *dev,
> +                        unsigned int flags);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/driver.h b/src/driver.h
> index 24636a4..1dd3760 100644
> --- a/src/driver.h
> +++ b/src/driver.h
> @@ -794,6 +794,11 @@ typedef int
>                                    int *nparams,
>                                    unsigned int flags);
>  
> +typedef int
> +    (*virDrvDomainGetIoError)(virDomainPtr dom,
> +                              const char *dev,
> +                              unsigned int flags);
> +
>  /**
>   * _virDriver:
>   *
> @@ -962,6 +967,7 @@ struct _virDriver {
>      virDrvNodeSuspendForDuration nodeSuspendForDuration;
>      virDrvDomainSetBlockIoTune domainSetBlockIoTune;
>      virDrvDomainGetBlockIoTune domainGetBlockIoTune;
> +    virDrvDomainGetIoError domainGetIoError;
>  };
>  
>  typedef int
> diff --git a/src/libvirt.c b/src/libvirt.c
> index 7b8adf7..99edca3 100644
> --- a/src/libvirt.c
> +++ b/src/libvirt.c
> @@ -17882,3 +17882,56 @@ error:
>      virDispatchError(dom->conn);
>      return -1;
>  }
> +
> +/**
> + * virDomainGetIoError:
> + * @dom: a domain object
> + * @dev: disk device to be inspected or NULL
> + * @flags: extra flags; not used yet, so callers should always pass 0
> + *
> + * The @disk parameter is either the device target (the dev attribute of
> + * target subelement), such as "vda", or NULL, in which case all disks will be
> + * inspected for errors. If only one disk experienced an I/O error, that error
> + * will be returned. However, if there are more disks with I/O errors, this
> + * function will fail and return -2, requiring the caller to check every
> + * device explicitly.
> + *
> + * Returns -2 if @dev is NULL and there are multiple disks with errors, -1 if
> + * the function fails to do its job, the I/O error (virDomainIoError) observed
> + * on the specified disk (or any disk if @dev is NULL). Namely, 0 is returned
> + * when there is no error on the disk.
> + */
> +int
> +virDomainGetIoError(virDomainPtr dom,
> +                    const char *dev,
> +                    unsigned int flags)

Do we want to narrow this only for disks? Maybe some day hypervisors
will report IO errors on other devices as well (haven't tried to find
out if they do now days, though). However, if we restrict this only for
disks, I'd change the name, so it is obvious that it's disk-only, e.g.
virDomainGetDiskIOError, so we can have this name reserved for future.
But if you take the other way of letting this API to report IO errors
for any device, I am perfectly comfortable with disk-only implementation
for now.


> diff --git a/src/libvirt_public.syms b/src/libvirt_public.syms
> index 4ca7216..d778fdc 100644
> --- a/src/libvirt_public.syms
> +++ b/src/libvirt_public.syms
> @@ -516,4 +516,9 @@ LIBVIRT_0.9.9 {
>          virDomainSetNumaParameters;
>  } LIBVIRT_0.9.8;
>  
> +LIBVIRT_0.9.10 {
> +    global:
> +        virDomainGetIoError;
> +} LIBVIRT_0.9.9;
> +
>  # .... define new API here using predicted next version number ....
> diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c
> index e28840b..d89f45b 100644
> --- a/src/remote/remote_driver.c
> +++ b/src/remote/remote_driver.c
> @@ -4750,6 +4750,7 @@ static virDriver remote_driver = {
>      .domainGetBlockIoTune = remoteDomainGetBlockIoTune, /* 0.9.8 */
>      .domainSetNumaParameters = remoteDomainSetNumaParameters, /* 0.9.9 */
>      .domainGetNumaParameters = remoteDomainGetNumaParameters, /* 0.9.9 */
> +    .domainGetIoError = remoteDomainGetIoError, /* 0.9.10 */
>  };
>  
>  static virNetworkDriver network_driver = {
> diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x
> index 514b0cc..b1635fb 100644
> --- a/src/remote/remote_protocol.x
> +++ b/src/remote/remote_protocol.x
> @@ -2349,6 +2349,16 @@ struct remote_node_suspend_for_duration_args {
>      unsigned int flags;
>  };
>  
> +struct remote_domain_get_io_error_args {
> +    remote_nonnull_domain dom;
> +    remote_string dev;
> +    unsigned int flags;
> +};
> +
> +struct remote_domain_get_io_error_ret {
> +    int result;
> +};
> +
>  
>  /*----- Protocol. -----*/
>  
> @@ -2654,7 +2664,8 @@ enum remote_procedure {
>      REMOTE_PROC_DOMAIN_SET_NUMA_PARAMETERS = 254, /* autogen autogen */
>      REMOTE_PROC_DOMAIN_GET_NUMA_PARAMETERS = 255, /* skipgen skipgen */
>      REMOTE_PROC_DOMAIN_SET_INTERFACE_PARAMETERS = 256, /* autogen autogen */
> -    REMOTE_PROC_DOMAIN_GET_INTERFACE_PARAMETERS = 257 /* skipgen skipgen */
> +    REMOTE_PROC_DOMAIN_GET_INTERFACE_PARAMETERS = 257, /* skipgen skipgen */
> +    REMOTE_PROC_DOMAIN_GET_IO_ERROR = 258 /* autogen autogen */
>  
>      /*
>       * Notice how the entries are grouped in sets of 10 ?
> diff --git a/src/remote_protocol-structs b/src/remote_protocol-structs
> index 2758315..1438485 100644
> --- a/src/remote_protocol-structs
> +++ b/src/remote_protocol-structs
> @@ -1832,6 +1832,14 @@ struct remote_node_suspend_for_duration_args {
>          uint64_t                   duration;
>          u_int                      flags;
>  };
> +struct remote_domain_get_io_error_args {
> +        remote_nonnull_domain      dom;
> +        remote_string              dev;
> +        u_int                      flags;
> +};
> +struct remote_domain_get_io_error_ret {
> +        int                        result;
> +};
>  enum remote_procedure {
>          REMOTE_PROC_OPEN = 1,
>          REMOTE_PROC_CLOSE = 2,
> @@ -2090,4 +2098,5 @@ enum remote_procedure {
>          REMOTE_PROC_DOMAIN_GET_NUMA_PARAMETERS = 255,
>          REMOTE_PROC_DOMAIN_SET_INTERFACE_PARAMETERS = 256,
>          REMOTE_PROC_DOMAIN_GET_INTERFACE_PARAMETERS = 257,
> +        REMOTE_PROC_DOMAIN_GET_IO_ERROR = 258,
>  };

This will need rebasing.

ACK, no matter which way you take.

Michal


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]