[libvirt] [PATCH] nodedev: add RDMA and tx-udp_tnl-segmentation NIC capabilities

John Ferlan jferlan at redhat.com
Tue Jul 14 14:35:32 UTC 2015



On 06/18/2015 04:45 AM, Moshe Levi wrote:
> Adding functionality to libvirt that will allow
> it query the interface for the availability of RDMA and
> tx-udp_tnl-segmentation Offloading NIC capabilities

Any chance for a shorter name?  Eg txudptnl  (

> 
> Here is an example of the feature XML definition:
> 
> <device>
> <name>net_eth4_90_e2_ba_5e_a5_45</name>
>   <path>/sys/devices/pci0000:00/0000:00:03.0/0000:08:00.1/net/eth4</path>
>   <parent>pci_0000_08_00_1</parent>
>   <capability type='net'>
>     <interface>eth4</interface>
>     <address>90:e2:ba:5e:a5:45</address>
>     <link speed='10000' state='up'/>
>     <feature name='rx'/>
>     <feature name='tx'/>
>     <feature name='sg'/>
>     <feature name='tso'/>
>     <feature name='gso'/>
>     <feature name='gro'/>
>     <feature name='rxvlan'/>
>     <feature name='txvlan'/>
>     <feature name='rxhash'/>
>     <feature name='rdma'/>
>     <feature name='tx-udp_tnl-segmentation'/>
>     <capability type='80203'/>
>   </capability>
> </device>
> ---
>  docs/formatnode.html.in |    2 +
>  src/conf/device_conf.c  |    4 +-
>  src/conf/device_conf.h  |    2 +
>  src/util/virnetdev.c    |   97 ++++++++++++++++++++++++++++++++++++++--------
>  src/util/virnetdev.h    |    1 +
>  5 files changed, 88 insertions(+), 18 deletions(-)
> 

Any chance to have some sort of test?  Check out commit id 'c9027d8f4'
for an example when other features were added

> diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in
> index 3ff1bef..9b32dd1 100644
> --- a/docs/formatnode.html.in
> +++ b/docs/formatnode.html.in
> @@ -199,6 +199,8 @@
>                      <dt><code>txvlan</code></dt><dd>tx-vlan-offload</dd>
>                      <dt><code>ntuple</code></dt><dd>ntuple-filters</dd>
>                      <dt><code>rxhash</code></dt><dd>receive-hashing</dd>
> +                    <dt><code>rdma</code></dt><dd>remote-direct-memory-access</dd>
> +                    <dt><code>tx-udp_tnl-segmentation</code></dt><dd>tx-udp-tunnel-segmentation</dd>

Here's where the shorter name would be used inside the <code>, but keep
the longer name inside the <dd>

>                  </dl>
>                </dd>
>                <dt><code>capability</code></dt>
> diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c
> index 98808e2..8e8d557 100644
> --- a/src/conf/device_conf.c
> +++ b/src/conf/device_conf.c
> @@ -51,7 +51,9 @@ VIR_ENUM_IMPL(virNetDevFeature,
>                "rxvlan",
>                "txvlan",
>                "ntuple",
> -              "rxhash")
> +              "rxhash",
> +              "rdma",
> +              "tx-udp_tnl-segmentation")

shorter name

>  
>  int virDevicePCIAddressIsValid(virDevicePCIAddressPtr addr)
>  {
> diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h
> index 7ea90f6..07298c9 100644
> --- a/src/conf/device_conf.h
> +++ b/src/conf/device_conf.h
> @@ -74,6 +74,8 @@ typedef enum {
>      VIR_NET_DEV_FEAT_TXVLAN,
>      VIR_NET_DEV_FEAT_NTUPLE,
>      VIR_NET_DEV_FEAT_RXHASH,
> +    VIR_NET_DEV_FEAT_RDMA,
> +    VIR_NET_DEV_FEAT_TX_UDP_TNL_SEGMENTATION,

shorter name

>      VIR_NET_DEV_FEAT_LAST
>  } virNetDevFeature;
>  
> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
> index e4fcd81..3086616 100644
> --- a/src/util/virnetdev.c
> +++ b/src/util/virnetdev.c
> @@ -87,6 +87,14 @@ VIR_LOG_INIT("util.netdev");
>  # define VIR_IFF_ALLMULTI 0
>  #endif
>  
> +#define RESOURCE_FILE_LEN 4096
> +#define TX_UDP_TNL 25
> +#define GFEATURES_SIZE 2
> +#define FEATURE_WORD(blocks, index, field)  ((blocks)[(index) / 32U].field)
> +#define FEATURE_FIELD_FLAG(index)      (1U << (index) % 32U)
> +#define FEATURE_BIT_IS_SET(blocks, index, field)        \
> +    (FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index))
> +
>  typedef enum {
>      VIR_MCAST_TYPE_INDEX_TOKEN,
>      VIR_MCAST_TYPE_NAME_TOKEN,
> @@ -2943,6 +2951,58 @@ int virNetDevGetRxFilter(const char *ifname,
>      return ret;
>  }
>  
> +
> +/**
> + * virNetDevRDMAFeature
> + * This function checks for the availability of RDMA feature
> + * and add it to bitmap
> + *
> + * @ifname: name of the interface
> + * @out: add RDMA feature if exist to bitmap
> + *
> + * Returns 0 on success, -1 on failure.
> + */
> +static int
> +virNetDevRDMAFeature(const char *ifname,
> +                     virBitmapPtr *out)
> +{
> +    char *eth_devpath = NULL;
> +    char *ib_devpath = NULL;
> +    char *eth_res_buf = NULL;
> +    char *ib_res_buf = NULL;
> +    struct dirent *dp;
> +
> +    DIR *dirp = opendir(SYSFS_INFINIBAND_DIR);

The extra space should be between dirp and if (!dirp) not between dp and
dirp...

Also leak dirp.

Add an "int ret = -1" and use standard "goto cleanup;" type logic on
failures in order to clean up anything allocated.

So the construct/code would be:

    DIR *dirp = NULL;

    if (!(dirp = opendir(SYSFS_INFINIBAND_DIR))) {

> +    if (dirp == NULL) {
> +        virReportSystemError(errno,
> +                             _("Failed to opendir path '%s'"),
> +                             SYSFS_INFINIBAND_DIR);
> +        return -1;
           goto cleanup;
> +    }
> +
> +    if (virAsprintf(&eth_devpath, SYSFS_NET_DIR "%s/device/resource", ifname) < 0)

eth_devpath would be leaked

> +        return -1;
> +    if (!virFileExists(eth_devpath))
> +        return 0;

     ret = 0 ???
     goto cleanup;

It's a success to not have the file?  Wouldn't that make it difficult
for the caller to determine if something existed and/or whether the
bitmap was updated?

If that doesn't matter then fine, but still have proper exit syntax


> +    if (virFileReadAll(eth_devpath, RESOURCE_FILE_LEN, &eth_res_buf) < 0)

eth_res_buf would be leaked

> +        return -1;

goto cleanup;

> +    while (virDirRead(dirp, &dp, SYSFS_INFINIBAND_DIR) > 0) {
> +        if (STREQ(dp->d_name, ".") ||
> +            STREQ(dp->d_name, ".."))
> +            continue;

Consider instead:

    if (dp->d_name[0] == '.')
        continue;

> +
> +        if (virAsprintf(&ib_devpath, SYSFS_INFINIBAND_DIR "%s/device/resource", dp->d_name) < 0)
> +            continue;

ib_devpath is leaked

> +        if (virFileReadAll(ib_devpath, RESOURCE_FILE_LEN, &ib_res_buf) < 0)

ib_res_buf is leaked multiple times

        VIR_FREE(ib_devpath);

> +            continue;
> +        if  (STREQ(eth_res_buf, ib_res_buf)) {

              ^
Extra space

> +            ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_RDMA));
> +            break;
> +        }
           VIR_FREE(ib_res_buf);
> +    }
> +    return 0;

here you would have:
    ret = 0;

 cleanup:
    closedir(dirp);
    VIR_FREE(eth_devpath);
    VIR_FREE(ib_devpath);
    VIR_FREE(eth_res_buf);
    VIR_FREE(ib_res_buf);
    return ret;

> +}
> +
>  #if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ)
>  
>  /**
> @@ -2952,12 +3012,10 @@ int virNetDevGetRxFilter(const char *ifname,
>   * @ifname: name of the interface
>   * @cmd: reference to an ethtool command structure

Would this parameter need an adjustment to indicate either command or
feature structure?

>   *
> - * Returns 0 on success, -1 on failure.

hrmph - looks like this was wrong ... 0 if not found, 1 on success, and
-1 on failure...

I'm 50/50 on the switch from int to void... Of course the name of the
function to me implies a value return

>   */
> -static int
> -virNetDevFeatureAvailable(const char *ifname, struct ethtool_value *cmd)
> +static void
> +virNetDevFeatureAvailable(const char *ifname, void *cmd)
>  {
> -    int ret = -1;
>      int sock = -1;
>      virIfreq ifr;
>  
> @@ -2969,8 +3027,7 @@ virNetDevFeatureAvailable(const char *ifname, struct ethtool_value *cmd)
>  
>      memset(&ifr, 0, sizeof(ifr));
>      strcpy(ifr.ifr_name, ifname);
> -    ifr.ifr_data = (void*) cmd;
> -
> +    ifr.ifr_data = cmd;
>      if (ioctl(sock, SIOCETHTOOL, &ifr) != 0) {
>          switch (errno) {
>              case EPERM:
> @@ -2988,12 +3045,9 @@ virNetDevFeatureAvailable(const char *ifname, struct ethtool_value *cmd)
>          }
>      }
>  
> -    ret = cmd->data > 0 ? 1: 0;
>   cleanup:
>      if (sock)
>          VIR_FORCE_CLOSE(sock);
> -
> -    return ret;
>  }
>  
>  
> @@ -3013,7 +3067,7 @@ virNetDevGetFeatures(const char *ifname,
>  {
>      size_t i = -1;
>      struct ethtool_value cmd = { 0 };
> -
> +    struct ethtool_gfeatures g_cmd = { 0 };
>      struct elem{
>          const int cmd;
>          const virNetDevFeature feat;
> @@ -3037,7 +3091,8 @@ virNetDevGetFeatures(const char *ifname,
>  
>      for (i = 0; i < ARRAY_CARDINALITY(cmds); i++) {
>          cmd.cmd = cmds[i].cmd;
> -        if (virNetDevFeatureAvailable(ifname, &cmd))
> +        virNetDevFeatureAvailable(ifname, &cmd);
> +        if (cmd.data > 0)

Could just go with cmd.data unless it could be negative...

>              ignore_value(virBitmapSetBit(*out, cmds[i].feat));
>      }
>  
> @@ -3061,14 +3116,22 @@ virNetDevGetFeatures(const char *ifname,
>      };
>  
>      cmd.cmd = ETHTOOL_GFLAGS;
> -    if (virNetDevFeatureAvailable(ifname, &cmd)) {
> -        for (j = 0; j < ARRAY_CARDINALITY(flags); j++) {
> -            if (cmd.data & flags[j].cmd)
> -                ignore_value(virBitmapSetBit(*out, flags[j].feat));
> +    virNetDevFeatureAvailable(ifname, &cmd);
> +        if (cmd.data > 0) {
> +            for (j = 0; j < ARRAY_CARDINALITY(flags); j++) {
> +                if (cmd.data & flags[j].cmd)
> +                    ignore_value(virBitmapSetBit(*out, flags[j].feat));
> +            }

There are 4 extra spaces for each of the previous 5 lines...

Also I suppose you could just use "if (cmd.data)", unless of course it
could be negative...

>          }
> -    }
> -# endif

What would happen if some arch didn't have ETHTOOL_GFEATURES? or is that
assumed due to opening "#if"?

#if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ)


> +    g_cmd.cmd = ETHTOOL_GFEATURES;
> +    g_cmd.size = GFEATURES_SIZE;
> +    virNetDevFeatureAvailable(ifname, &g_cmd);
> +    if FEATURE_BIT_IS_SET(g_cmd.features, TX_UDP_TNL, active)

This looks odd... should be "if (FEATURE_BIT...))

> +        ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_TX_UDP_TNL_SEGMENTATION));

Long name...

Looking forward to a V2

John

>  
> +# endif
> +    if (virNetDevRDMAFeature(ifname, out))
> +        return -1;
>      return 0;
>  }
>  #else
> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
> index 190b70e..fff881c 100644
> --- a/src/util/virnetdev.h
> +++ b/src/util/virnetdev.h
> @@ -210,6 +210,7 @@ int virNetDevGetRcvAllMulti(const char *ifname, bool *receive)
>      ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
>  
>  # define SYSFS_NET_DIR "/sys/class/net/"
> +# define SYSFS_INFINIBAND_DIR "/sys/class/infiniband/"
>  int virNetDevSysfsFile(char **pf_sysfs_device_link,
>                         const char *ifname,
>                         const char *file)
> 




More information about the libvir-list mailing list