[libvirt] [PATCHv2] SRIOV NIC offload feature discovery

James Chapman james.p.chapman at intel.com
Mon Feb 23 15:38:29 UTC 2015


Adding functionality to libvirt that will allow it
query the ethtool interface for the availability
of certain NIC HW offload features

Here is an example of the feature XML definition:

<device>
<name>net_eth4_90_e2_ba_5e_a5_45</name>
  <path>/sys/devices/pci0000:00/0000:00:03.0/0000:08:00.1/net/eth4</path>
  <parent>pci_0000_08_00_1</parent>
  <capability type='net'>
    <interface>eth4</interface>
    <address>90:e2:ba:5e:a5:45</address>
    <link speed='10000' state='up'/>
    <feature name='rx'/>
    <feature name='tx'/>
    <feature name='sg'/>
    <feature name='tso'/>
    <feature name='gso'/>
    <feature name='gro'/>
    <feature name='rxvlan'/>
    <feature name='txvlan'/>
    <feature name='rxhash'/>
    <capability type='80203'/>
  </capability>
</device>
---
 docs/formatnode.html.in                           |  18 +++
 docs/schemas/nodedev.rng                          |  14 +++
 src/conf/device_conf.h                            |   6 +
 src/conf/node_device_conf.c                       |  45 ++++++-
 src/conf/node_device_conf.h                       |   2 +
 src/libvirt_private.syms                          |   1 +
 src/node_device/node_device_udev.c                |   4 +
 src/util/virnetdev.c                              | 143 ++++++++++++++++++++++
 src/util/virnetdev.h                              |   7 ++
 tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml |   9 ++
 tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml |   9 ++
 11 files changed, 257 insertions(+), 1 deletion(-)

diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in
index b820a34..ba9a0f8 100644
--- a/docs/formatnode.html.in
+++ b/docs/formatnode.html.in
@@ -183,6 +183,24 @@
                 link. So far, the whole element is just for output,
                 not setting.
               </dd>
+              <dt><code>feature</code></dt>
+              <dd>If present, the hw offloads supported by this network
+                interface. Possible features are:
+                <dl>
+                    <dt><code>rx</code></dt><dd>rx-checksumming</dd>
+                    <dt><code>tx</code></dt><dd>tx-checksumming</dd>
+                    <dt><code>sg</code></dt><dd>scatter-gather</dd>
+                    <dt><code>tso</code></dt><dd>tcp-segmentation-offload</dd>
+                    <dt><code>ufo</code></dt><dd>udp-fragmentation-offload</dd>
+                    <dt><code>gso</code></dt><dd>generic-segmentation-offload</dd>
+                    <dt><code>gro</code></dt><dd>generic-receive-offload</dd>
+                    <dt><code>lro</code></dt><dd>large-receive-offload</dd>
+                    <dt><code>rxvlan</code></dt><dd>rx-vlan-offload</dd>
+                    <dt><code>txvlan</code></dt><dd>tx-vlan-offload</dd>
+                    <dt><code>ntuple</code></dt><dd>ntuple-filters</dd>
+                    <dt><code>rxhash</code></dt><dd>receive-hashing</dd>
+                </dl>
+              </dd>
               <dt><code>capability</code></dt>
               <dd>A network protocol exposed by the device, where the
                 attribute <code>type</code> can be "80203" for IEEE
diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng
index 13c5402..744dccd 100644
--- a/docs/schemas/nodedev.rng
+++ b/docs/schemas/nodedev.rng
@@ -275,10 +275,24 @@
     <ref name="link-speed-state"/>
 
     <zeroOrMore>
+      <element name='feature'>
+        <attribute name='name'>
+          <ref name='netfeaturename'/>
+        </attribute>
+      </element>
+    </zeroOrMore>
+
+    <zeroOrMore>
       <ref name='subcapnet'/>
     </zeroOrMore>
   </define>
 
+  <define name='netfeaturename'>
+    <data type='string'>
+      <param name='pattern'>[a-zA-Z\-_]+</param>
+    </data>
+  </define>
+
   <define name='subcapnet'>
     <element name='capability'>
       <choice>
diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h
index 7256cdc..091f2f0 100644
--- a/src/conf/device_conf.h
+++ b/src/conf/device_conf.h
@@ -62,6 +62,12 @@ struct _virInterfaceLink {
     unsigned int speed;      /* link speed in Mbits per second */
 };
 
+typedef struct _virDevFeature virDevFeature;
+typedef virDevFeature *virDevFeaturePtr;
+struct _virDevFeature {
+   char *name;             /* device feature */
+};
+
 int virDevicePCIAddressIsValid(virDevicePCIAddressPtr addr);
 
 int virDevicePCIAddressParseXML(xmlNodePtr node,
diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
index a728a00..76b53f0 100644
--- a/src/conf/node_device_conf.c
+++ b/src/conf/node_device_conf.c
@@ -437,6 +437,12 @@ char *virNodeDeviceDefFormat(const virNodeDeviceDef *def)
                 virBufferEscapeString(&buf, "<address>%s</address>\n",
                                   data->net.address);
             virInterfaceLinkFormat(&buf, &data->net.lnk);
+            if (data->net.features) {
+                for (i = 0; i < data->net.nfeatures; i++) {
+                    virBufferAsprintf(&buf, "<feature name='%s'/>\n",
+                            data->net.features[i].name);
+                }
+            }
             if (data->net.subtype != VIR_NODE_DEV_CAP_NET_LAST) {
                 const char *subtyp =
                     virNodeDevNetCapTypeToString(data->net.subtype);
@@ -927,8 +933,10 @@ virNodeDevCapNetParseXML(xmlXPathContextPtr ctxt,
                          union _virNodeDevCapData *data)
 {
     xmlNodePtr orignode, lnk;
-    int ret = -1;
+    size_t i = -1;
+    int ret = -1, n = -1;
     char *tmp;
+    xmlNodePtr *nodes = NULL;
 
     orignode = ctxt->node;
     ctxt->node = node;
@@ -943,6 +951,38 @@ virNodeDevCapNetParseXML(xmlXPathContextPtr ctxt,
 
     data->net.address = virXPathString("string(./address[1])", ctxt);
 
+    if ((n = virXPathNodeSet("./feature", ctxt, &nodes)) < 0)
+        goto out;
+
+    if (n > 0) {
+        if (VIR_RESIZE_N(data->net.features, data->net.nfeatures, data->net.nfeatures, n) < 0)
+            goto out;
+        data->net.nfeatures = n;
+    }
+
+    for (i = 0; i < n; i++) {
+        char *name;
+        size_t j;
+
+        if (!(name = virXMLPropString(nodes[i], "name")) || *name == 0) {
+            VIR_FREE(name);
+            virReportError(VIR_ERR_XML_ERROR, "%s",
+                    _("Invalid NIC offload feature name"));
+            goto out;
+        }
+        data->net.features[i].name = name;
+
+        for (j = 0; j < i; j++) {
+            if (STREQ(name, data->net.features[j].name)) {
+                virReportError(VIR_ERR_XML_ERROR,
+                        _("NIC offload feature '%s' specified more than once"),
+                        name);
+                VIR_FREE(name);
+                goto out;
+            }
+        }
+    }
+
     data->net.subtype = VIR_NODE_DEV_CAP_NET_LAST;
 
     tmp = virXPathString("string(./capability/@type)", ctxt);
@@ -1679,6 +1719,9 @@ void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
     case VIR_NODE_DEV_CAP_NET:
         VIR_FREE(data->net.ifname);
         VIR_FREE(data->net.address);
+        for (i = 0; i < data->net.nfeatures; i++)
+            VIR_FREE(data->net.features[i].name);
+        VIR_FREE(data->net.features);
         break;
     case VIR_NODE_DEV_CAP_SCSI_HOST:
         VIR_FREE(data->scsi_host.wwnn);
diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h
index fd5d179..918523a 100644
--- a/src/conf/node_device_conf.h
+++ b/src/conf/node_device_conf.h
@@ -141,6 +141,8 @@ struct _virNodeDevCapsDef {
             char *ifname;
             virInterfaceLink lnk;
             virNodeDevNetCapType subtype;  /* LAST -> no subtype */
+            size_t nfeatures;
+            virDevFeaturePtr features;
         } net;
         struct {
             unsigned int host;
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index c156b40..aa86560 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1668,6 +1668,7 @@ virNetDevAddRoute;
 virNetDevClearIPAddress;
 virNetDevDelMulti;
 virNetDevExists;
+virNetDevGetFeatures;
 virNetDevGetIndex;
 virNetDevGetIPv4Address;
 virNetDevGetLinkInfo;
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index 03c7a0b..349733f 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -719,6 +719,10 @@ static int udevProcessNetworkInterface(struct udev_device *device,
     if (virNetDevGetLinkInfo(data->net.ifname, &data->net.lnk) < 0)
         goto out;
 
+    if (virNetDevGetFeatures(data->net.ifname, &data->net.features,
+                &data->net.nfeatures) < 0)
+        goto out;
+
     ret = 0;
 
  out:
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index 2a0eb43..ab347c5 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -2728,3 +2728,146 @@ int virNetDevGetRxFilter(const char *ifname,
     *filter = fil;
     return ret;
 }
+
+#if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ)
+
+/**
+ * virNetDevFeatureAvailable
+ * This function checks for the availability of a network device feature
+ *
+ * @ifname: name of the interface
+ * @cmd: reference to an ethtool command structure
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+static int
+virNetDevFeatureAvailable(const char *ifname, struct ethtool_value *cmd)
+{
+    int ret = -1;
+    int sock = -1;
+    virIfreq ifr;
+
+    sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
+    if (sock < 0) {
+        virReportSystemError(errno, "%s", _("Cannot open control socket"));
+        goto cleanup;
+    }
+
+    strcpy(ifr.ifr_name, ifname);
+    ifr.ifr_data = (void*) cmd;
+
+    if (ioctl(sock, SIOCETHTOOL, &ifr) != 0) {
+        switch (errno) {
+            case EPERM:
+                VIR_DEBUG(("virNetDevFeatureAvailable ioctl: permission denied"));
+                break;
+            case EINVAL:
+                VIR_DEBUG(("virNetDevFeatureAvailable ioctl: invalid request"));
+                break;
+            case EOPNOTSUPP:
+                VIR_DEBUG(("virNetDevFeatureAvailable ioctl: request not supported"));
+                break;
+            default:
+                virReportSystemError(errno, "%s", _("virNetDevFeatureAvailable ioctl error"));
+                goto cleanup;
+        }
+    }
+
+    ret = cmd->data > 0 ? 1: 0;
+ cleanup:
+    if (sock)
+        VIR_FORCE_CLOSE(sock);
+
+    return ret;
+}
+
+
+/**
+ * virNetDevGetFeatures:
+ * This function gets the nic offloads features available for ifname
+ *
+ * @ifname: name of the interface
+ * @features: network device feature structures
+ * @nfeatures: number of features available
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+int
+virNetDevGetFeatures(const char *ifname,
+                     virDevFeaturePtr *features,
+                     size_t *nfeatures)
+{
+    int ret = -1;
+    size_t i = -1;
+    size_t j = -1;
+    struct ethtool_value cmd;
+    virDevFeaturePtr tmpfeature = NULL;
+
+    struct elem{
+        const char *name;
+        const int cmd;
+    };
+    /* legacy ethtool getters */
+    struct elem cmds[] = {
+        {"rx",     ETHTOOL_GRXCSUM},
+        {"tx",     ETHTOOL_GTXCSUM },
+        {"sg",     ETHTOOL_GSG},
+        {"tso",    ETHTOOL_GTSO},
+        {"gso",    ETHTOOL_GGSO},
+        {"gro",    ETHTOOL_GGRO},
+    };
+    /* ethtool masks */
+    struct elem flags[] = {
+        {"lro",    ETH_FLAG_LRO},
+        {"rxvlan", ETH_FLAG_RXVLAN},
+        {"txvlan", ETH_FLAG_TXVLAN},
+        {"ntuple", ETH_FLAG_NTUPLE},
+        {"rxhash", ETH_FLAG_RXHASH},
+    };
+
+    for (i = 0; i < ARRAY_CARDINALITY(cmds); i++) {
+        cmd.cmd = cmds[i].cmd;
+        if (virNetDevFeatureAvailable(ifname, &cmd)) {
+            if (VIR_ALLOC(tmpfeature) < 0)
+                goto cleanup;
+            if ((ret = VIR_STRDUP(tmpfeature->name, cmds[i].name)) != 1)
+                goto cleanup;
+            if (VIR_APPEND_ELEMENT(*features, *nfeatures, *tmpfeature) < 0)
+                goto cleanup;
+        }
+    }
+
+    cmd.cmd = ETHTOOL_GFLAGS;
+    for (j = 0; j < ARRAY_CARDINALITY(flags); j++) {
+        if (virNetDevFeatureAvailable(ifname, &cmd)) {
+            if (cmd.data & (flags[j].cmd)) {
+                if (VIR_ALLOC(tmpfeature) < 0)
+                    goto cleanup;
+                if ((ret = VIR_STRDUP(tmpfeature->name, flags[j].name)) != 1)
+                    goto cleanup;
+                if (VIR_APPEND_ELEMENT(*features, *nfeatures, *tmpfeature) < 0)
+                    goto cleanup;
+            }
+        }
+    }
+
+    ret = 0;
+ cleanup:
+
+    return ret;
+
+}
+#else
+int
+virNetDevGetFeatures(const char *ifname,
+                     virDevFeaturePtr *features,
+                     size_t *nfeatures)
+{
+    VIR_DEBUG("Getting network device features on %s is not implemented on this platform",
+              ifname);
+    *features = NULL;
+    *nfeatures = 0;
+
+    return 0;
+}
+#endif
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index de8b480..22ef1a2 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -31,6 +31,8 @@
 # include "virpci.h"
 # include "device_conf.h"
 
+# include <linux/ethtool.h>
+typedef struct ethtool_cmd virEthCmd;
 # ifdef HAVE_STRUCT_IFREQ
 typedef struct ifreq virIfreq;
 # else
@@ -182,6 +184,11 @@ int virNetDevGetVirtualFunctionInfo(const char *vfname, char **pfname,
                                     int *vf)
     ATTRIBUTE_NONNULL(1);
 
+int virNetDevGetFeatures(const char *ifname,
+                     virDevFeaturePtr *features,
+                     size_t *nfeatures)
+    ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK;
+
 int virNetDevGetLinkInfo(const char *ifname,
                          virInterfaceLinkPtr lnk)
     ATTRIBUTE_NONNULL(1);
diff --git a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
index 970ccca..2a34fed 100644
--- a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
+++ b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
@@ -4,6 +4,15 @@
   <capability type='net'>
     <interface>eth0</interface>
     <address>00:13:02:b9:f9:d3</address>
+    <feature name='rx'/>
+    <feature name='tx'/>
+    <feature name='sg'/>
+    <feature name='tso'/>
+    <feature name='gso'/>
+    <feature name='gro'/>
+    <feature name='rxvlan'/>
+    <feature name='txvlan'/>
+    <feature name='rxhash'/>
     <capability type='80211'/>
   </capability>
 </device>
diff --git a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
index 741c959..81d398c 100644
--- a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
+++ b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
@@ -4,6 +4,15 @@
   <capability type='net'>
     <interface>eth1</interface>
     <address>00:15:58:2f:e9:55</address>
+    <feature name='rx'/>
+    <feature name='tx'/>
+    <feature name='sg'/>
+    <feature name='tso'/>
+    <feature name='gso'/>
+    <feature name='gro'/>
+    <feature name='rxvlan'/>
+    <feature name='txvlan'/>
+    <feature name='rxhash'/>
     <capability type='80203'/>
   </capability>
 </device>
-- 
1.9.1




More information about the libvir-list mailing list