[libvirt] [PATCH v2 6/8] bandwidth: Create network bandwidth (un)plug functions

Michal Privoznik mprivozn at redhat.com
Tue Dec 11 10:42:45 UTC 2012


On 11.12.2012 09:52, Laine Stump wrote:
> On 12/04/2012 02:19 PM, Michal Privoznik wrote:
>> Network should be notified if we plug in or unplug an
>> interface, so it can perform some action, e.g. set/unset
>> network part of QoS. However, we are doing this in very
>> early stage, so iface->ifname isn't filled in yet. So
>> whenever we want to report an error, we must use a different
>> identifier, e.g. the MAC address.
>> ---
>>  src/Makefile.am             |    7 +-
>>  src/conf/domain_conf.h      |    1 +
>>  src/conf/network_conf.c     |   21 ++++
>>  src/conf/network_conf.h     |    4 +
>>  src/libvirt_network.syms    |   13 +++
>>  src/libvirt_private.syms    |    8 --
>>  src/network/bridge_driver.c |  223 ++++++++++++++++++++++++++++++++++++++++++-
>>  7 files changed, 266 insertions(+), 11 deletions(-)
>>  create mode 100644 src/libvirt_network.syms
>>
>> diff --git a/src/Makefile.am b/src/Makefile.am
>> index 01cb995..04378d1 100644
>> --- a/src/Makefile.am
>> +++ b/src/Makefile.am
>> @@ -1373,6 +1373,10 @@ if WITH_ATOMIC_OPS_PTHREAD
>>  USED_SYM_FILES += libvirt_atomic.syms
>>  endif
>>  
>> +if WITH_NETWORK
>> +USED_SYM_FILES += libvirt_network.syms
>> +endif
> 
> Was this necessary for some reason, or did you just decide it made
> things more organized?

Just more organized.

> 
>> +
>>  EXTRA_DIST += \
>>    libvirt_public.syms		\
>>    libvirt_private.syms		\
>> @@ -1386,7 +1390,8 @@ EXTRA_DIST += \
>>    libvirt_sasl.syms		\
>>    libvirt_vmx.syms		\
>>    libvirt_xenxs.syms	\
>> -  libvirt_libssh2.syms
>> +  libvirt_libssh2.syms	\
>> +  libvirt_network.syms
>>  
>>  GENERATED_SYM_FILES = libvirt.syms libvirt.def libvirt_qemu.def
>>  
>> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
>> index 4ab15e9..b4d149b 100644
>> --- a/src/conf/domain_conf.h
>> +++ b/src/conf/domain_conf.h
>> @@ -807,6 +807,7 @@ struct _virDomainActualNetDef {
>>      virNetDevVPortProfilePtr virtPortProfile;
>>      virNetDevBandwidthPtr bandwidth;
>>      virNetDevVlan vlan;
>> +    unsigned int class_id; /* class ID for bandwidth 'floor' */
>>  };
>>  
>>  /* Stores the virtual network interface configuration */
>> diff --git a/src/conf/network_conf.c b/src/conf/network_conf.c
>> index 2cb9e81..441ccdc 100644
>> --- a/src/conf/network_conf.c
>> +++ b/src/conf/network_conf.c
>> @@ -47,6 +47,9 @@
>>  
>>  #define MAX_BRIDGE_ID 256
>>  #define VIR_FROM_THIS VIR_FROM_NETWORK
>> +#define NEXT_FREE_CLASS_ID 3
>> +/* currently, /sbin/tc implementation allows up 16 bits for minor class size */
>> +#define CLASS_ID_BITMAP_SIZE (1<<16)
>>  
>>  VIR_ENUM_IMPL(virNetworkForward,
>>                VIR_NETWORK_FORWARD_LAST,
>> @@ -319,10 +322,28 @@ virNetworkAssignDef(virNetworkObjListPtr nets,
>>      virNetworkObjLock(network);
>>      network->def = def;
>>  
>> +    if (!(network->class_id = virBitmapNew(CLASS_ID_BITMAP_SIZE))) {
>> +        virReportOOMError();
>> +        goto error;
>> +    }
>> +
>> +    if (virBitmapSetBit(network->class_id, 0) < 0 ||
>> +        virBitmapSetBit(network->class_id, 1) < 0 ||
>> +        virBitmapSetBit(network->class_id, 2) < 0) {
>> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> +                       _("unable to initialize class id bitmap"));
>> +        goto error;
> 
> Those are all guaranteed to be successful (as long as
> CLASS_ID_BITMAP_SIZE > 3) and it looks like most uses of virBitmapSetBit
> are inside ignore_value(); this looks like another good candidate to do
> that - it will eliminate one more message that needs translating :-)

Yeah, I was thinking about that as another possibility but had chosen
this one. Can't recall why :)

> 
>> +    }
>> +
>> +    network->def = def;
>>      nets->objs[nets->count] = network;
>>      nets->count++;
>>  
>>      return network;
>> +error:
>> +    virNetworkObjUnlock(network);
>> +    virNetworkObjFree(network);
>> +    return NULL;
>>  
>>  }
>>  
>> diff --git a/src/conf/network_conf.h b/src/conf/network_conf.h
>> index 3e46304..20f8a51 100644
>> --- a/src/conf/network_conf.h
>> +++ b/src/conf/network_conf.h
>> @@ -38,6 +38,7 @@
>>  # include "virnetdevvlan.h"
>>  # include "virmacaddr.h"
>>  # include "device_conf.h"
>> +# include "bitmap.h"
>>  
>>  enum virNetworkForwardType {
>>      VIR_NETWORK_FORWARD_NONE   = 0,
>> @@ -221,6 +222,9 @@ struct _virNetworkObj {
>>  
>>      virNetworkDefPtr def; /* The current definition */
>>      virNetworkDefPtr newDef; /* New definition to activate at shutdown */
>> +
>> +    virBitmapPtr class_id; /* bitmap of class IDs for QoS */
>> +    unsigned long long floor_sum; /* sum of all 'floor'-s of attached NICs */
>>  };
>>  
>>  typedef struct _virNetworkObjList virNetworkObjList;
>> diff --git a/src/libvirt_network.syms b/src/libvirt_network.syms
>> new file mode 100644
>> index 0000000..fc8d415
>> --- /dev/null
>> +++ b/src/libvirt_network.syms
>> @@ -0,0 +1,13 @@
>> +#
>> +# Network-specific symbols
>> +#
>> +
>> +# virnetdevbandwidth.h
>> +virNetDevBandwidthClear;
>> +virNetDevBandwidthCopy;
>> +virNetDevBandwidthEqual;
>> +virNetDevBandwidthFree;
>> +virNetDevBandwidthSet;
>> +virNetDevBandwidthPlug;
>> +virNetDevBandwidthUnplug;
>> +virNetDevBandwidthUpdateRate;
>> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
>> index 41e2629..f5648a0 100644
>> --- a/src/libvirt_private.syms
>> +++ b/src/libvirt_private.syms
>> @@ -1497,14 +1497,6 @@ virNetDevSetOnline;
>>  virNetDevValidateConfig;
>>  
>>  
>> -# virnetdevbandwidth.h
>> -virNetDevBandwidthClear;
>> -virNetDevBandwidthCopy;
>> -virNetDevBandwidthEqual;
>> -virNetDevBandwidthFree;
>> -virNetDevBandwidthSet;
>> -
>> -
> 
> I don't really like that this movement of existing symbols into a new
> symbol file was mixed into this patch. Also, I'm pretty sure this isn't
> the intended use of the libvirt_xxx.syms files - what should be in this
> file are symbols that are *defined* if --with-network is passed to
> configure (i.e. if WITH_NETWORK is defined). What you have listed here
> are symbols that are *always* defined, but currently happen to only be
> *used* if WITH_NETWORK is defined.

D'oh.

> 
> 
>>  # virnetdevbridge.h
>>  virNetDevBridgeAddPort;
>>  virNetDevBridgeCreate;
>> diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c
>> index 63be5e2..15cbc87 100644
>> --- a/src/network/bridge_driver.c
>> +++ b/src/network/bridge_driver.c
>> @@ -121,6 +121,11 @@ static int networkShutdownNetworkExternal(struct network_driver *driver,
>>  static void networkReloadIptablesRules(struct network_driver *driver);
>>  static void networkRefreshDaemons(struct network_driver *driver);
>>  
>> +static int networkPlugBandwidth(virNetworkObjPtr net,
>> +                                virDomainNetDefPtr iface);
>> +static int networkUnplugBandwidth(virNetworkObjPtr net,
>> +                                  virDomainNetDefPtr iface);
>> +
>>  static struct network_driver *driverState = NULL;
>>  
>>  static char *
>> @@ -3469,6 +3474,7 @@ networkAllocateActualDevice(virDomainNetDefPtr iface)
>>      enum virDomainNetType actualType = iface->type;
>>      virNetworkObjPtr network = NULL;
>>      virNetworkDefPtr netdef = NULL;
>> +    virNetDevBandwidthPtr bandwidth = NULL;
>>      virPortGroupDefPtr portgroup = NULL;
>>      virNetDevVPortProfilePtr virtport = iface->virtPortProfile;
>>      virNetDevVlanPtr vlan = NULL;
>> @@ -3507,7 +3513,13 @@ networkAllocateActualDevice(virDomainNetDefPtr iface)
>>       * (already in NetDef). Otherwise, if there is bandwidth info in
>>       * the portgroup, fill that into the ActualDef.
>>       */
>> -    if (portgroup && !iface->bandwidth) {
>> +
>> +    if (iface->bandwidth)
>> +        bandwidth = iface->bandwidth;
>> +    else if (portgroup && portgroup->bandwidth)
>> +        bandwidth = portgroup->bandwidth;
>> +
>> +    if (bandwidth) {
>>          if (!iface->data.network.actual
>>              && (VIR_ALLOC(iface->data.network.actual) < 0)) {
>>              virReportOOMError();
>> @@ -3515,7 +3527,7 @@ networkAllocateActualDevice(virDomainNetDefPtr iface)
>>          }
>>  
>>          if (virNetDevBandwidthCopy(&iface->data.network.actual->bandwidth,
>> -                                   portgroup->bandwidth) < 0)
>> +                                   bandwidth) < 0)
>>              goto error;
>>      }
>>  
>> @@ -3528,6 +3540,10 @@ networkAllocateActualDevice(virDomainNetDefPtr iface)
>>          */
>>          if (iface->data.network.actual)
>>              iface->data.network.actual->type = VIR_DOMAIN_NET_TYPE_NETWORK;
>> +
>> +        if (networkPlugBandwidth(network, iface) < 0)
>> +            goto error;
>> +
>>      } else if ((netdef->forwardType == VIR_NETWORK_FORWARD_BRIDGE) &&
>>                 netdef->bridge) {
>>  
>> @@ -4039,6 +4055,12 @@ networkReleaseActualDevice(virDomainNetDefPtr iface)
>>      }
>>      netdef = network->def;
>>  
>> +    if ((netdef->forwardType == VIR_NETWORK_FORWARD_NONE ||
>> +        netdef->forwardType == VIR_NETWORK_FORWARD_NAT ||
>> +        netdef->forwardType == VIR_NETWORK_FORWARD_ROUTE) &&
>> +        networkUnplugBandwidth(network, iface) < 0)
>> +        goto error;
> 
> It's not listed in the coding guidelines, but I like to use braces when
> the condition is multiline too.
> 
>> +
>>      if ((!iface->data.network.actual) ||
>>          ((actualType != VIR_DOMAIN_NET_TYPE_DIRECT) &&
>>           (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV))) {
>> @@ -4242,3 +4264,200 @@ cleanup:
>>  error:
>>      goto cleanup;
>>  }
>> +
>> +/**
>> + * networkCheckBandwidth:
>> + * @net: network QoS
>> + * @iface: interface QoS
>> + * @new_rate: new rate for non guaranteed class
>> + *
>> + * Returns: -1 if plugging would overcommit network QoS
>> + *           0 if plugging is safe (@new_rate updated)
>> + *           1 if no QoS is set (@new_rate untouched)
>> + */
>> +static int
>> +networkCheckBandwidth(virNetworkObjPtr net,
>> +                      virDomainNetDefPtr iface,
>> +                      unsigned long long *new_rate)
>> +{
>> +    int ret = -1;
>> +    virNetDevBandwidthPtr netBand = net->def->bandwidth;
>> +    virNetDevBandwidthPtr ifaceBand = iface->bandwidth;
>> +    unsigned long long tmp_floor_sum = net->floor_sum;
>> +    unsigned long long tmp_new_rate = 0;
>> +    char ifmac[VIR_MAC_STRING_BUFLEN];
>> +
>> +    if (!ifaceBand || !ifaceBand->in || !ifaceBand->in->floor ||
>> +        !netBand || !netBand->in)
>> +        return 1;
>> +
>> +    virMacAddrFormat(&iface->mac, ifmac);
>> +
>> +    tmp_new_rate = netBand->in->average;
>> +    tmp_floor_sum += ifaceBand->in->floor;
>> +
>> +    /* check against peak */
>> +    if (netBand->in->peak) {
>> +        tmp_new_rate = netBand->in->peak;
>> +        if (tmp_floor_sum > netBand->in->peak) {
>> +            virReportError(VIR_ERR_OPERATION_INVALID,
>> +                           _("Cannot plug '%s' interface into '%s' because it "
>> +                             "would overcommit 'peak' on network '%s'"),
>> +                           ifmac,
>> +                           net->def->bridge,
>> +                           net->def->name);
>> +            goto cleanup;
>> +        }
>> +    } else if (tmp_floor_sum > netBand->in->average) {
>> +        /* tmp_floor_sum can be between 'average' and 'peak' iff 'peak' is set.
>> +         * Otherwise, tmp_floor_sum must be below 'average'. */
>> +        virReportError(VIR_ERR_OPERATION_INVALID,
>> +                       _("Cannot plug '%s' interface into '%s' because it "
>> +                         "would overcommit 'average' on network '%s'"),
>> +                       ifmac,
>> +                       net->def->bridge,
>> +                       net->def->name);
>> +        goto cleanup;
>> +    }
>> +
>> +    *new_rate = tmp_new_rate;
>> +    ret = 0;
>> +
>> +cleanup:
>> +    return ret;
>> +}
>> +
>> +/**
>> + * networkNextClassID:
>> + * @net: network object
>> + *
>> + * Find next free class ID. @net is supposed
>> + * to be locked already. If there is a free ID,
>> + * it is marked as used and returned.
>> + *
>> + * Returns next free class ID or -1 if none is available.
>> + */
>> +static ssize_t
>> +networkNextClassID(virNetworkObjPtr net)
>> +{
>> +    size_t ret = 0;
>> +    bool is_set = false;
>> +
>> +    while (virBitmapGetBit(net->class_id, ret, &is_set) == 0 && is_set)
>> +        ret++;
>> +
>> +    if (is_set || virBitmapSetBit(net->class_id, ret) < 0)
>> +        return -1;
>> +
>> +    return ret;
>> +}
>> +
>> +static int
>> +networkPlugBandwidth(virNetworkObjPtr net,
>> +                     virDomainNetDefPtr iface)
>> +{
>> +    int ret = -1;
>> +    int plug_ret;
>> +    unsigned long long new_rate = 0;
>> +    ssize_t class_id = 0;
>> +    char ifmac[VIR_MAC_STRING_BUFLEN];
>> +
>> +    /* XXX iface->ifname is not filled in, it gets allocated later,
>> +     * but not in this function. Use MAC in error messages instead */
> 
> I don't know that this comment is even needed - ifname is anyway some
> name unknown to the user, and if there's an error it will no longer be
> valid anyway. Even if it was already filled in, using MAC address in the
> error messages is still a better idea.
> 
>> +    if ((plug_ret = networkCheckBandwidth(net, iface, &new_rate)) < 0) {
>> +        /* helper reported error */
>> +        goto cleanup;
>> +    }
>> +
>> +    if (plug_ret > 0) {
>> +        /* no QoS needs to be set; claim success */
>> +        ret = 0;
>> +        goto cleanup;
>> +    }
>> +
>> +    virMacAddrFormat(&iface->mac, ifmac);
>> +    if (iface->type != VIR_DOMAIN_NET_TYPE_NETWORK ||
>> +        !iface->data.network.actual) {
>> +        virReportError(VIR_ERR_INTERNAL_ERROR,
>> +                       _("Cannot set bandwidth on interface '%s' of type %d"),
>> +                       ifmac, iface->type);
>> +        goto cleanup;
>> +    }
>> +
>> +    /* generate new class_id */
>> +    if ((class_id = networkNextClassID(net)) < 0) {
>> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> +                       _("Could not generate next class ID"));
>> +        goto cleanup;
>> +    }
>> +
>> +    plug_ret = virNetDevBandwidthPlug(net->def->bridge,
>> +                                      net->def->bandwidth,
>> +                                      &iface->mac,
>> +                                      iface->bandwidth,
>> +                                      class_id);
>> +    if (plug_ret < 0) {
>> +        ignore_value(virNetDevBandwidthUnplug(net->def->bridge, class_id));
>> +        goto cleanup;
>> +    }
>> +
>> +    if (plug_ret > 0) {
>> +        /* Well, this is embarrassing. The networkCheckBandwidth helper
>> +         * says we need to set a QoS, but virNetDevBandwidthPlug says
>> +         * we don't need to. It's almost certainly a bug then. */
>> +        VIR_WARN("Something strange happened. You may want to upgrade libvirt");
> 
> I realize this should never happen, but is it really okay to just return
> success?
> 
> Also, the message is incredibly vague.

It is indeed. But since it would never be printed out (it's just a warn)
I guess it's okay. Moreover, each error message produces a log entry
with appropriate line number, so a libvirt devel immediately knows where
the error was produced, right?
If we don't want to return success, we need to produce a meaningful
error message then. If somebody has an idea ...
Or as you say in 4/8 - I could drop the (plug_ret > 0) completely and
start to believe in correctness of parts of our subsystems.

> 
>> +        ret = 0;
>> +        goto cleanup;
>> +    }
>> +
>> +    /* QoS was set, generate new class ID */
>> +    iface->data.network.actual->class_id = class_id;
>> +    /* update sum of 'floor'-s of attached NICs */
>> +    net->floor_sum += iface->bandwidth->in->floor;
>> +    /* update rate for non guaranteed NICs */
>> +    new_rate -= net->floor_sum;
>> +    if (virNetDevBandwidthUpdateRate(net->def->bridge, "1:2",
>> +                                     net->def->bandwidth, new_rate) < 0)
>> +        VIR_WARN("Unable to update rate for 1:2 class on %s bridge",
>> +                 net->def->bridge);
> 
> Likewise, is there a reason this is just a warning and not an error?

This should be the part that solves the upgrading issue you mention in
3/8. That is, if we fail to upgrade 1:2 class it is likely there is no
such class (okay, we could have failed because of OOM, pipe() may have
failed, whatever. Don't focus on these for now). So we shouldn't fail.
Or maybe we should distinguish between 'class 1:2' is not there and
other fail cases.

> 
>> +
>> +    ret = 0;
>> +
>> +cleanup:
>> +    return ret;
>> +}
>> +
>> +static int
>> +networkUnplugBandwidth(virNetworkObjPtr net,
>> +                       virDomainNetDefPtr iface)
>> +{
>> +    int ret = 0;
>> +    unsigned long long new_rate;
>> +
>> +    if (iface->data.network.actual &&
>> +        iface->data.network.actual->class_id) {
>> +        /* we must remove class from bridge */
>> +        new_rate = net->def->bandwidth->in->average;
>> +
>> +        if (net->def->bandwidth->in->peak > 0)
>> +            new_rate = net->def->bandwidth->in->peak;
>> +
>> +        ret = virNetDevBandwidthUnplug(net->def->bridge,
>> +                                       iface->data.network.actual->class_id);
>> +        if (ret < 0)
>> +            goto cleanup;
>> +        /* update sum of 'floor'-s of attached NICs */
>> +        net->floor_sum -= iface->bandwidth->in->floor;
>> +        /* update rate for non guaranteed NICs */
>> +        new_rate -= net->floor_sum;
>> +        if (virNetDevBandwidthUpdateRate(net->def->bridge, "1:2",
>> +                                         net->def->bandwidth, new_rate) < 0)
>> +            VIR_WARN("Unable to update rate for 1:2 class on %s bridge",
>> +                     net->def->bridge);
> 
> same
> 
>> +        /* no class is associated any longer */
>> +        iface->data.network.actual->class_id = 0;
>> +    }
>> +
>> +cleanup:
>> +    return ret;
>> +}
> 
> The other items are minor and wouldn't take away from an ACK, but I
> think the movement of virNetDevBandwidth* symbols into
> libvirt_network.syms was wrong.
> 
> --
> libvir-list mailing list
> libvir-list at redhat.com
> https://www.redhat.com/mailman/listinfo/libvir-list
> 




More information about the libvir-list mailing list