[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[libvirt] [RFC] [Patch] Support for Linux macvtap device



Hello!

 The attached patch provides support for the Linux macvtap device for
Qemu by passing a file descriptor to Qemu command line similar to how it
is done with a regular tap device. I have modified the network XML code
to understand a definition as the following one here:

<network>
  <name>vepanet</name>
  <uuid>4ebd5168-6321-4757-8397-f6e83484f402</uuid>
  <extbridge mode='vepa' dev='eth0'/>
</network>


This XML indicates the device that links to the external bridge, here
eth0, and the mode this device is supposed to be set into, here 'vepa'.
If 'extbridge' is found in the XML, all other XML elements that have
been used so far, i.e., bridge, ip, dhcp, etc., are ignored.
The above network description can then be referenced from the virtual
machine definition using the typical interface description of type
'network':

    <interface type='network'>
      <source network='vepanet'/>
      <model type='virtio'/>
    </interface>


The above network XML holds the necessary parameters to open a macvtap
device. Using command line, one would issue the following command to
achieve the same what now libvirt does internally, using a patched
version of the 'ip' command:

ip link add link eth0 <optional name of if> type macvtap mode vepa

This then creates the network interface, i.e., macvtap12, along with
entries in /sys/class/net/macvtap12/ where the content of the ifindex
file returns the integer necessary to build the /dev/tap%d device to
open the file descriptor of and pass to qemu via command line argument.

Some details:

In libvirt I am first searching for an unused interface name following
the template macvtap%d unless the user has provided a name explicitly
or the previous macvtap%d is now taken by another VM. Once that has
succeeded, I follow the path through the filesystem to open the
corresponding /dev/tap%d.
Unlike the regular tap device, where the network interface disappears
once the corresponding file descriptor is closed, the macvtap device
needs explicit tear-down. So, when a VM terminates, I am deleting all
macvtap type interface with the MAC address as the interface of the
terminating VM.


Some further background on this device was recently given here:
https://www.redhat.com/archives/libvir-list/2010-January/msg00721.html

The macvtap patch for Linux was posted here:

http://lkml.org/lkml/2009/12/3/239

I'd be curious about comments on the code.

Signed-off-by: Stefan Berger <stefanb us ibm com>

Index: libvirt/src/qemu/qemu_conf.c
===================================================================
--- libvirt.orig/src/qemu/qemu_conf.c
+++ libvirt/src/qemu/qemu_conf.c
@@ -52,6 +52,7 @@
 #include "nodeinfo.h"
 #include "logging.h"
 #include "network.h"
+#include "macvtap.h"
 #include "cpu/cpu.h"
 
 #define VIR_FROM_THIS VIR_FROM_QEMU
@@ -1384,6 +1385,34 @@ int qemudExtractVersion(virConnectPtr co
 }
 
 
+static int
+qemudPhysIfaceConnect(virConnectPtr conn,
+                      virDomainNetDefPtr net,
+                      char *linkdev,
+                      char *brmode)
+{
+    int rc;
+#if defined(WITH_MACVTAP)
+    char *res_ifname = NULL;
+    delMacvtapByMACAddress(conn, net->mac);
+    rc = openMacvtapTap(conn, net->ifname, net->mac, linkdev, brmode,
+                        &res_ifname);
+    if (rc > 0) {
+        VIR_FREE(net->ifname);
+        net->ifname = res_ifname;
+    }
+#else
+    (void)net;
+    (void)linkdev;
+    (void)brmode;
+    qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                     "%s", _("No support for macvtap device"));
+    rc = -1;
+#endif
+    return rc;
+}
+
+
 int
 qemudNetworkIfaceConnect(virConnectPtr conn,
                          struct qemud_driver *driver,
@@ -1395,6 +1424,7 @@ qemudNetworkIfaceConnect(virConnectPtr c
     int tapfd = -1;
     int vnet_hdr = 0;
     int template_ifname = 0;
+    char *brmode = NULL;
 
     if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
         virNetworkPtr network = virNetworkLookupByName(conn,
@@ -1402,6 +1432,15 @@ qemudNetworkIfaceConnect(virConnectPtr c
         if (!network)
             return -1;
 
+        if (virNetworkGetExtbridgeData(network, &brname, &brmode) == 0) {
+            tapfd = qemudPhysIfaceConnect(conn, net,
+                                          brname,
+                                          brmode);
+            VIR_FREE(brname);
+            VIR_FREE(brmode);
+            return tapfd;
+        }
+
         brname = virNetworkGetBridgeName(network);
 
         virNetworkFree(network);
Index: libvirt/src/util/macvtap.h
===================================================================
--- /dev/null
+++ libvirt/src/util/macvtap.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Authors:
+ *     Stefan Berger <stefanb us ibm com>
+ */
+
+#ifndef __UTIL_MACVTAP_H__
+#define __UTIL_MACVTAP_H__
+
+#include <config.h>
+
+#if defined(WITH_MACVTAP)
+
+#include "internal.h"
+
+int openMacvtapTap(virConnectPtr conn,
+                   const char *ifname,
+                   const unsigned char *macaddress,
+                   const char *linkdev,
+                   const char *mode,
+                   char **res_ifname);
+
+int delMacvtapByMACAddress(virConnectPtr conn,
+                           const unsigned char *macaddress);
+
+#endif /* WITH_MACVTAP */
+
+#define MACVTAP_MODE_PRIVATE_STR  "private"
+#define MACVTAP_MODE_VEPA_STR     "vepa"
+#define MACVTAP_MODE_BRIDGE_STR   "bridge"
+
+
+#endif
Index: libvirt/src/Makefile.am
===================================================================
--- libvirt.orig/src/Makefile.am
+++ libvirt/src/Makefile.am
@@ -55,6 +55,7 @@ UTIL_SOURCES =							\
 		util/ebtables.c util/ebtables.h			\
 		util/json.c util/json.h				\
 		util/logging.c util/logging.h			\
+		util/macvtap.c util/macvtap.h			\
 		util/memory.c util/memory.h			\
 		util/pci.c util/pci.h				\
 		util/processinfo.c util/processinfo.h		\
@@ -784,12 +785,15 @@ if WITH_LINUX
 USED_SYM_FILES += libvirt_linux.syms
 endif
 
+USED_SYM_FILES += libvirt_macvtap.syms
+
 EXTRA_DIST += \
   libvirt_public.syms		\
   libvirt_private.syms		\
   libvirt_driver_modules.syms	\
   libvirt_bridge.syms		\
-  libvirt_linux.syms
+  libvirt_linux.syms		\
+  libvirt_macvtap.syms
 
 BUILT_SOURCES = libvirt.syms
 
Index: libvirt/src/util/macvtap.c
===================================================================
--- /dev/null
+++ libvirt/src/util/macvtap.c
@@ -0,0 +1,664 @@
+/*
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Authors:
+ *     Stefan Berger <stefanb us ibm com>
+ */
+
+#include <config.h>
+
+#if defined(WITH_MACVTAP)
+
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#include <linux/if.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_tun.h>
+
+#include "util.h"
+#include "macvtap.h"
+#include "virterror_internal.h"
+
+#define VIR_FROM_THIS VIR_FROM_NONE
+
+#define ReportError(conn, code, fmt...)                                      \
+        virReportErrorHelper(conn, VIR_FROM_NONE, code, __FILE__,          \
+                               __FUNCTION__, __LINE__, fmt)
+
+#define MACVTAP_NAME_PREFIX	"macvtap"
+#define MACVTAP_NAME_PATTERN	"macvtap%d"
+
+static int nlOpen(virConnectPtr conn)
+{
+    int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+    if (fd < 0)
+        virReportSystemError(conn, errno,
+                             "%s",_("cannot open netlink socket"));
+    return fd;
+}
+
+
+static void nlClose(int fd)
+{
+    close(fd);
+}
+
+
+static
+int nlComm(virConnectPtr conn,
+           struct nlmsghdr *nlmsg,
+           char *respbuf, int *respbuflen)
+{
+    int rc = 0;
+    struct sockaddr_nl nladdr = {
+            .nl_family = AF_NETLINK,
+            .nl_pid    = 0,
+            .nl_groups = 0,
+    };
+    char rcvbuf[1024];
+    ssize_t nbytes;
+    size_t tocopy;
+    int fd = nlOpen(conn);
+
+    if (fd < 0)
+        return -1;
+
+    nlmsg->nlmsg_flags |= NLM_F_ACK;
+
+    nbytes = sendto(fd, (void *)nlmsg, nlmsg->nlmsg_len, 0,
+                    (struct sockaddr *)&nladdr, sizeof(nladdr));
+    if (nbytes < 0) {
+        virReportSystemError(conn, errno,
+                             "%s", _("cannot send to netlink socket"));
+        rc = -1;
+        goto err_exit;
+    }
+
+    memset(rcvbuf, 0x0, sizeof(rcvbuf));
+    while (1) {
+        socklen_t addrlen = sizeof(nladdr);
+        nbytes = recvfrom(fd, &rcvbuf, sizeof(rcvbuf), 0,
+                          (struct sockaddr *)&nladdr, &addrlen);
+        if (nbytes < 0) {
+            if (errno == EAGAIN || errno == EINTR)
+                continue;
+            virReportSystemError(conn, errno, "%s",
+                                 _("error receiving from netlink socket"));
+            rc = -1;
+            goto err_exit;
+        }
+
+        tocopy = (nbytes < *respbuflen) ? nbytes : *respbuflen;
+        memcpy(respbuf, rcvbuf, tocopy);
+        *respbuflen = tocopy;
+        break;
+    }
+
+err_exit:
+    nlClose(fd);
+    return rc;
+}
+
+
+static struct rtattr *
+rtattrCreate(char *buffer, int bufsize, int type,
+             const void *data, int datalen)
+{
+    struct rtattr *r = (struct rtattr *)buffer;
+    r->rta_type = type;
+    r->rta_len  = RTA_LENGTH(datalen);
+    if (r->rta_len > bufsize)
+        return NULL;
+    memcpy(RTA_DATA(r), data, datalen);
+    return r;
+}
+
+
+static void
+nlInit(struct nlmsghdr *nlm, int flags, int type)
+{
+    nlm->nlmsg_len = NLMSG_LENGTH(0);
+    nlm->nlmsg_flags = flags;
+    nlm->nlmsg_type = type;
+}
+
+
+static void
+nlAlign(struct nlmsghdr *nlm)
+{
+    nlm->nlmsg_len = NLMSG_ALIGN(nlm->nlmsg_len);
+}
+
+
+static void *
+nlAppend(struct nlmsghdr *nlm, int totlen, const void *data, int datalen)
+{
+    char *pos;
+    nlAlign(nlm);
+    if (nlm->nlmsg_len + NLMSG_ALIGN(datalen) > totlen)
+        return NULL;
+    pos = (char *)nlm + nlm->nlmsg_len;
+    memcpy(pos, data, datalen);
+    nlm->nlmsg_len += datalen;
+    nlAlign(nlm);
+    return pos;
+}
+
+
+static int
+getIfIndex(virConnectPtr conn,
+           const char *ifname,
+           int *idx)
+{
+    int rc = 0;
+    struct ifreq ifreq;
+    int fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+
+    if (fd < 0)
+        return errno;
+
+    if (virStrncpy(ifreq.ifr_name, ifname, strlen(ifname),
+                   sizeof(ifreq.ifr_name)) == NULL) {
+        if (conn)
+            ReportError(conn, VIR_ERR_INTERNAL_ERROR,
+                        _("invalid interface name %s"),
+                        ifname);
+        rc = EINVAL;
+        goto err_exit;
+    }
+    if (ioctl(fd, SIOCGIFINDEX, &ifreq) >= 0)
+        *idx = ifreq.ifr_ifindex;
+    else {
+        if (conn)
+            ReportError(conn, VIR_ERR_INTERNAL_ERROR,
+                        _("interface %s does not exist"),
+                        ifname);
+        rc = ENODEV;
+    }
+
+err_exit:
+    close(fd);
+
+    return rc;
+}
+
+
+static int
+ifUp(const char *name, int up)
+{
+    struct ifreq ifr;
+    int rc = 0;
+    int fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+
+    if (fd < 0)
+        return errno;
+
+    strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+
+    if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0) {
+        rc = errno;
+        goto err_exit;
+    }
+
+    int flags = up ? (ifr.ifr_flags | IFF_UP) : (ifr.ifr_flags & ~IFF_UP);
+
+    if (ifr.ifr_flags != flags) {
+        ifr.ifr_flags = flags;
+
+        if (ioctl(fd, SIOCSIFFLAGS, &ifr) < 0)
+            rc = errno;
+    }
+
+err_exit:
+    close(fd);
+    return rc;
+}
+
+
+static int
+link_add(virConnectPtr conn,
+         const char *type,
+         const unsigned char *macaddress, int macaddrsize,
+         const char *ifname,
+         const char *srcdev,
+         uint32_t macvlan_mode,
+         int *retry)
+{
+    char nlmsgbuf[1024], recvbuf[1024];
+    struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp;
+    struct nlmsgerr *err;
+    char rtattbuf[256];
+    struct rtattr *rta, *rta1;
+    struct ifinfomsg i = { .ifi_family = AF_UNSPEC };
+    int ifindex;
+    int recvbuflen = sizeof(recvbuf);
+
+    if (getIfIndex(conn, srcdev, &ifindex) != 0)
+        return -1;
+
+    *retry = 0;
+
+    memset(&nlmsgbuf, 0, sizeof(nlmsgbuf));
+
+    nlInit(nlm, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL, RTM_NEWLINK);
+
+    if (!nlAppend(nlm, sizeof(nlmsgbuf), &i, sizeof(i)))
+        goto buffer_too_small;
+
+    rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_LINKINFO, NULL, 0);
+    if (!rta)
+        goto buffer_too_small;
+
+    if (!(rta1 = nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)))
+        goto buffer_too_small;
+
+    rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_INFO_KIND,
+                       type, strlen(type));
+    if (!rta)
+        goto buffer_too_small;
+
+    if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+        goto buffer_too_small;
+
+    rta1->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)rta1;
+
+    rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_LINK,
+                       &ifindex, sizeof(ifindex));
+    if (!rta)
+        goto buffer_too_small;
+
+    if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+        goto buffer_too_small;
+
+    rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_ADDRESS,
+                       macaddress, macaddrsize);
+    if (!rta)
+        goto buffer_too_small;
+
+    if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+        goto buffer_too_small;
+
+    if (ifname) {
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_IFNAME,
+                           ifname, strlen(ifname) + 1);
+        if (!rta)
+            goto buffer_too_small;
+
+        if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+            goto buffer_too_small;
+    }
+
+    if (macvlan_mode > 0) {
+#ifdef IFLA_MACVLAN_MAX
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_INFO_DATA,
+                           NULL, 0);
+        if (!rta)
+            goto buffer_too_small;
+
+        if (!(rta1 = nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)))
+            goto buffer_too_small;
+
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_MACVLAN_MODE,
+                           &macvlan_mode, sizeof(macvlan_mode));
+        if (!rta)
+            goto buffer_too_small;
+
+        if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+            goto buffer_too_small;
+
+        rta1->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)rta1;
+#else
+#warning You must update linux/if_link.h to have IFLA_MACVLAN_MODE defined
+#endif
+    }
+
+    if (nlComm(conn, nlm, recvbuf, &recvbuflen) < 0)
+        return -1;
+
+    if (recvbuflen < NLMSG_LENGTH(0))
+        goto malformed_resp;
+
+    resp = (struct nlmsghdr *)recvbuf;
+
+    switch (resp->nlmsg_type) {
+    case NLMSG_ERROR:
+        err = (struct nlmsgerr *)NLMSG_DATA(resp);
+        if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+            goto malformed_resp;
+
+        printf("%s: Netlink request rejected by kernel. error = %d.\n",
+               __FUNCTION__, -err->error);
+        switch (-err->error) {
+
+        case 0:
+        break;
+
+        case EEXIST:
+            *retry = 1;
+            return -1;
+        break;
+
+        default:
+            virReportSystemError(conn, -err->error,
+                                 _("error creating %s type of interface"),
+                                 type);
+            return -1;
+        }
+    break;
+
+    case NLMSG_DONE:
+    break;
+
+    default:
+        goto malformed_resp;
+    }
+
+    return 0;
+
+malformed_resp:
+    ReportError(conn, VIR_ERR_INTERNAL_ERROR,
+                _("malformed netlink response message"));
+    return -1;
+
+buffer_too_small:
+    ReportError(conn, VIR_ERR_INTERNAL_ERROR,
+                _("internal buffer is too small"));
+    return -1;
+}
+
+
+static int
+link_del(virConnectPtr conn,
+         const char *type,
+         const char *name)
+{
+    char nlmsgbuf[1024], recvbuf[1024];
+    struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp;
+    struct nlmsgerr *err;
+    char rtattbuf[256];
+    struct rtattr *rta, *rta1;
+    struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
+    int recvbuflen = sizeof(recvbuf);
+
+    // due to a bug in the driver, don't run the rest for now...
+    if (1)
+         return -1;
+
+    memset(&nlmsgbuf, 0, sizeof(nlmsgbuf));
+
+    nlInit(nlm, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL, RTM_DELLINK);
+
+    if (!nlAppend(nlm, sizeof(nlmsgbuf), &ifinfo, sizeof(ifinfo)))
+        goto buffer_too_small;
+
+    rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_LINKINFO, NULL, 0);
+    if (!rta)
+        goto buffer_too_small;
+
+    if (!(rta1 = nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)))
+        goto buffer_too_small;
+
+    rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_INFO_KIND,
+                       type, strlen(type));
+    if (!rta)
+        goto buffer_too_small;
+
+    if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+        goto buffer_too_small;
+
+    rta1->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)rta1;
+
+    rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_IFNAME,
+                       name, strlen(name)+1);
+    if (!rta)
+        goto buffer_too_small;
+
+    if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+        goto buffer_too_small;
+
+    if (nlComm(conn, nlm, recvbuf, &recvbuflen) < 0)
+        return -1;
+
+    if (recvbuflen < NLMSG_LENGTH(0))
+        goto malformed_resp;
+
+    resp = (struct nlmsghdr *)recvbuf;
+    switch (resp->nlmsg_type) {
+    case NLMSG_ERROR:
+        err = (struct nlmsgerr *)NLMSG_DATA(resp);
+        if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+            goto malformed_resp;
+
+        printf("%s: Netlink request rejected by kernel. error = %d.\n",
+               __FUNCTION__, -err->error);
+        switch (-err->error) {
+        case 0:
+        break;
+
+        default:
+            virReportSystemError(conn, -err->error,
+                                 _("error destroying %s interface"),
+                                 name);
+            return -1;
+        }
+    break;
+
+    case NLMSG_DONE:
+    break;
+
+    default:
+        goto malformed_resp;
+    }
+
+    return 0;
+
+malformed_resp:
+    ReportError(conn, VIR_ERR_INTERNAL_ERROR,
+                _("malformed netlink response message"));
+    return -1;
+
+buffer_too_small:
+    ReportError(conn, VIR_ERR_INTERNAL_ERROR,
+                _("internal buffer is too small"));
+    return -1;
+}
+
+
+static uint32_t
+modeFromString(const char *mode_str)
+{
+    if (!mode_str)
+        return 0;
+#ifdef IFLA_MACVLAN_MAX
+    if (!strcmp(mode_str, MACVTAP_MODE_PRIVATE_STR))
+        return MACVLAN_MODE_PRIVATE;
+    if (!strcmp(mode_str, MACVTAP_MODE_VEPA_STR))
+        return MACVLAN_MODE_VEPA;
+    if (!strcmp(mode_str, MACVTAP_MODE_BRIDGE_STR))
+        return MACVLAN_MODE_BRIDGE;
+#endif
+    return 0;
+}
+
+int
+openMacvtapTap(virConnectPtr conn,
+               const char *tgifname,
+               const unsigned char *macaddress,
+               const char *linkdev,
+               const char *mode_str,
+               char **res_ifname)
+{
+    const char *type = "macvtap";
+    int c, rc;
+    char ifname[IFNAMSIZ];
+    int retries, do_retry = 0;
+    char path[256];
+    FILE *file;
+    int ifindex;
+    char tapname[50];
+    uint32_t mode = modeFromString(mode_str);
+    const char *cr_ifname;
+
+    *res_ifname = NULL;
+
+    if (tgifname) {
+        if(getIfIndex(NULL, tgifname, &ifindex) == 0) {
+            if (strncmp(MACVTAP_NAME_PREFIX,
+                        tgifname,
+                        strlen(MACVTAP_NAME_PREFIX)) == 0) {
+                goto create_name;
+            }
+            virReportSystemError(conn, errno,
+                                 _("Interface %s already exists"), tgifname);
+            return -1;
+        }
+        cr_ifname = tgifname;
+        rc = link_add(conn, type, macaddress, 0, tgifname, linkdev,
+                      mode, &do_retry);
+        if (rc)
+            return -1;
+    } else {
+create_name:
+        retries = 5;
+        for (c = 0; c < 255; c++) {
+            snprintf(ifname, sizeof(ifname), MACVTAP_NAME_PATTERN, c);
+            if (getIfIndex(NULL, ifname, &ifindex) == ENODEV) {
+                rc = link_add(conn, type, macaddress, 6, ifname, linkdev,
+                              mode, &do_retry);
+                if (rc == 0)
+                    break;
+
+                if (do_retry && --retries)
+                    continue;
+                return -1;
+            }
+        }
+        cr_ifname = ifname;
+    }
+
+    rc = ifUp(cr_ifname, 1);
+    if (rc != 0) {
+        virReportSystemError(conn, errno,
+                             _("cannot 'up' interface %s"), cr_ifname);
+        rc = -1;
+        goto link_del_exit;
+    }
+
+    snprintf(path, sizeof(path), "/sys/class/net/%s/ifindex", cr_ifname);
+    file = fopen(path, "r");
+
+    if (!file) {
+        virReportSystemError(conn, errno,
+                             _("cannot open macvtap file %s to determine "
+                               "interface index"), path);
+        rc = -1;
+        goto link_del_exit;
+    }
+
+    if (fscanf(file, "%d", &ifindex) != 1) {
+        virReportSystemError(conn, errno,
+                             "%s",_("cannot determine macvtap's tap device "
+                             "interface index"));
+        rc = -1;
+        goto file_close_exit;
+    }
+
+    snprintf(tapname, sizeof(tapname), "/dev/tap%d", ifindex);
+
+    retries = 10;
+    while (1) {
+        // may need to wait for udev to be done
+        rc = open(tapname, O_RDWR);
+        if (rc < 0 && --retries) {
+            usleep(20000);
+            continue;
+        }
+        break;
+    }
+
+    *res_ifname = strdup(cr_ifname);
+
+file_close_exit:
+    fclose(file);
+
+link_del_exit:
+    link_del(conn, type, ifname);
+
+    printf("macvtap's tap filedescriptor : %d\n",rc);
+
+    return rc;
+}
+
+
+int
+delMacvtapByMACAddress(virConnectPtr conn,
+                       const unsigned char *macaddress)
+{
+    struct ifreq ifr;
+    FILE *file;
+    char *ifname, *pos;
+    char buffer[1024];
+    long oldpos = 0;
+
+    file = fopen("/proc/net/dev", "r");
+
+    if (!file) {
+        virReportSystemError(conn, errno, "%s",
+                             _("cannot open file to read network interfaces "
+                             "from"));
+        return -1;
+    }
+
+    int sock = socket(AF_INET, SOCK_DGRAM, 0);
+    if (sock < 0) {
+        virReportSystemError(conn, errno, "%s",
+                             _("cannot open socket"));
+        goto sock_err;
+    }
+
+    while (NULL != (ifname = fgets(buffer, sizeof(buffer), file))) {
+        if (isspace(ifname[0]))
+            ifname++;
+        if ((pos = strchr(ifname, ':')) != NULL) {
+            pos[0] = 0;
+            strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+            if (ioctl(sock, SIOCGIFHWADDR, (char *)&ifr) >= 0) {
+                if (memcmp(&ifr.ifr_hwaddr.sa_data[0], macaddress, 6) == 0) {
+                    ifUp(ifname, 0);
+                    if (link_del(conn, "macvtap", ifname) == 0)
+                        fseek(file, oldpos, SEEK_SET);
+                }
+            }
+        }
+        oldpos = ftell(file);
+    }
+
+    close(sock);
+sock_err:
+    fclose(file);
+
+    return 0;
+}
+
+#endif
Index: libvirt/src/libvirt_macvtap.syms
===================================================================
--- /dev/null
+++ libvirt/src/libvirt_macvtap.syms
@@ -0,0 +1,5 @@
+#
+
+# macvtap.h
+openMacvtapTap;
+delMacvtapByMACAddress;
Index: libvirt/src/qemu/qemu_driver.c
===================================================================
--- libvirt.orig/src/qemu/qemu_driver.c
+++ libvirt/src/qemu/qemu_driver.c
@@ -75,6 +75,7 @@
 #include "libvirt_internal.h"
 #include "xml.h"
 #include "cpu/cpu.h"
+#include "macvtap.h"
 
 
 #define VIR_FROM_THIS VIR_FROM_QEMU
@@ -2793,6 +2794,8 @@ static void qemudShutdownVMDaemon(virCon
     int retries = 0;
     qemuDomainObjPrivatePtr priv = vm->privateData;
     virErrorPtr orig_err;
+    virDomainDefPtr def;
+    int i;
 
     if (!virDomainObjIsActive(vm))
         return;
@@ -2804,8 +2807,7 @@ static void qemudShutdownVMDaemon(virCon
     orig_err = virSaveLastError();
 
     if (driver->macFilter) {
-        int i;
-        virDomainDefPtr def = vm->def;
+        def = vm->def;
         for (i = 0 ; i < def->nnets ; i++) {
             virDomainNetDefPtr net = def->nets[i];
             if (net->ifname == NULL)
@@ -2819,6 +2821,17 @@ static void qemudShutdownVMDaemon(virCon
         }
     }
 
+#if defined(WITH_MACVTAP)
+    def = vm->def;
+    for (i = 0; i < def->nnets ; i++) {
+        virDomainNetDefPtr net = def->nets[i];
+        if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
+            // cannot call into the network driver...
+            delMacvtapByMACAddress(conn, net->mac);
+        }
+    }
+#endif
+
     if (virKillProcess(vm->pid, 0) == 0 &&
         virKillProcess(vm->pid, SIGTERM) < 0)
         virReportSystemError(conn, errno,
Index: libvirt/src/conf/network_conf.c
===================================================================
--- libvirt.orig/src/conf/network_conf.c
+++ libvirt/src/conf/network_conf.c
@@ -43,6 +43,7 @@
 #include "util.h"
 #include "buf.h"
 #include "c-ctype.h"
+#include "macvtap.h"
 
 #define MAX_BRIDGE_ID 256
 #define VIR_FROM_THIS VIR_FROM_NETWORK
@@ -53,6 +54,14 @@ VIR_ENUM_IMPL(virNetworkForward,
               VIR_NETWORK_FORWARD_LAST,
               "none", "nat", "route" )
 
+VIR_ENUM_DECL(virNetworkBridgeMode)
+
+VIR_ENUM_IMPL(virNetworkBridgeMode,
+              VIR_NETWORK_BRIDGEMODE_LAST,
+              MACVTAP_MODE_BRIDGE_STR,
+              MACVTAP_MODE_VEPA_STR,
+              MACVTAP_MODE_PRIVATE_STR)
+
 #define virNetworkReportError(conn, code, fmt...)                            \
         virReportErrorHelper(conn, VIR_FROM_NETWORK, code, __FILE__,       \
                                __FUNCTION__, __LINE__, fmt)
@@ -96,6 +105,8 @@ void virNetworkDefFree(virNetworkDefPtr 
         return;
 
     VIR_FREE(def->name);
+    VIR_FREE(def->bridgeMode);
+    VIR_FREE(def->linkDev);
     VIR_FREE(def->bridge);
     VIR_FREE(def->forwardDev);
     VIR_FREE(def->ipAddress);
@@ -425,6 +436,28 @@ virNetworkDefParseXML(virConnectPtr conn
         VIR_FREE(tmp);
     }
 
+    if (virXPathBoolean(conn, "count(./extbridge) > 0", ctxt)) {
+        tmp = virXPathString(conn, "string(./extbridge[1]/@mode)", ctxt);
+        if (tmp) {
+            def->bridgeMode = tmp;
+            if (virNetworkBridgeModeTypeFromString(tmp) < 0) {
+                virNetworkReportError(conn, VIR_ERR_INTERNAL_ERROR,
+                                      _("unknown bridging mode '%s'"), tmp);
+                goto error;
+            }
+        }
+
+        def->linkDev = virXPathString(conn, "string(./extbridge[1]/@dev)", ctxt);
+        if (!def->linkDev) {
+            virNetworkReportError(conn, VIR_ERR_INTERNAL_ERROR,
+                                  "%s",
+                                  _("device to connect to external bridge "
+                                  "missing"));
+            goto error;
+        }
+        return def;
+    }
+
     /* Parse network domain information */
     def->domain = virXPathString(conn, "string(./domain[1]/@name)", ctxt);
 
@@ -664,12 +697,21 @@ char *virNetworkDefFormat(virConnectPtr 
         }
     }
 
-    virBufferAddLit(&buf, "  <bridge");
-    if (def->bridge)
-        virBufferEscapeString(&buf, " name='%s'", def->bridge);
-    virBufferVSprintf(&buf, " stp='%s' delay='%ld' />\n",
-                      def->stp ? "on" : "off",
-                      def->delay);
+    if (def->linkDev) {
+        virBufferAddLit(&buf, "  <extbridge");
+        if (def->bridgeMode)
+            virBufferEscapeString(&buf, " mode='%s'",
+                                  def->bridgeMode);
+        virBufferEscapeString(&buf, " dev='%s'/>\n",
+                              def->linkDev);
+    } else {
+        virBufferAddLit(&buf, "  <bridge");
+        if (def->bridge)
+            virBufferEscapeString(&buf, " name='%s'", def->bridge);
+        virBufferVSprintf(&buf, " stp='%s' delay='%ld' />\n",
+                          def->stp ? "on" : "off",
+                          def->delay);
+    }
 
     if (def->domain)
         virBufferVSprintf(&buf, "  <domain name='%s'/>\n", def->domain);
@@ -1010,6 +1052,9 @@ int virNetworkSetBridgeName(virConnectPt
 
     int ret = -1;
 
+    if (def->linkDev)
+        return 0;
+
     if (def->bridge && !strstr(def->bridge, "%d")) {
         /* We may want to skip collision detection in this case (ex. when
          * loading configs at daemon startup, so the network is at least
Index: libvirt/src/conf/network_conf.h
===================================================================
--- libvirt.orig/src/conf/network_conf.h
+++ libvirt/src/conf/network_conf.h
@@ -40,6 +40,14 @@ enum virNetworkForwardType {
     VIR_NETWORK_FORWARD_LAST,
 };
 
+enum virNetworkBridgeModeType {
+    VIR_NETWORK_BRIDGEMODE_BRIDGE  = 0,
+    VIR_NETWORK_BRIDGEMODE_VEPA,
+    VIR_NETWORK_BRIDGEMODE_PRIVATE,
+
+    VIR_NETWORK_BRIDGEMODE_LAST,
+};
+
 typedef struct _virNetworkDHCPRangeDef virNetworkDHCPRangeDef;
 typedef virNetworkDHCPRangeDef *virNetworkDHCPRangeDefPtr;
 struct _virNetworkDHCPRangeDef {
@@ -67,6 +75,9 @@ struct _virNetworkDef {
     unsigned long delay;   /* Bridge forward delay (ms) */
     unsigned int stp :1; /* Spanning tree protocol */
 
+    char *bridgeMode;
+    char *linkDev;
+
     int forwardType;    /* One of virNetworkForwardType constants */
     char *forwardDev;   /* Destination device for forwarding */
 
Index: libvirt/src/driver.h
===================================================================
--- libvirt.orig/src/driver.h
+++ libvirt/src/driver.h
@@ -496,7 +496,10 @@ typedef int
         (*virDrvNetworkIsActive)(virNetworkPtr net);
 typedef int
         (*virDrvNetworkIsPersistent)(virNetworkPtr net);
-
+typedef int
+        (*virDrvNetworkGetExtbridgeData)(virNetworkPtr net,
+                                         char **linkdev,
+                                         char **bridgemode);
 
 
 typedef struct _virNetworkDriver virNetworkDriver;
@@ -533,6 +536,7 @@ struct _virNetworkDriver {
         virDrvNetworkSetAutostart	networkSetAutostart;
         virDrvNetworkIsActive           networkIsActive;
         virDrvNetworkIsPersistent       networkIsPersistent;
+        virDrvNetworkGetExtbridgeData   networkGetExtbridgeData;
 };
 
 /*-------*/
Index: libvirt/src/libvirt.c
===================================================================
--- libvirt.orig/src/libvirt.c
+++ libvirt/src/libvirt.c
@@ -6034,6 +6034,48 @@ error:
     return NULL;
 }
 
+
+/**
+ * virNetworkGetExtbridgeData:
+ * @network: a network object
+ * @linkdev : pointer where name of the interface to connect to the external
+ *            bridge is returned
+ * @brmode  : pointer where mode of the external bridge is returned
+ *
+ * Returns 0 in case an external bridge has been configured, -1 otherwise
+ */
+int
+virNetworkGetExtbridgeData(virNetworkPtr network,
+                           char **linkdev, char **brmode)
+{
+    virConnectPtr conn;
+    DEBUG("network=%p", network);
+
+    virResetLastError();
+
+    if (!VIR_IS_CONNECTED_NETWORK(network)) {
+        virLibNetworkError(NULL, VIR_ERR_INVALID_NETWORK, __FUNCTION__);
+        virDispatchError(NULL);
+        return -1;
+    }
+
+    conn = network->conn;
+
+    if (conn->networkDriver && conn->networkDriver->networkGetExtbridgeData) {
+        int ret;
+        ret = conn->networkDriver->networkGetExtbridgeData(network,
+                                                           linkdev,
+                                                           brmode);
+        return ret;
+    }
+
+    virLibConnError (conn, VIR_ERR_NO_SUPPORT, __FUNCTION__);
+
+    virDispatchError(network->conn);
+    return -1;
+}
+
+
 /**
  * virNetworkGetAutostart:
  * @network: a network object
Index: libvirt/src/libvirt_public.syms
===================================================================
--- libvirt.orig/src/libvirt_public.syms
+++ libvirt/src/libvirt_public.syms
@@ -347,6 +347,7 @@ LIBVIRT_0.7.5 {
     global:
         virConnectCompareCPU;
         virDomainMemoryStats;
+        virNetworkGetExtbridgeData;
 } LIBVIRT_0.7.3;
 
 # .... define new API here using predicted next version number ....
Index: libvirt/src/network/bridge_driver.c
===================================================================
--- libvirt.orig/src/network/bridge_driver.c
+++ libvirt/src/network/bridge_driver.c
@@ -901,6 +901,11 @@ static int networkStartNetworkDaemon(vir
         return -1;
     }
 
+    if (network->def->linkDev) {
+        network->active = 1;
+        return 0;
+    }
+
     if ((err = brAddBridge(driver->brctl, network->def->bridge))) {
         virReportSystemError(conn, err,
                              _("cannot create bridge '%s'"),
@@ -1555,6 +1560,46 @@ cleanup:
 }
 
 
+static int networkGetExtbridgeData(virNetworkPtr net,
+                                   char **linkDev,
+                                   char **bridgeMode) {
+    int rc = -1;
+    struct network_driver *driver = net->conn->networkPrivateData;
+    virNetworkObjPtr network;
+
+    networkDriverLock(driver);
+    network = virNetworkFindByUUID(&driver->networks, net->uuid);
+    networkDriverUnlock(driver);
+
+    if (!network) {
+        networkReportError(net->conn, NULL, net, VIR_ERR_INVALID_NETWORK,
+                           "%s", _("no network with matching id"));
+        goto cleanup;
+    }
+
+    if (network->def->linkDev) {
+        *linkDev = strdup(network->def->linkDev);
+        if (!(*linkDev)) {
+            virReportOOMError(net->conn);
+            goto cleanup;
+        }
+        if (network->def->bridgeMode) {
+            *bridgeMode = strdup(network->def->bridgeMode);
+            if (!(*bridgeMode)) {
+                virReportOOMError(net->conn);
+                goto cleanup;
+            }
+        }
+        rc = 0;
+    }
+
+cleanup:
+    if (network)
+        virNetworkObjUnlock(network);
+    return rc;
+}
+
+
 static virNetworkDriver networkDriver = {
     "Network",
     networkOpenNetwork, /* open */
@@ -1576,6 +1621,7 @@ static virNetworkDriver networkDriver = 
     networkSetAutostart, /* networkSetAutostart */
     networkIsActive,
     networkIsPersistent,
+    networkGetExtbridgeData,
 };
 
 static virStateDriver networkStateDriver = {
Index: libvirt/src/test/test_driver.c
===================================================================
--- libvirt.orig/src/test/test_driver.c
+++ libvirt/src/test/test_driver.c
@@ -5261,6 +5261,7 @@ static virNetworkDriver testNetworkDrive
     testNetworkSetAutostart, /* networkSetAutostart */
     testNetworkIsActive, /* networkIsActive */
     testNetworkIsPersistent, /* networkIsPersistent */
+    NULL, /* networkGetExtbridgeData */
 };
 
 static virInterfaceDriver testInterfaceDriver = {
Index: libvirt/include/libvirt/libvirt.h.in
===================================================================
--- libvirt.orig/include/libvirt/libvirt.h.in
+++ libvirt/include/libvirt/libvirt.h.in
@@ -952,6 +952,9 @@ int                     virNetworkGetAut
                                                  int *autostart);
 int                     virNetworkSetAutostart  (virNetworkPtr network,
                                                  int autostart);
+int                     virNetworkGetExtbridgeData(virNetworkPtr network,
+                                                   char **linkdev,
+                                                   char **brmode);
 
 /*
  * Physical host interface configuration API
Index: libvirt/configure.in
===================================================================
--- libvirt.orig/configure.in
+++ libvirt/configure.in
@@ -1827,6 +1827,19 @@ AC_ARG_WITH([qemu-group],
 AC_DEFINE_UNQUOTED([QEMU_USER], ["$QEMU_USER"], [QEMU user account])
 AC_DEFINE_UNQUOTED([QEMU_GROUP], ["$QEMU_GROUP"], [QEMU group account])
 
+
+AC_ARG_WITH([macvtap],
+  AC_HELP_STRING([--with-macvtap],[enable macvtap device @<:@default=no@:>@]),
+  [with_macvtap=${withval}],
+  [with_macvtap=no])
+
+if test "$with_macvtap" = "yes" ; then
+    AC_DEFINE_UNQUOTED([WITH_MACVTAP], 1, [whether macvtap support is enabled])
+fi
+AM_CONDITIONAL([WITH_MACVTAP], [test "$with_macvtap" = "yes"])
+
+
+
 # Only COPYING.LIB is under version control, yet COPYING
 # is included as part of the distribution tarball.
 # Copy one to the other, but only if this is a srcdir-build.
@@ -1878,6 +1891,7 @@ AC_MSG_NOTICE([  Remote: $with_remote])
 AC_MSG_NOTICE([ Network: $with_network])
 AC_MSG_NOTICE([Libvirtd: $with_libvirtd])
 AC_MSG_NOTICE([   netcf: $with_netcf])
+AC_MSG_NOTICE([ macvtap: $with_macvtap])
 AC_MSG_NOTICE([])
 AC_MSG_NOTICE([Storage Drivers])
 AC_MSG_NOTICE([])
Index: libvirt/python/generator.py
===================================================================
--- libvirt.orig/python/generator.py
+++ libvirt/python/generator.py
@@ -170,6 +170,7 @@ skipped_types = {
 #    'int *': "usually a return type",
      'virConnectDomainEventCallback': "No function types in python",
      'virEventAddHandleFunc': "No function types in python",
+     'char **': 'No function types in python',
 }
 
 #######################################################################

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]