[libvirt] PATCH: Allow LXC to use private /dev/pts instance

Daniel P. Berrange berrange at redhat.com
Mon Apr 20 10:33:09 UTC 2009


On Fri, Apr 17, 2009 at 09:39:19AM -0500, Serge E. Hallyn wrote:
> Quoting Daniel P. Berrange (berrange at redhat.com):
> > > Calling unshare(CLONE_NEWNS) will not prevent the host OS from
> > > seeing the new /dev/pts if / was MS_SHARED.  That isn't taken
> > > care of anywhere else for this process's namespace, is it?
> > 
> > Yeah, so this is the place where I think we must still have a difference
> > in our host setups. I'm testing this patch on a Fedora 11 host, and with
> > my current code, the new /dev/pts is not visible in the host.
> 
> Well I haven't tested your patch as is, was just looking at the code.
> My pivot_root patch did a remount --make-slave, but I think it is only
> for the container itself, whereas your patch here affects the driver
> so it hasn't yet hit that remount, right?
> 
> > So I can only assume this means my host /  is *not* MS_SHARED, while
> 
> If on your F11 host you look at
> 
> 	cat /proc/self/mountinfo
> 
> do the top lines show / as being shared?  (Mine does)

No, all the F11 installs I have just show '-'  and I can't find any
sign of something in the init process which makes them shared, so
perhaps its some other software you've since installed ?

For sake of testing though I've run 'mount --make-rshared /' on one
of my systems and can now reproduce the behaviour you describe with
the mount appearing for all processes.

> 
> > yours is. I'm struggling to find out why this is different because
> > I'm testing on an Fedora 11 up2date system. 
> 
> It's possible this is just something that has been changed since.
> 
> > Anyway, would it be sufficiently to add in a call
> > 
> >         if (mount("", "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) {
> >             virReportSystemError(NULL, errno, "%s",
> >                                  _("failed to make root private"));
> >             goto cleanup;
> >         }
> 
> Maybe the best thing to do would be:
> 
> >         if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
> >             virReportSystemError(NULL, errno, "%s",
> >                                  _("failed to make root slave"));
> >             goto cleanup;
> >         }
> >         if (mount("", "/", NULL, MS_SHARED|MS_REC, NULL) < 0) {
> >             virReportSystemError(NULL, errno, "%s",
> >                                  _("failed to make root shared"));
> >             goto cleanup;
> >         }
> 
> So we are making it slave (so it will receive mounts from the host
> still), then shared (so the rest of the container will start out
> shared).  That, or just turn it SLAVE and leave it like that.

This patch attached now just makes it MS_SLAVE.  There's no need for the
extra SHARED flag, since the only process libvirt_lxc spawns is the 'init'
process inside the container and that immediately makes its own root
private.

Daniel

diff -r fadb4a5b251a src/domain_conf.c
--- a/src/domain_conf.c	Mon Apr 20 10:53:47 2009 +0100
+++ b/src/domain_conf.c	Mon Apr 20 11:00:58 2009 +0100
@@ -3856,6 +3858,21 @@ const char *virDomainDefDefaultEmulator(
     return emulator;
 }
 
+virDomainFSDefPtr virDomainGetRootFilesystem(virDomainDefPtr def)
+{
+    int i;
+
+    for (i = 0 ; i < def->nfss ; i++) {
+        if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+            continue;
+
+        if (STREQ(def->fss[i]->dst, "/"))
+            return def->fss[i];
+    }
+
+    return NULL;
+}
+
 
 void virDomainObjLock(virDomainObjPtr obj)
 {
diff -r fadb4a5b251a src/domain_conf.h
--- a/src/domain_conf.h	Mon Apr 20 10:53:47 2009 +0100
+++ b/src/domain_conf.h	Mon Apr 20 11:00:58 2009 +0100
@@ -636,6 +636,8 @@ const char *virDomainDefDefaultEmulator(
                                         virDomainDefPtr def,
                                         virCapsPtr caps);
 
+virDomainFSDefPtr virDomainGetRootFilesystem(virDomainDefPtr def);
+
 void virDomainObjLock(virDomainObjPtr obj);
 void virDomainObjUnlock(virDomainObjPtr obj);
 
diff -r fadb4a5b251a src/libvirt_private.syms
--- a/src/libvirt_private.syms	Mon Apr 20 10:53:47 2009 +0100
+++ b/src/libvirt_private.syms	Mon Apr 20 11:00:58 2009 +0100
@@ -79,6 +79,7 @@ virDomainDiskQSort;
 virDomainFindByID;
 virDomainFindByName;
 virDomainFindByUUID;
+virDomainGetRootFilesystem;
 virDomainGraphicsTypeFromString;
 virDomainGraphicsDefFree;
 virDomainInputDefFree;
diff -r fadb4a5b251a src/lxc_container.c
--- a/src/lxc_container.c	Mon Apr 20 10:53:47 2009 +0100
+++ b/src/lxc_container.c	Mon Apr 20 11:00:58 2009 +0100
@@ -308,7 +308,7 @@ static int lxcContainerPivotRoot(virDoma
 
     /* Create a tmpfs root since old and new roots must be
      * on separate filesystems */
-    if (mount("", oldroot, "tmpfs", 0, NULL) < 0) {
+    if (mount("tmprootfs", oldroot, "tmpfs", 0, NULL) < 0) {
         virReportSystemError(NULL, errno,
                              _("failed to mount empty tmpfs at %s"),
                              oldroot);
@@ -338,15 +338,9 @@ static int lxcContainerPivotRoot(virDoma
 
     /* Now we chroot into the tmpfs, then pivot into the
      * root->src bind-mounted onto '/new' */
-    if (chroot(oldroot) < 0) {
-        virReportSystemError(NULL, errno, "%s",
-                             _("failed to chroot into tmpfs"));
-        goto err;
-    }
-
-    if (chdir("/new") < 0) {
-        virReportSystemError(NULL, errno, "%s",
-                             _("failed to chdir into /new on tmpfs"));
+    if (chdir(newroot) < 0) {
+        virReportSystemError(NULL, errno,
+                             _("failed to chroot into %s"), newroot);
         goto err;
     }
 
@@ -362,12 +356,6 @@ static int lxcContainerPivotRoot(virDoma
     if (chdir("/") < 0)
         goto err;
 
-    if (umount2(".oldroot", MNT_DETACH) < 0) {
-        virReportSystemError(NULL, errno, "%s",
-                             _("failed to lazily unmount old root"));
-        goto err;
-    }
-
     ret = 0;
 
 err:
@@ -377,10 +365,64 @@ err:
     return ret;
 }
 
+
+static int lxcContainerMountBasicFS(virDomainFSDefPtr root)
+{
+    const struct {
+        const char *src;
+        const char *dst;
+        const char *type;
+    } mnts[] = {
+        { "/dev", "/dev", "tmpfs" },
+        { "/proc", "/proc", "proc" },
+        { "/sys", "/sys", "sysfs" },
+#if WITH_SELINUX
+        { "none", "/selinux", "selinuxfs" },
+#endif
+    };
+    int i, rc;
+    char *devpts;
+
+    if (virAsprintf(&devpts, "/.oldroot%s/dev/pts", root->src) < 0) {
+        virReportOOMError(NULL);
+        return -1;
+    }
+
+    for (i = 0 ; i < ARRAY_CARDINALITY(mnts) ; i++) {
+        if (virFileMakePath(mnts[i].dst) < 0) {
+            virReportSystemError(NULL, errno,
+                                 _("failed to mkdir %s"),
+                                 mnts[i].src);
+            return -1;
+        }
+        if (mount(mnts[i].src, mnts[i].dst, mnts[i].type, 0, NULL) < 0) {
+            virReportSystemError(NULL, errno,
+                                 _("failed to mount %s on %s"),
+                                 mnts[i].type, mnts[i].type);
+            return -1;
+        }
+    }
+
+    if ((rc = virFileMakePath("/dev/pts") < 0)) {
+        virReportSystemError(NULL, rc, "%s",
+                             _("cannot create /dev/pts"));
+        return -1;
+    }
+
+    VIR_DEBUG("Trying to move %s to %s", devpts, "/dev/pts");
+    if ((rc = mount(devpts, "/dev/pts", NULL, MS_MOVE, NULL)) < 0) {
+        virReportSystemError(NULL, errno, "%s",
+                             _("failed to mount /dev/pts in container"));
+        return -1;
+    }
+    VIR_FREE(devpts);
+
+    return 0;
+}
+
 static int lxcContainerPopulateDevices(void)
 {
     int i;
-    int rc;
     const struct {
         int maj;
         int min;
@@ -395,33 +437,6 @@ static int lxcContainerPopulateDevices(v
         { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/dev/urandom" },
     };
 
-    if ((rc = virFileMakePath("/dev")) < 0) {
-        virReportSystemError(NULL, rc, "%s",
-                             _("cannot create /dev/"));
-        return -1;
-    }
-    if (mount("none", "/dev", "tmpfs", 0, NULL) < 0) {
-        virReportSystemError(NULL, errno, "%s",
-                             _("failed to mount /dev tmpfs"));
-        return -1;
-    }
-    /* Move old devpts into container, since we have to
-       connect to the master ptmx which was opened in
-       the parent.
-       XXX This sucks, we need to figure out how to get our
-       own private devpts for isolation
-    */
-    if ((rc = virFileMakePath("/dev/pts") < 0)) {
-        virReportSystemError(NULL, rc, "%s",
-                             _("cannot create /dev/pts"));
-        return -1;
-    }
-    if (mount("devpts", "/dev/pts", "devpts", 0, NULL) < 0) {
-        virReportSystemError(NULL, errno, "%s",
-                             _("failed to mount /dev/pts in container"));
-        return -1;
-    }
-
     /* Populate /dev/ with a few important bits */
     for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
         dev_t dev = makedev(devs[i].maj, devs[i].min);
@@ -434,6 +449,23 @@ static int lxcContainerPopulateDevices(v
         }
     }
 
+    if (access("/dev/pts/ptmx", W_OK) == 0) {
+        if (symlink("/dev/pts/ptmx", "/dev/ptmx") < 0) {
+            virReportSystemError(NULL, errno, "%s",
+                                 _("failed to create symlink /dev/ptmx to /dev/pts/ptmx"));
+            return -1;
+        }
+    } else {
+        dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX);
+        if (mknod("/dev/ptmx", 0, dev) < 0 ||
+            chmod("/dev/ptmx", 0666)) {
+            virReportSystemError(NULL, errno, "%s",
+                                 _("failed to make device /dev/ptmx"));
+            return -1;
+        }
+    }
+
+
     return 0;
 }
 
@@ -493,6 +525,7 @@ static int lxcContainerUnmountOldFS(void
         return -1;
     }
     while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) {
+        VIR_DEBUG("Got %s", mntent.mnt_dir);
         if (!STRPREFIX(mntent.mnt_dir, "/.oldroot"))
             continue;
 
@@ -513,6 +546,7 @@ static int lxcContainerUnmountOldFS(void
           lxcContainerChildMountSort);
 
     for (i = 0 ; i < nmounts ; i++) {
+        VIR_DEBUG("Umount %s", mounts[i]);
         if (umount(mounts[i]) < 0) {
             virReportSystemError(NULL, errno,
                                  _("failed to unmount '%s'"),
@@ -534,22 +568,23 @@ static int lxcContainerUnmountOldFS(void
 static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
                                       virDomainFSDefPtr root)
 {
+    /* Gives us a private root, leaving all parent OS mounts on /.oldroot */
     if (lxcContainerPivotRoot(root) < 0)
         return -1;
 
-    if (virFileMakePath("/proc") < 0 ||
-        mount("none", "/proc", "proc", 0, NULL) < 0) {
-        virReportSystemError(NULL, errno, "%s",
-                             _("failed to mount /proc"));
+    /* Mounts the core /proc, /sys, /dev, /dev/pts filesystems */
+    if (lxcContainerMountBasicFS(root) < 0)
         return -1;
-    }
 
+    /* Populates device nodes in /dev/ */
     if (lxcContainerPopulateDevices() < 0)
         return -1;
 
+    /* Sets up any non-root mounts from guest config */
     if (lxcContainerMountNewFS(vmDef) < 0)
         return -1;
 
+    /* Gets rid of all remaining mounts from host OS, including /.oldroot itself */
     if (lxcContainerUnmountOldFS() < 0)
         return -1;
 
@@ -595,18 +630,9 @@ static int lxcContainerSetupExtraMounts(
     return 0;
 }
 
-static int lxcContainerSetupMounts(virDomainDefPtr vmDef)
+static int lxcContainerSetupMounts(virDomainDefPtr vmDef,
+                                   virDomainFSDefPtr root)
 {
-    int i;
-    virDomainFSDefPtr root = NULL;
-
-    for (i = 0 ; i < vmDef->nfss ; i++) {
-        if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
-            continue;
-        if (STREQ(vmDef->fss[i]->dst, "/"))
-            root = vmDef->fss[i];
-    }
-
     if (root)
         return lxcContainerSetupPivotRoot(vmDef, root);
     else
@@ -630,6 +656,8 @@ static int lxcContainerChild( void *data
     lxc_child_argv_t *argv = data;
     virDomainDefPtr vmDef = argv->config;
     int ttyfd;
+    char *ttyPath;
+    virDomainFSDefPtr root;
 
     if (NULL == vmDef) {
         lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
@@ -637,16 +665,28 @@ static int lxcContainerChild( void *data
         return -1;
     }
 
-    if (lxcContainerSetupMounts(vmDef) < 0)
-        return -1;
+    root = virDomainGetRootFilesystem(vmDef);
 
-    ttyfd = open(argv->ttyPath, O_RDWR|O_NOCTTY);
+    if (root) {
+        if (virAsprintf(&ttyPath, "%s%s", root->src, argv->ttyPath) < 0) {
+            virReportOOMError(NULL);
+            return -1;
+        }
+    } else {
+        if (!(ttyPath = strdup(argv->ttyPath))) {
+            virReportOOMError(NULL);
+            return -1;
+        }
+    }
+
+    ttyfd = open(ttyPath, O_RDWR|O_NOCTTY);
     if (ttyfd < 0) {
         virReportSystemError(NULL, errno,
-                             _("failed to open %s"),
-                             argv->ttyPath);
+                             _("failed to open tty %s"),
+                             ttyPath);
         return -1;
     }
+    VIR_FREE(ttyPath);
 
     if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) {
         close(ttyfd);
@@ -654,6 +694,9 @@ static int lxcContainerChild( void *data
     }
     close(ttyfd);
 
+    if (lxcContainerSetupMounts(vmDef, root) < 0)
+        return -1;
+
     /* Wait for interface devices to show up */
     if (lxcContainerWaitForContinue(argv->monitor) < 0)
         return -1;
diff -r fadb4a5b251a src/lxc_container.h
--- a/src/lxc_container.h	Mon Apr 20 10:53:47 2009 +0100
+++ b/src/lxc_container.h	Mon Apr 20 11:00:58 2009 +0100
@@ -39,6 +39,7 @@ enum {
 
 #define LXC_DEV_MAJ_TTY     5
 #define LXC_DEV_MIN_CONSOLE 1
+#define LXC_DEV_MIN_PTMX    2
 
 #define LXC_DEV_MAJ_PTY     136
 
diff -r fadb4a5b251a src/lxc_controller.c
--- a/src/lxc_controller.c	Mon Apr 20 10:53:47 2009 +0100
+++ b/src/lxc_controller.c	Mon Apr 20 11:00:58 2009 +0100
@@ -33,6 +33,7 @@
 #include <fcntl.h>
 #include <signal.h>
 #include <getopt.h>
+#include <sys/mount.h>
 
 #include "virterror_internal.h"
 #include "logging.h"
@@ -426,6 +427,13 @@ static int lxcControllerCleanupInterface
     return 0;
 }
 
+#ifndef MS_REC
+#define MS_REC          16384
+#endif
+
+#ifndef MS_SLAVE
+#define MS_SLAVE              (1<<19)
+#endif
 
 static int
 lxcControllerRun(virDomainDefPtr def,
@@ -440,6 +448,9 @@ lxcControllerRun(virDomainDefPtr def,
     int containerPty;
     char *containerPtyPath;
     pid_t container = -1;
+    virDomainFSDefPtr root;
+    char *devpts = NULL;
+    char *devptmx = NULL;
 
     if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
         virReportSystemError(NULL, errno, "%s",
@@ -447,14 +458,91 @@ lxcControllerRun(virDomainDefPtr def,
         goto cleanup;
     }
 
-    if (virFileOpenTty(&containerPty,
-                       &containerPtyPath,
-                       0) < 0) {
-        virReportSystemError(NULL, errno, "%s",
-                             _("failed to allocate tty"));
-        goto cleanup;
+    root = virDomainGetRootFilesystem(def);
+
+    /*
+     * If doing a chroot style setup, we need to prepare
+     * a private /dev/pts for the child now, which they
+     * will later move into position.
+     *
+     * This is complex because 'virsh console' needs to
+     * use /dev/pts from the host OS, and the guest OS
+     * needs to use /dev/pts from the guest.
+     *
+     * This means that we (libvirt_lxc) need to see and
+     * use both /dev/pts instances. We're running in the
+     * host OS context though and don't want to expose
+     * the guest OS /dev/pts there.
+     *
+     * Thus we call unshare(CLONE_NS) so that we can see
+     * the guest's new /dev/pts, without it becoming
+     * visible to the host OS. We also put the root FS
+     * into slave mode, just in case it was currently
+     * marked as shared
+     */
+    if (root) {
+        VIR_DEBUG0("Setting up private /dev/pts");
+        if (unshare(CLONE_NEWNS) < 0) {
+            virReportSystemError(NULL, errno, "%s",
+                                 _("cannot unshare mount namespace"));
+            goto cleanup;
+        }
+
+        if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+            virReportSystemError(NULL, errno, "%s",
+                                 _("failed to switch root mount into slave mode"));
+            goto cleanup;
+        }
+
+        if (virAsprintf(&devpts, "%s/dev/pts", root->src) < 0 ||
+            virAsprintf(&devptmx, "%s/dev/pts/ptmx", root->src) < 0) {
+            virReportOOMError(NULL);
+            goto cleanup;
+        }
+
+        if (virFileMakePath(devpts) < 0) {
+            virReportSystemError(NULL, errno,
+                                 _("failed to make path %s"),
+                                 devpts);
+            goto cleanup;
+        }
+
+        VIR_DEBUG("Mouting 'devpts' on %s", devpts);
+        if (mount("devpts", devpts, "devpts", 0, "newinstance,ptmxmode=0666") < 0) {
+            virReportSystemError(NULL, errno,
+                                 _("failed to mount devpts on %s"),
+                                 devpts);
+            goto cleanup;
+        }
+
+        if (access(devptmx, R_OK) < 0) {
+            VIR_WARN0("kernel does not support private devpts, using shared devpts");
+            VIR_FREE(devptmx);
+        }
     }
 
+    if (devptmx) {
+        VIR_DEBUG("Opening tty on private %s", devptmx);
+        if (virFileOpenTtyAt(devptmx,
+                             &containerPty,
+                             &containerPtyPath,
+                             0) < 0) {
+            virReportSystemError(NULL, errno, "%s",
+                                 _("failed to allocate tty"));
+            goto cleanup;
+        }
+    } else {
+        VIR_DEBUG0("Opening tty on shared /dev/ptmx");
+        if (virFileOpenTty(&containerPty,
+                           &containerPtyPath,
+                           0) < 0) {
+            virReportSystemError(NULL, errno, "%s",
+                                 _("failed to allocate tty"));
+            goto cleanup;
+        }
+    }
+
+
     if (lxcSetContainerResources(def) < 0)
         goto cleanup;
 
@@ -476,6 +564,8 @@ lxcControllerRun(virDomainDefPtr def,
     rc = lxcControllerMain(monitor, client, appPty, containerPty);
 
 cleanup:
+    VIR_FREE(devptmx);
+    VIR_FREE(devpts);
     if (control[0] != -1)
         close(control[0]);
     if (control[1] != -1)
diff -r fadb4a5b251a src/util.c
--- a/src/util.c	Mon Apr 20 10:53:47 2009 +0100
+++ b/src/util.c	Mon Apr 20 11:00:58 2009 +0100
@@ -1050,14 +1050,25 @@ int virFileBuildPath(const char *dir,
 }
 
 
-#ifdef __linux__
 int virFileOpenTty(int *ttymaster,
                    char **ttyName,
                    int rawmode)
 {
+    return virFileOpenTtyAt("/dev/ptmx",
+                            ttymaster,
+                            ttyName,
+                            rawmode);
+}
+
+#ifdef __linux__
+int virFileOpenTtyAt(const char *ptmx,
+                     int *ttymaster,
+                     char **ttyName,
+                     int rawmode)
+{
     int rc = -1;
 
-    if ((*ttymaster = posix_openpt(O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0)
+    if ((*ttymaster = open(ptmx, O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0)
         goto cleanup;
 
     if (unlockpt(*ttymaster) < 0)
@@ -1100,9 +1111,10 @@ cleanup:
 
 }
 #else
-int virFileOpenTty(int *ttymaster ATTRIBUTE_UNUSED,
-                   char **ttyName ATTRIBUTE_UNUSED,
-                   int rawmode ATTRIBUTE_UNUSED)
+int virFileOpenTtyAt(const char *ptmx ATTRIBUTE_UNUSED,
+                     int *ttymaster ATTRIBUTE_UNUSED,
+                     char **ttyName ATTRIBUTE_UNUSED,
+                     int rawmode ATTRIBUTE_UNUSED)
 {
     return -1;
 }
diff -r fadb4a5b251a src/util.h
--- a/src/util.h	Mon Apr 20 10:53:47 2009 +0100
+++ b/src/util.h	Mon Apr 20 11:00:58 2009 +0100
@@ -103,6 +103,10 @@ int virFileBuildPath(const char *dir,
 int virFileOpenTty(int *ttymaster,
                    char **ttyName,
                    int rawmode);
+int virFileOpenTtyAt(const char *ptmx,
+                     int *ttymaster,
+                     char **ttyName,
+                     int rawmode);
 
 char* virFilePid(const char *dir,
                  const char *name);



-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|




More information about the libvir-list mailing list