[libvirt] PATCH: Allow LXC to use private /dev/pts instance
Daniel P. Berrange
berrange at redhat.com
Mon Apr 20 10:33:09 UTC 2009
On Fri, Apr 17, 2009 at 09:39:19AM -0500, Serge E. Hallyn wrote:
> Quoting Daniel P. Berrange (berrange at redhat.com):
> > > Calling unshare(CLONE_NEWNS) will not prevent the host OS from
> > > seeing the new /dev/pts if / was MS_SHARED. That isn't taken
> > > care of anywhere else for this process's namespace, is it?
> >
> > Yeah, so this is the place where I think we must still have a difference
> > in our host setups. I'm testing this patch on a Fedora 11 host, and with
> > my current code, the new /dev/pts is not visible in the host.
>
> Well I haven't tested your patch as is, was just looking at the code.
> My pivot_root patch did a remount --make-slave, but I think it is only
> for the container itself, whereas your patch here affects the driver
> so it hasn't yet hit that remount, right?
>
> > So I can only assume this means my host / is *not* MS_SHARED, while
>
> If on your F11 host you look at
>
> cat /proc/self/mountinfo
>
> do the top lines show / as being shared? (Mine does)
No, all the F11 installs I have just show '-' and I can't find any
sign of something in the init process which makes them shared, so
perhaps its some other software you've since installed ?
For sake of testing though I've run 'mount --make-rshared /' on one
of my systems and can now reproduce the behaviour you describe with
the mount appearing for all processes.
>
> > yours is. I'm struggling to find out why this is different because
> > I'm testing on an Fedora 11 up2date system.
>
> It's possible this is just something that has been changed since.
>
> > Anyway, would it be sufficiently to add in a call
> >
> > if (mount("", "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) {
> > virReportSystemError(NULL, errno, "%s",
> > _("failed to make root private"));
> > goto cleanup;
> > }
>
> Maybe the best thing to do would be:
>
> > if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
> > virReportSystemError(NULL, errno, "%s",
> > _("failed to make root slave"));
> > goto cleanup;
> > }
> > if (mount("", "/", NULL, MS_SHARED|MS_REC, NULL) < 0) {
> > virReportSystemError(NULL, errno, "%s",
> > _("failed to make root shared"));
> > goto cleanup;
> > }
>
> So we are making it slave (so it will receive mounts from the host
> still), then shared (so the rest of the container will start out
> shared). That, or just turn it SLAVE and leave it like that.
This patch attached now just makes it MS_SLAVE. There's no need for the
extra SHARED flag, since the only process libvirt_lxc spawns is the 'init'
process inside the container and that immediately makes its own root
private.
Daniel
diff -r fadb4a5b251a src/domain_conf.c
--- a/src/domain_conf.c Mon Apr 20 10:53:47 2009 +0100
+++ b/src/domain_conf.c Mon Apr 20 11:00:58 2009 +0100
@@ -3856,6 +3858,21 @@ const char *virDomainDefDefaultEmulator(
return emulator;
}
+virDomainFSDefPtr virDomainGetRootFilesystem(virDomainDefPtr def)
+{
+ int i;
+
+ for (i = 0 ; i < def->nfss ; i++) {
+ if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+ continue;
+
+ if (STREQ(def->fss[i]->dst, "/"))
+ return def->fss[i];
+ }
+
+ return NULL;
+}
+
void virDomainObjLock(virDomainObjPtr obj)
{
diff -r fadb4a5b251a src/domain_conf.h
--- a/src/domain_conf.h Mon Apr 20 10:53:47 2009 +0100
+++ b/src/domain_conf.h Mon Apr 20 11:00:58 2009 +0100
@@ -636,6 +636,8 @@ const char *virDomainDefDefaultEmulator(
virDomainDefPtr def,
virCapsPtr caps);
+virDomainFSDefPtr virDomainGetRootFilesystem(virDomainDefPtr def);
+
void virDomainObjLock(virDomainObjPtr obj);
void virDomainObjUnlock(virDomainObjPtr obj);
diff -r fadb4a5b251a src/libvirt_private.syms
--- a/src/libvirt_private.syms Mon Apr 20 10:53:47 2009 +0100
+++ b/src/libvirt_private.syms Mon Apr 20 11:00:58 2009 +0100
@@ -79,6 +79,7 @@ virDomainDiskQSort;
virDomainFindByID;
virDomainFindByName;
virDomainFindByUUID;
+virDomainGetRootFilesystem;
virDomainGraphicsTypeFromString;
virDomainGraphicsDefFree;
virDomainInputDefFree;
diff -r fadb4a5b251a src/lxc_container.c
--- a/src/lxc_container.c Mon Apr 20 10:53:47 2009 +0100
+++ b/src/lxc_container.c Mon Apr 20 11:00:58 2009 +0100
@@ -308,7 +308,7 @@ static int lxcContainerPivotRoot(virDoma
/* Create a tmpfs root since old and new roots must be
* on separate filesystems */
- if (mount("", oldroot, "tmpfs", 0, NULL) < 0) {
+ if (mount("tmprootfs", oldroot, "tmpfs", 0, NULL) < 0) {
virReportSystemError(NULL, errno,
_("failed to mount empty tmpfs at %s"),
oldroot);
@@ -338,15 +338,9 @@ static int lxcContainerPivotRoot(virDoma
/* Now we chroot into the tmpfs, then pivot into the
* root->src bind-mounted onto '/new' */
- if (chroot(oldroot) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to chroot into tmpfs"));
- goto err;
- }
-
- if (chdir("/new") < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to chdir into /new on tmpfs"));
+ if (chdir(newroot) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to chroot into %s"), newroot);
goto err;
}
@@ -362,12 +356,6 @@ static int lxcContainerPivotRoot(virDoma
if (chdir("/") < 0)
goto err;
- if (umount2(".oldroot", MNT_DETACH) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to lazily unmount old root"));
- goto err;
- }
-
ret = 0;
err:
@@ -377,10 +365,64 @@ err:
return ret;
}
+
+static int lxcContainerMountBasicFS(virDomainFSDefPtr root)
+{
+ const struct {
+ const char *src;
+ const char *dst;
+ const char *type;
+ } mnts[] = {
+ { "/dev", "/dev", "tmpfs" },
+ { "/proc", "/proc", "proc" },
+ { "/sys", "/sys", "sysfs" },
+#if WITH_SELINUX
+ { "none", "/selinux", "selinuxfs" },
+#endif
+ };
+ int i, rc;
+ char *devpts;
+
+ if (virAsprintf(&devpts, "/.oldroot%s/dev/pts", root->src) < 0) {
+ virReportOOMError(NULL);
+ return -1;
+ }
+
+ for (i = 0 ; i < ARRAY_CARDINALITY(mnts) ; i++) {
+ if (virFileMakePath(mnts[i].dst) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mkdir %s"),
+ mnts[i].src);
+ return -1;
+ }
+ if (mount(mnts[i].src, mnts[i].dst, mnts[i].type, 0, NULL) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mount %s on %s"),
+ mnts[i].type, mnts[i].type);
+ return -1;
+ }
+ }
+
+ if ((rc = virFileMakePath("/dev/pts") < 0)) {
+ virReportSystemError(NULL, rc, "%s",
+ _("cannot create /dev/pts"));
+ return -1;
+ }
+
+ VIR_DEBUG("Trying to move %s to %s", devpts, "/dev/pts");
+ if ((rc = mount(devpts, "/dev/pts", NULL, MS_MOVE, NULL)) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to mount /dev/pts in container"));
+ return -1;
+ }
+ VIR_FREE(devpts);
+
+ return 0;
+}
+
static int lxcContainerPopulateDevices(void)
{
int i;
- int rc;
const struct {
int maj;
int min;
@@ -395,33 +437,6 @@ static int lxcContainerPopulateDevices(v
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/dev/urandom" },
};
- if ((rc = virFileMakePath("/dev")) < 0) {
- virReportSystemError(NULL, rc, "%s",
- _("cannot create /dev/"));
- return -1;
- }
- if (mount("none", "/dev", "tmpfs", 0, NULL) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to mount /dev tmpfs"));
- return -1;
- }
- /* Move old devpts into container, since we have to
- connect to the master ptmx which was opened in
- the parent.
- XXX This sucks, we need to figure out how to get our
- own private devpts for isolation
- */
- if ((rc = virFileMakePath("/dev/pts") < 0)) {
- virReportSystemError(NULL, rc, "%s",
- _("cannot create /dev/pts"));
- return -1;
- }
- if (mount("devpts", "/dev/pts", "devpts", 0, NULL) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to mount /dev/pts in container"));
- return -1;
- }
-
/* Populate /dev/ with a few important bits */
for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
dev_t dev = makedev(devs[i].maj, devs[i].min);
@@ -434,6 +449,23 @@ static int lxcContainerPopulateDevices(v
}
}
+ if (access("/dev/pts/ptmx", W_OK) == 0) {
+ if (symlink("/dev/pts/ptmx", "/dev/ptmx") < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to create symlink /dev/ptmx to /dev/pts/ptmx"));
+ return -1;
+ }
+ } else {
+ dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX);
+ if (mknod("/dev/ptmx", 0, dev) < 0 ||
+ chmod("/dev/ptmx", 0666)) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to make device /dev/ptmx"));
+ return -1;
+ }
+ }
+
+
return 0;
}
@@ -493,6 +525,7 @@ static int lxcContainerUnmountOldFS(void
return -1;
}
while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) {
+ VIR_DEBUG("Got %s", mntent.mnt_dir);
if (!STRPREFIX(mntent.mnt_dir, "/.oldroot"))
continue;
@@ -513,6 +546,7 @@ static int lxcContainerUnmountOldFS(void
lxcContainerChildMountSort);
for (i = 0 ; i < nmounts ; i++) {
+ VIR_DEBUG("Umount %s", mounts[i]);
if (umount(mounts[i]) < 0) {
virReportSystemError(NULL, errno,
_("failed to unmount '%s'"),
@@ -534,22 +568,23 @@ static int lxcContainerUnmountOldFS(void
static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
virDomainFSDefPtr root)
{
+ /* Gives us a private root, leaving all parent OS mounts on /.oldroot */
if (lxcContainerPivotRoot(root) < 0)
return -1;
- if (virFileMakePath("/proc") < 0 ||
- mount("none", "/proc", "proc", 0, NULL) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to mount /proc"));
+ /* Mounts the core /proc, /sys, /dev, /dev/pts filesystems */
+ if (lxcContainerMountBasicFS(root) < 0)
return -1;
- }
+ /* Populates device nodes in /dev/ */
if (lxcContainerPopulateDevices() < 0)
return -1;
+ /* Sets up any non-root mounts from guest config */
if (lxcContainerMountNewFS(vmDef) < 0)
return -1;
+ /* Gets rid of all remaining mounts from host OS, including /.oldroot itself */
if (lxcContainerUnmountOldFS() < 0)
return -1;
@@ -595,18 +630,9 @@ static int lxcContainerSetupExtraMounts(
return 0;
}
-static int lxcContainerSetupMounts(virDomainDefPtr vmDef)
+static int lxcContainerSetupMounts(virDomainDefPtr vmDef,
+ virDomainFSDefPtr root)
{
- int i;
- virDomainFSDefPtr root = NULL;
-
- for (i = 0 ; i < vmDef->nfss ; i++) {
- if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
- continue;
- if (STREQ(vmDef->fss[i]->dst, "/"))
- root = vmDef->fss[i];
- }
-
if (root)
return lxcContainerSetupPivotRoot(vmDef, root);
else
@@ -630,6 +656,8 @@ static int lxcContainerChild( void *data
lxc_child_argv_t *argv = data;
virDomainDefPtr vmDef = argv->config;
int ttyfd;
+ char *ttyPath;
+ virDomainFSDefPtr root;
if (NULL == vmDef) {
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
@@ -637,16 +665,28 @@ static int lxcContainerChild( void *data
return -1;
}
- if (lxcContainerSetupMounts(vmDef) < 0)
- return -1;
+ root = virDomainGetRootFilesystem(vmDef);
- ttyfd = open(argv->ttyPath, O_RDWR|O_NOCTTY);
+ if (root) {
+ if (virAsprintf(&ttyPath, "%s%s", root->src, argv->ttyPath) < 0) {
+ virReportOOMError(NULL);
+ return -1;
+ }
+ } else {
+ if (!(ttyPath = strdup(argv->ttyPath))) {
+ virReportOOMError(NULL);
+ return -1;
+ }
+ }
+
+ ttyfd = open(ttyPath, O_RDWR|O_NOCTTY);
if (ttyfd < 0) {
virReportSystemError(NULL, errno,
- _("failed to open %s"),
- argv->ttyPath);
+ _("failed to open tty %s"),
+ ttyPath);
return -1;
}
+ VIR_FREE(ttyPath);
if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) {
close(ttyfd);
@@ -654,6 +694,9 @@ static int lxcContainerChild( void *data
}
close(ttyfd);
+ if (lxcContainerSetupMounts(vmDef, root) < 0)
+ return -1;
+
/* Wait for interface devices to show up */
if (lxcContainerWaitForContinue(argv->monitor) < 0)
return -1;
diff -r fadb4a5b251a src/lxc_container.h
--- a/src/lxc_container.h Mon Apr 20 10:53:47 2009 +0100
+++ b/src/lxc_container.h Mon Apr 20 11:00:58 2009 +0100
@@ -39,6 +39,7 @@ enum {
#define LXC_DEV_MAJ_TTY 5
#define LXC_DEV_MIN_CONSOLE 1
+#define LXC_DEV_MIN_PTMX 2
#define LXC_DEV_MAJ_PTY 136
diff -r fadb4a5b251a src/lxc_controller.c
--- a/src/lxc_controller.c Mon Apr 20 10:53:47 2009 +0100
+++ b/src/lxc_controller.c Mon Apr 20 11:00:58 2009 +0100
@@ -33,6 +33,7 @@
#include <fcntl.h>
#include <signal.h>
#include <getopt.h>
+#include <sys/mount.h>
#include "virterror_internal.h"
#include "logging.h"
@@ -426,6 +427,13 @@ static int lxcControllerCleanupInterface
return 0;
}
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_SLAVE
+#define MS_SLAVE (1<<19)
+#endif
static int
lxcControllerRun(virDomainDefPtr def,
@@ -440,6 +448,9 @@ lxcControllerRun(virDomainDefPtr def,
int containerPty;
char *containerPtyPath;
pid_t container = -1;
+ virDomainFSDefPtr root;
+ char *devpts = NULL;
+ char *devptmx = NULL;
if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
virReportSystemError(NULL, errno, "%s",
@@ -447,14 +458,91 @@ lxcControllerRun(virDomainDefPtr def,
goto cleanup;
}
- if (virFileOpenTty(&containerPty,
- &containerPtyPath,
- 0) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to allocate tty"));
- goto cleanup;
+ root = virDomainGetRootFilesystem(def);
+
+ /*
+ * If doing a chroot style setup, we need to prepare
+ * a private /dev/pts for the child now, which they
+ * will later move into position.
+ *
+ * This is complex because 'virsh console' needs to
+ * use /dev/pts from the host OS, and the guest OS
+ * needs to use /dev/pts from the guest.
+ *
+ * This means that we (libvirt_lxc) need to see and
+ * use both /dev/pts instances. We're running in the
+ * host OS context though and don't want to expose
+ * the guest OS /dev/pts there.
+ *
+ * Thus we call unshare(CLONE_NS) so that we can see
+ * the guest's new /dev/pts, without it becoming
+ * visible to the host OS. We also put the root FS
+ * into slave mode, just in case it was currently
+ * marked as shared
+ */
+ if (root) {
+ VIR_DEBUG0("Setting up private /dev/pts");
+ if (unshare(CLONE_NEWNS) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("cannot unshare mount namespace"));
+ goto cleanup;
+ }
+
+ if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to switch root mount into slave mode"));
+ goto cleanup;
+ }
+
+ if (virAsprintf(&devpts, "%s/dev/pts", root->src) < 0 ||
+ virAsprintf(&devptmx, "%s/dev/pts/ptmx", root->src) < 0) {
+ virReportOOMError(NULL);
+ goto cleanup;
+ }
+
+ if (virFileMakePath(devpts) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to make path %s"),
+ devpts);
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Mouting 'devpts' on %s", devpts);
+ if (mount("devpts", devpts, "devpts", 0, "newinstance,ptmxmode=0666") < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mount devpts on %s"),
+ devpts);
+ goto cleanup;
+ }
+
+ if (access(devptmx, R_OK) < 0) {
+ VIR_WARN0("kernel does not support private devpts, using shared devpts");
+ VIR_FREE(devptmx);
+ }
}
+ if (devptmx) {
+ VIR_DEBUG("Opening tty on private %s", devptmx);
+ if (virFileOpenTtyAt(devptmx,
+ &containerPty,
+ &containerPtyPath,
+ 0) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to allocate tty"));
+ goto cleanup;
+ }
+ } else {
+ VIR_DEBUG0("Opening tty on shared /dev/ptmx");
+ if (virFileOpenTty(&containerPty,
+ &containerPtyPath,
+ 0) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to allocate tty"));
+ goto cleanup;
+ }
+ }
+
+
if (lxcSetContainerResources(def) < 0)
goto cleanup;
@@ -476,6 +564,8 @@ lxcControllerRun(virDomainDefPtr def,
rc = lxcControllerMain(monitor, client, appPty, containerPty);
cleanup:
+ VIR_FREE(devptmx);
+ VIR_FREE(devpts);
if (control[0] != -1)
close(control[0]);
if (control[1] != -1)
diff -r fadb4a5b251a src/util.c
--- a/src/util.c Mon Apr 20 10:53:47 2009 +0100
+++ b/src/util.c Mon Apr 20 11:00:58 2009 +0100
@@ -1050,14 +1050,25 @@ int virFileBuildPath(const char *dir,
}
-#ifdef __linux__
int virFileOpenTty(int *ttymaster,
char **ttyName,
int rawmode)
{
+ return virFileOpenTtyAt("/dev/ptmx",
+ ttymaster,
+ ttyName,
+ rawmode);
+}
+
+#ifdef __linux__
+int virFileOpenTtyAt(const char *ptmx,
+ int *ttymaster,
+ char **ttyName,
+ int rawmode)
+{
int rc = -1;
- if ((*ttymaster = posix_openpt(O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0)
+ if ((*ttymaster = open(ptmx, O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0)
goto cleanup;
if (unlockpt(*ttymaster) < 0)
@@ -1100,9 +1111,10 @@ cleanup:
}
#else
-int virFileOpenTty(int *ttymaster ATTRIBUTE_UNUSED,
- char **ttyName ATTRIBUTE_UNUSED,
- int rawmode ATTRIBUTE_UNUSED)
+int virFileOpenTtyAt(const char *ptmx ATTRIBUTE_UNUSED,
+ int *ttymaster ATTRIBUTE_UNUSED,
+ char **ttyName ATTRIBUTE_UNUSED,
+ int rawmode ATTRIBUTE_UNUSED)
{
return -1;
}
diff -r fadb4a5b251a src/util.h
--- a/src/util.h Mon Apr 20 10:53:47 2009 +0100
+++ b/src/util.h Mon Apr 20 11:00:58 2009 +0100
@@ -103,6 +103,10 @@ int virFileBuildPath(const char *dir,
int virFileOpenTty(int *ttymaster,
char **ttyName,
int rawmode);
+int virFileOpenTtyAt(const char *ptmx,
+ int *ttymaster,
+ char **ttyName,
+ int rawmode);
char* virFilePid(const char *dir,
const char *name);
--
|: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :|
|: http://autobuild.org -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
More information about the libvir-list
mailing list