[libvirt] PATCH: 4/4: Add pivot_root support to container
Daniel P. Berrange
berrange at redhat.com
Thu Aug 28 09:24:36 UTC 2008
On Wed, Aug 27, 2008 at 09:58:58PM +0100, Daniel P. Berrange wrote:
> On Wed, Aug 27, 2008 at 01:32:13PM -0700, Dan Smith wrote:
> > DB> +static int lxcContainerMountNewFS(virDomainDefPtr vmDef)
> > DB> +{
> > DB> + virDomainFSDefPtr tmp;
....
> > DB> + return -1;
> >
> > Shouldn't this be "return 0"? AFAICT, this means this function will
> > always fail and thus any domain with a root target will fail to start.
> > If I change this to "return 0" I'm able to start such guests, with
> > properly pivoted roots.
>
> Yes, it clearly should be return 0.
>
> > On a more general note, it seems like there are a lot of places where
> > failures trigger a "return -1" that rolls completely up the stack with
> > no error information getting logged. Since you have the excellent
> > per-container logging functionality, can we increase the verbosity a
> > little so that there is some way to diagnose where things are failing?
> > Thus far, I've just started sprinkling fprintf()'s into the code until I
> > start to narrow things down. I'd be glad to help with that after this
> > goes in.
>
> The newly improved virExec() API which LXC now uses ensures that libvirt's
> error callback is reset to the default in child processes. This means that
> any call to virRaiseError in the child is turned into a fprintf(stderr...).
> We also wire the container stdout/err to /var/log/libvirt/lxc/NAME.log
> So if everything is operating as I expect, all the lxcError() calls in
> this code should result in log messages being written out to /var/log.
Of course in this particular scenario you would never have had a log
message because this should never have been a 'return -1'. I found a
couple of other places with missing lxcError() calls. So here's an
updated patch
diff -r 4d49719aa768 src/lxc_container.c
--- a/src/lxc_container.c Wed Aug 27 22:19:33 2008 +0100
+++ b/src/lxc_container.c Wed Aug 27 22:21:15 2008 +0100
@@ -1,10 +1,12 @@
/*
* Copyright IBM Corp. 2008
+ * Copyright Red Hat 2008
*
* lxc_container.c: file description
*
* Authors:
* David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ * Daniel P. Berrange <berrange at redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -26,10 +28,18 @@
#include <fcntl.h>
#include <limits.h>
#include <stdlib.h>
+#include <stdio.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <mntent.h>
+
+/* Yes, we want linux private one, for _syscall2() macro */
+#include <linux/unistd.h>
+
+/* For MS_MOVE */
+#include <linux/fs.h>
#include "lxc_container.h"
#include "util.h"
@@ -103,23 +113,15 @@
*
* Returns 0 on success or -1 in case of error
*/
-static int lxcContainerSetStdio(int control, const char *ttyPath)
+static int lxcContainerSetStdio(int control, int ttyfd)
{
int rc = -1;
- int ttyfd;
int open_max, i;
if (setsid() < 0) {
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
_("setsid failed: %s"), strerror(errno));
- goto error_out;
- }
-
- ttyfd = open(ttyPath, O_RDWR|O_NOCTTY);
- if (ttyfd < 0) {
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- _("open(%s) failed: %s"), ttyPath, strerror(errno));
- goto error_out;
+ goto cleanup;
}
if (ioctl(ttyfd, TIOCSCTTY, NULL) < 0) {
@@ -156,9 +158,6 @@
rc = 0;
cleanup:
- close(ttyfd);
-
-error_out:
return rc;
}
@@ -221,6 +220,7 @@
return 0;
}
+
/**
* lxcEnableInterfaces:
* @vm: Pointer to vm structure
@@ -251,6 +251,285 @@
return rc;
}
+
+//_syscall2(int, pivot_root, char *, newroot, const char *, oldroot)
+extern int pivot_root(const char * new_root,const char * put_old);
+
+static int lxcContainerChildMountSort(const void *a, const void *b)
+{
+ const char **sa = (const char**)a;
+ const char **sb = (const char**)b;
+
+ /* Delibrately reversed args - we need to unmount deepest
+ children first */
+ return strcmp(*sb, *sa);
+}
+
+static int lxcContainerPivotRoot(virDomainFSDefPtr root)
+{
+ char *oldroot;
+
+ /* First step is to ensure the new root itself is
+ a mount point */
+ if (mount(root->src, root->src, NULL, MS_BIND, NULL) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to bind new root %s: %s"),
+ root->src, strerror(errno));
+ return -1;
+ }
+
+ if (asprintf(&oldroot, "%s/.oldroot", root->src) < 0) {
+ oldroot = NULL;
+ lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, NULL);
+ return -1;
+ }
+
+ if (virFileMakePath(oldroot) < 0) {
+ VIR_FREE(oldroot);
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to create %s: %s"),
+ oldroot, strerror(errno));
+ return -1;
+ }
+
+ /* The old root directory will live at /.oldroot after
+ * this and will soon be unmounted completely */
+ if (pivot_root(root->src, oldroot) < 0) {
+ VIR_FREE(oldroot);
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to pivot root %s to %s: %s"),
+ oldroot, root->src, strerror(errno));
+ return -1;
+ }
+ VIR_FREE(oldroot);
+
+ /* CWD is undefined after pivot_root, so go to / */
+ if (chdir("/") < 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int lxcContainerPopulateDevices(void)
+{
+ int i;
+ const struct {
+ int maj;
+ int min;
+ mode_t mode;
+ const char *path;
+ } devs[] = {
+ { 1, 3, 0666, "/dev/null" },
+ { 1, 5, 0666, "/dev/zero" },
+ { 1, 7, 0666, "/dev/full" },
+ { 5, 1, 0600, "/dev/console" },
+ { 1, 8, 0666, "/dev/random" },
+ { 1, 9, 0666, "/dev/urandom" },
+ };
+
+ if (virFileMakePath("/dev") < 0 ||
+ mount("none", "/dev", "tmpfs", 0, NULL) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to mount /dev tmpfs for container: %s"),
+ strerror(errno));
+ return -1;
+ }
+ /* Move old devpts into container, since we have to
+ connect to the master ptmx which was opened in
+ the parent.
+ XXX This sucks, we need to figure out how to get our
+ own private devpts for isolation
+ */
+ if (virFileMakePath("/dev/pts") < 0 ||
+ mount("/.oldroot/dev/pts", "/dev/pts", NULL,
+ MS_MOVE, NULL) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to move /dev/pts into container: %s"),
+ strerror(errno));
+ return -1;
+ }
+
+ /* Populate /dev/ with a few important bits */
+ for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
+ dev_t dev = makedev(devs[i].maj, devs[i].min);
+ if (mknod(devs[i].path, 0, dev) < 0 ||
+ chmod(devs[i].path, devs[i].mode)) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to make device %s: %s"),
+ devs[i].path, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+
+static int lxcContainerMountNewFS(virDomainDefPtr vmDef)
+{
+ virDomainFSDefPtr tmp;
+
+ /* Pull in rest of container's mounts */
+ for (tmp = vmDef->fss; tmp; tmp = tmp->next) {
+ char *src;
+ if (STREQ(tmp->dst, "/"))
+ continue;
+ // XXX fix
+ if (tmp->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+ continue;
+
+ if (asprintf(&src, "/.oldroot/%s", tmp->src) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, NULL);
+ return -1;
+ }
+
+ if (virFileMakePath(tmp->dst) < 0 ||
+ mount(src, tmp->dst, NULL, MS_BIND, NULL) < 0) {
+ VIR_FREE(src);
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to mount %s at %s for container: %s"),
+ tmp->src, tmp->dst, strerror(errno));
+ return -1;
+ }
+ VIR_FREE(src);
+ }
+
+ return 0;
+}
+
+
+static int lxcContainerUnmountOldFS(void)
+{
+ struct mntent *mntent;
+ char **mounts = NULL;
+ int nmounts = 0;
+ FILE *procmnt;
+ int i;
+
+ if (!(procmnt = setmntent("/proc/mounts", "r"))) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to read /proc/mounts: %s"),
+ strerror(errno));
+ return -1;
+ }
+ while ((mntent = getmntent(procmnt)) != NULL) {
+ if (!STRPREFIX(mntent->mnt_dir, "/.oldroot"))
+ continue;
+
+ if (VIR_REALLOC_N(mounts, nmounts+1) < 0) {
+ endmntent(procmnt);
+ lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, NULL);
+ return -1;
+ }
+ if (!(mounts[nmounts++] = strdup(mntent->mnt_dir))) {
+ endmntent(procmnt);
+ lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, NULL);
+ return -1;
+ }
+ }
+ endmntent(procmnt);
+
+ qsort(mounts, nmounts, sizeof(mounts[0]),
+ lxcContainerChildMountSort);
+
+ for (i = 0 ; i < nmounts ; i++) {
+ if (umount(mounts[i]) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to unmount %s: %s"),
+ mounts[i], strerror(errno));
+ return -1;
+ }
+ VIR_FREE(mounts[i]);
+ }
+ VIR_FREE(mounts);
+
+ return 0;
+}
+
+
+/* Got a FS mapped to /, we're going the pivot_root
+ * approach to do a better-chroot-than-chroot
+ * this is based on this thread http://lkml.org/lkml/2008/3/5/29
+ */
+static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
+ virDomainFSDefPtr root)
+{
+ if (lxcContainerPivotRoot(root) < 0)
+ return -1;
+
+ if (virFileMakePath("/proc") < 0 ||
+ mount("none", "/proc", "proc", 0, NULL) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to mount /proc for container: %s"),
+ strerror(errno));
+ return -1;
+ }
+
+ if (lxcContainerPopulateDevices() < 0)
+ return -1;
+
+ if (lxcContainerMountNewFS(vmDef) < 0)
+ return -1;
+
+ if (lxcContainerUnmountOldFS() < 0)
+ return -1;
+
+ return 0;
+}
+
+/* Nothing mapped to /, we're using the main root,
+ but with extra stuff mapped in */
+static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef)
+{
+ virDomainFSDefPtr tmp;
+
+ for (tmp = vmDef->fss; tmp; tmp = tmp->next) {
+ // XXX fix to support other mount types
+ if (tmp->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+ continue;
+
+ if (mount(tmp->src,
+ tmp->dst,
+ NULL,
+ MS_BIND,
+ NULL) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to mount %s at %s for container: %s"),
+ tmp->src, tmp->dst, strerror(errno));
+ return -1;
+ }
+ }
+
+ /* mount /proc */
+ if (mount("lxcproc", "/proc", "proc", 0, NULL) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to mount /proc for container: %s"),
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int lxcContainerSetupMounts(virDomainDefPtr vmDef)
+{
+ virDomainFSDefPtr tmp;
+ virDomainFSDefPtr root = NULL;
+
+ for (tmp = vmDef->fss; tmp && !root; tmp = tmp->next) {
+ if (tmp->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+ continue;
+ if (STREQ(tmp->dst, "/"))
+ root = tmp;
+ }
+
+ if (root)
+ return lxcContainerSetupPivotRoot(vmDef, root);
+ else
+ return lxcContainerSetupExtraMounts(vmDef);
+}
+
/**
* lxcChild:
* @argv: Pointer to container arguments
@@ -265,11 +544,9 @@
*/
static int lxcContainerChild( void *data )
{
- int rc = -1;
lxc_child_argv_t *argv = data;
virDomainDefPtr vmDef = argv->config;
- virDomainFSDefPtr curMount;
- int i;
+ int ttyfd;
if (NULL == vmDef) {
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
@@ -277,37 +554,21 @@
return -1;
}
- /* handle the bind mounts first before doing anything else that may */
- /* then access those mounted dirs */
- curMount = vmDef->fss;
- for (i = 0; curMount; curMount = curMount->next) {
- // XXX fix
- if (curMount->type != VIR_DOMAIN_FS_TYPE_MOUNT)
- continue;
- rc = mount(curMount->src,
- curMount->dst,
- NULL,
- MS_BIND,
- NULL);
- if (0 != rc) {
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- _("failed to mount %s at %s for container: %s"),
- curMount->src, curMount->dst, strerror(errno));
- return -1;
- }
- }
+ if (lxcContainerSetupMounts(vmDef) < 0)
+ return -1;
- /* mount /proc */
- rc = mount("lxcproc", "/proc", "proc", 0, NULL);
- if (0 != rc) {
+ ttyfd = open(argv->ttyPath, O_RDWR|O_NOCTTY);
+ if (ttyfd < 0) {
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- _("failed to mount /proc for container: %s"),
- strerror(errno));
+ _("open(%s) failed: %s"), argv->ttyPath, strerror(errno));
return -1;
}
- if (lxcContainerSetStdio(argv->monitor, argv->ttyPath) < 0)
+ if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) {
+ close(ttyfd);
return -1;
+ }
+ close(ttyfd);
/* Wait for interface devices to show up */
if (lxcContainerWaitForContinue(argv->monitor) < 0)
diff -r 4d49719aa768 src/util.c
--- a/src/util.c Wed Aug 27 22:19:33 2008 +0100
+++ b/src/util.c Wed Aug 27 22:21:15 2008 +0100
@@ -616,13 +616,11 @@
if (!(p = strrchr(parent, '/')))
return EINVAL;
- if (p == parent)
- return EPERM;
-
- *p = '\0';
-
- if ((err = virFileMakePath(parent)))
- return err;
+ if (p != parent) {
+ *p = '\0';
+ if ((err = virFileMakePath(parent)))
+ return err;
+ }
if (mkdir(path, 0777) < 0 && errno != EEXIST)
return errno;
Daniel
--
|: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :|
|: http://autobuild.org -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
More information about the libvir-list
mailing list