[libvirt] [PATCH RFC 3/5] lxc: adds checkpoint and restore helper functions

Daniel P. Berrange berrange at redhat.com
Fri Jul 22 08:51:18 UTC 2016


On Thu, Jul 21, 2016 at 03:37:25PM +0000, Katerina Koukiou wrote:
> This patch adds some helper functions for checkpointing/restoring
> linux containers. We use CRIU binary.
> 
> Signed-off-by: Katerina Koukiou <k.koukiou at gmail.com>
> ---
>  po/POTFILES.in     |   1 +
>  src/Makefile.am    |   3 +-
>  src/lxc/lxc_criu.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  src/lxc/lxc_criu.h |  34 +++++++
>  4 files changed, 310 insertions(+), 1 deletion(-)
>  create mode 100644 src/lxc/lxc_criu.c
>  create mode 100644 src/lxc/lxc_criu.h
> 
> diff --git a/po/POTFILES.in b/po/POTFILES.in
> index a6b6c9c..718b11d 100644
> --- a/po/POTFILES.in
> +++ b/po/POTFILES.in
> @@ -95,6 +95,7 @@ src/lxc/lxc_cgroup.c
>  src/lxc/lxc_conf.c
>  src/lxc/lxc_container.c
>  src/lxc/lxc_controller.c
> +src/lxc/lxc_criu.c
>  src/lxc/lxc_domain.c
>  src/lxc/lxc_driver.c
>  src/lxc/lxc_fuse.c
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 78c493c..64a7680 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -750,7 +750,8 @@ LXC_DRIVER_SOURCES =						\
>  		lxc/lxc_process.c lxc/lxc_process.h		\
>  		lxc/lxc_fuse.c lxc/lxc_fuse.h			\
>  		lxc/lxc_native.c lxc/lxc_native.h		\
> -		lxc/lxc_driver.c lxc/lxc_driver.h
> +		lxc/lxc_driver.c lxc/lxc_driver.h		\
> +		lxc/lxc_criu.c lxc/lxc_criu.h
>  
>  LXC_CONTROLLER_SOURCES =					\
>  		$(LXC_MONITOR_PROTOCOL_GENERATED)		\
> diff --git a/src/lxc/lxc_criu.c b/src/lxc/lxc_criu.c
> new file mode 100644
> index 0000000..6944223
> --- /dev/null
> +++ b/src/lxc/lxc_criu.c
> @@ -0,0 +1,273 @@
> +/*
> + * lxc_criu.c: wrapper functions for CRIU C API to be used for lxc migration
> + *
> + * Copyright (C) 2016 Katerina Koukiou
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library.  If not, see
> + * <http://www.gnu.org/licenses/>.
> + *
> + * Author: Katerina Koukiou <k.koukiou at gmail.com>
> + */
> +
> +#include <config.h>
> +
> +#include <fcntl.h>
> +#include <sys/stat.h>
> +#include <sys/mount.h>
> +
> +#include "virobject.h"
> +#include "virerror.h"
> +#include "virlog.h"
> +#include "virfile.h"
> +#include "vircommand.h"
> +#include "virstring.h"
> +#include "viralloc.h"
> +
> +#include "lxc_domain.h"
> +#include "lxc_driver.h"
> +#include "lxc_criu.h"
> +
> +#define VIR_FROM_THIS VIR_FROM_LXC
> +
> +VIR_LOG_INIT("lxc.lxc_criu");
> +
> +#ifdef CRIU
> +int lxcCriuDump(virLXCDriverPtr driver ATTRIBUTE_UNUSED,
> +                virDomainObjPtr vm,
> +                const char *checkpointdir)

For dumping a container we should be creating a single file
containing all the data, not creating multiple files spread
across a directory. Take a look at what we do with the QEMU
driver where we have a magic header, then the XML description
and the the actual dumped state from QEMU. We should do the
same kind of thing with LXC.

> +{
> +    int fd;
> +    int ret = -1;
> +    virLXCDomainObjPrivatePtr priv;
> +    virCommandPtr cmd;
> +    struct stat sb;
> +    char *path = NULL;
> +    char *tty_info_path = NULL;
> +    char *ttyinfo = NULL;
> +    int status;
> +
> +    if (virFileMakePath(checkpointdir) < 0) {
> +        virReportSystemError(errno,
> +                             _("Failed to mkdir %s"), checkpointdir);
> +        return -1;
> +    }
> +
> +    fd = open(checkpointdir, O_DIRECTORY);
> +    if (fd < 0) {
> +        virReportSystemError(errno,
> +                             _("Failed to open directory %s"), checkpointdir);
> +        return -1;
> +    }
> +
> +    cmd = virCommandNew(CRIU);
> +    virCommandAddArg(cmd, "dump");
> +
> +    virCommandAddArgList(cmd, "--images-dir", checkpointdir, NULL);
> +
> +    virCommandAddArgList(cmd, "--log-file", "dump.log", NULL);
> +
> +    virCommandAddArgList(cmd, "-vvvv", NULL);
> +
> +    priv = vm->privateData;
> +    virCommandAddArg(cmd, "--tree");
> +    virCommandAddArgFormat(cmd, "%d", priv->initpid);
> +
> +    virCommandAddArgList(cmd, "--tcp-established", "--file-locks",
> +                              "--link-remap", "--force-irmap", NULL);
> +
> +    virCommandAddArgList(cmd, "--manage-cgroup", NULL);
> +
> +    virCommandAddArgList(cmd, "--enable-external-sharing",
> +                              "--enable-external-masters", NULL);
> +
> +    virCommandAddArgList(cmd, "--enable-fs", "hugetlbfs",
> +                              "--enable-fs", "tracefs", NULL);
> +
> +    /* Add support for FUSE */
> +    virCommandAddArgList(cmd, "--ext-mount-map", "/proc/meminfo:fuse", NULL);
> +    virCommandAddArgList(cmd, "--ghost-limit", "10000000", NULL);
> +
> +    virCommandAddArgList(cmd, "--ext-mount-map", "/dev/console:console", NULL);
> +    virCommandAddArgList(cmd, "--ext-mount-map", "/dev/tty1:tty1", NULL);
> +    virCommandAddArgList(cmd, "--ext-mount-map", "auto", NULL);
> +
> +    /* The master pair of the /dev/pts device lives outside from what is dumped
> +     * inside the libvirt-lxc process. Add the slave pair as an external tty
> +     * otherwise criu will fail.
> +     */
> +    if (virAsprintf(&path, "/proc/%d/root/dev/pts/0", priv->initpid) < 0)
> +        goto cleanup;
> +
> +    if (stat(path, &sb) < 0) {
> +        virReportSystemError(errno,
> +                             _("Unable to stat %s"), path);
> +        goto cleanup;
> +    }
> +
> +    if (virAsprintf(&tty_info_path, "%s/tty.info", checkpointdir) < 0)
> +        goto cleanup;
> +
> +    if (virAsprintf(&ttyinfo, "tty[%x:%x]",
> +                   (unsigned int)sb.st_rdev, (unsigned int)sb.st_dev) < 0)
> +        goto cleanup;
> +
> +    if (virFileWriteStr(tty_info_path, ttyinfo, 0666) < 0) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR,
> +                       _("Failed to write tty info to %s"), tty_info_path);
> +        goto cleanup;
> +    }
> +
> +    VIR_DEBUG("tty.info: tty[%x:%x]",
> +             (unsigned int)sb.st_dev, (unsigned int)sb.st_rdev);
> +    virCommandAddArg(cmd, "--external");
> +    virCommandAddArgFormat(cmd, "tty[%x:%x]",
> +                          (unsigned int)sb.st_rdev, (unsigned int)sb.st_dev);
> +
> +    VIR_DEBUG("About to checkpoint domain %s (pid = %d)",
> +              vm->def->name, priv->initpid);
> +    virCommandRawStatus(cmd);
> +    if (virCommandRun(cmd, &status) < 0)
> +        goto cleanup;
> +
> +    ret = 0;
> +
> + cleanup:
> +    VIR_FORCE_CLOSE(fd);
> +    VIR_FREE(path);
> +    VIR_FREE(tty_info_path);
> +    VIR_FREE(ttyinfo);
> +
> +    if (ret < 0)
> +        return ret;
> +    return status;
> +}
> +
> +int lxcCriuRestore(virDomainDefPtr def, int restorefd,
> +                   int ttyfd)
> +{
> +    int ret = -1;
> +    virCommandPtr cmd;
> +    char *ttyinfo = NULL;
> +    char *inheritfd = NULL;
> +    char *tty_info_path = NULL;
> +    char *checkpointfd = NULL;
> +    char *checkpointdir = NULL;
> +    char *rootfs_mount = NULL;
> +
> +    cmd = virCommandNew(CRIU);
> +    virCommandAddArg(cmd, "restore");
> +
> +    if (virAsprintf(&checkpointfd, "/proc/self/fd/%d", restorefd) < 0) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                       _("Failed to write checkpoint dir path"));
> +        goto cleanup;
> +    }
> +
> +    if (virFileResolveLink(checkpointfd, &checkpointdir) < 0) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                       _("Failed to readlink checkpoint dir path"));
> +        goto cleanup;
> +    }
> +
> +    /* CRIU needs the container's root bind mounted so that it is the root of
> +     * some mount.
> +     */
> +    if (virAsprintf(&rootfs_mount, "/tmp/%s", def->name) < 0) {

If this is a directory on the host filesysten, then this is a security
flaw. You must never create predictable filenames in /tmp. Ideally files
would be under one of the private directores libvirt already uses in
/var/run/libvirt or /var/lib/libvirt


Regards,
Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|




More information about the libvir-list mailing list