[libvirt] [PATCH RFC 3/5] lxc: adds checkpoint and restore helper functions
Daniel P. Berrange
berrange at redhat.com
Fri Jul 22 08:51:18 UTC 2016
On Thu, Jul 21, 2016 at 03:37:25PM +0000, Katerina Koukiou wrote:
> This patch adds some helper functions for checkpointing/restoring
> linux containers. We use CRIU binary.
>
> Signed-off-by: Katerina Koukiou <k.koukiou at gmail.com>
> ---
> po/POTFILES.in | 1 +
> src/Makefile.am | 3 +-
> src/lxc/lxc_criu.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> src/lxc/lxc_criu.h | 34 +++++++
> 4 files changed, 310 insertions(+), 1 deletion(-)
> create mode 100644 src/lxc/lxc_criu.c
> create mode 100644 src/lxc/lxc_criu.h
>
> diff --git a/po/POTFILES.in b/po/POTFILES.in
> index a6b6c9c..718b11d 100644
> --- a/po/POTFILES.in
> +++ b/po/POTFILES.in
> @@ -95,6 +95,7 @@ src/lxc/lxc_cgroup.c
> src/lxc/lxc_conf.c
> src/lxc/lxc_container.c
> src/lxc/lxc_controller.c
> +src/lxc/lxc_criu.c
> src/lxc/lxc_domain.c
> src/lxc/lxc_driver.c
> src/lxc/lxc_fuse.c
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 78c493c..64a7680 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -750,7 +750,8 @@ LXC_DRIVER_SOURCES = \
> lxc/lxc_process.c lxc/lxc_process.h \
> lxc/lxc_fuse.c lxc/lxc_fuse.h \
> lxc/lxc_native.c lxc/lxc_native.h \
> - lxc/lxc_driver.c lxc/lxc_driver.h
> + lxc/lxc_driver.c lxc/lxc_driver.h \
> + lxc/lxc_criu.c lxc/lxc_criu.h
>
> LXC_CONTROLLER_SOURCES = \
> $(LXC_MONITOR_PROTOCOL_GENERATED) \
> diff --git a/src/lxc/lxc_criu.c b/src/lxc/lxc_criu.c
> new file mode 100644
> index 0000000..6944223
> --- /dev/null
> +++ b/src/lxc/lxc_criu.c
> @@ -0,0 +1,273 @@
> +/*
> + * lxc_criu.c: wrapper functions for CRIU C API to be used for lxc migration
> + *
> + * Copyright (C) 2016 Katerina Koukiou
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> + * <http://www.gnu.org/licenses/>.
> + *
> + * Author: Katerina Koukiou <k.koukiou at gmail.com>
> + */
> +
> +#include <config.h>
> +
> +#include <fcntl.h>
> +#include <sys/stat.h>
> +#include <sys/mount.h>
> +
> +#include "virobject.h"
> +#include "virerror.h"
> +#include "virlog.h"
> +#include "virfile.h"
> +#include "vircommand.h"
> +#include "virstring.h"
> +#include "viralloc.h"
> +
> +#include "lxc_domain.h"
> +#include "lxc_driver.h"
> +#include "lxc_criu.h"
> +
> +#define VIR_FROM_THIS VIR_FROM_LXC
> +
> +VIR_LOG_INIT("lxc.lxc_criu");
> +
> +#ifdef CRIU
> +int lxcCriuDump(virLXCDriverPtr driver ATTRIBUTE_UNUSED,
> + virDomainObjPtr vm,
> + const char *checkpointdir)
For dumping a container we should be creating a single file
containing all the data, not creating multiple files spread
across a directory. Take a look at what we do with the QEMU
driver where we have a magic header, then the XML description
and the the actual dumped state from QEMU. We should do the
same kind of thing with LXC.
> +{
> + int fd;
> + int ret = -1;
> + virLXCDomainObjPrivatePtr priv;
> + virCommandPtr cmd;
> + struct stat sb;
> + char *path = NULL;
> + char *tty_info_path = NULL;
> + char *ttyinfo = NULL;
> + int status;
> +
> + if (virFileMakePath(checkpointdir) < 0) {
> + virReportSystemError(errno,
> + _("Failed to mkdir %s"), checkpointdir);
> + return -1;
> + }
> +
> + fd = open(checkpointdir, O_DIRECTORY);
> + if (fd < 0) {
> + virReportSystemError(errno,
> + _("Failed to open directory %s"), checkpointdir);
> + return -1;
> + }
> +
> + cmd = virCommandNew(CRIU);
> + virCommandAddArg(cmd, "dump");
> +
> + virCommandAddArgList(cmd, "--images-dir", checkpointdir, NULL);
> +
> + virCommandAddArgList(cmd, "--log-file", "dump.log", NULL);
> +
> + virCommandAddArgList(cmd, "-vvvv", NULL);
> +
> + priv = vm->privateData;
> + virCommandAddArg(cmd, "--tree");
> + virCommandAddArgFormat(cmd, "%d", priv->initpid);
> +
> + virCommandAddArgList(cmd, "--tcp-established", "--file-locks",
> + "--link-remap", "--force-irmap", NULL);
> +
> + virCommandAddArgList(cmd, "--manage-cgroup", NULL);
> +
> + virCommandAddArgList(cmd, "--enable-external-sharing",
> + "--enable-external-masters", NULL);
> +
> + virCommandAddArgList(cmd, "--enable-fs", "hugetlbfs",
> + "--enable-fs", "tracefs", NULL);
> +
> + /* Add support for FUSE */
> + virCommandAddArgList(cmd, "--ext-mount-map", "/proc/meminfo:fuse", NULL);
> + virCommandAddArgList(cmd, "--ghost-limit", "10000000", NULL);
> +
> + virCommandAddArgList(cmd, "--ext-mount-map", "/dev/console:console", NULL);
> + virCommandAddArgList(cmd, "--ext-mount-map", "/dev/tty1:tty1", NULL);
> + virCommandAddArgList(cmd, "--ext-mount-map", "auto", NULL);
> +
> + /* The master pair of the /dev/pts device lives outside from what is dumped
> + * inside the libvirt-lxc process. Add the slave pair as an external tty
> + * otherwise criu will fail.
> + */
> + if (virAsprintf(&path, "/proc/%d/root/dev/pts/0", priv->initpid) < 0)
> + goto cleanup;
> +
> + if (stat(path, &sb) < 0) {
> + virReportSystemError(errno,
> + _("Unable to stat %s"), path);
> + goto cleanup;
> + }
> +
> + if (virAsprintf(&tty_info_path, "%s/tty.info", checkpointdir) < 0)
> + goto cleanup;
> +
> + if (virAsprintf(&ttyinfo, "tty[%x:%x]",
> + (unsigned int)sb.st_rdev, (unsigned int)sb.st_dev) < 0)
> + goto cleanup;
> +
> + if (virFileWriteStr(tty_info_path, ttyinfo, 0666) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Failed to write tty info to %s"), tty_info_path);
> + goto cleanup;
> + }
> +
> + VIR_DEBUG("tty.info: tty[%x:%x]",
> + (unsigned int)sb.st_dev, (unsigned int)sb.st_rdev);
> + virCommandAddArg(cmd, "--external");
> + virCommandAddArgFormat(cmd, "tty[%x:%x]",
> + (unsigned int)sb.st_rdev, (unsigned int)sb.st_dev);
> +
> + VIR_DEBUG("About to checkpoint domain %s (pid = %d)",
> + vm->def->name, priv->initpid);
> + virCommandRawStatus(cmd);
> + if (virCommandRun(cmd, &status) < 0)
> + goto cleanup;
> +
> + ret = 0;
> +
> + cleanup:
> + VIR_FORCE_CLOSE(fd);
> + VIR_FREE(path);
> + VIR_FREE(tty_info_path);
> + VIR_FREE(ttyinfo);
> +
> + if (ret < 0)
> + return ret;
> + return status;
> +}
> +
> +int lxcCriuRestore(virDomainDefPtr def, int restorefd,
> + int ttyfd)
> +{
> + int ret = -1;
> + virCommandPtr cmd;
> + char *ttyinfo = NULL;
> + char *inheritfd = NULL;
> + char *tty_info_path = NULL;
> + char *checkpointfd = NULL;
> + char *checkpointdir = NULL;
> + char *rootfs_mount = NULL;
> +
> + cmd = virCommandNew(CRIU);
> + virCommandAddArg(cmd, "restore");
> +
> + if (virAsprintf(&checkpointfd, "/proc/self/fd/%d", restorefd) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Failed to write checkpoint dir path"));
> + goto cleanup;
> + }
> +
> + if (virFileResolveLink(checkpointfd, &checkpointdir) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Failed to readlink checkpoint dir path"));
> + goto cleanup;
> + }
> +
> + /* CRIU needs the container's root bind mounted so that it is the root of
> + * some mount.
> + */
> + if (virAsprintf(&rootfs_mount, "/tmp/%s", def->name) < 0) {
If this is a directory on the host filesysten, then this is a security
flaw. You must never create predictable filenames in /tmp. Ideally files
would be under one of the private directores libvirt already uses in
/var/run/libvirt or /var/lib/libvirt
Regards,
Daniel
--
|: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org -o- http://virt-manager.org :|
|: http://autobuild.org -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
More information about the libvir-list
mailing list