[libvirt] [RFC PATCH 3/4] lxc: Add restore mode for libvirt-lxc

Radostin Stoyanov rstoyanov1 at gmail.com
Wed Apr 11 11:29:14 UTC 2018


Extend `lxcContainerStart` with support for restore from fd
of directory that contains saved state of lxc container.

Signed-off-by: Radostin Stoyanov <rstoyanov1 at gmail.com>
---
 src/lxc/lxc_container.c  | 162 +++++++++++++++++++++++++++++++++++++++++++++--
 src/lxc/lxc_container.h  |   3 +-
 src/lxc/lxc_controller.c | 104 ++++++++++++++++++++++++++++--
 src/lxc/lxc_driver.c     |   4 +-
 src/lxc/lxc_process.c    |  23 +++++--
 src/lxc/lxc_process.h    |   1 +
 6 files changed, 280 insertions(+), 17 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 532fd0be0..6cd203d7f 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -69,6 +69,8 @@
 #include "virprocess.h"
 #include "virstring.h"
 
+#include "lxc_criu.h"
+
 #define VIR_FROM_THIS VIR_FROM_LXC
 
 VIR_LOG_INIT("lxc.lxc_container");
@@ -111,6 +113,7 @@ struct __lxc_child_argv {
     char **ttyPaths;
     int handshakefd;
     int *nsInheritFDs;
+    int restorefd;
 };
 
 static int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
@@ -263,6 +266,7 @@ static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef,
  * @ttyfd: FD of tty to set as the container console
  * @npassFDs: number of extra FDs
  * @passFDs: list of extra FDs
+ * @restorefd: FD of folder where container was dumped
  *
  * Setup file descriptors in the container. @ttyfd is set to be
  * the container's stdin, stdout & stderr. Any FDs included in
@@ -272,7 +276,7 @@ static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef,
  * Returns 0 on success or -1 in case of error
  */
 static int lxcContainerSetupFDs(int *ttyfd,
-                                size_t npassFDs, int *passFDs)
+                                size_t npassFDs, int *passFDs, int restorefd)
 {
     int rc = -1;
     int open_max;
@@ -368,6 +372,8 @@ static int lxcContainerSetupFDs(int *ttyfd,
     }
 
     for (fd = last_fd + 1; fd < open_max; fd++) {
+        if (fd == restorefd)
+            continue;
         int tmpfd = fd;
         VIR_MASS_CLOSE(tmpfd);
     }
@@ -1083,6 +1089,31 @@ static int lxcContainerMountFSDev(virDomainDefPtr def,
     return ret;
 }
 
+static int lxcContainerMountFSDevPTSRestore(virDomainDefPtr def,
+                                            const char *stateDir)
+{
+    int ret = -1;
+    char *path = NULL;
+    int flags = MS_MOVE;
+
+    VIR_DEBUG("Mount /dev/pts stateDir=%s", stateDir);
+
+    if (virAsprintf(&path, "%s/%s.devpts", stateDir, def->name) < 0)
+        return ret;
+
+    VIR_DEBUG("Trying to move %s to /dev/pts", path);
+
+    if (mount(path, "/dev/pts", NULL, flags, NULL) < 0) {
+        virReportSystemError(errno, _("Failed to mount %s on /dev/pts"), path);
+        goto cleanup;
+    }
+
+    ret = 0;
+ cleanup:
+    VIR_FREE(path);
+    return ret;
+}
+
 static int lxcContainerMountFSDevPTS(virDomainDefPtr def,
                                      const char *stateDir)
 {
@@ -2191,6 +2222,116 @@ static int lxcContainerSetHostname(virDomainDefPtr def)
     return ret;
 }
 
+/*
+ * lxcContainerChildRestore:
+ * @data: pointer to container arguments
+ */
+static int lxcContainerChildRestore(void *data)
+{
+    lxc_child_argv_t *argv = data;
+    virDomainDefPtr vmDef = argv->config;
+    int ttyfd = -1;
+    int ret = -1;
+    char *ttyPath = NULL;
+    virDomainFSDefPtr root;
+    char *sec_mount_options = NULL;
+    char *stateDir = NULL;
+
+    if (vmDef == NULL) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       "%s", _("lxcChild() passed invalid vm definition"));
+        goto cleanup;
+    }
+
+    if (lxcContainerWaitForContinue(argv->monitor) < 0) {
+        virReportSystemError(errno, "%s",
+                             _("Failed to read the container continue message"));
+        goto cleanup;
+    }
+    VIR_DEBUG("Received container continue message");
+
+    if (lxcContainerSetID(vmDef) < 0)
+        goto cleanup;
+
+    root = virDomainGetFilesystemForTarget(vmDef, "/");
+
+    if (argv->nttyPaths) {
+        const char *tty = argv->ttyPaths[0];
+        if (STRPREFIX(tty, "/dev/pts/"))
+            tty += strlen("/dev/pts/");
+        if (virAsprintf(&ttyPath, "%s/%s.devpts/%s",
+                        LXC_STATE_DIR, vmDef->name, tty) < 0)
+            goto cleanup;
+    } else {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("At least one tty is required"));
+        goto cleanup;
+    }
+
+    VIR_DEBUG("Container TTY path: %s", ttyPath);
+
+    ttyfd = open(ttyPath, O_RDWR);
+    if (ttyfd < 0) {
+        virReportSystemError(errno, _("Failed to open tty %s"), ttyPath);
+        goto cleanup;
+    }
+    VIR_DEBUG("Container TTY fd: %d", ttyfd);
+
+    if (!(sec_mount_options = virSecurityManagerGetMountOptions(
+                                        argv->securityDriver,
+                                        vmDef)))
+        goto cleanup;
+
+    if (lxcContainerPrepareRoot(vmDef, root, sec_mount_options) < 0)
+        goto cleanup;
+
+    if (lxcContainerSendContinue(argv->handshakefd) < 0) {
+        virReportSystemError(errno, "%s",
+                            _("Failed to send continue signal to controller"));
+        goto cleanup;
+    }
+
+    VIR_DEBUG("Setting up container's std streams");
+
+    if (lxcContainerSetupFDs(&ttyfd, argv->npassFDs,
+                             argv->passFDs, argv->restorefd) < 0)
+        goto cleanup;
+
+    if (virFileResolveAllLinks(LXC_STATE_DIR, &stateDir) < 0)
+        goto cleanup;
+
+    /* Mounts /dev/pts */
+    if (lxcContainerMountFSDevPTSRestore(vmDef, stateDir) < 0) {
+        virReportSystemError(errno, "%s", _("Failed to mount dev/pts"));
+        goto cleanup;
+    }
+
+    if (setsid() < 0)
+        virReportSystemError(errno, "%s", _("Unable to become session leader"));
+
+    VIR_DEBUG("Executing container restore criu function");
+    ret = lxcCriuRestore(vmDef, argv->restorefd, 0);
+
+ cleanup:
+    VIR_FORCE_CLOSE(argv->monitor);
+    VIR_FORCE_CLOSE(argv->handshakefd);
+    VIR_FORCE_CLOSE(ttyfd);
+    VIR_FREE(ttyPath);
+    VIR_FREE(stateDir);
+    VIR_FREE(sec_mount_options);
+
+    if (ret != 0) {
+        VIR_DEBUG("Tearing down container");
+        fprintf(stderr,
+                _("Failure in libvirt_lxc startup: %s\n"),
+                virGetLastErrorMessage());
+    }
+
+    return ret;
+}
+
+
+
 /**
  * lxcContainerChild:
  * @data: pointer to container arguments
@@ -2322,7 +2463,7 @@ static int lxcContainerChild(void *data)
     VIR_FORCE_CLOSE(argv->handshakefd);
     VIR_FORCE_CLOSE(argv->monitor);
     if (lxcContainerSetupFDs(&ttyfd,
-                             argv->npassFDs, argv->passFDs) < 0)
+                             argv->npassFDs, argv->passFDs, -1) < 0)
         goto cleanup;
 
     /* Make init process of the container the leader of the new session.
@@ -2403,6 +2544,7 @@ virArch lxcContainerGetAlt32bitArch(virArch arch)
  * @veths: interface names
  * @control: control FD to the container
  * @ttyPath: path of tty to set as the container console
+ * @restorefd: FD to folder where container was dumped
  *
  * Starts a container process by calling clone() with the namespace flags
  *
@@ -2418,7 +2560,8 @@ int lxcContainerStart(virDomainDefPtr def,
                       int handshakefd,
                       int *nsInheritFDs,
                       size_t nttyPaths,
-                      char **ttyPaths)
+                      char **ttyPaths,
+                      int restorefd)
 {
     pid_t pid;
     int cflags;
@@ -2436,6 +2579,7 @@ int lxcContainerStart(virDomainDefPtr def,
         .ttyPaths = ttyPaths,
         .handshakefd = handshakefd,
         .nsInheritFDs = nsInheritFDs,
+        .restorefd = restorefd,
     };
 
     /* allocate a stack for the container */
@@ -2484,10 +2628,16 @@ int lxcContainerStart(virDomainDefPtr def,
         VIR_DEBUG("Inheriting a UTS namespace");
     }
 
-    VIR_DEBUG("Cloning container init process");
-    pid = clone(lxcContainerChild, stacktop, cflags, &args);
+    if (restorefd != -1) {
+        VIR_DEBUG("Cloning container process that will spawn criu restore");
+        pid = clone(lxcContainerChildRestore, stacktop, SIGCHLD, &args);
+    } else {
+        VIR_DEBUG("Cloning container init process");
+        pid = clone(lxcContainerChild, stacktop, cflags, &args);
+        VIR_DEBUG("clone() completed, new container PID is %d", pid);
+    }
+
     VIR_FREE(stack);
-    VIR_DEBUG("clone() completed, new container PID is %d", pid);
 
     if (pid < 0) {
         virReportSystemError(errno, "%s",
diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h
index 641e2d460..9a6ac2073 100644
--- a/src/lxc/lxc_container.h
+++ b/src/lxc/lxc_container.h
@@ -58,7 +58,8 @@ int lxcContainerStart(virDomainDefPtr def,
                       int handshakefd,
                       int *nsInheritFDs,
                       size_t nttyPaths,
-                      char **ttyPaths);
+                      char **ttyPaths,
+                      int restorefd);
 
 int lxcContainerSetupHostdevCapsMakePath(const char *dev);
 
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 507bffda0..a5eb5e336 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -146,6 +146,8 @@ struct _virLXCController {
     virCgroupPtr cgroup;
 
     virLXCFusePtr fuse;
+
+    int restore;
 };
 
 #include "lxc_controller_dispatch.h"
@@ -1015,6 +1017,65 @@ static int lxcControllerClearCapabilities(void)
     return 0;
 }
 
+static int
+lxcControllerFindRestoredPid(int fd)
+{
+    int initpid = 0;
+    int ret = -1;
+    char *checkpointdir = NULL;
+    char *pidfile = NULL;
+    char *checkpointfd = NULL;
+    int pidfilefd;
+    char c;
+
+    if (fd < 0)
+        goto cleanup;
+
+    if (virAsprintf(&checkpointfd, "/proc/self/fd/%d", fd) < 0) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                           _("Failed to write checkpoint dir path"));
+            goto cleanup;
+    }
+
+    if (virFileResolveLink(checkpointfd, &checkpointdir) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Failed to readlink checkpoint dir path"));
+        goto cleanup;
+    }
+
+    if (virAsprintf(&pidfile, "%s/pidfile", checkpointdir) < 0) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                                           _("Failed to write pidfile path"));
+        goto cleanup;
+    }
+
+    if ((pidfilefd = virFileOpenAs(pidfile, O_RDONLY, 0, -1, -1, 0)) < 0) {
+        virReportSystemError(pidfilefd,
+                             _("Failed to open domain's pidfile '%s'"),
+                             pidfile);
+        goto cleanup;
+    }
+
+    while ((saferead(pidfilefd,  &c, 1) == 1) &&  c != EOF)
+        initpid = initpid*10 + c - '0';
+
+    ret = initpid;
+
+    if (virFileRemove(pidfile, -1, -1) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Failed to delete pidfile path"));
+    }
+
+ cleanup:
+    VIR_FORCE_CLOSE(fd);
+    VIR_FORCE_CLOSE(pidfilefd);
+    VIR_FREE(pidfile);
+    VIR_FREE(checkpointdir);
+    VIR_FREE(checkpointfd);
+    return ret;
+}
+
+
 static bool wantReboot;
 static virMutex lock = VIR_MUTEX_INITIALIZER;
 
@@ -2327,6 +2388,7 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
     int containerhandshake[2] = { -1, -1 };
     char **containerTTYPaths = NULL;
     size_t i;
+    bool restore_mode = (ctrl->restore != -1);
 
     if (VIR_ALLOC_N(containerTTYPaths, ctrl->nconsoles) < 0)
         goto cleanup;
@@ -2383,7 +2445,8 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
                                            containerhandshake[1],
                                            ctrl->nsFDs,
                                            ctrl->nconsoles,
-                                           containerTTYPaths)) < 0)
+                                           containerTTYPaths,
+                                           ctrl->restore)) < 0)
         goto cleanup;
     VIR_FORCE_CLOSE(control[1]);
     VIR_FORCE_CLOSE(containerhandshake[1]);
@@ -2395,10 +2458,10 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
         for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++)
             VIR_FORCE_CLOSE(ctrl->nsFDs[i]);
 
-    if (virLXCControllerSetupCgroupLimits(ctrl) < 0)
+    if (!restore_mode && virLXCControllerSetupCgroupLimits(ctrl) < 0)
         goto cleanup;
 
-    if (virLXCControllerSetupUserns(ctrl) < 0)
+    if (!restore_mode && virLXCControllerSetupUserns(ctrl) < 0)
         goto cleanup;
 
     if (virLXCControllerMoveInterfaces(ctrl) < 0)
@@ -2423,6 +2486,26 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
     if (lxcControllerClearCapabilities() < 0)
         goto cleanup;
 
+    if (restore_mode) {
+        int status;
+        int ret = waitpid(-1, &status, 0);
+        VIR_DEBUG("Got sig child %d", ret);
+
+        /* There could be two cases here:
+         * 1. CRIU died bacause of restore error and the container is not running
+         * 2. CRIU detached itself from the running container
+         */
+        int initpid;
+        if ((initpid = lxcControllerFindRestoredPid(ctrl->restore)) < 0) {
+            virReportSystemError(errno, "%s",
+                                 _("Unable to get restored task pid"));
+            virNetDaemonQuit(ctrl->daemon);
+            goto cleanup;
+        }
+
+        ctrl->initpid = initpid;
+    }
+
     for (i = 0; i < ctrl->nconsoles; i++)
         if (virLXCControllerConsoleSetNonblocking(&(ctrl->consoles[i])) < 0)
             goto cleanup;
@@ -2466,6 +2549,7 @@ int main(int argc, char *argv[])
     int ns_fd[VIR_LXC_DOMAIN_NAMESPACE_LAST];
     int handshakeFd = -1;
     bool bg = false;
+    int restore = -1;
     const struct option options[] = {
         { "background", 0, NULL, 'b' },
         { "name",   1, NULL, 'n' },
@@ -2477,6 +2561,7 @@ int main(int argc, char *argv[])
         { "share-net", 1, NULL, 'N' },
         { "share-ipc", 1, NULL, 'I' },
         { "share-uts", 1, NULL, 'U' },
+        { "restore", 1, NULL, 'r' },
         { "help", 0, NULL, 'h' },
         { 0, 0, 0, 0 },
     };
@@ -2504,7 +2589,7 @@ int main(int argc, char *argv[])
     while (1) {
         int c;
 
-        c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:",
+        c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:r:",
                         options, NULL);
 
         if (c == -1)
@@ -2580,6 +2665,14 @@ int main(int argc, char *argv[])
             securityDriver = optarg;
             break;
 
+        case 'r':
+             if (virStrToLong_i(optarg, NULL, 10, &restore) < 0) {
+                fprintf(stderr, "malformed --restore argument '%s'",
+                        optarg);
+                goto cleanup;
+            }
+            break;
+
         case 'h':
         case '?':
             fprintf(stderr, "\n");
@@ -2596,6 +2689,7 @@ int main(int argc, char *argv[])
             fprintf(stderr, "  -N FD, --share-net FD\n");
             fprintf(stderr, "  -I FD, --share-ipc FD\n");
             fprintf(stderr, "  -U FD, --share-uts FD\n");
+            fprintf(stderr, "  -r FD, --restore FD\n");
             fprintf(stderr, "  -h, --help\n");
             fprintf(stderr, "\n");
             rc = 0;
@@ -2648,6 +2742,8 @@ int main(int argc, char *argv[])
     ctrl->passFDs = passFDs;
     ctrl->npassFDs = npassFDs;
 
+    ctrl->restore = restore;
+
     for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) {
         if (ns_fd[i] != -1) {
             if (!ctrl->nsFDs) {/*allocate only once */
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index 4f600f3df..f52085ebf 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -1125,7 +1125,7 @@ static int lxcDomainCreateWithFiles(virDomainPtr dom,
 
     ret = virLXCProcessStart(dom->conn, driver, vm,
                              nfiles, files,
-                             (flags & VIR_DOMAIN_START_AUTODESTROY),
+                             (flags & VIR_DOMAIN_START_AUTODESTROY), -1,
                              VIR_DOMAIN_RUNNING_BOOTED);
 
     if (ret == 0) {
@@ -1252,7 +1252,7 @@ lxcDomainCreateXMLWithFiles(virConnectPtr conn,
 
     if (virLXCProcessStart(conn, driver, vm,
                            nfiles, files,
-                           (flags & VIR_DOMAIN_START_AUTODESTROY),
+                           (flags & VIR_DOMAIN_START_AUTODESTROY), -1,
                            VIR_DOMAIN_RUNNING_BOOTED) < 0) {
         virDomainAuditStart(vm, "booted", false);
         virLXCDomainObjEndJob(driver, vm);
diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
index 96041f2ec..1cd7f5bfe 100644
--- a/src/lxc/lxc_process.c
+++ b/src/lxc/lxc_process.c
@@ -118,7 +118,7 @@ virLXCProcessReboot(virLXCDriverPtr driver,
     virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
     vm->newDef = savedDef;
     if (virLXCProcessStart(conn, driver, vm,
-                           0, NULL, autodestroy, reason) < 0) {
+                           0, NULL, autodestroy, -1, reason) < 0) {
         VIR_WARN("Unable to handle reboot of vm %s",
                  vm->def->name);
         goto cleanup;
@@ -914,7 +914,8 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver,
                                 size_t nfiles,
                                 int handshakefd,
                                 int * const logfd,
-                                const char *pidfile)
+                                const char *pidfile,
+                                int restorefd)
 {
     size_t i;
     char *filterstr;
@@ -993,6 +994,12 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver,
     for (i = 0; i < nveths; i++)
         virCommandAddArgList(cmd, "--veth", veths[i], NULL);
 
+    if (restorefd != -1) {
+        virCommandAddArg(cmd, "--restore");
+        virCommandAddArgFormat(cmd, "%d", restorefd);
+        virCommandPassFD(cmd, restorefd, 0);
+    }
+
     virCommandPassFD(cmd, handshakefd, 0);
     virCommandDaemonize(cmd);
     virCommandSetPidFile(cmd, pidfile);
@@ -1166,6 +1173,8 @@ virLXCProcessEnsureRootFS(virDomainObjPtr vm)
  * @driver: pointer to driver structure
  * @vm: pointer to virtual machine structure
  * @autoDestroy: mark the domain for auto destruction
+ * @restorefd: file descriptor pointing to the restore directory (-1 if not
+ *             restoring)
  * @reason: reason for switching vm to running state
  *
  * Starts a vm
@@ -1177,6 +1186,7 @@ int virLXCProcessStart(virConnectPtr conn,
                        virDomainObjPtr vm,
                        unsigned int nfiles, int *files,
                        bool autoDestroy,
+                       int restorefd,
                        virDomainRunningReason reason)
 {
     int rc = -1, r;
@@ -1386,7 +1396,7 @@ int virLXCProcessStart(virConnectPtr conn,
                                                 files, nfiles,
                                                 handshakefds[1],
                                                 &logfd,
-                                                pidfile)))
+                                                pidfile, restorefd)))
         goto cleanup;
 
     /* now that we know it is about to start call the hook if present */
@@ -1494,6 +1504,9 @@ int virLXCProcessStart(virConnectPtr conn,
     if (!priv->machineName)
         goto cleanup;
 
+    if (restorefd != -1)
+        goto skip_cgroup_checks;
+
     /* We know the cgroup must exist by this synchronization
      * point so lets detect that first, since it gives us a
      * more reliable way to kill everything off if something
@@ -1510,6 +1523,8 @@ int virLXCProcessStart(virConnectPtr conn,
         goto cleanup;
     }
 
+ skip_cgroup_checks:
+
     /* And we can get the first monitor connection now too */
     if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) {
         /* Intentionally overwrite the real monitor error message,
@@ -1596,7 +1611,7 @@ virLXCProcessAutostartDomain(virDomainObjPtr vm,
     if (vm->autostart &&
         !virDomainObjIsActive(vm)) {
         ret = virLXCProcessStart(data->conn, data->driver, vm,
-                                 0, NULL, false,
+                                 0, NULL, false, -1,
                                  VIR_DOMAIN_RUNNING_BOOTED);
         virDomainAuditStart(vm, "booted", ret >= 0);
         if (ret < 0) {
diff --git a/src/lxc/lxc_process.h b/src/lxc/lxc_process.h
index d78cddef4..c724f31a7 100644
--- a/src/lxc/lxc_process.h
+++ b/src/lxc/lxc_process.h
@@ -29,6 +29,7 @@ int virLXCProcessStart(virConnectPtr conn,
                        virDomainObjPtr vm,
                        unsigned int nfiles, int *files,
                        bool autoDestroy,
+                       int restorefd,
                        virDomainRunningReason reason);
 int virLXCProcessStop(virLXCDriverPtr driver,
                       virDomainObjPtr vm,
-- 
2.14.3




More information about the libvir-list mailing list