[libvirt] [RFC PATCH 01/17] qemu: setup shared memory without explicit numa configuration

John Ferlan jferlan at redhat.com
Thu Aug 16 02:35:54 UTC 2018



On 07/13/2018 09:28 AM, marcandre.lureau at redhat.com wrote:
> From: Marc-André Lureau <marcandre.lureau at redhat.com>
> 
> When a domain is configured with 'shared' memory backing:
> 
>   <memoryBacking>
>     <access mode='shared'/>
>   </memoryBacking>
> 
> But no explicit NUMA configuration, let's configure a shared memory
> backend associated with default -numa.
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau at redhat.com>
> ---
>  src/qemu/qemu_command.c                       | 100 ++++++++++++------
>  .../fd-memory-no-numa-topology.args           |   4 +
>  2 files changed, 73 insertions(+), 31 deletions(-)
> 

NUMA, memory backends, and hugepages - not in my wheelhouse of
knowledge. Hopefully Michal and/or Pavel will take a look!

Is it possible someone may not want this type of thing to happen? Is
there an upside or downside to this?  What happens "today" when not
generated? And of course, what about migration concerns about
unconditionally doing this for some target migration?

> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 44ae8dcef7..f1235099b2 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -3254,26 +3254,21 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
>  
>  
>  static int
> -qemuBuildMemoryCellBackendStr(virDomainDefPtr def,
> -                              virQEMUDriverConfigPtr cfg,
> -                              size_t cell,
> -                              qemuDomainObjPrivatePtr priv,
> -                              virBufferPtr buf)
> +qemuBuildMemoryBackendStr(virDomainDefPtr def,
> +                          virQEMUDriverConfigPtr cfg,
> +                          const char *alias,

So the one "concern" I'd have here is some time in the future the @mem
gets allocated and handled like a real device eventually calling
virDomainDeviceInfoClear and that'd be a problem for the passed const
char * string.  Some future person's problem I guess!

> +                          int targetNode,
> +                          unsigned long long memsize,
> +                          qemuDomainObjPrivatePtr priv,
> +                          virBufferPtr buf)

As much as a long name is a pain, is this more of a :

    qemuBuildMemorySharedDefaultBackendStr

>  {
>      virJSONValuePtr props = NULL;
> -    char *alias = NULL;
> -    int ret = -1;
> -    int rc;
>      virDomainMemoryDef mem = { 0 };
> -    unsigned long long memsize = virDomainNumaGetNodeMemorySize(def->numa,
> -                                                                cell);
> -
> -    if (virAsprintf(&alias, "ram-node%zu", cell) < 0)
> -        goto cleanup;
> +    int rc, ret = -1;
>  
>      mem.size = memsize;
> -    mem.targetNode = cell;
> -    mem.info.alias = alias;
> +    mem.targetNode = targetNode;
> +    mem.info.alias = (char *)alias;
>  
>      if ((rc = qemuBuildMemoryBackendProps(&props, alias, cfg, priv->qemuCaps,
>                                            def, &mem, priv->autoNodeset, false)) < 0)> @@ -3284,9 +3279,30 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr def,
>  
>      ret = rc;
>  
> +cleanup:

Fails 'make check syntax-check' :

maint.mk: Top-level labels should be indented by one space
make: *** [cfg.mk:898: sc_require_space_before_label] Error 1

> +    virJSONValueFree(props);
> +    return ret;
> +}
> +
> +
> +static int
> +qemuBuildMemoryCellBackendStr(virDomainDefPtr def,
> +                              virQEMUDriverConfigPtr cfg,
> +                              size_t cell,
> +                              qemuDomainObjPrivatePtr priv,
> +                              virBufferPtr buf)
> +{
> +    char *alias = NULL;
> +    int ret = -1;
> +    unsigned long long memsize = virDomainNumaGetNodeMemorySize(def->numa, cell);
> +
> +    if (virAsprintf(&alias, "ram-node%zu", cell) < 0)
> +        goto cleanup;
> +
> +    ret = qemuBuildMemoryBackendStr(def, cfg, alias, cell, memsize, priv, buf);
> +
>   cleanup:
>      VIR_FREE(alias);
> -    virJSONValueFree(props);
>  
>      return ret;
>  }
> @@ -7590,6 +7606,17 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
>      size_t ncells = virDomainNumaGetNodeCount(def->numa);
>      const long system_page_size = virGetSystemPageSizeKB();
>      bool numa_distances = false;
> +    bool implicit = false;
> +
> +    if (ncells == 0) {
> +        if (def->mem.access == VIR_DOMAIN_MEMORY_ACCESS_SHARED) {
> +            ncells = 1;

Well, that's cheating for the subsequent for loop ;-)

> +            implicit = true;
> +        } else {
> +            ret = 0;
> +            goto cleanup;
> +        }
> +    }

So if ncells == 0 && def->mem.access != VIR_DOMAIN_MEMORY_ACCESS_SHARED,
then we return 0 without doing the subsequent code? Is that expected? Is
there something done later that may be necessary, needed, or assumed.

>  
>      if (virDomainNumatuneHasPerNodeBinding(def->numa) &&
>          !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) ||
> @@ -7645,14 +7672,22 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
>          if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) ||
>              virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) {
>  
> -            if ((rc = qemuBuildMemoryCellBackendStr(def, cfg, i, priv,
> -                                                    &nodeBackends[i])) < 0)
> +
> +            if (implicit)
> +                rc = qemuBuildMemoryBackendStr(def, cfg, "ram-node", -1,

Using "ram-node" where other devices use "ram-node%zu" could easily be
missed - maybe "default" or "implicit" as a prefix or postfix to make it
stand out a bit more? It's not a big deal though.

> +                                               def->mem.total_memory,
> +                                               priv, &nodeBackends[i]);
> +            else
> +                rc = qemuBuildMemoryCellBackendStr(def, cfg, i,
> +                                                   priv, &nodeBackends[i]);
> +            if (rc < 0)
>                  goto cleanup;
>  
>              if (rc == 0)
>                  needBackend = true;
>          } else {
> -            if (virDomainNumaGetNodeMemoryAccessMode(def->numa, i)) {
> +            if (implicit ||
> +                virDomainNumaGetNodeMemoryAccessMode(def->numa, i)) {
>                  virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>                                 _("Shared memory mapping is not supported "
>                                   "with this QEMU"));
> @@ -7667,15 +7702,18 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
>  
>      for (i = 0; i < ncells; i++) {
>          VIR_FREE(cpumask);
> -        if (!(cpumask = virBitmapFormat(virDomainNumaGetNodeCpumask(def->numa, i))))
> -            goto cleanup;
>  
> -        if (strchr(cpumask, ',') &&
> -            !virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA)) {
> -            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> -                           _("disjoint NUMA cpu ranges are not supported "
> -                             "with this QEMU"));
> -            goto cleanup;
> +        if (!implicit) {
> +            if (!(cpumask = virBitmapFormat(virDomainNumaGetNodeCpumask(def->numa, i))))
> +                goto cleanup;
> +
> +            if (strchr(cpumask, ',') &&
> +                !virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA)) {
> +                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                               _("disjoint NUMA cpu ranges are not supported "
> +                                 "with this QEMU"));
> +                goto cleanup;
> +            }
>          }
>  
>          if (needBackend) {
> @@ -7694,7 +7732,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
>          }
>  
>          if (needBackend)
> -            virBufferAsprintf(&buf, ",memdev=ram-node%zu", i);
> +            virBufferAsprintf(&buf, implicit ?
> +                              ",memdev=ram-node" : ",memdev=ram-node%zu", i);
>          else
>              virBufferAsprintf(&buf, ",mem=%llu",
>                                virDomainNumaGetNodeMemorySize(def->numa, i) / 1024);
> @@ -7717,7 +7756,7 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
>              break;
>      }
>  
> -    if (numa_distances) {
> +    if (!implicit && numa_distances) {
>          if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA_DIST)) {
>              virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>                             _("setting NUMA distances is not "
> @@ -10303,8 +10342,7 @@ qemuBuildCommandLine(virQEMUDriverPtr driver,
>      if (qemuBuildIOThreadCommandLine(cmd, def) < 0)
>          goto error;
>  
> -    if (virDomainNumaGetNodeCount(def->numa) &&
> -        qemuBuildNumaArgStr(cfg, def, cmd, priv) < 0)
> +    if (qemuBuildNumaArgStr(cfg, def, cmd, priv) < 0)
>          goto error;
>  
>      if (qemuBuildMemoryDeviceCommandLine(cmd, cfg, def, priv) < 0)
> diff --git a/tests/qemuxml2argvdata/fd-memory-no-numa-topology.args b/tests/qemuxml2argvdata/fd-memory-no-numa-topology.args
> index bd88daaa3b..400fb39cc6 100644
> --- a/tests/qemuxml2argvdata/fd-memory-no-numa-topology.args
> +++ b/tests/qemuxml2argvdata/fd-memory-no-numa-topology.args
> @@ -11,6 +11,10 @@ QEMU_AUDIO_DRV=none \
>  -m 14336 \
>  -mem-prealloc \
>  -smp 8,sockets=8,cores=1,threads=1 \
> +-object memory-backend-file,id=ram-node,\
> +mem-path=/var/lib/libvirt/qemu/ram/libvirt/qemu/-1-instance-00000092/ram-node,\
> +share=yes,size=15032385536 \

Curious does that perhaps rather large file for mem-path get created? If
so, wow!  Seems to me something like this would need to be documented or
as noted earlier be configurable.

John

> +-numa node,nodeid=0,memdev=ram-node \
>  -uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
>  -display none \
>  -no-user-config \
> 




More information about the libvir-list mailing list