[libvirt] [PATCH v2 2/3] qemu_domain: NVLink2 device tree functions for PPC64
Daniel Henrique Barboza
danielhb413 at gmail.com
Mon Mar 4 18:25:55 UTC 2019
Just noticed that the functions signatures of the new functions created
by this
patch are mostly in one line, like:
static int read_dt_phandle(...)
While Libvirt code standard is the return value in the first line, then
the function
name after a line break:
void
qemuDomainUpdateCurrentMemorySize(...)
I'll fix this naming stardard in the next spin.
On 3/3/19 10:23 AM, Daniel Henrique Barboza wrote:
> The NVLink2 support in QEMU implements the detection of NVLink2
> capable devices by verfying the attributes of the VFIO mem region
> QEMU allocates for the NVIDIA GPUs. To properly allocate an
> adequate amount of memLock, Libvirt needs this information before
> a QEMU instance is even created.
>
> An alternative is presented in this patch. Given a PCI device,
> we'll traverse the device tree at /proc/device-tree to check if
> the device has a NPU bridge, retrieve the node of the NVLink2 bus,
> find the memory-node that is related to the bus and see if it's a
> NVLink2 bus by inspecting its 'reg' value. This logic is contained
> inside the 'device_is_nvlink2_capable' function, which uses other
> new helper functions to navigate and fetch values from the device
> tree nodes.
>
> Signed-off-by: Daniel Henrique Barboza <danielhb413 at gmail.com>Just
> ---
> src/qemu/qemu_domain.c | 188 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 188 insertions(+)
>
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 55578f3d19..76e1e4b161 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -10331,6 +10331,194 @@ qemuDomainUpdateCurrentMemorySize(virDomainObjPtr vm)
> }
>
>
> +/**
> + * Reads a phandle file and returns the phandle value.
> + */
> +static int read_dt_phandle(const char* file)
> +{
> + unsigned int buf[1];
> + size_t read;
> + FILE *f;
> +
> + f = fopen(file, "r");
> + if (!f)
> + return -1;
> +
> + read = fread(buf, sizeof(unsigned int), 1, f);
> +
> + if (!read) {
> + fclose(f);
> + return 0;
> + }
> +
> + fclose(f);
> + return be32toh(buf[0]);
> +}
> +
> +
> +/**
> + * Reads a memory reg file and returns the first 4 int values.
> + *
> + * The caller is responsible for freeing the returned array.
> + */
> +static unsigned int *read_dt_memory_reg(const char *file)
> +{
> + unsigned int *buf;
> + size_t read, i;
> + FILE *f;
> +
> + f = fopen(file, "r");
> + if (!f)
> + return NULL;
> +
> + buf = calloc(4, sizeof(unsigned int));
> + read = fread(buf, sizeof(unsigned int), 4, f);
> +
> + if (!read && read < 4)
> + /* shouldn't happen */
> + VIR_FREE(buf);
> + else for (i = 0; i < 4; i++)
> + buf[i] = be32toh(buf[i]);
> +
> + fclose(f);
> + return buf;
> +}
> +
> +
> +/**
> + * This wrapper function receives arguments to be used in a
> + * 'find' call to retrieve the file names that matches
> + * the criteria inside the /proc/device-tree dir.
> + *
> + * A 'find' call with '-iname phandle' inside /proc/device-tree
> + * provides more than a thousand matches. Adding '-path' to
> + * narrow it down further is necessary to keep the file
> + * listing sane.
> + *
> + * The caller is responsible to free the buffer returned by
> + * this function.
> + */
> +static char *retrieve_dt_files_pattern(const char *path_pattern,
> + const char *file_pattern)
> +{
> + virCommandPtr cmd = NULL;
> + char *output = NULL;
> +
> + cmd = virCommandNew("find");
> + virCommandAddArgList(cmd, "/proc/device-tree/","-path", path_pattern,
> + "-iname", file_pattern, NULL);
> + virCommandSetOutputBuffer(cmd, &output);
> +
> + if (virCommandRun(cmd, NULL) < 0)
> + VIR_FREE(output);
> +
> + virCommandFree(cmd);
> + return output;
> +}
> +
> +
> +/**
> + * Helper function that receives a listing of file names and
> + * calls read_dt_phandle() on each one finding for a match
> + * with the given phandle argument. Returns the file name if a
> + * match is found, NULL otherwise.
> + */
> +static char *find_dt_file_with_phandle(char *files, int phandle)
> +{
> + char *line, *tmp;
> + int ret;
> +
> + line = strtok_r(files, "\n", &tmp);
> + do {
> + ret = read_dt_phandle(line);
> + if (ret == phandle)
> + break;
> + } while ((line = strtok_r(NULL, "\n", &tmp)) != NULL);
> +
> + return line;
> +}
> +
> +
> +/**
> + * This function receives a string that represents a PCI device,
> + * such as '0004:04:00.0', and tells if the device is NVLink2 capable.
> + *
> + * The logic goes as follows:
> + *
> + * 1 - get the phandle of a nvlink of the device, reading the 'ibm,npu'
> + * attribute;
> + * 2 - find the device tree node of the nvlink bus using the phandle
> + * found in (1)
> + * 3 - get the phandle of the memory region of the nvlink bus
> + * 4 - find the device tree node of the memory region using the
> + * phandle found in (3)
> + * 5 - read the 'reg' value of the memory region. If the value of
> + * the second 64 bit value is 0x02 0x00, the device is attached
> + * to a NVLink2 bus.
> + *
> + * If any of these steps fails, the function returns false.
> + */
> +static bool device_is_nvlink2_capable(const char *device)
> +{
> + char *file, *files, *tmp;
> + unsigned int *reg;
> + int phandle;
> +
> + if ((virAsprintf(&file, "/sys/bus/pci/devices/%s/of_node/ibm,npu",
> + device)) < 0)
> + return false;
> +
> + /* Find phandles of nvlinks: */
> + if ((phandle = read_dt_phandle(file)) == -1)
> + return false;
> +
> + /* Find a DT node for the phandle found */
> + files = retrieve_dt_files_pattern("*device-tree/pci*", "phandle");
> + if (!files)
> + return false;
> +
> + if ((file = find_dt_file_with_phandle(files, phandle)) == NULL)
> + goto fail;
> +
> + /* Find a phandle of the GPU memory region of the device. The
> + * file found above ends with '/phandle' - the memory region
> + * of the GPU ends with '/memory-region */
> + tmp = strrchr(file, '/');
> + *tmp = '\0';
> + file = strcat(file, "/memory-region");
> +
> + if ((phandle = read_dt_phandle(file)) == -1)
> + goto fail;
> +
> + file = NULL;
> + VIR_FREE(files);
> +
> + /* Find the memory node for the phandle found above */
> + files = retrieve_dt_files_pattern("*device-tree/memory*", "phandle");
> + if (!files)
> + return false;
> +
> + if ((file = find_dt_file_with_phandle(files, phandle)) == NULL)
> + goto fail;
> +
> + /* And see its size in the second 64bit value of 'reg'. First,
> + * the end of the file needs to be changed from '/phandle' to
> + * '/reg' */
> + tmp = strrchr(file, '/');
> + *tmp = '\0';
> + file = strcat(file, "/reg");
> +
> + reg = read_dt_memory_reg(file);
> + if (reg && reg[2] == 0x20 && reg[3] == 0x00)
> + return true;
> +
> +fail:
> + VIR_FREE(files);
> + VIR_FREE(reg);
> + return false;
> +}
> +
> +
> /**
> * qemuDomainGetMemLockLimitBytes:
> * @def: domain definition
More information about the libvir-list
mailing list