[libvirt] [PATCH v2 1/1] gluster: cache glfs connection object per volume
Prasanna Kalever
pkalever at redhat.com
Mon Dec 5 11:19:56 UTC 2016
On Wed, Nov 30, 2016 at 5:41 PM, Peter Krempa <pkrempa at redhat.com> wrote:
> On Wed, Nov 30, 2016 at 16:06:37 +0530, prasanna.kalever at redhat.com wrote:
>> From: Prasanna Kumar Kalever <prasanna.kalever at redhat.com>
>>
>> This patch optimizes calls to glfs_init() and friends
>>
>> Currently, a start of a VM will call 2 glfs_new/glfs_init (which will create
>> glfs object, once for stat, read headers and next to chown) and then will fork
>> qemu process which will call once again (for actual read write IO).
>>
>> Not that all, in case if we are have 4 extra attached disks, then the total
>> calls to glfs_init() and friends will be (4+1)*2 in libvirt and (4+1)*1 in
>> qemu space i.e 15 calls. Since we don't have control over qemu process as that
>> executes in a different process environment, lets do not bother much about it.
>>
>> This patch shrinks these 10 calls (i.e objects from above example) to just
>> one, by maintaining a cache of glfs objects.
>>
>> Additionally snapshot(external) scenario will further complex the situation ...
>>
>> The glfs object is shared across other only if volume name and all the
>> volfile servers match (includes hostname, transport and port number).
>> In case of hit glfs object takes a ref and on close unref happens.
>>
>> Thanks to 'Peter Krempa' for all the inputs.
>>
>> Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever at redhat.com>
>> ---
>> v2: Address review comments from Peter on v1
>> * Rebased on latest master
>> * Changes to commit msg
>> * Introduce storage API's for Register and Unregister of volume
>> * During qemu process Start/Stop and snapshot create
>> * Check Transport and Port type
>> * Atomic element add/del to list and ref counting
>> Pending: Treating IP and FQDN belong to same host
>
> In addition to dan's review:
>
> You should split the parts that add the caching and the parts that
> optimize the calls to virStorageVolInit and friends.
Yeah! agree.
>
>>
>> v1: Initial patch
>> ---
>> src/qemu/qemu_domain.c | 2 +-
>> src/qemu/qemu_domain.h | 5 +
>> src/qemu/qemu_driver.c | 29 ++++
>> src/qemu/qemu_process.c | 25 +++
>> src/storage/storage_backend_fs.c | 2 +
>> src/storage/storage_backend_gluster.c | 295 +++++++++++++++++++++++++++++++---
>> src/storage/storage_driver.c | 23 ++-
>> src/storage/storage_driver.h | 3 +
>> 8 files changed, 357 insertions(+), 27 deletions(-)
>>
>
>> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
>> index 7650ff3..a9e38bd 100644
>> --- a/src/qemu/qemu_domain.h
>> +++ b/src/qemu/qemu_domain.h
>> @@ -591,6 +591,11 @@ bool qemuDomainDiskSourceDiffers(virDomainDiskDefPtr disk,
>> bool qemuDomainDiskChangeSupported(virDomainDiskDefPtr disk,
>> virDomainDiskDefPtr orig_disk);
>>
>> +void qemuDomainGetImageIds(virQEMUDriverConfigPtr cfg,
>> + virDomainObjPtr vm,
>> + virStorageSourcePtr src,
>> + uid_t *uid, gid_t *gid);
>> +
>> int qemuDomainStorageFileInit(virQEMUDriverPtr driver,
>> virDomainObjPtr vm,
>> virStorageSourcePtr src);
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index 3517aa2..7cae094 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -14211,6 +14211,7 @@ qemuDomainSnapshotCreateSingleDiskActive(virQEMUDriverPtr driver,
>> cleanup:
>> if (need_unlink && virStorageFileUnlink(newDiskSrc))
>> VIR_WARN("unable to unlink just-created %s", source);
>> + virStorageFileDeinit(disk->src);
>> virStorageFileDeinit(newDiskSrc);
>> virStorageSourceFree(newDiskSrc);
>> virStorageSourceFree(persistDiskSrc);
>> @@ -14566,6 +14567,7 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>> virDomainSnapshotObjPtr snap = NULL;
>> virDomainSnapshotPtr snapshot = NULL;
>> virDomainSnapshotDefPtr def = NULL;
>> + virDomainSnapshotDefPtr refDef = NULL;
>> bool update_current = true;
>> bool redefine = flags & VIR_DOMAIN_SNAPSHOT_CREATE_REDEFINE;
>> unsigned int parse_flags = VIR_DOMAIN_SNAPSHOT_PARSE_DISKS;
>> @@ -14574,6 +14576,9 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>> bool align_match = true;
>> virQEMUDriverConfigPtr cfg = NULL;
>> virCapsPtr caps = NULL;
>> + unsigned int dIndex;
>> + uid_t uid;
>> + gid_t gid;
>>
>> virCheckFlags(VIR_DOMAIN_SNAPSHOT_CREATE_REDEFINE |
>> VIR_DOMAIN_SNAPSHOT_CREATE_CURRENT |
>> @@ -14690,6 +14695,19 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>>
>> qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE);
>>
>> + for (dIndex = 0; dIndex < def->ndisks; dIndex++) {
>
> Please use 'i' for iterator.
sure
>
>> + virDomainSnapshotDiskDef disk = def->disks[dIndex];
>> +
>> + if (virStorageSourceIsEmpty(disk.src))
>> + continue;
>> +
>> + qemuDomainGetImageIds(cfg, vm, disk.src, &uid, &gid);
>> +
>> + if (virStorageVolumeRegister(disk.src, uid, gid) <0)
>> + goto cleanup;
>> + }
>> +
>> +
>> if (redefine) {
>> if (virDomainSnapshotRedefinePrep(domain, vm, &def, &snap,
>> &update_current, flags) < 0)
>> @@ -14800,6 +14818,17 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>> snapshot = virGetDomainSnapshot(domain, snap->def->name);
>>
>> endjob:
>> + refDef = (!snap) ? def : snap->def;
>> +
>> + for (dIndex = 0; dIndex < refDef->ndisks; dIndex++) {
>> + virDomainSnapshotDiskDef disk = refDef->disks[dIndex];
>> +
>> + if (virStorageSourceIsEmpty(disk.src))
>> + continue;
>> +
>> + virStorageVolumeUnRegister(disk.src);
>> + }
>> +
>> if (snapshot && !(flags & VIR_DOMAIN_SNAPSHOT_CREATE_NO_METADATA)) {
>> if (qemuDomainSnapshotWriteMetadata(vm, snap, driver->caps,
>> cfg->snapshotDir) < 0) {
>> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
>> index ecd7ded..20793cf 100644
>> --- a/src/qemu/qemu_process.c
>> +++ b/src/qemu/qemu_process.c
>> @@ -4612,6 +4612,9 @@ qemuProcessInit(virQEMUDriverPtr driver,
>> qemuDomainObjPrivatePtr priv = vm->privateData;
>> int stopFlags;
>> int ret = -1;
>> + unsigned int dIndex;
>> + uid_t uid;
>> + gid_t gid;
>>
>> VIR_DEBUG("vm=%p name=%s id=%d migration=%d",
>> vm, vm->def->name, vm->def->id, migration);
>> @@ -4664,6 +4667,18 @@ qemuProcessInit(virQEMUDriverPtr driver,
>> if (qemuDomainSetPrivatePaths(driver, vm) < 0)
>> goto cleanup;
>>
>> + for (dIndex = 0; dIndex < vm->def->ndisks; dIndex++) {
>
> We usually use 'i' for iterator variables.
>
>> + virDomainDiskDefPtr disk = vm->def->disks[dIndex];
>> +
>> + if (virStorageSourceIsEmpty(disk->src))
>> + continue;
>> +
>> + qemuDomainGetImageIds(cfg, vm, disk->src, &uid, &gid);
>> +
>> + if (virStorageVolumeRegister(disk->src, uid, gid) < 0)
>> + goto cleanup;
>> + }
>> +
>> ret = 0;
>>
>> cleanup:
>
> [...]
>
>> diff --git a/src/storage/storage_backend_fs.c b/src/storage/storage_backend_fs.c
>> index de0e8d5..0e03e06 100644
>> --- a/src/storage/storage_backend_fs.c
>> +++ b/src/storage/storage_backend_fs.c
>> @@ -1488,6 +1488,8 @@ virStorageFileBackendFileDeinit(virStorageSourcePtr src)
>
> [...]
>
>> +static int
>> +virStorageBackendGlusterSetPreopened(virStorageSourcePtr src,
>> + virStorageFileBackendGlusterPrivPtr priv)
>> +{
>> + unsigned int idx;
>> + virStorageBackendGlusterStatePtrPreopened entry = NULL;
>> +
>> + if (connCache == NULL && (virOnce(&glusterConnOnce,
>> + virGlusterConnAlloc) < 0))
>> + return -1;
>> +
>> + if (virGlusterGlobalError)
>> + return -1;
>> +
>> + if (!connCache->nConn) {
>> + if (virMutexInit(&connCache->lock) < 0) {
>> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> + _("Unable to initialize mutex"));
>> + return -1;
>> + }
>> + }
>> +
>> + for (idx = 0; idx < connCache->nConn; idx++) {
>> + if (STREQ(connCache->conn[idx]->volname, src->volume))
>> + return 0;
>> + }
>> +
>> + if (VIR_ALLOC(entry) < 0)
>> + return -1;
>> +
>> + if (VIR_STRDUP(entry->volname, src->volume) < 0)
>> + goto error;
>> +
>> + if (VIR_ALLOC_N(entry->hosts, entry->nservers) < 0)
>> + goto error;
>> +
>> + entry->nservers = src->nhosts;
>> +
>> + for (idx = 0; idx < src->nhosts; idx++) {
>> + if (VIR_ALLOC(entry->hosts[idx]) < 0)
>> + goto error;
>> + if (src->hosts[idx].transport == VIR_STORAGE_NET_HOST_TRANS_UNIX) {
>> + if (VIR_STRDUP(entry->hosts[idx]->u.uds.path,
>> + src->hosts[idx].socket) < 0)
>> + goto error;
>> + entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_UNIX;
>> + } else if (src->hosts[idx].transport ==
>> + VIR_STORAGE_NET_HOST_TRANS_TCP) {
>> + if (VIR_STRDUP(entry->hosts[idx]->u.tcp.host,
>> + src->hosts[idx].name) < 0)
>> + goto error;
>> + if (VIR_STRDUP(entry->hosts[idx]->u.tcp.port,
>> + src->hosts[idx].port) < 0)
>> + goto error;
>> + entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_TCP;
>> + } else {
>> + entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_LAST;
>
> We've got virStorageSourceCopy. The cache could use those objects so
> that you don't have to copy everything manually.
I'm thinking that you are referring to virStorageNetHostDefCopy here ?
>
>> + }
>> + }
>> + entry->priv = priv;
>> +
>> + virMutexLock(&connCache->lock);
>> + virAtomicIntSet(&entry->ref, 1);
>
> You should use virObjectLockable for this as I told you in the previous
> version. It provides a mutex and reference counting and it's the way we
> do stuff in libvirt.
oh yeah. I missed it somehow, sorry.
>
>> + if (VIR_INSERT_ELEMENT(connCache->conn, -1, connCache->nConn, entry) < 0) {
>
> This is equivalent to VIR_APPEND_ELEMENT.
>
>> + virMutexUnlock(&connCache->lock);
>> + goto error;
>> + }
>> + virMutexUnlock(&connCache->lock);
>> +
>> + return 0;
>> +
>> + error:
>> + for (idx = 0; idx < entry->nservers; idx++) {
>> + if (entry->hosts[idx]->type == VIR_STORAGE_NET_HOST_TRANS_UNIX) {
>> + VIR_FREE(entry->hosts[idx]->u.uds.path);
>> + } else {
>> + VIR_FREE(entry->hosts[idx]->u.tcp.host);
>> + VIR_FREE(entry->hosts[idx]->u.tcp.port);
>> + }
>> + VIR_FREE(entry->hosts[idx]);
>
> You should create a freeing function.
got it.
>
>> + }
>> +
>> + VIR_FREE(entry->hosts);
>> + VIR_FREE(entry->volname);
>> + VIR_FREE(entry);
>> +
>> + return -1;
>> +}
>> +
>> +static glfs_t *
>> +virStorageBackendGlusterFindPreopened(virStorageSourcePtr src)
>> +{
>> + unsigned int cIdx, sIdx, nIdx; /* connectionIdx, savedIdx, newIdx */
>
> The comment explaining these variables is pointless. Also please one
> variable declaration per line.
Okay.
>
>> + bool flag = false;
>
> The name of this variable does not help deciphering what it's used for.
Okay.
>
>> +
>> + if (connCache == NULL)
>> + return NULL;
>> +
>> + virStorageBackendGlusterStatePtrPreopened entry;
>> +
>> + for (cIdx = 0; cIdx < connCache->nConn; cIdx++) {
>> + entry = connCache->conn[cIdx];
>> + if (STREQ(entry->volname, src->volume)) {
>
> By inverting a few of the conditions and using continue statements you
> can avoid deep nesting.
>
>> + if (entry->nservers == src->nhosts) {
>> + for (sIdx = 0; sIdx < entry->nservers; sIdx++) {
>> + for (nIdx = 0; nIdx < src->nhosts; nIdx++) {
>> + if (entry->hosts[sIdx]->type ==
>> + src->hosts[nIdx].transport) {
>> + if (entry->hosts[sIdx]->type
>> + == VIR_STORAGE_NET_HOST_TRANS_UNIX) {
>> + if (STREQ(entry->hosts[sIdx]->u.uds.path,
>> + src->hosts[nIdx].socket)) {
>> + flag = true;
>> + break;
>> + }
>> + } else {
>> + if (STREQ(entry->hosts[sIdx]->u.tcp.host,
>> + src->hosts[nIdx].name) &&
>> + STREQ(entry->hosts[sIdx]->u.tcp.port,
>> + src->hosts[nIdx].port)) {
>> + flag = true;
>> + break;
>> + }
>> + }
>> + }
>> + }
>> + if (!flag)
>> + return NULL;
>> + flag = false;
>> + }
>> + virAtomicIntInc(&entry->ref);
>> + return entry->priv->vol;
>> + } else {
>> + return NULL;
>> + }
>> + }
>> + }
>> + return NULL;
>> +}
>> +
>> +static void
>> +virStorageBackendGlusterClosePreopened(virStorageSourcePtr src)
>> +{
>> + virStorageFileBackendGlusterPrivPtr priv = src->drv->priv;
>> + unsigned int cIdx, hIdx; /* connectionIdx, hostIdx */
>
> The comment is rather pointless. Declare one variable per line.
Okay.
>
>> + for (cIdx = 0; cIdx < connCache->nConn; cIdx++) {
>> + if (STREQ(connCache->conn[cIdx]->volname, src->volume)) {
>> + virAtomicIntDecAndTest(&connCache->conn[cIdx]->ref);
>> + if (virAtomicIntGet(&connCache->conn[cIdx]->ref) != 0) {
>> + if (priv && priv->canonpath) {
>> + VIR_FREE(priv->canonpath);
>> + VIR_FREE(priv);
>> + src->drv->priv = NULL;
>
> All this should be replaced by a virObject destructor for your given
> cache type.
Yeah, got it.
>
>> + }
>> + return;
>> + }
>> +
>> + glfs_fini(connCache->conn[cIdx]->priv->vol);
>> +
>> + VIR_FREE(connCache->conn[cIdx]->priv->canonpath);
>> + VIR_FREE(connCache->conn[cIdx]->priv);
>> +
>> + for (hIdx = 0; hIdx < connCache->conn[cIdx]->nservers; hIdx++) {
>> + if (connCache->conn[cIdx]->hosts[hIdx]->type ==
>> + VIR_STORAGE_NET_HOST_TRANS_UNIX) {
>> + VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.uds.path);
>> + } else {
>> + VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.tcp.host);
>> + VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.tcp.port);
>> + }
>> + VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]);
>> + }
>> +
>> + VIR_FREE(connCache->conn[cIdx]->hosts);
>> + VIR_FREE(connCache->conn[cIdx]->volname);
>> + VIR_FREE(connCache->conn[cIdx]);
>> +
>> + virMutexLock(&connCache->lock);
>> + VIR_DELETE_ELEMENT(connCache->conn, cIdx, connCache->nConn);
>> + virMutexUnlock(&connCache->lock);
>
> Doing a extracted freeing function will allow you to get rid of this
> duplication too.
Yes, now it make scene to me.
>
>> +
>> + VIR_FREE(src->drv);
>> + }
>> + }
>> +
>> + if (!connCache->conn)
>> + virMutexDestroy(&connCache->lock);
>> +}
>>
>> static void
>> virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state)
>
> [...]
>
>> @@ -612,6 +852,7 @@ virStorageFileBackendGlusterInitServer(virStorageFileBackendGlusterPrivPtr priv,
>> static int
>> virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>> {
>> + int ret = 0;
>> virStorageFileBackendGlusterPrivPtr priv = NULL;
>> size_t i;
>>
>> @@ -625,6 +866,12 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>> if (VIR_ALLOC(priv) < 0)
>> return -1;
>>
>> + priv->vol = virStorageBackendGlusterFindPreopened(src);
>> + if (priv->vol) {
>> + src->drv->priv = priv;
>> + return ret;
>> + }
>> +
>> VIR_DEBUG("initializing gluster storage file %p "
>> "(priv='%p' volume='%s' path='%s') as [%u:%u]",
>> src, priv, src->volume, src->path,
>> @@ -635,6 +882,10 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>> goto error;
>> }
>>
>> + ret = virStorageBackendGlusterSetPreopened(src, priv);
>> + if (ret < 0)
>> + goto error;
>> +
>> for (i = 0; i < src->nhosts; i++) {
>> if (virStorageFileBackendGlusterInitServer(priv, src->hosts + i) < 0)
>> goto error;
>> @@ -648,13 +899,13 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>> }
>>
>> src->drv->priv = priv;
>> + ret = 0;
>>
>> - return 0;
>> + return ret;
>
> Why do you need a ret variable if you overwrite it right before
> returning it? The above changes don't make sense.
right!
>
>>
>> error:
>> - if (priv->vol)
>> - glfs_fini(priv->vol);
>> VIR_FREE(priv);
>> + virStorageBackendGlusterClosePreopened(src);
>>
>> return -1;
>> }
>> diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
>> index df65807..9c3bffb 100644
>> --- a/src/storage/storage_driver.c
>> +++ b/src/storage/storage_driver.c
>
> [...]
>
>> @@ -2967,6 +2964,24 @@ virStorageFileInit(virStorageSourcePtr src)
>> return virStorageFileInitAs(src, -1, -1);
>> }
>>
>> +int
>> +virStorageVolumeRegister(virStorageSourcePtr src,
>> + uid_t uid, gid_t gid)
>> +{
>> + if (virStorageFileInitAs(src, uid, gid) < 0)
>> + return -1;
>> +
>> + src->drv->priv = NULL;
>
> What's the point of this wrapper? Also it seems wrong as you initialize
> it and overwrite the private data after that? That will leak them which
> is wrong.
>
> Ah, that is correct for the gluster driver. Note that other backends
> have private data too and this breaks it and leaks their private data.
Realized.
Thanks Peter.
--
Prasanna
>
>> + return 0;
>> +}
More information about the libvir-list
mailing list