[libvirt] [PATCH v2 1/1] gluster: cache glfs connection object per volume

Prasanna Kalever pkalever at redhat.com
Mon Dec 5 11:19:56 UTC 2016


On Wed, Nov 30, 2016 at 5:41 PM, Peter Krempa <pkrempa at redhat.com> wrote:
> On Wed, Nov 30, 2016 at 16:06:37 +0530, prasanna.kalever at redhat.com wrote:
>> From: Prasanna Kumar Kalever <prasanna.kalever at redhat.com>
>>
>> This patch optimizes calls to glfs_init() and friends
>>
>> Currently, a start of a VM will call 2 glfs_new/glfs_init (which will create
>> glfs object, once for stat, read headers and next to chown) and then will fork
>> qemu process which will call once again (for actual read write IO).
>>
>> Not that all, in case if we are have 4 extra attached disks, then the total
>> calls to glfs_init() and friends will be (4+1)*2 in libvirt and (4+1)*1 in
>> qemu space i.e 15 calls. Since we don't have control over qemu process as that
>> executes in a different process environment, lets do not bother much about it.
>>
>> This patch shrinks these 10 calls (i.e objects from above example) to just
>> one, by maintaining a cache of glfs objects.
>>
>> Additionally snapshot(external) scenario will further complex the situation ...
>>
>> The glfs object is shared across other only if volume name and all the
>> volfile servers match (includes hostname, transport and port number).
>> In case of hit glfs object takes a ref and on close unref happens.
>>
>> Thanks to 'Peter Krempa' for all the inputs.
>>
>> Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever at redhat.com>
>> ---
>> v2: Address review comments from Peter on v1
>> * Rebased on latest master
>> * Changes to commit msg
>> * Introduce storage API's for Register and Unregister of volume
>> * During qemu process Start/Stop and snapshot create
>> * Check Transport and Port type
>> * Atomic element add/del to list and ref counting
>> Pending: Treating IP and FQDN belong to same host
>
> In addition to dan's review:
>
> You should split the parts that add the caching and the parts that
> optimize the calls to virStorageVolInit and friends.

Yeah! agree.

>
>>
>> v1: Initial patch
>> ---
>>  src/qemu/qemu_domain.c                |   2 +-
>>  src/qemu/qemu_domain.h                |   5 +
>>  src/qemu/qemu_driver.c                |  29 ++++
>>  src/qemu/qemu_process.c               |  25 +++
>>  src/storage/storage_backend_fs.c      |   2 +
>>  src/storage/storage_backend_gluster.c | 295 +++++++++++++++++++++++++++++++---
>>  src/storage/storage_driver.c          |  23 ++-
>>  src/storage/storage_driver.h          |   3 +
>>  8 files changed, 357 insertions(+), 27 deletions(-)
>>
>
>> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
>> index 7650ff3..a9e38bd 100644
>> --- a/src/qemu/qemu_domain.h
>> +++ b/src/qemu/qemu_domain.h
>> @@ -591,6 +591,11 @@ bool qemuDomainDiskSourceDiffers(virDomainDiskDefPtr disk,
>>  bool qemuDomainDiskChangeSupported(virDomainDiskDefPtr disk,
>>                                     virDomainDiskDefPtr orig_disk);
>>
>> +void qemuDomainGetImageIds(virQEMUDriverConfigPtr cfg,
>> +                           virDomainObjPtr vm,
>> +                           virStorageSourcePtr src,
>> +                           uid_t *uid, gid_t *gid);
>> +
>>  int qemuDomainStorageFileInit(virQEMUDriverPtr driver,
>>                                virDomainObjPtr vm,
>>                                virStorageSourcePtr src);
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index 3517aa2..7cae094 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -14211,6 +14211,7 @@ qemuDomainSnapshotCreateSingleDiskActive(virQEMUDriverPtr driver,
>>   cleanup:
>>      if (need_unlink && virStorageFileUnlink(newDiskSrc))
>>          VIR_WARN("unable to unlink just-created %s", source);
>> +    virStorageFileDeinit(disk->src);
>>      virStorageFileDeinit(newDiskSrc);
>>      virStorageSourceFree(newDiskSrc);
>>      virStorageSourceFree(persistDiskSrc);
>> @@ -14566,6 +14567,7 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>>      virDomainSnapshotObjPtr snap = NULL;
>>      virDomainSnapshotPtr snapshot = NULL;
>>      virDomainSnapshotDefPtr def = NULL;
>> +    virDomainSnapshotDefPtr refDef = NULL;
>>      bool update_current = true;
>>      bool redefine = flags & VIR_DOMAIN_SNAPSHOT_CREATE_REDEFINE;
>>      unsigned int parse_flags = VIR_DOMAIN_SNAPSHOT_PARSE_DISKS;
>> @@ -14574,6 +14576,9 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>>      bool align_match = true;
>>      virQEMUDriverConfigPtr cfg = NULL;
>>      virCapsPtr caps = NULL;
>> +    unsigned int dIndex;
>> +    uid_t uid;
>> +    gid_t gid;
>>
>>      virCheckFlags(VIR_DOMAIN_SNAPSHOT_CREATE_REDEFINE |
>>                    VIR_DOMAIN_SNAPSHOT_CREATE_CURRENT |
>> @@ -14690,6 +14695,19 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>>
>>      qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE);
>>
>> +    for (dIndex = 0; dIndex < def->ndisks; dIndex++) {
>
> Please use 'i' for iterator.

sure

>
>> +        virDomainSnapshotDiskDef disk = def->disks[dIndex];
>> +
>> +        if (virStorageSourceIsEmpty(disk.src))
>> +            continue;
>> +
>> +        qemuDomainGetImageIds(cfg, vm, disk.src, &uid, &gid);
>> +
>> +        if (virStorageVolumeRegister(disk.src, uid, gid) <0)
>> +            goto cleanup;
>> +    }
>> +
>> +
>>      if (redefine) {
>>          if (virDomainSnapshotRedefinePrep(domain, vm, &def, &snap,
>>                                            &update_current, flags) < 0)
>> @@ -14800,6 +14818,17 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>>      snapshot = virGetDomainSnapshot(domain, snap->def->name);
>>
>>   endjob:
>> +    refDef = (!snap) ? def : snap->def;
>> +
>> +    for (dIndex = 0; dIndex < refDef->ndisks; dIndex++) {
>> +        virDomainSnapshotDiskDef disk = refDef->disks[dIndex];
>> +
>> +        if (virStorageSourceIsEmpty(disk.src))
>> +            continue;
>> +
>> +        virStorageVolumeUnRegister(disk.src);
>> +    }
>> +
>>      if (snapshot && !(flags & VIR_DOMAIN_SNAPSHOT_CREATE_NO_METADATA)) {
>>          if (qemuDomainSnapshotWriteMetadata(vm, snap, driver->caps,
>>                                              cfg->snapshotDir) < 0) {
>> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
>> index ecd7ded..20793cf 100644
>> --- a/src/qemu/qemu_process.c
>> +++ b/src/qemu/qemu_process.c
>> @@ -4612,6 +4612,9 @@ qemuProcessInit(virQEMUDriverPtr driver,
>>      qemuDomainObjPrivatePtr priv = vm->privateData;
>>      int stopFlags;
>>      int ret = -1;
>> +    unsigned int dIndex;
>> +    uid_t uid;
>> +    gid_t gid;
>>
>>      VIR_DEBUG("vm=%p name=%s id=%d migration=%d",
>>                vm, vm->def->name, vm->def->id, migration);
>> @@ -4664,6 +4667,18 @@ qemuProcessInit(virQEMUDriverPtr driver,
>>      if (qemuDomainSetPrivatePaths(driver, vm) < 0)
>>          goto cleanup;
>>
>> +    for (dIndex = 0; dIndex < vm->def->ndisks; dIndex++) {
>
> We usually use 'i' for iterator variables.
>
>> +        virDomainDiskDefPtr disk = vm->def->disks[dIndex];
>> +
>> +        if (virStorageSourceIsEmpty(disk->src))
>> +            continue;
>> +
>> +        qemuDomainGetImageIds(cfg, vm, disk->src, &uid, &gid);
>> +
>> +        if (virStorageVolumeRegister(disk->src, uid, gid) < 0)
>> +            goto cleanup;
>> +    }
>> +
>>      ret = 0;
>>
>>   cleanup:
>
> [...]
>
>> diff --git a/src/storage/storage_backend_fs.c b/src/storage/storage_backend_fs.c
>> index de0e8d5..0e03e06 100644
>> --- a/src/storage/storage_backend_fs.c
>> +++ b/src/storage/storage_backend_fs.c
>> @@ -1488,6 +1488,8 @@ virStorageFileBackendFileDeinit(virStorageSourcePtr src)
>
> [...]
>
>> +static int
>> +virStorageBackendGlusterSetPreopened(virStorageSourcePtr src,
>> +                                     virStorageFileBackendGlusterPrivPtr priv)
>> +{
>> +    unsigned int idx;
>> +    virStorageBackendGlusterStatePtrPreopened entry = NULL;
>> +
>> +    if (connCache == NULL && (virOnce(&glusterConnOnce,
>> +                                      virGlusterConnAlloc) < 0))
>> +        return -1;
>> +
>> +    if (virGlusterGlobalError)
>> +        return -1;
>> +
>> +    if (!connCache->nConn) {
>> +        if (virMutexInit(&connCache->lock) < 0) {
>> +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> +                           _("Unable to initialize mutex"));
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    for (idx = 0; idx < connCache->nConn; idx++) {
>> +        if (STREQ(connCache->conn[idx]->volname, src->volume))
>> +            return 0;
>> +    }
>> +
>> +    if (VIR_ALLOC(entry) < 0)
>> +        return -1;
>> +
>> +    if (VIR_STRDUP(entry->volname, src->volume) < 0)
>> +        goto error;
>> +
>> +    if (VIR_ALLOC_N(entry->hosts, entry->nservers) < 0)
>> +        goto error;
>> +
>> +    entry->nservers = src->nhosts;
>> +
>> +    for (idx = 0; idx < src->nhosts; idx++) {
>> +        if (VIR_ALLOC(entry->hosts[idx]) < 0)
>> +            goto error;
>> +        if (src->hosts[idx].transport == VIR_STORAGE_NET_HOST_TRANS_UNIX) {
>> +            if (VIR_STRDUP(entry->hosts[idx]->u.uds.path,
>> +                           src->hosts[idx].socket) < 0)
>> +               goto error;
>> +            entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_UNIX;
>> +        } else if (src->hosts[idx].transport ==
>> +                   VIR_STORAGE_NET_HOST_TRANS_TCP) {
>> +            if (VIR_STRDUP(entry->hosts[idx]->u.tcp.host,
>> +                           src->hosts[idx].name) < 0)
>> +                goto error;
>> +            if (VIR_STRDUP(entry->hosts[idx]->u.tcp.port,
>> +                           src->hosts[idx].port) < 0)
>> +                goto error;
>> +            entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_TCP;
>> +        } else {
>> +            entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_LAST;
>
> We've got virStorageSourceCopy. The cache could use those objects so
> that you don't have to copy everything manually.

I'm thinking that you are referring to virStorageNetHostDefCopy here ?

>
>> +        }
>> +    }
>> +    entry->priv = priv;
>> +
>> +    virMutexLock(&connCache->lock);
>> +    virAtomicIntSet(&entry->ref, 1);
>
> You should use virObjectLockable for this as I told you in the previous
> version. It provides a mutex and reference counting and it's the way we
> do stuff in libvirt.

oh yeah. I missed it somehow, sorry.

>
>> +    if (VIR_INSERT_ELEMENT(connCache->conn, -1, connCache->nConn, entry) < 0) {
>
> This is equivalent to VIR_APPEND_ELEMENT.
>
>> +        virMutexUnlock(&connCache->lock);
>> +        goto error;
>> +    }
>> +    virMutexUnlock(&connCache->lock);
>> +
>> +    return 0;
>> +
>> + error:
>> +    for (idx = 0; idx < entry->nservers; idx++) {
>> +        if (entry->hosts[idx]->type == VIR_STORAGE_NET_HOST_TRANS_UNIX) {
>> +            VIR_FREE(entry->hosts[idx]->u.uds.path);
>> +        } else {
>> +            VIR_FREE(entry->hosts[idx]->u.tcp.host);
>> +            VIR_FREE(entry->hosts[idx]->u.tcp.port);
>> +        }
>> +        VIR_FREE(entry->hosts[idx]);
>
> You should create a freeing function.

got it.

>
>> +    }
>> +
>> +    VIR_FREE(entry->hosts);
>> +    VIR_FREE(entry->volname);
>> +    VIR_FREE(entry);
>> +
>> +    return -1;
>> +}
>> +
>> +static glfs_t *
>> +virStorageBackendGlusterFindPreopened(virStorageSourcePtr src)
>> +{
>> +    unsigned int cIdx, sIdx, nIdx; /* connectionIdx, savedIdx, newIdx */
>
> The comment explaining these variables is pointless. Also please one
> variable declaration per line.

Okay.

>
>> +    bool flag = false;
>
> The name of this variable does not help deciphering what it's used for.

Okay.

>
>> +
>> +    if (connCache == NULL)
>> +        return NULL;
>> +
>> +    virStorageBackendGlusterStatePtrPreopened entry;
>> +
>> +    for (cIdx = 0; cIdx < connCache->nConn; cIdx++) {
>> +        entry = connCache->conn[cIdx];
>> +        if (STREQ(entry->volname, src->volume)) {
>
> By inverting a few of the conditions and using continue statements you
> can avoid deep nesting.
>
>> +            if (entry->nservers == src->nhosts) {
>> +                for (sIdx = 0; sIdx < entry->nservers; sIdx++) {
>> +                    for (nIdx = 0; nIdx < src->nhosts; nIdx++) {
>> +                        if (entry->hosts[sIdx]->type ==
>> +                            src->hosts[nIdx].transport) {
>> +                            if (entry->hosts[sIdx]->type
>> +                                    == VIR_STORAGE_NET_HOST_TRANS_UNIX) {
>> +                                if (STREQ(entry->hosts[sIdx]->u.uds.path,
>> +                                          src->hosts[nIdx].socket)) {
>> +                                    flag = true;
>> +                                    break;
>> +                                }
>> +                            } else {
>> +                                if (STREQ(entry->hosts[sIdx]->u.tcp.host,
>> +                                          src->hosts[nIdx].name) &&
>> +                                    STREQ(entry->hosts[sIdx]->u.tcp.port,
>> +                                          src->hosts[nIdx].port)) {
>> +                                    flag = true;
>> +                                    break;
>> +                                }
>> +                            }
>> +                        }
>> +                    }
>> +                    if (!flag)
>> +                        return NULL;
>> +                    flag = false;
>> +                }
>> +                virAtomicIntInc(&entry->ref);
>> +                return entry->priv->vol;
>> +            } else {
>> +                return NULL;
>> +            }
>> +        }
>> +    }
>> +    return NULL;
>> +}
>> +
>> +static void
>> +virStorageBackendGlusterClosePreopened(virStorageSourcePtr src)
>> +{
>> +    virStorageFileBackendGlusterPrivPtr priv = src->drv->priv;
>> +    unsigned int cIdx, hIdx; /* connectionIdx, hostIdx */
>
> The comment is rather pointless. Declare one variable per line.

Okay.

>
>> +    for (cIdx = 0; cIdx < connCache->nConn; cIdx++) {
>> +        if (STREQ(connCache->conn[cIdx]->volname, src->volume)) {
>> +            virAtomicIntDecAndTest(&connCache->conn[cIdx]->ref);
>> +            if (virAtomicIntGet(&connCache->conn[cIdx]->ref) != 0) {
>> +                if (priv && priv->canonpath) {
>> +                    VIR_FREE(priv->canonpath);
>> +                    VIR_FREE(priv);
>> +                    src->drv->priv = NULL;
>
> All this should be replaced by a virObject destructor for your given
> cache type.

Yeah, got it.

>
>> +                }
>> +                return;
>> +            }
>> +
>> +            glfs_fini(connCache->conn[cIdx]->priv->vol);
>> +
>> +            VIR_FREE(connCache->conn[cIdx]->priv->canonpath);
>> +            VIR_FREE(connCache->conn[cIdx]->priv);
>> +
>> +            for (hIdx = 0; hIdx < connCache->conn[cIdx]->nservers; hIdx++) {
>> +                if (connCache->conn[cIdx]->hosts[hIdx]->type ==
>> +                    VIR_STORAGE_NET_HOST_TRANS_UNIX) {
>> +                    VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.uds.path);
>> +                } else {
>> +                    VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.tcp.host);
>> +                    VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.tcp.port);
>> +                }
>> +                VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]);
>> +            }
>> +
>> +            VIR_FREE(connCache->conn[cIdx]->hosts);
>> +            VIR_FREE(connCache->conn[cIdx]->volname);
>> +            VIR_FREE(connCache->conn[cIdx]);
>> +
>> +            virMutexLock(&connCache->lock);
>> +            VIR_DELETE_ELEMENT(connCache->conn, cIdx, connCache->nConn);
>> +            virMutexUnlock(&connCache->lock);
>
> Doing a extracted freeing function will allow you to get rid of this
> duplication too.

Yes, now it make scene to me.

>
>> +
>> +            VIR_FREE(src->drv);
>> +        }
>> +    }
>> +
>> +    if (!connCache->conn)
>> +        virMutexDestroy(&connCache->lock);
>> +}
>>
>>  static void
>>  virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state)
>
> [...]
>
>> @@ -612,6 +852,7 @@ virStorageFileBackendGlusterInitServer(virStorageFileBackendGlusterPrivPtr priv,
>>  static int
>>  virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>>  {
>> +    int ret = 0;
>>      virStorageFileBackendGlusterPrivPtr priv = NULL;
>>      size_t i;
>>
>> @@ -625,6 +866,12 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>>      if (VIR_ALLOC(priv) < 0)
>>          return -1;
>>
>> +    priv->vol = virStorageBackendGlusterFindPreopened(src);
>> +    if (priv->vol) {
>> +        src->drv->priv = priv;
>> +        return ret;
>> +    }
>> +
>>      VIR_DEBUG("initializing gluster storage file %p "
>>                "(priv='%p' volume='%s' path='%s') as [%u:%u]",
>>                src, priv, src->volume, src->path,
>> @@ -635,6 +882,10 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>>          goto error;
>>      }
>>
>> +    ret = virStorageBackendGlusterSetPreopened(src, priv);
>> +    if (ret < 0)
>> +        goto error;
>> +
>>      for (i = 0; i < src->nhosts; i++) {
>>          if (virStorageFileBackendGlusterInitServer(priv, src->hosts + i) < 0)
>>              goto error;
>> @@ -648,13 +899,13 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>>      }
>>
>>      src->drv->priv = priv;
>> +    ret = 0;
>>
>> -    return 0;
>> +    return ret;
>
> Why do you need a ret variable if you overwrite it right before
> returning it?  The above changes don't make sense.

right!

>
>>
>>   error:
>> -    if (priv->vol)
>> -        glfs_fini(priv->vol);
>>      VIR_FREE(priv);
>> +    virStorageBackendGlusterClosePreopened(src);
>>
>>      return -1;
>>  }
>> diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
>> index df65807..9c3bffb 100644
>> --- a/src/storage/storage_driver.c
>> +++ b/src/storage/storage_driver.c
>
> [...]
>
>> @@ -2967,6 +2964,24 @@ virStorageFileInit(virStorageSourcePtr src)
>>      return virStorageFileInitAs(src, -1, -1);
>>  }
>>
>> +int
>> +virStorageVolumeRegister(virStorageSourcePtr src,
>> +                         uid_t uid, gid_t gid)
>> +{
>> +    if (virStorageFileInitAs(src, uid, gid) < 0)
>> +        return -1;
>> +
>> +    src->drv->priv = NULL;
>
> What's the point of this wrapper? Also it seems wrong as you initialize
> it and overwrite the private data after that? That will leak them which
> is wrong.
>
> Ah, that is correct for the gluster driver. Note that other backends
> have private data too and this breaks it and leaks their private data.

Realized.


Thanks Peter.

--
Prasanna

>
>> +    return 0;
>> +}




More information about the libvir-list mailing list