[libvirt] [PATCH] Add copy-on-write image support

Daniel P. Berrange berrange at redhat.com
Wed Dec 10 17:58:03 UTC 2008


On Tue, Dec 09, 2008 at 05:28:57PM +0000, Miloslav Trma?? wrote:
> the attached patch adds support for copy-on-write images: creating them,
> deleting them, identifying them.

Nice - thanks for working on this feature ! I'll put a few comments
inline....

> Index: docs/formatstorage.html.in
> ===================================================================
> RCS file: /data/cvs/libvirt/docs/formatstorage.html.in,v
> retrieving revision 1.5
> diff -u -p -r1.5 formatstorage.html.in
> --- docs/formatstorage.html.in	4 Dec 2008 14:51:57 -0000	1.5
> +++ docs/formatstorage.html.in	9 Dec 2008 17:25:18 -0000
> @@ -269,6 +269,13 @@
>  	contains the MAC (eg SELinux) label string.
>  	<span class="since">Since 0.4.1</span>
>        </dd>
> +      <dt><code>cow-base</code></dt>
> +      <dd>Provides the key of a base volume.  This element is optional, and
> +        supported only in some volume types and formats.  If it is present,
> +        this volume is a copy-on-write volume that stores only changes to the
> +        base volume.  Changes to the base volume may make this volume
> +        inconsistent.
> +      </dd>

I'm not  a fan of having '-' or '_' in XML element names, so how
about naming it

     <backingstore>/some/path</backingstore>

We will also want to make use of this feature to allow LVM snapshots.
ie, creating an LVM volume with a <backingstore> element would take
an LVM snapshot of this backingstore. The 'allocation' would be the
amount of LVM storage allocated, and the 'capacity' would just match
the original volume capacity.


> Index: src/storage_backend_fs.c
> ===================================================================
> RCS file: /data/cvs/libvirt/src/storage_backend_fs.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 storage_backend_fs.c
> --- src/storage_backend_fs.c	17 Nov 2008 11:19:33 -0000	1.22
> +++ src/storage_backend_fs.c	9 Dec 2008 17:25:18 -0000
> +static int
> +cowGetCowBase(virConnectPtr conn,
> +	      char **res,
> +	      const unsigned char *buf,
> +	      size_t buf_size)
> +{
> +    size_t len;
> +
> +    *res = NULL;
> +    if (buf_size < 4+4+1024)
> +	return GET_COW_BASE_INVALID;
> +    if (buf[4+4] == '\0')
> +	return GET_COW_BASE_OK;
> +
> +    len = 1024;
> +    if (VIR_ALLOC_N(*res, len + 1) < 0) {
> +	virStorageReportError(conn, VIR_ERR_NO_MEMORY,
> +			      _("copy-on-write base path"));
> +	return GET_COW_BASE_ERROR;
> +    }
> +    memcpy(*res, buf + 4+4, len);
> +    (*res)[len] = '\0';
> +    if (VIR_REALLOC_N(*res, strlen(*res) + 1) < 0) {
> +	/* Ignore failure */
> +    }
> +    return GET_COW_BASE_OK;
> +}

In general, this entire  patch has alot of whitespace damage - I
think your editor has inserted TABs instead of spaces - could you
resend with whitespace fixed as per the 'HACKING' file.


> +
> +static int
> +qcowXGetCowBase(virConnectPtr conn,
> +		char **res,
> +		const unsigned char *buf,
> +		size_t buf_size)
> +{
> +    unsigned long long offset;
> +    unsigned long size;
> +
> +    *res = NULL;
> +    if (buf_size < 4+4+8+4)
> +	return GET_COW_BASE_INVALID;
> +    offset = (((unsigned long long)buf[4+4] << 56)
> +	      | ((unsigned long long)buf[4+4+1] << 48)
> +	      | ((unsigned long long)buf[4+4+2] << 40)
> +	      | ((unsigned long long)buf[4+4+3] << 32)
> +	      | ((unsigned long long)buf[4+4+4] << 24)
> +	      | ((unsigned long long)buf[4+4+5] << 16)
> +	      | ((unsigned long long)buf[4+4+6] << 8)
> +	      | buf[4+4+7]);
> +    if (offset > buf_size)
> +	return GET_COW_BASE_INVALID;
> +    size = ((buf[4+4+8] << 24)
> +	    | (buf[4+4+8+1] << 16)
> +	    | (buf[4+4+8+2] << 8)
> +	    | buf[4+4+8+3]);
> +    if (size == 0)
> +	return GET_COW_BASE_OK;
> +    if (offset + size > buf_size || offset + size < offset)
> +	return GET_COW_BASE_INVALID;
> +    if (size + 1 == 0)
> +	return GET_COW_BASE_INVALID;

It'd be useful to have comments against each of these 'if'
checks to indicate what named header field is being checked
in each place.


> +static int
> +vmdk4GetCowBase(virConnectPtr conn,
> +		char **res,
> +		const unsigned char *buf,
> +		size_t buf_size)
> +{
> +    static const char prefix[] = "parentFileNameHint=\"";
> +
> +    char desc[20*512 + 1], *start, *end;
> +    size_t len;
> +
> +    *res = NULL;
> +    if (buf_size <= 0x200)
> +	return GET_COW_BASE_INVALID;
> +    /* FIXME? the default buf_size is not large enough to contain the complete
> +       descriptor. */
> +    len = buf_size - 0x200;


Do you know how large it needs to be ?  There's no particular reason
for the current size, except that it was big enough for our needs
at the time. We can increase it to suit....

> +
> +/**
> + * Return an absolute path corresponding to PATH, which is absolute or relative
> + * to the directory containing BASE_FILE, or NULL on error
> + */
> +static char *absolutePathFromBaseFile(const char *base_file, const char *path)
> +{
> +    size_t base_size, path_size;
> +    char *res, *p;
> +
> +    if (*path == '/')
> +	return strdup(path);
> +
> +    base_size = strlen(base_file) + 1;
> +    path_size = strlen(path) + 1;
> +    if (VIR_ALLOC_N(res, base_size - 1 + path_size) < 0)
> +	return NULL;
> +    memcpy(res, base_file, base_size);
> +    p = strrchr(res, '/');
> +    if (p != NULL)
> +	p++;
> +    else
> +	p = res;
> +    memcpy(p, path, path_size);
> +    if (VIR_REALLOC_N(res, (p + path_size) - res) < 0) {
> +	/* Ignore failure */
> +    }
> +    return res;
> +}

Seeems like we could just call libc 'realpath' function here ?

>  
> @@ -832,7 +1029,9 @@ virStorageBackendFileSystemVolCreate(vir
>  #if HAVE_QEMU_IMG
>          const char *type;
>          char size[100];
> -        const char *imgargv[7];
> +        const char *imgargv[9];
> +	size_t i;
> +	virStorageVolDefPtr cowBase;
>  
>          if ((type = virStorageVolFormatFileSystemTypeToString(vol->target.format)) == NULL) {
>              virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
> @@ -840,17 +1039,55 @@ virStorageBackendFileSystemVolCreate(vir
>                                    vol->target.format);
>              return -1;
>          }
> +	if (vol->target.cowBase == NULL)
> +	    cowBase = NULL;
> +	else {
> +	    const struct FileTypeInfo *info;
> +
> +	    info = virStorageBackendFileSystemVolFormatToInfo(conn,
> +							      vol->target.format);
> +	    if (info == NULL) {
> +		virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
> +				      _("unknown storage vol type %d"),
> +				      vol->target.format);
> +		return -1;
> +	    }
> +	    if (info->getCowBase == NULL) {
> +		virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
> +				      _("copy-on-write not supported with "
> +					"format %s"), type);
> +		return -1;
> +	    }
> +	    cowBase = virStorageVolDefFindByKey(pool, vol->target.cowBase);
> +	    if (cowBase == NULL) {
> +		virStorageReportError(conn, VIR_ERR_NO_STORAGE_VOL,
> +				      _("unknown copy-on-write base volume "
> +					"key %s"), vol->target.cowBase);
> +		return -1;
> +	    }
> +	    if (cowBase->target.format != vol->target.format) {
> +		virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
> +				      _("copy-on-write with mixed file formats "
> +					"not supported"));
> +		return -1;

IIRC there's no restricton on backing file type for QCow files. You
can even make a qcow file that's backed by a physical device. 

So we should remove the virStorageVolDefFindByKey() call and format
check, and merely do  'access(vol->target.cowBase, R_OK)' on it.

>   */
>  static int
>  virStorageBackendFileSystemVolDelete(virConnectPtr conn,
> -                                     virStoragePoolObjPtr pool ATTRIBUTE_UNUSED,
> +                                     virStoragePoolObjPtr pool,
>                                       virStorageVolDefPtr vol,
>                                       unsigned int flags ATTRIBUTE_UNUSED)
>  {
> +    size_t i;
> +
> +    for (i = 0; i < pool->volumes.count; i++) {
> +	const char *cowBase;
> +
> +	cowBase = pool->volumes.objs[i]->target.cowBase;
> +	if (cowBase != NULL && STREQ(cowBase, vol->key)) {
> +	    virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
> +				  _("volume %s is used by volume %s"),
> +				  vol->key, pool->volumes.objs[i]->key);
> +	    return -1;
> +	}
> +    }

As a general rule libvirt does not implement policy like this, leave it
upto the caller whether they want to imeplment such policies. In addition
we can't rely on the the cow base file being part of the same storage pool
as the volume being deleted - it might not be part of any pool in fact.
IMHO, this block of code can just be omitted


Regards,
Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|




More information about the libvir-list mailing list