[dm-devel] [PATCH 1/4] dm-snapshot-rework-origin-write

Jonathan Brassow jbrassow at redhat.com
Wed Oct 7 17:39:02 UTC 2009


Reviewed-by: Jonathan Brassow <jbrassow at redhat.com>

  brassow

On Oct 5, 2009, at 2:00 PM, Mike Snitzer wrote:

> From: Mikulas Patocka <mpatocka at redhat.com>
>
> Rework writing to snapshot origin.
>
> The previous code selected one exception as "primary_pe", linked all  
> other
> exceptions on it and used reference counting to wait until all  
> exceptions are
> reallocated. This didn't work with exceptions with different chunk  
> sizes:
> https://bugzilla.redhat.com/show_bug.cgi?id=182659
>
> I removed all the complexity with exceptions linking and reference  
> counting.
> Currently, bio is linked on one exception and when that exception is
> reallocated, the bio is retried to possibly wait for other exceptions.
>
> Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
> Signed-off-by: Mike Snitzer <snitzer at redhat.com>
> ---
> drivers/md/dm-snap.c |  165 ++++++++++++++++ 
> +--------------------------------
> 1 files changed, 57 insertions(+), 108 deletions(-)
>
> diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
> index 668457f..b23decb 100644
> --- a/drivers/md/dm-snap.c
> +++ b/drivers/md/dm-snap.c
> @@ -125,28 +125,6 @@ struct dm_snap_pending_exception {
> 	struct bio_list origin_bios;
> 	struct bio_list snapshot_bios;
>
> -	/*
> -	 * Short-term queue of pending exceptions prior to submission.
> -	 */
> -	struct list_head list;
> -
> -	/*
> -	 * The primary pending_exception is the one that holds
> -	 * the ref_count and the list of origin_bios for a
> -	 * group of pending_exceptions.  It is always last to get freed.
> -	 * These fields get set up when writing to the origin.
> -	 */
> -	struct dm_snap_pending_exception *primary_pe;
> -
> -	/*
> -	 * Number of pending_exceptions processing this chunk.
> -	 * When this drops to zero we must complete the origin bios.
> -	 * If incrementing or decrementing this, hold pe->snap->lock for
> -	 * the sibling concerned and not pe->primary_pe->snap->lock unless
> -	 * they are the same.
> -	 */
> -	atomic_t ref_count;
> -
> 	/* Pointer back to snapshot context */
> 	struct dm_snapshot *snap;
>
> @@ -809,6 +787,28 @@ static void flush_queued_bios(struct  
> work_struct *work)
> 	flush_bios(queued_bios);
> }
>
> +static int do_origin(struct dm_dev *origin, struct bio *bio);
> +
> +/*
> + * Flush a list of buffers.
> + */
> +static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
> +{
> +	struct bio *n;
> +	int r;
> +
> +	while (bio) {
> +		n = bio->bi_next;
> +		bio->bi_next = NULL;
> +		r = do_origin(s->origin, bio);
> +		if (r == DM_MAPIO_REMAPPED)
> +			generic_make_request(bio);
> +		else
> +			BUG_ON(r != DM_MAPIO_SUBMITTED);
> +		bio = n;
> +	}
> +}
> +
> /*
>  * Error a list of buffers.
>  */
> @@ -842,39 +842,6 @@ static void __invalidate_snapshot(struct  
> dm_snapshot *s, int err)
> 	dm_table_event(s->store->ti->table);
> }
>
> -static void get_pending_exception(struct dm_snap_pending_exception  
> *pe)
> -{
> -	atomic_inc(&pe->ref_count);
> -}
> -
> -static struct bio *put_pending_exception(struct  
> dm_snap_pending_exception *pe)
> -{
> -	struct dm_snap_pending_exception *primary_pe;
> -	struct bio *origin_bios = NULL;
> -
> -	primary_pe = pe->primary_pe;
> -
> -	/*
> -	 * If this pe is involved in a write to the origin and
> -	 * it is the last sibling to complete then release
> -	 * the bios for the original write to the origin.
> -	 */
> -	if (primary_pe &&
> -	    atomic_dec_and_test(&primary_pe->ref_count)) {
> -		origin_bios = bio_list_get(&primary_pe->origin_bios);
> -		free_pending_exception(primary_pe);
> -	}
> -
> -	/*
> -	 * Free the pe if it's not linked to an origin write or if
> -	 * it's not itself a primary pe.
> -	 */
> -	if (!primary_pe || primary_pe != pe)
> -		free_pending_exception(pe);
> -
> -	return origin_bios;
> -}
> -
> static void pending_complete(struct dm_snap_pending_exception *pe,  
> int success)
> {
> 	struct dm_snap_exception *e;
> @@ -923,7 +890,8 @@ static void pending_complete(struct  
> dm_snap_pending_exception *pe, int success)
>  out:
> 	remove_exception(&pe->e);
> 	snapshot_bios = bio_list_get(&pe->snapshot_bios);
> -	origin_bios = put_pending_exception(pe);
> +	origin_bios = bio_list_get(&pe->origin_bios);
> +	free_pending_exception(pe);
>
> 	up_write(&s->lock);
>
> @@ -933,7 +901,7 @@ static void pending_complete(struct  
> dm_snap_pending_exception *pe, int success)
> 	else
> 		flush_bios(snapshot_bios);
>
> -	flush_bios(origin_bios);
> +	retry_origin_bios(s, origin_bios);
> }
>
> static void commit_callback(void *context, int success)
> @@ -1020,8 +988,6 @@ __find_pending_exception(struct dm_snapshot *s,
> 	pe->e.old_chunk = chunk;
> 	bio_list_init(&pe->origin_bios);
> 	bio_list_init(&pe->snapshot_bios);
> -	pe->primary_pe = NULL;
> -	atomic_set(&pe->ref_count, 0);
> 	pe->started = 0;
>
> 	if (s->store->type->prepare_exception(s->store, &pe->e)) {
> @@ -1029,7 +995,6 @@ __find_pending_exception(struct dm_snapshot *s,
> 		return NULL;
> 	}
>
> -	get_pending_exception(pe);
> 	insert_exception(&s->pending, &pe->e);
>
> 	return pe;
> @@ -1212,14 +1177,21 @@ static int snapshot_iterate_devices(struct  
> dm_target *ti,
> /*-----------------------------------------------------------------
>  * Origin methods
>  *---------------------------------------------------------------*/
> -static int __origin_write(struct list_head *snapshots, struct bio  
> *bio)
> +
> +/*
> + * Returns:
> + *	DM_MAPIO_REMAPPED: bio may be submitted to origin device
> + *	DM_MAPIO_SUBMITTED: bio was queued on queue on one of exceptions
> + */
> +
> +static int __origin_write(struct list_head *snapshots,
> +			  sector_t sector, struct bio *bio)
> {
> -	int r = DM_MAPIO_REMAPPED, first = 0;
> +	int r = DM_MAPIO_REMAPPED;
> 	struct dm_snapshot *snap;
> 	struct dm_snap_exception *e;
> -	struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
> +	struct dm_snap_pending_exception *pe, *pe_to_start = NULL;
> 	chunk_t chunk;
> -	LIST_HEAD(pe_queue);
>
> 	/* Do all the snapshots on this origin */
> 	list_for_each_entry (snap, snapshots, list) {
> @@ -1231,22 +1203,19 @@ static int __origin_write(struct list_head  
> *snapshots, struct bio *bio)
> 			goto next_snapshot;
>
> 		/* Nothing to do if writing beyond end of snapshot */
> -		if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
> +		if (sector >= dm_table_get_size(snap->store->ti->table))
> 			goto next_snapshot;
>
> 		/*
> 		 * Remember, different snapshots can have
> 		 * different chunk sizes.
> 		 */
> -		chunk = sector_to_chunk(snap->store, bio->bi_sector);
> +		chunk = sector_to_chunk(snap->store, sector);
>
> 		/*
> 		 * Check exception table to see if block
> 		 * is already remapped in this snapshot
> 		 * and trigger an exception if not.
> -		 *
> -		 * ref_count is initialised to 1 so pending_complete()
> -		 * won't destroy the primary_pe while we're inside this loop.
> 		 */
> 		e = lookup_exception(&snap->complete, chunk);
> 		if (e)
> @@ -1276,59 +1245,39 @@ static int __origin_write(struct list_head  
> *snapshots, struct bio *bio)
> 			}
> 		}
>
> -		if (!primary_pe) {
> -			/*
> -			 * Either every pe here has same
> -			 * primary_pe or none has one yet.
> -			 */
> -			if (pe->primary_pe)
> -				primary_pe = pe->primary_pe;
> -			else {
> -				primary_pe = pe;
> -				first = 1;
> -			}
> -
> -			bio_list_add(&primary_pe->origin_bios, bio);
> +		r = DM_MAPIO_SUBMITTED;
>
> -			r = DM_MAPIO_SUBMITTED;
> -		}
> +		if (bio) {
> +			bio_list_add(&pe->origin_bios, bio);
> +			bio = NULL;
>
> -		if (!pe->primary_pe) {
> -			pe->primary_pe = primary_pe;
> -			get_pending_exception(primary_pe);
> +			if (!pe->started) {
> +				pe->started = 1;
> +				pe_to_start = pe;
> +			}
> 		}
>
> 		if (!pe->started) {
> 			pe->started = 1;
> -			list_add_tail(&pe->list, &pe_queue);
> +			start_copy(pe);
> 		}
>
>  next_snapshot:
> 		up_write(&snap->lock);
> 	}
>
> -	if (!primary_pe)
> -		return r;
> -
> 	/*
> -	 * If this is the first time we're processing this chunk and
> -	 * ref_count is now 1 it means all the pending exceptions
> -	 * got completed while we were in the loop above, so it falls to
> -	 * us here to remove the primary_pe and submit any origin_bios.
> +	 * pe_to_start is a small performance improvement:
> +	 * To avoid calling __origin_write N times for N snapshots, we start
> +	 * the snapshot where we queued the bio as the last one.
> +	 *
> +	 * If we start it as the last one, it finishes most likely as the  
> last
> +	 * one and exceptions in other snapshots will be already finished  
> when
> +	 * the bio will be retried.
> 	 */
>
> -	if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
> -		flush_bios(bio_list_get(&primary_pe->origin_bios));
> -		free_pending_exception(primary_pe);
> -		/* If we got here, pe_queue is necessarily empty. */
> -		return r;
> -	}
> -
> -	/*
> -	 * Now that we have a complete pe list we can start the copying.
> -	 */
> -	list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
> -		start_copy(pe);
> +	if (pe_to_start)
> +		start_copy(pe_to_start);
>
> 	return r;
> }
> @@ -1344,7 +1293,7 @@ static int do_origin(struct dm_dev *origin,  
> struct bio *bio)
> 	down_read(&_origins_lock);
> 	o = __lookup_origin(origin->bdev);
> 	if (o)
> -		r = __origin_write(&o->snapshots, bio);
> +		r = __origin_write(&o->snapshots, bio->bi_sector, bio);
> 	up_read(&_origins_lock);
>
> 	return r;
> -- 
> 1.6.2.5
>
> --
> dm-devel mailing list
> dm-devel at redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel




More information about the dm-devel mailing list