[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [dm-devel] Re: DM configuration



On jeu, 2005-10-06 at 13:14 -0400, Kiyoshi Ueda wrote:
> Hi,
> 
> On Wed, 5 Oct 2005 15:45:05 +0200, Christophe Varoqui <christophe varoqui free fr> wrote:
> > > 1) Under all path failure condition or the device is removed from array
> > > (using management station), IO continues if "queue_if_no_path" feature
> > > is enabled. As all the requests are cached all the memory gets used out
> > > for this. Can there be any mechanism where the all path failure is
> > > detected and marked, devices can be suspended or removed after some time
> > > (user configurable). By doing this the OS stalling can be avoided when
> > > the LUN is in dead state for prolonged period of time.
> > > 
> > Wouldn't it be adequate to solve this issue in the queue_if_no_path
> > limiting framework that should be worked on (time-based, queue depth-based,
> > both, ...?)
> > 
> > After all, the admin should "multipath -f $sssu_wants_to_kill_me") before,
> > shouldn't it ? Think of mounted FS, for example ... the admin *has* to
> > do something on the system anyway.
> 
> Indeed.
> But I think that taking care of that in multipathd is also needed.
> While kernel space solution may be better to solve daemon crash
> issue and so on, user space solution will keep the kernel code simple,
> as discussed in the following thread.
> 
>   Subject: [dm-devel] queue_if_no_paths timeout handling
>   URL    : https://www.redhat.com/archives/dm-devel/2005-July/msg00035.html
> 
> The following patch adds time-based retry feature in no-path situation
> to multipathd.  Any comments are welcome.
> 
> 
> This patch adds 'no_path_retry' option to multipathd.
> 
>    o no_path_retry = "fail" is equal to 'fail_if_no_path'.
>      (i.e. I/O to the no-path map will immediately fail.)
> 
>    o no_path_retry = "queue" is equal to 'queue_if_no_path'.
>      (i.e. I/O to the no-path map will be queued until any path comes up.)
> 
>    o If no_path_retry = <n> where n is positive number,
>      then multipathd will set queue_if_no_path to the map,
>      and if the all paths are down, multipathd will turn the feature
>      off to fail_if_no_path after the checker tries <n> times for each
>      paths in the map.
> 
> Multipathd re-writes queue_if_no_path feature parameter in the map,
> if this option is specified.  But by default, this patch doesn't
> change current multipathd behaviour.
> So this patch don't break any existing configuration.
> 
Interesting. Is this patched reviewed (besides me) and agreed on ?
Does the "no_path_retry" option name raise consensus ?

> # Multipathd has "features" option, with which user can directly specify
> # a feature parameter of the table (e.g. features="1 queue_if_no_path").
> # However, no_path_retry could be considered as smarter replacement for it.
> 
I have no objection phasing out the "features" option, in favor of a
more "deterministic" solution. If you care to submit a patch.

Only one note below,
Regards,
cvaroqui

> 
> diff -rup git/libmultipath/config.h retry-multipathd/libmultipath/config.h
> --- git/libmultipath/config.h	2005-09-28 12:32:02.000000000 -0400
> +++ retry-multipathd/libmultipath/config.h	2005-10-05 18:37:33.000000000 -0400
> @@ -33,6 +33,7 @@ struct mpentry {
>  	int pgpolicy;
>  	int pgfailback;
>  	int rr_weight;
> +	int no_path_retry;
>  
>  	char * wwid;
>  	char * selector;
> @@ -56,6 +57,7 @@ struct config {
>  	int pgfailback;
>  	int remove;
>  	int rr_weight;
> +	int default_no_path_retry;
>  
the "default_" prefix can be avoided here and in the dictionnary, as the
default{} block says it all.

>  	char * dev;
>  	char * udev_dir;
> diff -rup git/libmultipath/devmapper.c retry-multipathd/libmultipath/devmapper.c
> --- git/libmultipath/devmapper.c	2005-10-04 12:20:03.000000000 -0400
> +++ retry-multipathd/libmultipath/devmapper.c	2005-10-05 18:39:36.000000000 -0400
> @@ -490,6 +490,41 @@ out:
>  	return r;
>  }
>  
> +int
> +dm_queue_if_no_path(char *mapname, int enable)
> +{
> +	int r = 1;
> +	struct dm_task *dmt;
> +	char *str;
> +
> +	if (enable)
> +		str = "queue_if_no_path\n";
> +	else
> +		str = "fail_if_no_path\n";
> +
> +	if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG)))
> +		return 1;
> +
> +	if (!dm_task_set_name(dmt, mapname))
> +		goto out;
> +
> +	if (!dm_task_set_sector(dmt, 0))
> +		goto out;
> +
> +	if (!dm_task_set_message(dmt, str))
> +		goto out;
> +
> +	dm_task_no_open_count(dmt);
> +
> +	if (!dm_task_run(dmt))
> +		goto out;
> +
> +	r = 0;
> +out:
> +	dm_task_destroy(dmt);
> +	return r;
> +}
> +
>  static int
>  dm_groupmsg (char * msg, char * mapname, int index)
>  {
> diff -rup git/libmultipath/devmapper.h retry-multipathd/libmultipath/devmapper.h
> --- git/libmultipath/devmapper.h	2005-09-30 14:13:49.000000000 -0400
> +++ retry-multipathd/libmultipath/devmapper.h	2005-10-05 18:39:58.000000000 -0400
> @@ -10,6 +10,7 @@ int dm_flush_map (char *, char *);
>  int dm_flush_maps (char *);
>  int dm_fail_path(char * mapname, char * path);
>  int dm_reinstate(char * mapname, char * path);
> +int dm_queue_if_no_path(char *mapname, int enable);
>  int dm_switchgroup(char * mapname, int index);
>  int dm_enablegroup(char * mapname, int index);
>  int dm_disablegroup(char * mapname, int index);
> diff -rup git/libmultipath/dict.c retry-multipathd/libmultipath/dict.c
> --- git/libmultipath/dict.c	2005-09-28 12:32:02.000000000 -0400
> +++ retry-multipathd/libmultipath/dict.c	2005-10-05 18:43:51.000000000 -0400
> @@ -171,6 +171,26 @@ default_failback_handler(vector strvec)
>  	return 0;
>  }
>  
> +static int
> +def_no_path_retry_handler(vector strvec)
> +{
> +	char * buff;
> +
> +	buff = set_value(strvec);
> +	if (!buff)
> +		return 1;
> +
> +	if (!strncmp(buff, "fail", 4) || !strncmp(buff, "0", 1))
> +		conf->default_no_path_retry = NO_PATH_RETRY_FAIL;
> +	else if (!strncmp(buff, "queue", 5))
> +		conf->default_no_path_retry = NO_PATH_RETRY_QUEUE;
> +	else if ((conf->default_no_path_retry = atoi(buff)) < 1)
> +		conf->default_no_path_retry = NO_PATH_RETRY_UNDEF;
> +
> +	FREE(buff);
> +	return 0;
> +}
> +
>  /*
>   * blacklist block handlers
>   */
> @@ -579,6 +599,30 @@ mp_weight_handler(vector strvec)
>  	return 0;
>  }
>  
> +static int
> +mp_no_path_retry_handler(vector strvec)
> +{
> +	struct mpentry *mpe = VECTOR_LAST_SLOT(conf->mptable);
> +	char *buff;
> +
> +	if (!mpe)
> +		return 1;
> +
> +	buff = set_value(strvec);
> +	if (!buff)
> +		return 1;
> +
> +	if (!strncmp(buff, "fail", 4) || !strncmp(buff, "0", 1))
> +		mpe->no_path_retry = NO_PATH_RETRY_FAIL;
> +	else if (!strncmp(buff, "queue", 5))
> +		mpe->no_path_retry = NO_PATH_RETRY_QUEUE;
> +	else if ((mpe->no_path_retry = atoi(buff)) < 1)
> +		mpe->no_path_retry = NO_PATH_RETRY_UNDEF;
> +
> +	FREE(buff);
> +	return 0;
> +}
> +
>  vector
>  init_keywords(void)
>  {
> @@ -596,6 +640,7 @@ init_keywords(void)
>  	install_keyword("failback", &default_failback_handler);
>  	install_keyword("rr_min_io", &def_minio_handler);
>  	install_keyword("rr_weight", &def_weight_handler);
> +	install_keyword("default_no_path_retry", &def_no_path_retry_handler);

"default_" prefix can be avoided

>  	install_keyword_root("devnode_blacklist", &blacklist_handler);
>  	install_keyword("devnode", &ble_handler);
> @@ -626,6 +671,7 @@ init_keywords(void)
>  	install_keyword("path_selector", &mp_selector_handler);
>  	install_keyword("failback", &mp_failback_handler);
>  	install_keyword("rr_weight", &mp_weight_handler);
> +	install_keyword("no_path_retry", &mp_no_path_retry_handler);
>  	install_sublevel_end();
>  
>  	return keywords;
> diff -rup git/libmultipath/propsel.c retry-multipathd/libmultipath/propsel.c
> --- git/libmultipath/propsel.c	2005-09-28 12:32:02.000000000 -0400
> +++ retry-multipathd/libmultipath/propsel.c	2005-10-05 18:46:03.000000000 -0400
> @@ -201,3 +201,22 @@ select_getprio (struct path * pp)
>  	return 0;
>  }
>  
> +extern int
> +select_no_path_retry(struct multipath *mp)
> +{
> +	if (mp->mpe && mp->mpe->no_path_retry != NO_PATH_RETRY_UNDEF) {
> +		mp->no_path_retry = mp->mpe->no_path_retry;
> +		condlog(3, "no_path_retry = %i (controler setting)",
> +			mp->no_path_retry);
> +		return 0;
> +	}
> +	if (conf->default_no_path_retry != NO_PATH_RETRY_UNDEF) {
> +		mp->no_path_retry = conf->default_no_path_retry;
> +		condlog(3, "no_path_retry = %i (config file default)",
> +			mp->no_path_retry);
> +		return 0;
> +	}
> +	mp->no_path_retry = NO_PATH_RETRY_UNDEF;
> +	condlog(3, "no_path_retry = NONE (internal default)");
> +	return 0;
> +}
> diff -rup git/libmultipath/propsel.h retry-multipathd/libmultipath/propsel.h
> --- git/libmultipath/propsel.h	2005-08-29 09:41:06.000000000 -0400
> +++ retry-multipathd/libmultipath/propsel.h	2005-10-05 18:46:21.000000000 -0400
> @@ -8,4 +8,4 @@ int select_hwhandler (struct multipath *
>  int select_checkfn(struct path *pp);
>  int select_getuid (struct path * pp);
>  int select_getprio (struct path * pp);
> -
> +int select_no_path_retry(struct multipath *mp);
> diff -rup git/libmultipath/structs.h retry-multipathd/libmultipath/structs.h
> --- git/libmultipath/structs.h	2005-10-04 12:20:03.000000000 -0400
> +++ retry-multipathd/libmultipath/structs.h	2005-10-05 18:51:01.000000000 -0400
> @@ -14,6 +14,10 @@
>  #define SCSI_PRODUCT_SIZE	17
>  #define SCSI_REV_SIZE		5
>  
> +#define NO_PATH_RETRY_UNDEF	0
> +#define NO_PATH_RETRY_FAIL	-1
> +#define NO_PATH_RETRY_QUEUE	-2
> +
>  enum free_path_switch {
>  	KEEP_PATHS,
>  	FREE_PATHS
> @@ -115,6 +119,9 @@ struct multipath {
>  	int pgfailback;
>  	int failback_tick;
>  	int rr_weight;
> +	int nr_active;     /* current available(= not known as failed) paths */
> +	int no_path_retry; /* number of retries after all paths are down */
> +	int retry_tick;    /* remaining times for retries */
>  	unsigned long long size;
>  	vector paths;
>  	vector pg;
> diff -rup git/multipath.conf.annotated retry-multipathd/multipath.conf.annotated
> --- git/multipath.conf.annotated	2005-09-28 12:32:02.000000000 -0400
> +++ retry-multipathd/multipath.conf.annotated	2005-10-05 18:53:28.000000000 -0400
> @@ -99,6 +99,17 @@
>  #	# default : immediate
>  #	#
>  #	failback	manual
> +#
> +#	#
> +#	# name    : default_no_path_retry
> +#	# scope   : multipathd
> +#	# desc    : tell the number of retries until disable queueing, or
> +#	#           "fail" means immediate failure (no queueing),
> +#	#           "queue" means never stop queueing
> +#	# values  : queue|fail|n (>0)
> +#	# default : (null)
> +#	#
> +#	#default_no_path_retry  queue
>  #}
>  #	
>  ##
> @@ -177,6 +188,17 @@
>  #		# default : immediate
>  #		#
>  #		failback		manual
> +#
> +#		#
> +#		# name    : no_path_retry
> +#		# scope   : multipathd
> +#		# desc    : tell the number of retries until disable queueing, or
> +#		#           "fail" means immediate failure (no queueing),
> +#		#           "queue" means never stop queueing
> +#		# values  : queue|fail|n (>0)
> +#		# default : (null)
> +#		#
> +#		#no_path_retry  queue
>  #	}
>  #	multipath {
>  #		wwid	1DEC_____321816758474
> diff -rup git/multipathd/main.c retry-multipathd/multipathd/main.c
> --- git/multipathd/main.c	2005-09-28 12:32:02.000000000 -0400
> +++ retry-multipathd/multipathd/main.c	2005-10-06 11:48:47.000000000 -0400
> @@ -209,6 +209,21 @@ set_multipath_wwid (struct multipath * m
>  }
>  
>  static int
> +pathcount (struct multipath *mpp, int state)
> +{
> +	struct pathgroup *pgp;
> +	struct path *pp;
> +	int i, j;
> +	int count = 0;
> +
> +	vector_foreach_slot (mpp->pg, pgp, i)
> +		vector_foreach_slot (pgp->paths, pp, j)
> +			if (pp->state == state)
> +				count++;
> +	return count;
> +}
> +
> +static int
>  setup_multipath (struct vectors * vecs, struct multipath * mpp)
>  {
>  	int i;
> @@ -222,6 +237,14 @@ setup_multipath (struct vectors * vecs, 
>  
>  	set_paths_owner(vecs, mpp);
>  	select_pgfailback(mpp);
> +	mpp->nr_active = pathcount(mpp, PATH_UP);
> +	select_no_path_retry(mpp);
> +	if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF) {
> +		if (mpp->no_path_retry == NO_PATH_RETRY_FAIL)
> +			dm_queue_if_no_path(mpp->alias, 0);
> +		else
> +			dm_queue_if_no_path(mpp->alias, 1);
> +	}
>  
>  	return 0;
>  out:
> @@ -347,6 +370,38 @@ static sigset_t unblock_sighup(void)
>  }
>  
>  /*
> + * mpp->no_path_retry:
> + *   -2 : queue_if_no_path enabled, never turned off
> + *   -1 : fail_if_no_path
> + *    0 : nothing
> + *   >0 : queue_if_no_path enabled, turned off after polling n times
> + */
> +static void
> +update_queue_mode_del_path(struct multipath *mpp)
> +{
> +	if (--mpp->nr_active == 0 && mpp->no_path_retry > 0) {
> +		/* Enter retry mode */
> +		mpp->retry_tick = mpp->no_path_retry * conf->checkint;
> +		condlog(1, "%s: Entering recovery mode: max_retries=%d",
> +			mpp->alias, mpp->no_path_retry);
> +	}
> +	condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
> +}
> +
> +static void
> +update_queue_mode_add_path(struct multipath *mpp)
> +{
> +	if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
> +		/* come back to normal mode from retry mode */
> +		mpp->retry_tick = 0;
> +		dm_queue_if_no_path(mpp->alias, 1);
> +		condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
> +		condlog(1, "%s: Recovered to normal mode", mpp->alias);
> +	}
> +	condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
> +}
> +
> +/*
>   * returns the reschedule delay
>   * negative means *stop*
>   */
> @@ -690,6 +745,10 @@ uev_remove_path (char * devname, struct 
>  		condlog(3, "%s: not in pathvec");
>  		return 1;
>  	}
> +
> +	if (pp->mpp && pp->state == PATH_UP)
> +		update_queue_mode_del_path(pp->mpp);
> +
>  	condlog(2, "remove %s path checker", devname);
>  	i = find_slot(vecs->pathvec, (void *)pp);
>  	vector_del_slot(vecs->pathvec, i);
> @@ -817,6 +876,13 @@ reconfigure (struct vectors * vecs)
>  	vector_foreach_slot (vecs->mpvec, mpp, i) {
>  		mpp->mpe = find_mpe(mpp->wwid);
>  		set_paths_owner(vecs, mpp);
> +		select_no_path_retry(mpp);
> +		if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF) {
> +			if (mpp->no_path_retry == NO_PATH_RETRY_FAIL)
> +				dm_queue_if_no_path(mpp->alias, 0);
> +			else
> +				dm_queue_if_no_path(mpp->alias, 1);
> +		}
>  	}
>  	vector_foreach_slot (vecs->pathvec, pp, i) {
>  		select_checkfn(pp);
> @@ -983,6 +1049,7 @@ fail_path (struct path * pp)
>  		 pp->dev_t, pp->mpp->alias);
>  
>  	dm_fail_path(pp->mpp->alias, pp->dev_t);
> +	update_queue_mode_del_path(pp->mpp);
>  }
>  
>  /*
> @@ -991,11 +1058,14 @@ fail_path (struct path * pp)
>  static void
>  reinstate_path (struct path * pp)
>  {
> -	if (pp->mpp) {
> -		if (dm_reinstate(pp->mpp->alias, pp->dev_t))
> -			condlog(0, "%s: reinstate failed", pp->dev_t);
> -		else
> -			condlog(2, "%s: reinstated", pp->dev_t);
> +	if (!pp->mpp)
> +		return;
> +
> +	if (dm_reinstate(pp->mpp->alias, pp->dev_t))
> +		condlog(0, "%s: reinstate failed", pp->dev_t);
> +	else {
> +		condlog(2, "%s: reinstated", pp->dev_t);
> +		update_queue_mode_add_path(pp->mpp);
>  	}
>  }
>  
> @@ -1056,6 +1126,23 @@ defered_failback_tick (vector mpvec)
>  	}
>  }
>  
> +static void
> +retry_count_tick(vector mpvec)
> +{
> +	struct multipath *mpp;
> +	int i;
> +
> +	vector_foreach_slot (mpvec, mpp, i) {
> +		if (mpp->retry_tick) {
> +			condlog(4, "%s: Retrying.. No active path", mpp->alias);
> +			if(--mpp->retry_tick == 0) {
> +				dm_queue_if_no_path(mpp->alias, 0);
> +				condlog(2, "%s: Disable queueing", mpp->alias);
> +			}
> +		}
> +	}
> +}
> +
>  static void *
>  checkerloop (void *ap)
>  {
> @@ -1201,6 +1288,7 @@ checkerloop (void *ap)
>  			}
>  		}
>  		defered_failback_tick(vecs->mpvec);
> +		retry_count_tick(vecs->mpvec);
>  
>  		if (count)
>  			count--;
> 
> Regards,
> Kiyoshi Ueda
> 
> --
> dm-devel mailing list
> dm-devel redhat com
> https://www.redhat.com/mailman/listinfo/dm-devel



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]