[Cluster-devel] [PATCH 5/5] gfs2: dlm based recovery coordination

Thu Jan 5 16:58:22 UTC 2012

Hi,

On Thu, 2012-01-05 at 10:46 -0600, David Teigland wrote:
[snip]
>  
> +static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
> +{
> +	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
> +	char cluster[GFS2_LOCKNAME_LEN];
> +	const char *fsname;
> +	uint32_t flags;
> +	int error, ops_result;
> +
> +	/*
> +	 * initialize everything
> +	 */
> +
> +	INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
> +	spin_lock_init(&ls->ls_recover_spin);
> +	ls->ls_recover_flags = 0;
> +	ls->ls_recover_mount = 0;
> +	ls->ls_recover_start = 0;
> +	ls->ls_recover_block = 0;
> +	ls->ls_recover_size = 0;
> +	ls->ls_recover_submit = NULL;
> +	ls->ls_recover_result = NULL;
> +
> +	error = set_recover_size(sdp, NULL, 0);
> +	if (error)
> +		goto fail;
> +
> +	/*
> +	 * prepare dlm_new_lockspace args
> +	 */
> +
> +	fsname = strchr(table, ':');
> +	if (!fsname) {
> +		fs_info(sdp, "no fsname found\n");
> +		error = -EINVAL;
> +		goto fail_free;
> +	}
> +	memset(cluster, 0, sizeof(cluster));
> +	memcpy(cluster, table, strlen(table) - strlen(fsname));
> +	fsname++;
> +
> +	flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL;
> +	if (ls->ls_nodir)
> +		flags |= DLM_LSFL_NODIR;
> +
> +	/*
> +	 * create/join lockspace
> +	 */
> +
> +	error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
> +				  &gdlm_lockspace_ops, sdp, &ops_result,
> +				  &ls->ls_dlm);
> +	if (error) {
> +		fs_err(sdp, "dlm_new_lockspace error %d\n", error);
> +		goto fail_free;
> +	}
> +
> +	if (ops_result < 0) {
> +		/*
> +		 * dlm does not support ops callbacks,
> +		 * old dlm_controld/gfs_controld are used, try without ops.
> +		 */
> +		fs_info(sdp, "dlm lockspace ops not used\n");
> +		free_recover_size(ls);
> +		set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags);
> +		return 0;
> +	}
> +
> +	if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) {
> +		fs_err(sdp, "dlm lockspace ops disallow jid preset\n");
> +		error = -EINVAL;
> +		goto fail_release;
> +	}
> +
> +	/*
> +	 * control_mount() uses control_lock to determine first mounter,
> +	 * and for later mounts, waits for any recoveries to be cleared.
> +	 */
> +
> +	error = control_mount(sdp);
> +	if (error) {
> +		fs_err(sdp, "mount control error %d\n", error);
> +		goto fail_release;
> +	}
> +
> +	clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
> +	smp_mb__after_clear_bit();
> +	wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
> +	ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
> +	return 0;
> +

This bit of code, which was correct last time you posted this patch
appears to have reverted to its previous incorrect state. ls_first must
be set before SDF_NOJOURNALID is cleared, otherwise the uninitialised
value may be read,

Steve.