[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] cluster gfs-kernel/src/gfs/recovery.c gnbd-ker ...



CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL50
Changes by:	bmarzins sourceware org	2006-11-21 22:01:42

Modified files:
	gfs-kernel/src/gfs: recovery.c 
	gnbd-kernel/src: gnbd.c 

Log message:
	Fix for bz215095 and bz215099.   GFS now uses fewer gfs_log_header structures,
	and dynamically allocates them.  Also, clean journal is declared noinline, so
	that the stack for it doesn't get added to the stack for gfs_recover_journal
	
	GNBD no longer uses the siginfo structure to examine the signal that killed
	a transfer. This is not as important as it was because GNBD now only deals with
	signals to the gnbd_recvd process. Handling other processes signals created
	a bug that could cause data corruption.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/recovery.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.9&r2=1.9.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd-kernel/src/gnbd.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.15&r2=1.15.4.1

--- cluster/gfs-kernel/src/gfs/recovery.c	2006/07/10 23:22:34	1.9
+++ cluster/gfs-kernel/src/gfs/recovery.c	2006/11/21 22:01:41	1.9.4.1
@@ -244,9 +244,10 @@
 gfs_find_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
 	       struct gfs_glock *gl, struct gfs_log_header *head)
 {
-	struct gfs_log_header lh1, lh_m;
+	struct gfs_log_header lh;
 	uint32_t seg1, seg2, seg_m;
 	int error;
+	uint64_t lh1_sequence;
 
 	seg1 = 0;
 	seg2 = jdesc->ji_nsegment - 1;
@@ -254,24 +255,26 @@
 	for (;;) {
 		seg_m = (seg1 + seg2) / 2;
 
-		error = find_good_lh(sdp, jdesc, gl, &seg1, &lh1, TRUE);
+		error = find_good_lh(sdp, jdesc, gl, &seg1, &lh, TRUE);
 		if (error)
 			break;
 
 		if (seg1 == seg_m) {
-			error = verify_jhead(sdp, jdesc, gl, &lh1);
+			error = verify_jhead(sdp, jdesc, gl, &lh);
 			if (unlikely(error)) 
 				printk("GFS: verify_jhead error=%d\n", error);
 			else
-				memcpy(head, &lh1, sizeof(struct gfs_log_header));
+				memcpy(head, &lh, sizeof(struct gfs_log_header));
 			break;
 		}
 
-		error = find_good_lh(sdp, jdesc, gl, &seg_m, &lh_m, FALSE);
+		lh1_sequence = lh.lh_sequence;
+
+		error = find_good_lh(sdp, jdesc, gl, &seg_m, &lh, FALSE);
 		if (error)
 			break;
 
-		if (lh1.lh_sequence <= lh_m.lh_sequence)
+		if (lh1_sequence <= lh.lh_sequence)
 			seg1 = seg_m;
 		else
 			seg2 = seg_m;
@@ -443,7 +446,7 @@
  * Returns: errno
  */
 
-static int
+static int noinline
 clean_journal(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
 	      struct gfs_glock *gl, struct gfs_log_header *head)
 {
@@ -565,7 +568,7 @@
 		    unsigned int jid, struct gfs_jindex *jdesc,
 		    int wait)
 {
-	struct gfs_log_header head;
+	struct gfs_log_header *head;
 	struct gfs_holder j_gh, t_gh;
 	unsigned long t;
 	int error;
@@ -596,16 +599,23 @@
 	printk("GFS: fsid=%s: jid=%u: Looking at journal...\n",
 	       sdp->sd_fsname, jid);
 
-	error = gfs_find_jhead(sdp, jdesc, j_gh.gh_gl, &head);
-	if (error)
+	head = kmalloc(sizeof(struct gfs_log_header), GFP_KERNEL);
+	if (!head) {
+		printk("GFS: fsid=%s jid=%u: Can't replay: Not enough memory",
+		       sdp->sd_fsname, jid);
 		goto fail_gunlock;
+	}
 
-	if (!(head.lh_flags & GFS_LOG_HEAD_UNMOUNT)) {
+	error = gfs_find_jhead(sdp, jdesc, j_gh.gh_gl, head);
+	if (error)
+		goto fail_header;
+
+	if (!(head->lh_flags & GFS_LOG_HEAD_UNMOUNT)) {
 		if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
 			printk("GFS: fsid=%s: jid=%u: Can't replay: read-only FS\n",
 			       sdp->sd_fsname, jid);
 			error = -EROFS;
-			goto fail_gunlock;
+			goto fail_header;
 		}
 
 		printk("GFS: fsid=%s: jid=%u: Acquiring the transaction lock...\n",
@@ -623,7 +633,7 @@
 					  GL_NOCACHE,
 					  &t_gh);
 		if (error)
-			goto fail_gunlock;
+			goto fail_header;
 
 		if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
 			printk("GFS: fsid=%s: jid=%u: Can't replay: read-only FS\n",
@@ -637,10 +647,10 @@
 
 		set_bit(GLF_DIRTY, &j_gh.gh_gl->gl_flags);
 
-		LO_BEFORE_SCAN(sdp, jid, &head, GFS_RECPASS_A1);
+		LO_BEFORE_SCAN(sdp, jid, head, GFS_RECPASS_A1);
 
 		error = foreach_descriptor(sdp, jdesc, j_gh.gh_gl,
-					   head.lh_tail, head.lh_first,
+					   head->lh_tail, head->lh_first,
 					   GFS_RECPASS_A1);
 		if (error)
 			goto fail_gunlock_tr;
@@ -649,7 +659,7 @@
 
 		gfs_replay_wait(sdp);
 
-		error = clean_journal(sdp, jdesc, j_gh.gh_gl, &head);
+		error = clean_journal(sdp, jdesc, j_gh.gh_gl, head);
 		if (error)
 			goto fail_gunlock_tr;
 
@@ -663,6 +673,8 @@
 
 	gfs_lm_recovery_done(sdp, jid, LM_RD_SUCCESS);
 
+	kfree(head);
+
 	gfs_glock_dq_uninit(&j_gh);
 
 	printk("GFS: fsid=%s: jid=%u: Done\n", sdp->sd_fsname, jid);
@@ -673,6 +685,9 @@
 	gfs_replay_wait(sdp);
 	gfs_glock_dq_uninit(&t_gh);
 
+ fail_header:
+	kfree(head);
+
  fail_gunlock:
 	gfs_glock_dq_uninit(&j_gh);
 
--- cluster/gnbd-kernel/src/gnbd.c	2006/08/11 15:18:14	1.15
+++ cluster/gnbd-kernel/src/gnbd.c	2006/11/21 22:01:42	1.15.4.1
@@ -288,7 +288,7 @@
  *  Send or receive packet.
  */
 static int sock_xmit(struct socket *sock, int send, void *buf, int size,
-		int msg_flags)
+		int msg_flags, int can_signal)
 {
 	mm_segment_t oldfs;
 	int result;
@@ -299,13 +299,12 @@
 
 	oldfs = get_fs();
 	set_fs(get_ds());
-	/* Allow interception of SIGKILL only
-	 * Don't allow other signals to interrupt the transmission */
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	oldset = current->blocked;
 	sigfillset(&current->blocked);
-	sigdelsetmask(&current->blocked, sigmask(SIGKILL) | sigmask(SIGTERM) |
-	              sigmask(SIGHUP));
+	if (can_signal)
+		sigdelsetmask(&current->blocked, sigmask(SIGKILL) |
+			      sigmask(SIGTERM) | sigmask(SIGHUP));
 	recalc_sigpending();
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
@@ -327,13 +326,9 @@
 		else
 			result = sock_recvmsg(sock, &msg, size, 0);
 
-		if (signal_pending(current)) {
-			siginfo_t info;
-			spin_lock_irqsave(&current->sighand->siglock, flags);
-			printk(KERN_WARNING "gnbd (pid %d: %s) got signal %d\n",
-				current->pid, current->comm, 
-				dequeue_signal(current, &current->blocked, &info));
-			spin_unlock_irqrestore(&current->sighand->siglock, flags);
+		if (can_signal && signal_pending(current)) {
+			printk(KERN_WARNING "gnbd (pid %d: %s) got signal\n",
+				current->pid, current->comm);
 			result = -EINTR;
 			break;
 		}
@@ -357,21 +352,22 @@
 }
 
 static inline int sock_send_bvec(struct socket *sock, struct bio_vec *bvec,
-		int flags)
+		int flags, int can_signal)
 {
 	int result;
 	void *kaddr = kmap(bvec->bv_page);
 	result = sock_xmit(sock, 1, kaddr + bvec->bv_offset, bvec->bv_len,
-			flags);
+			flags, can_signal);
 	kunmap(bvec->bv_page);
 	return result;
 }
 
 
-#define gnbd_send_req(dev, req) __gnbd_send_req((dev), (dev)->sock, (req))
+#define gnbd_send_req(dev, req, can_sig) \
+__gnbd_send_req((dev), (dev)->sock, (req), (can_sig))
 	
 int __gnbd_send_req(struct gnbd_device *dev, struct socket *sock,
-		struct request *req)
+		struct request *req, int can_signal)
 {
 	int result, i, flags;
 	struct gnbd_request request;
@@ -398,7 +394,8 @@
 			(unsigned long long)req->sector << 9,
 			req->nr_sectors << 9);
 	result = sock_xmit(sock, 1, &request, sizeof(request),
-			(gnbd_cmd(req) == GNBD_CMD_WRITE)? MSG_MORE: 0);
+			(gnbd_cmd(req) == GNBD_CMD_WRITE)? MSG_MORE: 0,
+			can_signal);
 	if (result < 0) {
 		printk(KERN_ERR "%s: Send control failed (result %d)\n",
 				dev->disk->disk_name, result);
@@ -420,7 +417,8 @@
 				dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
 						dev->disk->disk_name, req,
 						bvec->bv_len);
-				result = sock_send_bvec(sock, bvec, flags);
+				result = sock_send_bvec(sock, bvec, flags,
+							can_signal);
 				if (result < 0) {
 					printk(KERN_ERR "%s: Send data failed (result %d)\n",
 							dev->disk->disk_name,
@@ -458,7 +456,7 @@
 	int result;
 	void *kaddr = kmap(bvec->bv_page);
 	result = sock_xmit(sock, 0, kaddr + bvec->bv_offset, bvec->bv_len,
-			MSG_WAITALL);
+			MSG_WAITALL, 1);
 	kunmap(bvec->bv_page);
 	return result;
 }
@@ -494,7 +492,7 @@
 
 	BUG_ON(dev->magic != GNBD_MAGIC);
 
-	while((result = sock_xmit(sock, 0, &reply,sizeof(reply), MSG_WAITALL)) > 0){
+	while((result = sock_xmit(sock, 0, &reply,sizeof(reply), MSG_WAITALL, 1)) > 0){
 		if (ntohl(reply.magic) == GNBD_KEEP_ALIVE_MAGIC)
 			/* FIXME -- I should reset the wait time here */
 			continue;
@@ -609,7 +607,7 @@
 		list_add(&req->queuelist, &dev->queue_head);
 		spin_unlock(&dev->queue_lock);
 
-		err = gnbd_send_req(dev, req);
+		err = gnbd_send_req(dev, req, 0);
 
 		spin_lock_irq(q->queue_lock);
 		if (err)
@@ -641,7 +639,7 @@
 	printk("resending requests\n");
 	list_for_each(tmp, &dev->queue_head) {
 		req = list_entry(tmp, struct request, queuelist);
-		err = __gnbd_send_req(dev, sock, req);
+		err = __gnbd_send_req(dev, sock, req, 1);
 
 		if (err){
 			printk("failed trying to resend request (%d)\n", err);
@@ -705,7 +703,7 @@
 		/* There is no one using the device, you can disconnect it */
 		if (dev->sock == NULL)
 			return -ENOTCONN;
-		gnbd_send_req(dev, &shutdown_req);
+		gnbd_send_req(dev, &shutdown_req, 1);
                 return 0;
 	case GNBD_CLEAR_QUE:
 		if (down_interruptible(&dev->do_it_lock))
@@ -782,7 +780,7 @@
 			list_add(&ping_req.queuelist, &dev->queue_head);
 		}
 		spin_unlock(&dev->queue_lock);
-		gnbd_send_req(dev, &ping_req); /* ignore the errors */
+		gnbd_send_req(dev, &ping_req, 1); /* ignore the errors */
 		return 0;
 	case GNBD_PRINT_DEBUG:
 		printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]