[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] cluster gfs-kernel/src/gfs/recovery.c gnbd-ker ...



CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	bmarzins sourceware org	2006-11-10 22:17:39

Modified files:
	gfs-kernel/src/gfs: recovery.c 
	gnbd-kernel/src: gnbd.c 

Log message:
	fix for bz215095 & 215099.
	
	for 215099, gnbd now only handles signals in sock_xmit() when it is called by
	the gnbd_recvd process. Otherwise, it simply blocks the signals until it
	completes the IO. This keeps gnbd from sending partial requests to the
	server, which can lead to data corruption.
	
	for 215095, the gfs function clean_journal() now uses the noinline attriubute,
	gfs_find_jhead() only uses on struct gfs_log_header, and gfs_recover_journal() dynamically allocates its struct gfs_log_header, all to conserve stack space.
	
	In the gnbd function sock_xmit(), you no longer get the signal info, so
	gnbd_recvd cannot print which signal it received, but it saves over 120 bytes
	of stack space.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/recovery.c.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd-kernel/src/gnbd.c.diff?cvsroot=cluster&r1=1.15&r2=1.16

--- cluster/gfs-kernel/src/gfs/recovery.c	2006/07/10 23:22:34	1.9
+++ cluster/gfs-kernel/src/gfs/recovery.c	2006/11/10 22:17:38	1.10
@@ -244,9 +244,10 @@
 gfs_find_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
 	       struct gfs_glock *gl, struct gfs_log_header *head)
 {
-	struct gfs_log_header lh1, lh_m;
+	struct gfs_log_header lh;
 	uint32_t seg1, seg2, seg_m;
 	int error;
+	uint64_t lh1_sequence;
 
 	seg1 = 0;
 	seg2 = jdesc->ji_nsegment - 1;
@@ -254,24 +255,26 @@
 	for (;;) {
 		seg_m = (seg1 + seg2) / 2;
 
-		error = find_good_lh(sdp, jdesc, gl, &seg1, &lh1, TRUE);
+		error = find_good_lh(sdp, jdesc, gl, &seg1, &lh, TRUE);
 		if (error)
 			break;
 
 		if (seg1 == seg_m) {
-			error = verify_jhead(sdp, jdesc, gl, &lh1);
+			error = verify_jhead(sdp, jdesc, gl, &lh);
 			if (unlikely(error)) 
 				printk("GFS: verify_jhead error=%d\n", error);
 			else
-				memcpy(head, &lh1, sizeof(struct gfs_log_header));
+				memcpy(head, &lh, sizeof(struct gfs_log_header));
 			break;
 		}
 
-		error = find_good_lh(sdp, jdesc, gl, &seg_m, &lh_m, FALSE);
+		lh1_sequence = lh.lh_sequence;
+
+		error = find_good_lh(sdp, jdesc, gl, &seg_m, &lh, FALSE);
 		if (error)
 			break;
 
-		if (lh1.lh_sequence <= lh_m.lh_sequence)
+		if (lh1_sequence <= lh.lh_sequence)
 			seg1 = seg_m;
 		else
 			seg2 = seg_m;
@@ -443,7 +446,7 @@
  * Returns: errno
  */
 
-static int
+static int noinline
 clean_journal(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
 	      struct gfs_glock *gl, struct gfs_log_header *head)
 {
@@ -565,7 +568,7 @@
 		    unsigned int jid, struct gfs_jindex *jdesc,
 		    int wait)
 {
-	struct gfs_log_header head;
+	struct gfs_log_header *head;
 	struct gfs_holder j_gh, t_gh;
 	unsigned long t;
 	int error;
@@ -596,16 +599,23 @@
 	printk("GFS: fsid=%s: jid=%u: Looking at journal...\n",
 	       sdp->sd_fsname, jid);
 
-	error = gfs_find_jhead(sdp, jdesc, j_gh.gh_gl, &head);
-	if (error)
+	head = kmalloc(sizeof(struct gfs_log_header), GFP_KERNEL);
+	if (!head) {
+		printk("GFS: fsid=%s jid=%u: Can't replay: Not enough memory",
+		       sdp->sd_fsname, jid);
 		goto fail_gunlock;
+	}
 
-	if (!(head.lh_flags & GFS_LOG_HEAD_UNMOUNT)) {
+	error = gfs_find_jhead(sdp, jdesc, j_gh.gh_gl, head);
+	if (error)
+		goto fail_header;
+
+	if (!(head->lh_flags & GFS_LOG_HEAD_UNMOUNT)) {
 		if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
 			printk("GFS: fsid=%s: jid=%u: Can't replay: read-only FS\n",
 			       sdp->sd_fsname, jid);
 			error = -EROFS;
-			goto fail_gunlock;
+			goto fail_header;
 		}
 
 		printk("GFS: fsid=%s: jid=%u: Acquiring the transaction lock...\n",
@@ -623,7 +633,7 @@
 					  GL_NOCACHE,
 					  &t_gh);
 		if (error)
-			goto fail_gunlock;
+			goto fail_header;
 
 		if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
 			printk("GFS: fsid=%s: jid=%u: Can't replay: read-only FS\n",
@@ -637,10 +647,10 @@
 
 		set_bit(GLF_DIRTY, &j_gh.gh_gl->gl_flags);
 
-		LO_BEFORE_SCAN(sdp, jid, &head, GFS_RECPASS_A1);
+		LO_BEFORE_SCAN(sdp, jid, head, GFS_RECPASS_A1);
 
 		error = foreach_descriptor(sdp, jdesc, j_gh.gh_gl,
-					   head.lh_tail, head.lh_first,
+					   head->lh_tail, head->lh_first,
 					   GFS_RECPASS_A1);
 		if (error)
 			goto fail_gunlock_tr;
@@ -649,7 +659,7 @@
 
 		gfs_replay_wait(sdp);
 
-		error = clean_journal(sdp, jdesc, j_gh.gh_gl, &head);
+		error = clean_journal(sdp, jdesc, j_gh.gh_gl, head);
 		if (error)
 			goto fail_gunlock_tr;
 
@@ -663,6 +673,8 @@
 
 	gfs_lm_recovery_done(sdp, jid, LM_RD_SUCCESS);
 
+	kfree(head);
+
 	gfs_glock_dq_uninit(&j_gh);
 
 	printk("GFS: fsid=%s: jid=%u: Done\n", sdp->sd_fsname, jid);
@@ -673,6 +685,9 @@
 	gfs_replay_wait(sdp);
 	gfs_glock_dq_uninit(&t_gh);
 
+ fail_header:
+	kfree(head);
+
  fail_gunlock:
 	gfs_glock_dq_uninit(&j_gh);
 
--- cluster/gnbd-kernel/src/gnbd.c	2006/08/11 15:18:14	1.15
+++ cluster/gnbd-kernel/src/gnbd.c	2006/11/10 22:17:39	1.16
@@ -288,7 +288,7 @@
  *  Send or receive packet.
  */
 static int sock_xmit(struct socket *sock, int send, void *buf, int size,
-		int msg_flags)
+		int msg_flags, int can_signal)
 {
 	mm_segment_t oldfs;
 	int result;
@@ -299,13 +299,12 @@
 
 	oldfs = get_fs();
 	set_fs(get_ds());
-	/* Allow interception of SIGKILL only
-	 * Don't allow other signals to interrupt the transmission */
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	oldset = current->blocked;
 	sigfillset(&current->blocked);
-	sigdelsetmask(&current->blocked, sigmask(SIGKILL) | sigmask(SIGTERM) |
-	              sigmask(SIGHUP));
+	if (can_signal)
+		sigdelsetmask(&current->blocked, sigmask(SIGKILL) |
+			      sigmask(SIGTERM) | sigmask(SIGHUP));
 	recalc_sigpending();
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
@@ -327,13 +326,9 @@
 		else
 			result = sock_recvmsg(sock, &msg, size, 0);
 
-		if (signal_pending(current)) {
-			siginfo_t info;
-			spin_lock_irqsave(&current->sighand->siglock, flags);
-			printk(KERN_WARNING "gnbd (pid %d: %s) got signal %d\n",
-				current->pid, current->comm, 
-				dequeue_signal(current, &current->blocked, &info));
-			spin_unlock_irqrestore(&current->sighand->siglock, flags);
+		if (can_signal && signal_pending(current)) {
+			printk(KERN_WARNING "gnbd (pid %d: %s) got signal\n",
+				current->pid, current->comm);
 			result = -EINTR;
 			break;
 		}
@@ -357,21 +352,22 @@
 }
 
 static inline int sock_send_bvec(struct socket *sock, struct bio_vec *bvec,
-		int flags)
+		int flags, int can_signal)
 {
 	int result;
 	void *kaddr = kmap(bvec->bv_page);
 	result = sock_xmit(sock, 1, kaddr + bvec->bv_offset, bvec->bv_len,
-			flags);
+			flags, can_signal);
 	kunmap(bvec->bv_page);
 	return result;
 }
 
 
-#define gnbd_send_req(dev, req) __gnbd_send_req((dev), (dev)->sock, (req))
+#define gnbd_send_req(dev, req, can_sig) \
+__gnbd_send_req((dev), (dev)->sock, (req), (can_sig))
 	
 int __gnbd_send_req(struct gnbd_device *dev, struct socket *sock,
-		struct request *req)
+		struct request *req, int can_signal)
 {
 	int result, i, flags;
 	struct gnbd_request request;
@@ -398,7 +394,8 @@
 			(unsigned long long)req->sector << 9,
 			req->nr_sectors << 9);
 	result = sock_xmit(sock, 1, &request, sizeof(request),
-			(gnbd_cmd(req) == GNBD_CMD_WRITE)? MSG_MORE: 0);
+			(gnbd_cmd(req) == GNBD_CMD_WRITE)? MSG_MORE: 0,
+			can_signal);
 	if (result < 0) {
 		printk(KERN_ERR "%s: Send control failed (result %d)\n",
 				dev->disk->disk_name, result);
@@ -420,7 +417,8 @@
 				dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
 						dev->disk->disk_name, req,
 						bvec->bv_len);
-				result = sock_send_bvec(sock, bvec, flags);
+				result = sock_send_bvec(sock, bvec, flags,
+							can_signal);
 				if (result < 0) {
 					printk(KERN_ERR "%s: Send data failed (result %d)\n",
 							dev->disk->disk_name,
@@ -458,7 +456,7 @@
 	int result;
 	void *kaddr = kmap(bvec->bv_page);
 	result = sock_xmit(sock, 0, kaddr + bvec->bv_offset, bvec->bv_len,
-			MSG_WAITALL);
+			MSG_WAITALL, 1);
 	kunmap(bvec->bv_page);
 	return result;
 }
@@ -494,7 +492,7 @@
 
 	BUG_ON(dev->magic != GNBD_MAGIC);
 
-	while((result = sock_xmit(sock, 0, &reply,sizeof(reply), MSG_WAITALL)) > 0){
+	while((result = sock_xmit(sock, 0, &reply,sizeof(reply), MSG_WAITALL, 1)) > 0){
 		if (ntohl(reply.magic) == GNBD_KEEP_ALIVE_MAGIC)
 			/* FIXME -- I should reset the wait time here */
 			continue;
@@ -609,7 +607,7 @@
 		list_add(&req->queuelist, &dev->queue_head);
 		spin_unlock(&dev->queue_lock);
 
-		err = gnbd_send_req(dev, req);
+		err = gnbd_send_req(dev, req, 0);
 
 		spin_lock_irq(q->queue_lock);
 		if (err)
@@ -641,7 +639,7 @@
 	printk("resending requests\n");
 	list_for_each(tmp, &dev->queue_head) {
 		req = list_entry(tmp, struct request, queuelist);
-		err = __gnbd_send_req(dev, sock, req);
+		err = __gnbd_send_req(dev, sock, req, 1);
 
 		if (err){
 			printk("failed trying to resend request (%d)\n", err);
@@ -705,7 +703,7 @@
 		/* There is no one using the device, you can disconnect it */
 		if (dev->sock == NULL)
 			return -ENOTCONN;
-		gnbd_send_req(dev, &shutdown_req);
+		gnbd_send_req(dev, &shutdown_req, 1);
                 return 0;
 	case GNBD_CLEAR_QUE:
 		if (down_interruptible(&dev->do_it_lock))
@@ -782,7 +780,7 @@
 			list_add(&ping_req.queuelist, &dev->queue_head);
 		}
 		spin_unlock(&dev->queue_lock);
-		gnbd_send_req(dev, &ping_req); /* ignore the errors */
+		gnbd_send_req(dev, &ping_req, 1); /* ignore the errors */
 		return 0;
 	case GNBD_PRINT_DEBUG:
 		printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]