[Cluster-devel] cluster/group/gfs_controld main.c plock.c

Tue Nov 14 21:31:03 UTC 2006

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL50
Changes by:	teigland at sourceware.org	2006-11-14 21:31:00

Modified files:
	group/gfs_controld: main.c plock.c 

Log message:
	Add plock rate limit option -l <limit>.  Current default is no limit (0).
	If a limit is set, gfs_controld will send no more than <limit> plock
	operations (multicast messages) every second.
	
	Given a limit of 10, one file system where plocks are used, and a program
	that does a tight loop of fcntl lock/unlock operations, the max number of
	loop iterations in 1 second would be 5.  If eight nodes were all doing
	this there would be 80 total network multicasts every second from all
	nodes in the cluster.
	
	We also record the volume of plock messages accepted locally and received
	from the network in the debug log.  A log entry is written for every
	1000 locally accepted plock operations and for every 1000 operations
	received from the network.
	
	Default plock rate limit of 10 instead of 0.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/main.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.18&r2=1.18.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/plock.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.25&r2=1.25.4.1

--- cluster/group/gfs_controld/main.c	2006/10/20 19:32:52	1.18
+++ cluster/group/gfs_controld/main.c	2006/11/14 21:30:59	1.18.4.1
@@ -12,9 +12,11 @@
 
 #include "lock_dlm.h"
 
-#define OPTION_STRING			"DPhVwp"
+#define OPTION_STRING			"DPhVwpl:"
 #define LOCKFILE_NAME			"/var/run/gfs_controld.pid"
 
+#define DEFAULT_PLOCK_RATE_LIMIT 10
+
 struct client {
 	int fd;
 	char type[32];
@@ -32,11 +34,13 @@
 static int groupd_fd;
 static int uevent_fd;
 static int plocks_fd;
+static int plocks_ci;
 
 extern struct list_head mounts;
 extern struct list_head withdrawn_mounts;
 int no_withdraw;
 int no_plock;
+uint32_t plock_rate_limit = DEFAULT_PLOCK_RATE_LIMIT;
 
 
 int do_write(int fd, void *buf, size_t count)
@@ -158,6 +162,18 @@
 	client[ci].mg = NULL;
 }
 
+static void client_ignore(int ci, int fd)
+{
+	pollfd[ci].fd = -1;
+	pollfd[ci].events = 0;
+}
+
+static void client_back(int ci, int fd)
+{
+	pollfd[ci].fd = fd;
+	pollfd[ci].events = POLLIN;
+}
+
 int client_send(int ci, char *buf, int len)
 {
 	return do_write(client[ci].fd, buf, len);
@@ -401,7 +417,7 @@
 
 int loop(void)
 {
-	int rv, i, f;
+	int rv, i, f, error, poll_timeout = -1, ignore_plocks_fd = 0;
 
 	rv = listen_fd = setup_listen();
 	if (rv < 0)
@@ -431,12 +447,12 @@
 	rv = plocks_fd = setup_plocks();
 	if (rv < 0)
 		goto out;
-	client_add(plocks_fd);
+	plocks_ci = client_add(plocks_fd);
 
 	log_debug("setup done");
 
 	for (;;) {
-		rv = poll(pollfd, client_maxi + 1, -1);
+		rv = poll(pollfd, client_maxi + 1, poll_timeout);
 		if (rv < 0)
 			log_error("poll error %d errno %d", rv, errno);
 
@@ -463,9 +479,15 @@
 					process_cpg();
 				else if (pollfd[i].fd == uevent_fd)
 					process_uevent();
-				else if (pollfd[i].fd == plocks_fd)
-					process_plocks();
-				else
+				else if (pollfd[i].fd == plocks_fd) {
+					error = process_plocks();
+					if (error == -EBUSY) {
+						client_ignore(plocks_ci,
+							      plocks_fd);
+						ignore_plocks_fd = 1;
+						poll_timeout = 100;
+					}
+				} else
 					process_client(i);
 			}
 
@@ -482,6 +504,18 @@
 				}
 				client_dead(i);
 			}
+
+			/* check if our plock rate limit has expired so we
+			   can start taking more local plock requests again */
+
+			if (ignore_plocks_fd) {
+				error = process_plocks();
+				if (error != -EBUSY) {
+					client_back(plocks_ci, plocks_fd);
+					ignore_plocks_fd = 0;
+					poll_timeout = -1;
+				}
+			}
 		}
 	}
 	rv = 0;
@@ -560,6 +594,11 @@
 	printf("Options:\n");
 	printf("\n");
 	printf("  -D	       Enable debugging code and don't fork\n");
+	printf("  -P	       Enable plock debugging\n");
+	printf("  -p	       Disable plocks\n");
+	printf("  -l <limit>   Limit the rate of plock operations\n");
+	printf("               Default is %d, set to 0 for no limit\n", DEFAULT_PLOCK_RATE_LIMIT);
+	printf("  -w	       Disable withdraw\n");
 	printf("  -h	       Print this help, then exit\n");
 	printf("  -V	       Print program version information, then exit\n");
 }
@@ -586,6 +625,10 @@
 			plock_debug_opt = 1;
 			break;
 
+		case 'l':
+			plock_rate_limit = atoi(optarg);
+			break;
+
 		case 'p':
 			no_plock = 1;
 			break;
--- cluster/group/gfs_controld/plock.c	2006/11/03 15:33:46	1.25
+++ cluster/group/gfs_controld/plock.c	2006/11/14 21:30:59	1.25.4.1
@@ -18,6 +18,7 @@
 #include <sys/ioctl.h>
 #include <sys/stat.h>
 #include <sys/utsname.h>
+#include <sys/time.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <net/if.h>
@@ -49,6 +50,14 @@
 extern int message_flow_control_on;
 extern int no_plock;
 
+extern uint32_t plock_rate_limit;
+uint32_t plock_read_count;
+uint32_t plock_recv_count;
+uint32_t plock_rate_delays;
+struct timeval plock_read_time;
+struct timeval plock_recv_time;
+struct timeval plock_rate_last;
+
 static SaCkptHandleT ckpt_handle;
 static SaCkptCallbacksT callbacks = { 0, 0 };
 static SaVersionT version = { 'B', 1, 1 };
@@ -276,6 +285,13 @@
 	SaAisErrorT err;
 	int rv;
 
+	plock_read_count = 0;
+	plock_recv_count = 0;
+	plock_rate_delays = 0;
+	gettimeofday(&plock_read_time, NULL);
+	gettimeofday(&plock_recv_time, NULL);
+	gettimeofday(&plock_rate_last, NULL);
+
 	if (no_plock)
 		goto control;
 
@@ -300,6 +316,7 @@
 	struct mountgroup *mg;
 	struct gdlm_plock_info info;
 	struct gdlm_header *hd;
+	struct timeval now;
 	char *buf;
 	int len, rv;
 
@@ -307,6 +324,19 @@
 	if (message_flow_control_on)
 		return 0;
 
+	/* do we want to do something a little more accurate than tv_sec? */
+
+	/* limit plock rate within one second */
+	if (plock_rate_limit && plock_read_count &&
+	    !(plock_read_count % plock_rate_limit)) {
+		gettimeofday(&now, NULL);
+		if (now.tv_sec - plock_rate_last.tv_sec <= 0) {
+			plock_rate_delays++;
+			return -EBUSY;
+		}
+		plock_rate_last = now;
+	}
+
 	memset(&info, 0, sizeof(info));
 
 	rv = read(control_fd, &info, sizeof(info));
@@ -331,6 +361,18 @@
 		  info.nodeid, info.pid, info.owner,
 		  info.wait);
 
+	/* report plock rate and any delays since the last report */
+	plock_read_count++;
+	if (!(plock_read_count % 1000)) {
+		gettimeofday(&now, NULL);
+		log_group(mg, "plock_read_count %u time %us delays %u",
+			  plock_read_count,
+			  (unsigned) (now.tv_sec - plock_read_time.tv_sec),
+			  plock_rate_delays);
+		plock_read_time = now;
+		plock_rate_delays = 0;
+	}
+
 	len = sizeof(struct gdlm_header) + sizeof(struct gdlm_plock_info);
 	buf = malloc(len);
 	if (!buf) {
@@ -878,6 +920,7 @@
 {
 	struct gdlm_plock_info info;
 	struct gdlm_header *hd = (struct gdlm_header *) buf;
+	struct timeval now;
 	int rv = 0;
 
 	memcpy(&info, buf + sizeof(struct gdlm_header), sizeof(info));
@@ -892,6 +935,14 @@
 		  info.nodeid, info.pid, info.owner,
 		  info.wait);
 
+	plock_recv_count++;
+	if (!(plock_recv_count % 1000)) {
+		gettimeofday(&now, NULL);
+		log_group(mg, "plock_recv_count %u time %us", plock_recv_count,
+			  (unsigned) (now.tv_sec - plock_recv_time.tv_sec));
+		plock_recv_time = now;
+	}
+
 	if (info.optype == GDLM_PLOCK_OP_GET && from != our_nodeid)
 		return;