[Cluster-devel] cluster/cman man/qdisk.5 qdisk/disk.h qdisk/di ...

Tue Jan 23 17:57:08 UTC 2007

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	lhh at sourceware.org	2007-01-23 17:57:07

Modified files:
	cman/man       : qdisk.5 
	cman/qdisk     : disk.h disk_util.c main.c 

Log message:
	Use /proc/uptime by default instead of gettimeofday(2) for internal timings to avoid problems when the clock is reset by NTP

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/qdisk.5.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk_util.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.5&r2=1.1.2.6

--- cluster/cman/man/qdisk.5	2007/01/22 22:50:38	1.1.2.2
+++ cluster/cman/man/qdisk.5	2007/01/23 17:57:07	1.1.2.3
@@ -294,6 +294,16 @@
 disconnected from the SAN.  The default for this value is 0 (off).
 
 .in 9
+\fIuse_uptime\fP\fB="\fP1\fB"\fP
+.in 12
+If this parameter is set to 1 (on), qdiskd will use values from
+/proc/uptime for internal timings.  This is a bit less precise
+than \fBgettimeofday(2)\fP, but the benefit is that changing the 
+system clock will not affect qdiskd's behavior - even if \fBparanoid\fP
+is enabled.  If set to 0, qdiskd will use \fBgettimeofday(2)\fP, which
+is more precise.  The default for this value is 1 (on / use uptime).
+
+.in 9
 \fIdevice\fP\fB="\fP/dev/sda1\fB"\fP
 .in 12
 This is the device the quorum daemon will use.  This device must be the
@@ -412,4 +422,4 @@
 for more details.
 
 .SH "SEE ALSO"
-mkqdisk(8), qdiskd(8), cman(5), syslog.conf(5)
+mkqdisk(8), qdiskd(8), cman(5), syslog.conf(5), gettimeofday(2)
--- cluster/cman/qdisk/disk.h	2007/01/22 22:50:38	1.1.2.4
+++ cluster/cman/qdisk/disk.h	2007/01/23 17:57:07	1.1.2.5
@@ -71,7 +71,8 @@
 	RF_STOP_CMAN = 0x2,
 	RF_DEBUG = 0x4,
 	RF_PARANOID = 0x8,
-	RF_ALLOW_KILL = 0x10
+	RF_ALLOW_KILL = 0x10,
+	RF_UPTIME = 0x20
 } run_flag_t;
 
 
--- cluster/cman/qdisk/disk_util.c	2006/05/18 14:52:49	1.1.2.1
+++ cluster/cman/qdisk/disk_util.c	2007/01/23 17:57:07	1.1.2.2
@@ -37,20 +37,66 @@
 #include <time.h>
 
 
-static inline void
+inline void
 _diff_tv(struct timeval *dest, struct timeval *start, struct timeval *end)
 {
-	        dest->tv_sec = end->tv_sec - start->tv_sec;
-	        dest->tv_usec = end->tv_usec - start->tv_usec;
+	dest->tv_sec = end->tv_sec - start->tv_sec;
+	dest->tv_usec = end->tv_usec - start->tv_usec;
 
-		if (dest->tv_usec < 0) {
-			dest->tv_usec += 1000000;
-			dest->tv_sec--;
-		}
+	if (dest->tv_usec < 0) {
+		dest->tv_usec += 1000000;
+		dest->tv_sec--;
+	}
 }
 
 
 /**
+ *
+ * Grab the uptime from /proc/uptime.
+ * 
+ * @param tv		Timeval struct to store time in.  The sec
+ * 			field contains seconds, the usec field 
+ * 			contains the hundredths-of-seconds (converted
+ * 			to micro-seconds)
+ * @return		-1 on failure, 0 on success.
+ */
+static inline int
+getuptime(struct timeval *tv)
+{
+	FILE *fp;
+	struct timeval junk;
+	int rv;
+	
+	fp = fopen("/proc/uptime","r");
+	if (!fp)
+		return -1;
+
+	rv = fscanf(fp,"%ld.%ld %ld.%ld\n", &tv->tv_sec, &tv->tv_usec,
+		    &junk.tv_sec, &junk.tv_usec);
+	fclose(fp);
+	
+	if (rv != 4) {
+		return -1;
+	}
+	
+	tv->tv_usec *= 10000;
+	
+	return 0;
+}
+
+
+inline int
+get_time(struct timeval *tv, int use_uptime)
+{
+	if (use_uptime) {
+		return getuptime(tv);
+	} else {
+		return gettimeofday(tv, NULL);
+	}
+}
+
+ 
+/**
   Update write times and calculate a new average time
  */
 void
@@ -147,7 +193,7 @@
 		ps.ps_arg = 0;
 	}
 
-	if (gettimeofday(&start, NULL) < 0)
+	if (get_time(&start, ctx->qc_flags&RF_UPTIME) < 0)
 		utime_ok = 0;
 	swab_status_block_t(&ps);
 	if (qdisk_write(ctx->qc_fd, qdisk_nodeid_offset(nid), &ps,
@@ -155,7 +201,7 @@
 		printf("Error writing node ID block %d\n", nid);
 		return -1;
 	}
-	if (utime_ok && (gettimeofday(&end, NULL) < 0))
+	if (utime_ok && (get_time(&end, ctx->qc_flags&RF_UPTIME) < 0))
 		utime_ok = 0;
 
 	if (utime_ok) {
--- cluster/cman/qdisk/main.c	2007/01/22 22:50:38	1.1.2.5
+++ cluster/cman/qdisk/main.c	2007/01/23 17:57:07	1.1.2.6
@@ -62,6 +62,10 @@
 int clear_bit(uint8_t *mask, uint32_t bitidx, uint32_t masklen);
 int set_bit(uint8_t *mask, uint32_t bitidx, uint32_t masklen);
 int is_bit_set(uint8_t *mask, uint32_t bitidx, uint32_t masklen);
+inline int get_time(struct timeval *tv, int use_uptime);
+inline void _diff_tv(struct timeval *dest, struct timeval *start,
+		     struct timeval *end);
+
 static int _running = 0;
 
 
@@ -711,18 +715,6 @@
 }
 
 
-static inline void
-_diff_tv(struct timeval *dest, struct timeval *start, struct timeval *end)
-{
-	dest->tv_sec = end->tv_sec - start->tv_sec;
-	dest->tv_usec = end->tv_usec - start->tv_usec;
-
-	if (dest->tv_usec < 0) {
-		dest->tv_usec += 1000000;
-		dest->tv_sec--;
-	}
-}
-
 
 #define _print_tv(val) \
 	printf("%s: %d.%06d\n", #val, (int)((val)->tv_sec), \
@@ -821,7 +813,7 @@
 	_running = 1;
 	while (_running) {
 		/* XXX this was getuptime() in clumanager */
-		gettimeofday(&oldtime, NULL);
+		get_time(&oldtime, (ctx->qc_flags&RF_UPTIME));
 		
 		/* Read everyone else's status */
 		read_node_blocks(ctx, ni, max);
@@ -985,7 +977,7 @@
 
 		/* Cycle. We could time the loop and sleep
 		   usleep(interval-looptime), but this is fine for now.*/
-		gettimeofday(&newtime, NULL);
+		get_time(&newtime, ctx->qc_flags&RF_UPTIME);
 		_diff_tv(&diff, &oldtime, &newtime);
 		
 		/*
@@ -1066,7 +1058,8 @@
 	ctx->qc_interval = 1;
 	ctx->qc_tko = 10;
 	ctx->qc_scoremin = 0;
-	ctx->qc_flags = RF_REBOOT | RF_ALLOW_KILL; /* | RF_STOP_CMAN;*/
+	ctx->qc_flags = RF_REBOOT | RF_ALLOW_KILL | RF_UPTIME;
+			/* | RF_STOP_CMAN;*/
 	ctx->qc_sched = SCHED_RR;
 	ctx->qc_sched_prio = 1;
 
@@ -1228,6 +1221,20 @@
 		free(val);
 	}
 
+	/*
+	 * Get flag to see if we're supposed to use /proc/uptime instead of
+	 * gettimeofday(2)
+	 */
+	/* default = off, so, 1 to turn on */
+	snprintf(query, sizeof(query), "/cluster/quorumd/@use_uptime");
+	if (ccs_get(ccsfd, query, &val) == 0) {
+		if (!atoi(val))
+			ctx->qc_flags &= ~RF_UPTIME;
+		else
+			ctx->qc_flags |= RF_UPTIME;
+		free(val);
+	}
+
 	*cfh = configure_heuristics(ccsfd, h, maxh);
 
 	clulog(LOG_DEBUG,