[Cluster-devel] [PATCH] cman: improve cman/qdisk interactions
Fabio M. Di Nitto
fdinitto at redhat.com
Wed Sep 7 13:10:25 UTC 2011
- libcman/cman: add new quorum API call to update name and votes of a quorum device
- cman: simplify common code to free quorum_device infrastructure and handle quorum recalculation
- cman: do better logging/error reports of the quorum API usage
- cman: use strdup instead of malloc+strcpy (code is more readable)
- libcman: perform better error checking in register_quorum_device/update_quorum_device
- Allow qdisk to update device name in cman using a new libcman quorum API call
- Perform slight better error checking of some update opertaions
Resolves: rhbz#735917
Signed-off-by: Fabio M. Di Nitto <fdinitto at redhat.com>
---
cman/daemon/cnxman-socket.h | 1 +
cman/daemon/commands.c | 138 +++++++++++++++++++++++++++++++++----------
cman/lib/libcman.c | 19 +++++-
cman/lib/libcman.h | 4 +
cman/qdisk/main.c | 28 ++++++++-
5 files changed, 153 insertions(+), 37 deletions(-)
diff --git a/cman/daemon/cnxman-socket.h b/cman/daemon/cnxman-socket.h
index e8b7378..d243b40 100644
--- a/cman/daemon/cnxman-socket.h
+++ b/cman/daemon/cnxman-socket.h
@@ -32,6 +32,7 @@
#define CMAN_CMD_REG_QUORUMDEV 0x800000b5
#define CMAN_CMD_UNREG_QUORUMDEV 0x800000b6
#define CMAN_CMD_POLL_QUORUMDEV 0x800000b7
+#define CMAN_CMD_UPDATE_QUORUMDEV 0x800000b8
#define CMAN_CMD_TRY_SHUTDOWN 0x800000bb
#define CMAN_CMD_SHUTDOWN_REPLY 0x000000bc
#define CMAN_CMD_UPDATE_FENCE_INFO 0x800000bd
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index 2948952..567ff96 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -1080,27 +1080,69 @@ static int do_cmd_try_shutdown(struct connection *con, char *cmdbuf)
return 0;
}
+static void free_quorum_device(void)
+{
+ if (!quorum_device)
+ return;
+
+ if (quorum_device->name)
+ free(quorum_device->name);
+
+ free(quorum_device);
+
+ quorum_device = NULL;
+
+ return;
+}
+
+
+static void quorum_device_update_votes(int votes)
+{
+ int oldvotes;
+
+ /* Update votes even if it existed before */
+ oldvotes = quorum_device->votes;
+ quorum_device->votes = votes;
+
+ /* If it is a member and votes decreased, recalculate quorum */
+ if (quorum_device->state == NODESTATE_MEMBER &&
+ oldvotes != votes) {
+ recalculate_quorum(1, 0);
+ }
+}
+
static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
{
int votes;
- int oldvotes;
char *name = cmdbuf+sizeof(int);
- if (!ais_running)
+ if (!ais_running) {
+ log_printf(LOG_ERR, "unable to register quorum device: corosync is not running\n");
return -ENOTCONN;
+ }
- if (!we_are_a_cluster_member)
+ if (!we_are_a_cluster_member) {
+ log_printf(LOG_ERR, "unable to register quorum device: this node is not part of a cluster\n");
return -ENOENT;
+ }
- if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
+ if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) {
+ log_printf(LOG_ERR, "unable to register quorum device: name is too long\n");
+ /* this should probably return -E2BIG? */
return -EINVAL;
+ }
/* Allow re-registering of a quorum device if the name is the same */
- if (quorum_device && strcmp(name, quorum_device->name))
- return -EBUSY;
+ if (quorum_device && strcmp(name, quorum_device->name)) {
+ log_printf(LOG_ERR, "unable to re-register quorum device: device names do not match\n");
+ log_printf(LOG_DEBUG, "memb: old name: %s new name: %s\n", quorum_device->name, name);
+ return -EBUSY;
+ }
- if (find_node_by_name(name))
- return -EALREADY;
+ if (find_node_by_name(name)) {
+ log_printf(LOG_ERR, "unable to register quorum device: a node with the same name (%s) already exists\n", name);
+ return -EALREADY;
+ }
memcpy(&votes, cmdbuf, sizeof(int));
@@ -1108,18 +1150,19 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
if (!quorum_device)
{
quorum_device = malloc(sizeof(struct cluster_node));
- if (!quorum_device)
+ if (!quorum_device) {
+ log_printf(LOG_ERR, "unable to register quorum device: not enough memory\n");
return -ENOMEM;
+ }
memset(quorum_device, 0, sizeof(struct cluster_node));
- quorum_device->name = malloc(strlen(name) + 1);
+ quorum_device->name = strdup(name);
if (!quorum_device->name) {
- free(quorum_device);
- quorum_device = NULL;
+ log_printf(LOG_ERR, "unable to register quorum device: not enough memory\n");
+ free_quorum_device();
return -ENOMEM;
}
- strcpy(quorum_device->name, name);
quorum_device->state = NODESTATE_DEAD;
gettimeofday(&quorum_device->join_time, NULL);
@@ -1132,34 +1175,63 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
log_printf(LOG_INFO, "quorum device re-registered\n");
}
- /* Update votes even if it existed before */
- oldvotes = quorum_device->votes;
- quorum_device->votes = votes;
+ quorum_device_update_votes(votes);
- /* If it is a member and votes decreased, recalculate quorum */
- if (quorum_device->state == NODESTATE_MEMBER &&
- oldvotes != votes) {
- recalculate_quorum(1, 0);
+ return 0;
+}
+
+static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
+{
+ if (!quorum_device) {
+ log_printf(LOG_DEBUG, "memb: failed to unregister a non existing quorum device\n");
+ return -EINVAL;
}
- return 0;
+ if (quorum_device->state == NODESTATE_MEMBER) {
+ log_printf(LOG_DEBUG, "memb: failed to unregister: quorum device still active.\n");
+ return -EBUSY;
+ }
+
+ free_quorum_device();
+
+ log_printf(LOG_INFO, "quorum device unregistered\n");
+ return 0;
}
-static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
+static int do_cmd_update_quorum_device(char *cmdbuf, int *retlen)
{
- if (!quorum_device)
- return -EINVAL;
+ int votes, ret = 0;
+ char *name = cmdbuf+sizeof(int);
- if (quorum_device->state == NODESTATE_MEMBER)
- return -EBUSY;
+ if (!quorum_device) {
+ log_printf(LOG_DEBUG, "memb: failed to update a non-existing quorum device\n");
+ return -EINVAL;
+ }
- free(quorum_device->name);
- free(quorum_device);
+ memcpy(&votes, cmdbuf, sizeof(int));
- quorum_device = NULL;
+ /* allow name change of the quorum device */
+ if (quorum_device && strcmp(name, quorum_device->name)) {
+ char *newname = NULL;
+ char *oldname = NULL;
- log_printf(LOG_INFO, "quorum device unregistered\n");
- return 0;
+ log_printf(LOG_DEBUG, "memb: old name: %s new name: %s\n", quorum_device->name, name);
+ newname = strdup(name);
+ if (!newname) {
+ log_printf(LOG_ERR, "memb: unable to update quorum device name: out of memory\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ log_printf(LOG_INFO, "quorum device name changed to %s\n", name);
+ oldname = quorum_device->name;
+ quorum_device->name = newname;
+ free(oldname);
+ }
+
+out:
+ quorum_device_update_votes(votes);
+
+ return ret;
}
static int reload_config(int new_version, int should_broadcast)
@@ -1560,6 +1632,10 @@ int process_command(struct connection *con, int cmd, char *cmdbuf,
err = do_cmd_unregister_quorum_device(cmdbuf, retlen);
break;
+ case CMAN_CMD_UPDATE_QUORUMDEV:
+ err = do_cmd_update_quorum_device(cmdbuf, retlen);
+ break;
+
case CMAN_CMD_POLL_QUORUMDEV:
err = do_cmd_poll_quorum_device(cmdbuf, retlen);
break;
diff --git a/cman/lib/libcman.c b/cman/lib/libcman.c
index daaad07..a89c731 100644
--- a/cman/lib/libcman.c
+++ b/cman/lib/libcman.c
@@ -1002,14 +1002,15 @@ int cman_replyto_shutdown(cman_handle_t handle, int yesno)
return 0;
}
-
-int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
+static int cman_set_quorum_device(cman_handle_t handle,
+ int ops,
+ char *name, int votes)
{
struct cman_handle *h = (struct cman_handle *)handle;
char buf[strlen(name)+1 + sizeof(int)];
VALIDATE_HANDLE(h);
- if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
+ if ((!name) || (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) || (votes < 0))
{
errno = EINVAL;
return -1;
@@ -1017,7 +1018,12 @@ int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
memcpy(buf, &votes, sizeof(int));
strcpy(buf+sizeof(int), name);
- return info_call(h, CMAN_CMD_REG_QUORUMDEV, buf, strlen(name)+1+sizeof(int), NULL, 0);
+ return info_call(h, ops, buf, strlen(name)+1+sizeof(int), NULL, 0);
+}
+
+int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
+{
+ return cman_set_quorum_device(handle, CMAN_CMD_REG_QUORUMDEV, name, votes);
}
int cman_unregister_quorum_device(cman_handle_t handle)
@@ -1053,6 +1059,11 @@ int cman_get_quorum_device(cman_handle_t handle, struct cman_qdev_info *info)
return ret;
}
+int cman_update_quorum_device(cman_handle_t handle, char *name, int votes)
+{
+ return cman_set_quorum_device(handle, CMAN_CMD_UPDATE_QUORUMDEV, name, votes);
+}
+
int cman_get_fenceinfo(cman_handle_t handle, int nodeid, uint64_t *time, int *fenced, char *agent)
{
struct cman_handle *h = (struct cman_handle *)handle;
diff --git a/cman/lib/libcman.h b/cman/lib/libcman.h
index feb10a2..9f97875 100644
--- a/cman/lib/libcman.h
+++ b/cman/lib/libcman.h
@@ -420,6 +420,9 @@ int cman_barrier_delete(cman_handle_t handle, const char *name);
/*
* Add your own quorum device here, needs an admin socket
*
+ * register_quorum and update_quorum arguments are mandatory.
+ * name has to be a valid null-terminated string and votes >= 0.
+ *
* After creating a quorum device you will need to call 'poll_quorum_device'
* at least once every (default) 10 seconds (this can be changed in CCS)
* otherwise it will time-out and the cluster will lose its vote.
@@ -428,6 +431,7 @@ int cman_register_quorum_device(cman_handle_t handle, char *name, int votes);
int cman_unregister_quorum_device(cman_handle_t handle);
int cman_poll_quorum_device(cman_handle_t handle, int isavailable);
int cman_get_quorum_device(cman_handle_t handle, struct cman_qdev_info *info);
+int cman_update_quorum_device(cman_handle_t handle, char *name, int votes);
/*
* Sets the dirty bit inside cman. This indicates that the node has
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index c1598fa..2f0c2ca 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -690,6 +690,17 @@ register_device(qd_ctx *ctx)
ctx->qc_votes : 0);
}
+static int
+update_device(qd_ctx *ctx)
+{
+ return cman_update_quorum_device(
+ ctx->qc_cman_admin,
+ (ctx->qc_flags&RF_CMAN_LABEL) ?
+ ctx->qc_cman_label : ctx->qc_device,
+ (!(ctx->qc_flags & RF_MASTER_WINS) ||
+ ctx->qc_status == S_MASTER) ?
+ ctx->qc_votes : 0);
+}
static int
adjust_votes(qd_ctx *ctx)
@@ -2119,9 +2130,22 @@ main(int argc, char **argv)
if (!_running)
goto out;
-
+
/* This registers the quorum device */
- register_device(&ctx);
+ ret = register_device(&ctx);
+ if (ret) {
+ if (errno == EBUSY) {
+ logt_print(LOG_NOTICE, "quorum device is already registered, updating\n");
+ ret = update_device(&ctx);
+ if (ret) {
+ logt_print(LOG_ERR, "DEBUG: unable to update quorum device info\n");
+ goto out;
+ }
+ } else {
+ logt_print(LOG_ERR, "Unable to register quorum device!\n");
+ goto out;
+ }
+ }
io_nanny_start(ch_user, ctx.qc_tko * ctx.qc_interval);
--
1.7.4.4
More information about the Cluster-devel
mailing list