People sometimes think they have a use case
for influencing which node will be the DC.

Sometimes it is latency (certain cli commands work faster
when done on the DC), sometimes they add a "mostly quorum"
node which may be not quite up to the task of being DC.


Prohibiting a node from becoming DC completely would
mean it can not even be cleanly shutdown (with 1.0.x, no MCP),
or act on its own resources for certain no-quorum policies.

So here is a patch I have been asked to present for discussion,
against Pacemaker 1.0, that introduces a "dc-prio" configuration
parameter, which will add some skew to the election algorithm.


Open questions:
 * does it make sense at all?

 * election algorithm compatibility, stability:
   will the election be correct if some nodes have this patch,
   and some don't ?

 * How can it be improved so that a node with dc-prio=0 will
   "give up" its DC-role as soon as there is at least one other node
   with dc-prio > 0?

        Lars


--- ./crmd/election.c.orig      2011-05-11 11:36:05.577329600 +0200
+++ ./crmd/election.c   2011-05-12 13:49:04.671484200 +0200
@@ -29,6 +29,7 @@
 GHashTable *voted = NULL;
 uint highest_born_on = -1;
 static int current_election_id = 1;
+static int our_dc_prio = -1;
 
 /*     A_ELECTION_VOTE */
 void
@@ -55,6 +56,20 @@
                        break;
        }
 
+       if (our_dc_prio < 0) {
+                       char * dc_prio_str = getenv("HA_dc_prio");
+
+                       if (dc_prio_str == NULL) {
+                               our_dc_prio = 1;
+                       } else {
+                               our_dc_prio = atoi(dc_prio_str);
+                       }
+       }
+
+       if (!our_dc_prio) {
+               not_voting = TRUE;
+       }
+
        if(not_voting == FALSE) {
                if(is_set(fsa_input_register, R_STARTING)) {
                        not_voting = TRUE;
@@ -72,12 +87,13 @@
        }
        
        vote = create_request(
-               CRM_OP_VOTE, NULL, NULL,
+               our_dc_prio?CRM_OP_VOTE:CRM_OP_NOVOTE, NULL, NULL,
                CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 
        current_election_id++;
        crm_xml_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid);
        crm_xml_add_int(vote, F_CRM_ELECTION_ID, current_election_id);
+       crm_xml_add_int(vote, F_CRM_DC_PRIO, our_dc_prio);
 
        send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
        free_xml(vote);
@@ -188,6 +204,7 @@
                       fsa_data_t *msg_data)
 {
        int election_id = -1;
+       int your_dc_prio = 1;
        int log_level = LOG_INFO;
        gboolean done = FALSE;
        gboolean we_loose = FALSE;
@@ -216,6 +233,17 @@
        your_version   = crm_element_value(vote->msg, F_CRM_VERSION);
        election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
        crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);
+       crm_element_value_int(vote->msg, F_CRM_DC_PRIO, &your_dc_prio);
+
+       if (our_dc_prio < 0) {
+               char * dc_prio_str = getenv("HA_dc_prio");
+
+               if (dc_prio_str == NULL) {
+                       our_dc_prio = 1;
+               } else {
+                       our_dc_prio = atoi(dc_prio_str);
+               }
+       }
 
        CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);
        
@@ -269,6 +297,13 @@
            reason = "Recorded";
            done = TRUE;
            
+       } else if(our_dc_prio < your_dc_prio) {
+           reason = "DC Prio";
+           we_loose = TRUE;
+
+       } else if(our_dc_prio > your_dc_prio) {
+           reason = "DC Prio";
+
        } else if(compare_version(your_version, CRM_FEATURE_SET) < 0) {
            reason = "Version";
            we_loose = TRUE;
@@ -328,6 +363,7 @@
 
                crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
                crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);
+               crm_xml_add_int(novote, F_CRM_DC_PRIO, our_dc_prio);
                
                send_cluster_message(vote_from, crm_msg_crmd, novote, TRUE);
                free_xml(novote);
--- ./include/crm/msg_xml.h.orig        2011-05-11 18:22:08.061726000 +0200
+++ ./include/crm/msg_xml.h     2011-05-11 18:24:17.405132000 +0200
@@ -32,6 +32,7 @@
 #define F_CRM_ORIGIN                   "origin"
 #define F_CRM_JOIN_ID                  "join_id"
 #define F_CRM_ELECTION_ID              "election-id"
+#define F_CRM_DC_PRIO                  "dc-prio"
 #define F_CRM_ELECTION_OWNER           "election-owner"
 #define F_CRM_TGRAPH                   "crm-tgraph"
 #define F_CRM_TGRAPH_INPUT             "crm-tgraph-in"
--- ./lib/ais/plugin.c.orig     2011-05-11 11:29:38.496116000 +0200
+++ ./lib/ais/plugin.c  2011-05-11 17:28:32.385425300 +0200
@@ -421,6 +421,9 @@
     get_config_opt(pcmk_api, local_handle, "use_logd", &value, "no");
     pcmk_env.use_logd = value;
 
+    get_config_opt(pcmk_api, local_handle, "dc_prio", &value, "1");
+    pcmk_env.dc_prio = value;
+
     get_config_opt(pcmk_api, local_handle, "use_mgmtd", &value, "no");
     if(ais_get_boolean(value) == FALSE) {
        int lpc = 0;
@@ -584,6 +587,7 @@
     pcmk_env.logfile  = NULL;
     pcmk_env.use_logd = "false";
     pcmk_env.syslog   = "daemon";
+    pcmk_env.dc_prio = "1";
 
     if(cs_uid != root_uid) {
        ais_err("Corosync must be configured to start as 'root',"
--- ./lib/ais/utils.c.orig      2011-05-11 11:27:08.460183200 +0200
+++ ./lib/ais/utils.c   2011-05-11 17:29:09.182064800 +0200
@@ -171,6 +171,7 @@
        setenv("HA_logfacility",        pcmk_env.syslog,   1);
        setenv("HA_LOGFACILITY",        pcmk_env.syslog,   1);
        setenv("HA_use_logd",           pcmk_env.use_logd, 1);
+       setenv("HA_dc_prio",            pcmk_env.dc_prio,  1);
        if(pcmk_env.logfile) {
            setenv("HA_debugfile", pcmk_env.logfile, 1);
        }
--- ./lib/ais/utils.h.orig      2011-05-11 11:26:12.757414700 +0200
+++ ./lib/ais/utils.h   2011-05-11 17:36:34.194841700 +0200
@@ -226,6 +226,7 @@
        const char *syslog;
        const char *logfile;
        const char *use_logd;
+       const char *dc_prio;
 };
 
 extern struct pcmk_env_s pcmk_env;



-- 
: Lars Ellenberg
: LINBIT | Your Way to High Availability
: DRBD/HA support and consulting http://www.linbit.com

DRBD® and LINBIT® are registered trademarks of LINBIT, Austria.

_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://bugs.clusterlabs.org

Reply via email to