Here's the patch: I've set it up to go into 1.5, but not 1.4 as that series is 
being closed out. Please let me know if this solves the problem for you.


Modified: orte/mca/ras/gridengine/ras_gridengine_module.c
==============================================================================
--- orte/mca/ras/gridengine/ras_gridengine_module.c     (original)
+++ orte/mca/ras/gridengine/ras_gridengine_module.c     2012-03-15 13:45:50 EDT 
(Thu, 15 Mar 2012)
@@ -64,6 +64,8 @@
    int rc;
    FILE *fp;
    orte_node_t *node;
+    opal_list_item_t *item;
+    bool found;

    /* show the Grid Engine's JOB_ID */
    if (mca_ras_gridengine_component.show_jobid ||
@@ -92,22 +94,36 @@
        queue = strtok_r(NULL, " \n", &tok);
        arch = strtok_r(NULL, " \n", &tok);

-        /* create a new node entry */
-        node = OBJ_NEW(orte_node_t);
-        if (NULL == node) {
-            fclose(fp);
-            return ORTE_ERR_OUT_OF_RESOURCE;
+        /* see if we already have this node */
+        found = false;
+        for (item = opal_list_get_first(nodelist);
+             item != opal_list_get_end(nodelist);
+             item = opal_list_get_next(item)) {
+            node = (orte_node_t*)item;
+            if (0 == strcmp(ptr, node->name)) {
+                /* just add the slots */
+                node->slots += (int)strtol(num, (char **)NULL, 10);
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            /* create a new node entry */
+            node = OBJ_NEW(orte_node_t);
+            if (NULL == node) {
+                fclose(fp);
+                return ORTE_ERR_OUT_OF_RESOURCE;
+            }
+            node->name = strdup(ptr);
+            node->state = ORTE_NODE_STATE_UP;
+            node->slots_inuse = 0;
+            node->slots_max = 0;
+            node->slots = (int)strtol(num, (char **)NULL, 10);
+            opal_output(mca_ras_gridengine_component.verbose,
+                        "ras:gridengine: %s: PE_HOSTFILE shows slots=%d",
+                        node->name, node->slots);
+            opal_list_append(nodelist, &node->super);
        }
-        node->name = strdup(ptr);
-        node->state = ORTE_NODE_STATE_UP;
-        node->slots_inuse = 0;
-        node->slots_max = 0;
-        node->slots = (int)strtol(num, (char **)NULL, 10);
-        opal_output(mca_ras_gridengine_component.verbose,
-            "ras:gridengine: %s: PE_HOSTFILE shows slots=%d",
-            node->name, node->slots);
-        opal_list_append(nodelist, &node->super);
-
    } /* finished reading the $PE_HOSTFILE */

cleanup:

On Mar 15, 2012, at 11:41 AM, Joshua Baker-LePain wrote:

> On Thu, 15 Mar 2012 at 11:38am, Ralph Castain wrote
> 
>> No, I'll fix the parser as we should be able to run anyway. Just can't 
>> guarantee which queue the job will end up in, but at least it -will- run.
> 
> Makes sense to me.  Thanks!
> 
> -- 
> Joshua Baker-LePain
> QB3 Shared Cluster Sysadmin
> UCSF
> _______________________________________________
> users mailing list
> us...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/users


Reply via email to