Here's the patch: I've set it up to go into 1.5, but not 1.4 as that series is being closed out. Please let me know if this solves the problem for you.
Modified: orte/mca/ras/gridengine/ras_gridengine_module.c ============================================================================== --- orte/mca/ras/gridengine/ras_gridengine_module.c (original) +++ orte/mca/ras/gridengine/ras_gridengine_module.c 2012-03-15 13:45:50 EDT (Thu, 15 Mar 2012) @@ -64,6 +64,8 @@ int rc; FILE *fp; orte_node_t *node; + opal_list_item_t *item; + bool found; /* show the Grid Engine's JOB_ID */ if (mca_ras_gridengine_component.show_jobid || @@ -92,22 +94,36 @@ queue = strtok_r(NULL, " \n", &tok); arch = strtok_r(NULL, " \n", &tok); - /* create a new node entry */ - node = OBJ_NEW(orte_node_t); - if (NULL == node) { - fclose(fp); - return ORTE_ERR_OUT_OF_RESOURCE; + /* see if we already have this node */ + found = false; + for (item = opal_list_get_first(nodelist); + item != opal_list_get_end(nodelist); + item = opal_list_get_next(item)) { + node = (orte_node_t*)item; + if (0 == strcmp(ptr, node->name)) { + /* just add the slots */ + node->slots += (int)strtol(num, (char **)NULL, 10); + found = true; + break; + } + } + if (!found) { + /* create a new node entry */ + node = OBJ_NEW(orte_node_t); + if (NULL == node) { + fclose(fp); + return ORTE_ERR_OUT_OF_RESOURCE; + } + node->name = strdup(ptr); + node->state = ORTE_NODE_STATE_UP; + node->slots_inuse = 0; + node->slots_max = 0; + node->slots = (int)strtol(num, (char **)NULL, 10); + opal_output(mca_ras_gridengine_component.verbose, + "ras:gridengine: %s: PE_HOSTFILE shows slots=%d", + node->name, node->slots); + opal_list_append(nodelist, &node->super); } - node->name = strdup(ptr); - node->state = ORTE_NODE_STATE_UP; - node->slots_inuse = 0; - node->slots_max = 0; - node->slots = (int)strtol(num, (char **)NULL, 10); - opal_output(mca_ras_gridengine_component.verbose, - "ras:gridengine: %s: PE_HOSTFILE shows slots=%d", - node->name, node->slots); - opal_list_append(nodelist, &node->super); - } /* finished reading the $PE_HOSTFILE */ cleanup: On Mar 15, 2012, at 11:41 AM, Joshua Baker-LePain wrote: > On Thu, 15 Mar 2012 at 11:38am, Ralph Castain wrote > >> No, I'll fix the parser as we should be able to run anyway. Just can't >> guarantee which queue the job will end up in, but at least it -will- run. > > Makes sense to me. Thanks! > > -- > Joshua Baker-LePain > QB3 Shared Cluster Sysadmin > UCSF > _______________________________________________ > users mailing list > us...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/users