The attached patch (against 4.0.2) should fix it, I'll prepare a PR to
fix this upstream.

Brice


Le 27/11/2019 à 00:41, Brice Goglin via users a écrit :
> It looks like NUMA is broken, while others such as SOCKET and L3CACHE
> work fine. A quick look in opal_hwloc_base_get_relative_locality() and
> friends tells me that those functions were not properly updated to hwloc
> 2.0 NUMA changes. I'll try to understand what's going on tomorrow.
>
> Rebuilding OMPI with an external hwloc 1.11.x might avoid the issue in
> the meantime.
>
> Beware that splitting on NUMA might become meaningless on some platforms
> in the future (there are already some x86 platforms where some NUMA
> nodes are attached to the Packages while others are attached to each
> half of the same Packages).
>
> Brice
>
>
> Le 26/11/2019 à 23:12, Hatem Elshazly via users a écrit :
>> Hello,
>>
>>
>> I'm trying to split the world communicator by NUMA using
>> MPI_Comm_split_type. I expected to get as many sub communicators as
>> the NUMA nodes, but what I get is as many sub communicator as the
>> number of mpi processes each containing one process.
>>
>>
>> Attached is a reproducer code. I tried it using version 4.0.2 built
>> with GNU 9.2.0 on a skyline and haswell machines and both behave
>> similarly.
>>
>>
>> Can anyone point me to why does it behave like that? Is this expected
>> or am I confusing something?
>>
>>
>> Thanks in advance,
>>
>> Hatem
>>
>> Junior Researcher -- Barcelona Supercomputing Center (BSC)
>>
>>
>>
>> http://bsc.es/disclaimer
diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c
index ba26ba0ac6..daf2fa2064 100644
--- a/opal/mca/hwloc/base/hwloc_base_util.c
+++ b/opal/mca/hwloc/base/hwloc_base_util.c
@@ -1215,16 +1215,84 @@ int opal_hwloc_base_cpu_list_parse(const char *slot_str,
     return OPAL_SUCCESS;
 }
 
+static void opal_hwloc_base_get_relative_locality_by_depth(hwloc_topology_t topo, unsigned d,
+                                                           hwloc_cpuset_t loc1, hwloc_cpuset_t loc2,
+                                                           opal_hwloc_locality_t *locality, bool *shared)
+{
+    unsigned width, w;
+    hwloc_obj_t obj;
+    int sect1, sect2;
+
+    /* get the width of the topology at this depth */
+    width = hwloc_get_nbobjs_by_depth(topo, d);
+
+    /* scan all objects at this depth to see if
+     * our locations overlap with them
+     */
+    for (w=0; w < width; w++) {
+        /* get the object at this depth/index */
+        obj = hwloc_get_obj_by_depth(topo, d, w);
+        /* see if our locations intersect with the cpuset for this obj */
+        sect1 = hwloc_bitmap_intersects(obj->cpuset, loc1);
+        sect2 = hwloc_bitmap_intersects(obj->cpuset, loc2);
+        /* if both intersect, then we share this level */
+        if (sect1 && sect2) {
+            *shared = true;
+            switch(obj->type) {
+            case HWLOC_OBJ_NODE:
+                *locality |= OPAL_PROC_ON_NUMA;
+                break;
+            case HWLOC_OBJ_SOCKET:
+                *locality |= OPAL_PROC_ON_SOCKET;
+                break;
+#if HWLOC_API_VERSION < 0x20000
+            case HWLOC_OBJ_CACHE:
+                if (3 == obj->attr->cache.depth) {
+                    *locality |= OPAL_PROC_ON_L3CACHE;
+                } else if (2 == obj->attr->cache.depth) {
+                    *locality |= OPAL_PROC_ON_L2CACHE;
+                } else {
+                    *locality |= OPAL_PROC_ON_L1CACHE;
+                }
+                break;
+#else
+            case HWLOC_OBJ_L3CACHE:
+                *locality |= OPAL_PROC_ON_L3CACHE;
+                break;
+            case HWLOC_OBJ_L2CACHE:
+                *locality |= OPAL_PROC_ON_L2CACHE;
+                break;
+            case HWLOC_OBJ_L1CACHE:
+                *locality |= OPAL_PROC_ON_L1CACHE;
+                break;
+#endif
+            case HWLOC_OBJ_CORE:
+                *locality |= OPAL_PROC_ON_CORE;
+                break;
+            case HWLOC_OBJ_PU:
+                *locality |= OPAL_PROC_ON_HWTHREAD;
+                break;
+            default:
+                /* just ignore it */
+                break;
+            }
+            break;
+        }
+        /* otherwise, we don't share this
+         * object - but we still might share another object
+         * on this level, so we have to keep searching
+         */
+    }
+}
+
 opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo,
                                                             char *cpuset1, char *cpuset2)
 {
     opal_hwloc_locality_t locality;
-    hwloc_obj_t obj;
-    unsigned depth, d, width, w;
+    hwloc_cpuset_t loc1, loc2;
+    unsigned depth, d;
     bool shared;
     hwloc_obj_type_t type;
-    int sect1, sect2;
-    hwloc_cpuset_t loc1, loc2;
 
     /* start with what we know - they share a node on a cluster
      * NOTE: we may alter that latter part as hwloc's ability to
@@ -1265,66 +1333,8 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top
             HWLOC_OBJ_PU != type) {
             continue;
         }
-        /* get the width of the topology at this depth */
-        width = hwloc_get_nbobjs_by_depth(topo, d);
+        opal_hwloc_base_get_relative_locality_by_depth(topo, d, loc1, loc2, &locality, &shared);
 
-        /* scan all objects at this depth to see if
-         * our locations overlap with them
-         */
-        for (w=0; w < width; w++) {
-            /* get the object at this depth/index */
-            obj = hwloc_get_obj_by_depth(topo, d, w);
-            /* see if our locations intersect with the cpuset for this obj */
-            sect1 = hwloc_bitmap_intersects(obj->cpuset, loc1);
-            sect2 = hwloc_bitmap_intersects(obj->cpuset, loc2);
-            /* if both intersect, then we share this level */
-            if (sect1 && sect2) {
-                shared = true;
-                switch(obj->type) {
-                case HWLOC_OBJ_NODE:
-                    locality |= OPAL_PROC_ON_NUMA;
-                    break;
-                case HWLOC_OBJ_SOCKET:
-                    locality |= OPAL_PROC_ON_SOCKET;
-                    break;
-#if HWLOC_API_VERSION < 0x20000
-                case HWLOC_OBJ_CACHE:
-                    if (3 == obj->attr->cache.depth) {
-                        locality |= OPAL_PROC_ON_L3CACHE;
-                    } else if (2 == obj->attr->cache.depth) {
-                        locality |= OPAL_PROC_ON_L2CACHE;
-                    } else {
-                        locality |= OPAL_PROC_ON_L1CACHE;
-                    }
-                    break;
-#else
-                case HWLOC_OBJ_L3CACHE:
-                    locality |= OPAL_PROC_ON_L3CACHE;
-                    break;
-                case HWLOC_OBJ_L2CACHE:
-                    locality |= OPAL_PROC_ON_L2CACHE;
-                    break;
-                case HWLOC_OBJ_L1CACHE:
-                    locality |= OPAL_PROC_ON_L1CACHE;
-                    break;
-#endif
-                case HWLOC_OBJ_CORE:
-                    locality |= OPAL_PROC_ON_CORE;
-                    break;
-                case HWLOC_OBJ_PU:
-                    locality |= OPAL_PROC_ON_HWTHREAD;
-                    break;
-                default:
-                    /* just ignore it */
-                    break;
-                }
-                break;
-            }
-            /* otherwise, we don't share this
-             * object - but we still might share another object
-             * on this level, so we have to keep searching
-             */
-        }
         /* if we spanned the entire width without finding
          * a point of intersection, then no need to go
          * deeper
@@ -1333,6 +1343,9 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top
             break;
         }
     }
+#if HWLOC_API_VERSION >= 0x20000
+    opal_hwloc_base_get_relative_locality_by_depth(topo, HWLOC_TYPE_DEPTH_NUMANODE, loc1, loc2, &locality, &shared);
+#endif
 
     opal_output_verbose(5, opal_hwloc_base_framework.framework_output,
                         "locality: %s",
@@ -2063,12 +2076,40 @@ char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo)
     return sig;
 }
 
+static int opal_hwloc_base_get_locality_string_by_depth(hwloc_topology_t topo,
+                                                        int d,
+                                                        hwloc_cpuset_t cpuset,
+                                                        hwloc_cpuset_t result)
+{
+    hwloc_obj_t obj;
+    unsigned width, w;
+
+    /* get the width of the topology at this depth */
+    width = hwloc_get_nbobjs_by_depth(topo, d);
+    if (0 == width) {
+        return -1;
+    }
+
+    /* scan all objects at this depth to see if
+     * the location overlaps with them
+     */
+    for (w=0; w < width; w++) {
+        /* get the object at this depth/index */
+        obj = hwloc_get_obj_by_depth(topo, d, w);
+        /* see if the location intersects with it */
+        if (hwloc_bitmap_intersects(obj->cpuset, cpuset)) {
+            hwloc_bitmap_set(result, w);
+        }
+    }
+
+    return 0;
+}
+
 char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo,
                                           char *bitmap)
 {
-    hwloc_obj_t obj;
     char *locality=NULL, *tmp, *t2;
-    unsigned depth, d, width, w;
+    unsigned depth, d;
     hwloc_cpuset_t cpuset, result;
     hwloc_obj_type_t type;
 
@@ -2111,28 +2152,15 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo,
             continue;
         }
 
-        /* get the width of the topology at this depth */
-        width = hwloc_get_nbobjs_by_depth(topo, d);
-        if (0 == width) {
+        if (opal_hwloc_base_get_locality_string_by_depth(topo, d, cpuset, result) < 0) {
             continue;
         }
 
-        /* scan all objects at this depth to see if
-         * the location overlaps with them
-         */
-        for (w=0; w < width; w++) {
-            /* get the object at this depth/index */
-            obj = hwloc_get_obj_by_depth(topo, d, w);
-            /* see if the location intersects with it */
-            if (hwloc_bitmap_intersects(obj->cpuset, cpuset)) {
-                hwloc_bitmap_set(result, w);
-            }
-        }
         /* it should be impossible, but allow for the possibility
          * that we came up empty at this depth */
         if (!hwloc_bitmap_iszero(result)) {
             hwloc_bitmap_list_asprintf(&tmp, result);
-            switch(obj->type) {
+            switch(type) {
                 case HWLOC_OBJ_NODE:
                     asprintf(&t2, "%sNM%s:", (NULL == locality) ? "" : locality, tmp);
                     if (NULL != locality) {
@@ -2217,6 +2245,24 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo,
         }
         hwloc_bitmap_zero(result);
     }
+
+#if HWLOC_API_VERSION >= 0x20000
+    if (opal_hwloc_base_get_locality_string_by_depth(topo, HWLOC_TYPE_DEPTH_NUMANODE, cpuset, result) == 0) {
+        /* it should be impossible, but allow for the possibility
+         * that we came up empty at this depth */
+        if (!hwloc_bitmap_iszero(result)) {
+            hwloc_bitmap_list_asprintf(&tmp, result);
+            asprintf(&t2, "%sNM%s:", (NULL == locality) ? "" : locality, tmp);
+            if (NULL != locality) {
+                free(locality);
+            }
+            locality = t2;
+            free(tmp);
+        }
+        hwloc_bitmap_zero(result);
+    }
+#endif
+
     hwloc_bitmap_free(result);
     hwloc_bitmap_free(cpuset);
 

Reply via email to