Hi Vijay,
On 18/07/17 12:41, vijay.kil...@gmail.com wrote:
From: Vijaya Kumar K <vijaya.ku...@cavium.com>
Parse distance-matrix and fetch node distance information.
Store distance information in node_distance[].
Register dt_node_distance() function pointer with
the ARM numa code. This approach can be later used for
ACPI.
After my comment on v1, I was expecting to see a link to the binding in
the commit message...
Signed-off-by: Vijaya Kumar K <vijaya.ku...@cavium.com>
---
v3: - Moved __node_distance() declaration to common
header file
- Use device_tree_node_compatible() instead of
device_tree_node_matches()
- Dropped xen/errno.h inclusion
---
xen/arch/arm/numa/dt_numa.c | 131 ++++++++++++++++++++++++++++++++++++++++++++
xen/arch/arm/numa/numa.c | 22 ++++++++
xen/include/asm-arm/numa.h | 2 +
xen/include/asm-x86/numa.h | 1 -
xen/include/xen/numa.h | 3 +
5 files changed, 158 insertions(+), 1 deletion(-)
diff --git a/xen/arch/arm/numa/dt_numa.c b/xen/arch/arm/numa/dt_numa.c
index 84030e7..46c0346 100644
--- a/xen/arch/arm/numa/dt_numa.c
+++ b/xen/arch/arm/numa/dt_numa.c
@@ -23,6 +23,48 @@
#include <xen/numa.h>
#include <asm/setup.h>
+static uint8_t node_distance[MAX_NUMNODES][MAX_NUMNODES];
On v1, you said that you will look at allocating node_distance on the
fly. So why it is not done?
You can give a look at alloc_boot_pages(...).
+
+static uint8_t dt_node_distance(nodeid_t nodea, nodeid_t nodeb)
+{
+ if ( nodea >= MAX_NUMNODES || nodeb >= MAX_NUMNODES )
+ return nodea == nodeb ? LOCAL_DISTANCE : REMOTE_DISTANCE;
Do we really expect dt_node_distance to be called with wrong node?
Looking at the ACPI code, they don't check that... So likely this should
be an ASSERT(...).
+
+ return node_distance[nodea][nodeb];
+}
+
+static int dt_numa_set_distance(uint32_t nodea, uint32_t nodeb,
I think this should be __init.
+ uint32_t distance)
+{
+ /* node_distance is uint8_t. Ensure distance is less than 255 */
+ if ( nodea >= MAX_NUMNODES || nodeb >= MAX_NUMNODES || distance > 255 )
+ return -EINVAL;
+
+ node_distance[nodea][nodeb] = distance;
+
+ return 0;
+}
+
+void init_dt_numa_distance(void)
Ditto.
+{
+ int i, j;
+
+ for ( i = 0; i < MAX_NUMNODES; i++ )
+ {
+ for ( j = 0; j < MAX_NUMNODES; j++ )
+ {
+ /*
+ * Initialize distance 10 for local distance and
+ * 20 for remote distance.
+ */
+ if ( i == j )
+ node_distance[i][j] = LOCAL_DISTANCE;
+ else
+ node_distance[i][j] = REMOTE_DISTANCE;
+ }
+ }
+}
+
/*
* Even though we connect cpus to numa domains later in SMP
* init, we need to know the node ids now for all cpus.
@@ -58,6 +100,76 @@ static int __init dt_numa_process_cpu_node(const void *fdt)
return 0;
}
+static int __init dt_numa_parse_distance_map(const void *fdt, int node,
+ const char *name,
+ uint32_t address_cells,
+ uint32_t size_cells)
+{
+ const struct fdt_property *prop;
+ const __be32 *matrix;
+ int entry_count, len, i;
+
+ printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
+
+ prop = fdt_get_property(fdt, node, "distance-matrix", &len);
+ if ( !prop )
+ {
+ printk(XENLOG_WARNING
s/XENLOG_WARNING/XENLOG_INFO/ because numa-distance-map is not mandatory.
+ "NUMA: No distance-matrix property in distance-map\n");
+
+ return -EINVAL;
If I am reading correctly the binding, the distance-matrix is not
mandatory. If it is not present, you should use a default matrix. But
here you will disable NUMA completely.
+ }
+
+ if ( len % sizeof(uint32_t) != 0 )
+ {
+ printk(XENLOG_WARNING
+ "distance-matrix in node is not a multiple of u32\n");
+
+ return -EINVAL;
+ }
+
+ entry_count = len / sizeof(uint32_t);
+ if ( entry_count <= 0 )
+ {
+ printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
+
+ return -EINVAL;
+ }
+
+ matrix = (const __be32 *)prop->data;
+ for ( i = 0; i + 2 < entry_count; i += 3 )
It would be easier to read if entry_count is the number of triplet. E.g
entry_count = (len / sizeof(uint32_t)) / 3;
for ( i = 0; i < entry_count; i++ )
+ {
+ uint32_t nodea, nodeb, distance;
+
+ nodea = dt_read_number(matrix, 1);
+ matrix++;
nodea = dt_next_cell(1, &matrix) will do the increment for you.
+ nodeb = dt_read_number(matrix, 1);
+ matrix++;
Ditto.
+ distance = dt_read_number(matrix, 1);
+ matrix++;
Ditto.
+
+ if ( dt_numa_set_distance(nodea, nodeb, distance) )
+ {
+ printk(XENLOG_WARNING
+ "NUMA: node-id out of range in distance matrix for [node%d ->
node%d]\n",
s/%d/%u/
+ nodea, nodeb);
+ return -EINVAL;
+
+ }
+ printk(XENLOG_INFO "NUMA: distance[node%d -> node%d] = %d\n",
+ nodea, nodeb, distance);
+
+ /*
+ * Set default distance of node B->A same as A->B.
+ * No need to check for return value of numa_set_distance.
+ */
+ if ( nodeb > nodea )
Mind explaining this if in the comment?
+ dt_numa_set_distance(nodeb, nodea, distance);
+ }
+
+ return 0;
+}
+
void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start,
paddr_t size)
{
@@ -90,11 +202,30 @@ void __init dt_numa_process_memory_node(uint32_t nid,
paddr_t start,
return;
}
+static int __init dt_numa_scan_distance_node(const void *fdt, int node,
+ const char *name, int depth,
+ uint32_t address_cells,
+ uint32_t size_cells, void *data)
+{
+ if ( device_tree_node_compatible(fdt, node, "numa-distance-map-v1") )
+ return dt_numa_parse_distance_map(fdt, node, name, address_cells,
+ size_cells);
+
+ return 0;
+}
+
int __init dt_numa_init(void)
{
int ret;
ret = dt_numa_process_cpu_node((void *)device_tree_flattened);
+ if ( ret )
+ return ret;
+
+ ret = device_tree_for_each_node((void *)device_tree_flattened,
Why do you need the cast?
+ dt_numa_scan_distance_node, NULL);
+ if ( !ret )
+ register_node_distance(&dt_node_distance);
return ret;
}
diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c
index 8227361..c00b92c 100644
--- a/xen/arch/arm/numa/numa.c
+++ b/xen/arch/arm/numa/numa.c
@@ -18,10 +18,30 @@
#include <xen/ctype.h>
#include <xen/nodemask.h>
#include <xen/numa.h>
+#include <asm/acpi.h>
I don't understand why you include asm/acpi.h with no code using ACPI at
the moment...
+
+static uint8_t (*node_distance_fn)(nodeid_t a, nodeid_t b);
void numa_failed(void)
{
numa_off = true;
+ init_dt_numa_distance();
Why do you need to initialize init_dt_numa_distance when it has failed?
The array will never be used in that case.
+ node_distance_fn = NULL;
+}
+
+uint8_t __node_distance(nodeid_t a, nodeid_t b)
+{
+ if ( node_distance_fn != NULL);
+ return node_distance_fn(a, b);
+
+ return a == b ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+}
+
+EXPORT_SYMBOL(__node_distance);
Please drop EXPORT_SYMBOL, this is not used by Xen and only here when
the code is imported from Linux.
+
+void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b))
+{
+ node_distance_fn = fn;
}
void __init numa_init(void)
@@ -29,6 +49,8 @@ void __init numa_init(void)
int ret = 0;
nodes_clear(processor_nodes_parsed);
+ init_dt_numa_distance();
This should go in dt_numa_init and would avoid to export it.
+
if ( numa_off )
goto no_numa;
diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
index 36cd782..d1dc83a 100644
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -4,6 +4,8 @@
typedef uint8_t nodeid_t;
void dt_numa_process_memory_node(uint32_t nid, paddr_t start, paddr_t size);
+void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b));
+void init_dt_numa_distance(void);
#ifdef CONFIG_NUMA
void numa_init(void);
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
index d8a0a44..ca0a2a6 100644
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -18,7 +18,6 @@ extern nodeid_t apicid_to_node[];
extern void init_cpu_to_node(void);
void srat_parse_regions(paddr_t addr);
-extern uint8_t __node_distance(nodeid_t a, nodeid_t b);
unsigned int arch_get_dma_bitsize(void);
#endif
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index 110d5dc..10ef4c4 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -6,6 +6,8 @@
#include <asm/numa.h>
#define NUMA_NO_NODE 0xFF
+#define LOCAL_DISTANCE 10
+#define REMOTE_DISTANCE 20
I would add DEFAULT in each name. Probably LOCAL_DEFAULT_DISTANCE and
REMOVE_LOCAL_DISTANCE.
#define NUMA_NO_DISTANCE 0xFF
#define MAX_NUMNODES NR_NODES
@@ -70,6 +72,7 @@ int numa_add_memblk(nodeid_t nodeid, paddr_t start, uint64_t
size);
int get_num_node_memblks(void);
bool arch_sanitize_nodes_memory(void);
void numa_failed(void);
+uint8_t __node_distance(nodeid_t a, nodeid_t b);
#else
static inline void numa_add_cpu(int cpu) { }
static inline void numa_set_node(int cpu, nodeid_t node) { }
Cheers,
--
Julien Grall
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel