On 22.08.2022 04:58, Wei Chen wrote:
> --- a/xen/common/numa.c
> +++ b/xen/common/numa.c
> @@ -13,6 +13,21 @@
>  #include <xen/sched.h>
>  #include <xen/softirq.h>
>  
> +static nodemask_t __initdata processor_nodes_parsed;
> +static nodemask_t __initdata memory_nodes_parsed;
> +static struct node __initdata nodes[MAX_NUMNODES];
> +
> +static int __ro_after_init num_node_memblks;

unsigned int?

> @@ -36,6 +51,308 @@ bool numa_disabled(void)
>      return numa_off || arch_numa_disabled(false);
>  }
>  
> +void __init numa_set_processor_nodes_parsed(nodeid_t node)
> +{
> +    node_set(node, processor_nodes_parsed);
> +}
> +
> +unsigned int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node)

bool (and then true/false below)?

> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < num_node_memblks; i++ )
> +    {
> +        struct node *nd = &node_memblk_range[i];

const?

> +        if ( nd->start <= start && nd->end >= end &&
> +             memblk_nodeid[i] == node )
> +            return 1;
> +    }
> +
> +    return 0;
> +}
> +
> +static
> +enum conflicts __init conflicting_memblks(nodeid_t nid, paddr_t start,

May I ask that you re-flow this to either

static enum conflicts __init
conflicting_memblks(nodeid_t nid, paddr_t start,

or

static enum conflicts __init conflicting_memblks(
    nodeid_t nid, paddr_t start,

?

> +                                          paddr_t end, paddr_t nd_start,
> +                                          paddr_t nd_end, unsigned int 
> *mblkid)
> +{
> +    unsigned int i;
> +
> +    /*
> +     * Scan all recorded nodes' memory blocks to check conflicts:
> +     * Overlap or interleave.
> +     */
> +    for ( i = 0; i < num_node_memblks; i++ )
> +    {
> +        struct node *nd = &node_memblk_range[i];

const?

> +bool __init numa_memblks_available(void)
> +{
> +    return num_node_memblks < NR_NODE_MEMBLKS;
> +}

This is kind of clumsy, but I have no better suggestion.

> +/*
> + * This function will be called by NUMA memory affinity initialization to
> + * update NUMA node's memory range. In this function, we assume all memory
> + * regions belonging to a single node are in one chunk. Holes (or MMIO
> + * ranges) between them will be included in the node.
> + *
> + * So in numa_update_node_memblks, if there are multiple banks for each
> + * node, start and end are stretched to cover the holes between them, and
> + * it works as long as memory banks of different NUMA nodes don't interleave.
> + */
> +int __init numa_update_node_memblks(nodeid_t node, unsigned int arch_nid,

The function only ever returns 0 or -EINVAL - please consider switching
to "bool" return type.

> +                                    paddr_t start, paddr_t size,
> +                                    const char *prefix,
> +                                    bool hotplug)
> +{
> +    unsigned int i;
> +    paddr_t end = start + size;
> +    paddr_t nd_start = start;
> +    paddr_t nd_end = end;
> +    struct node *nd = &nodes[node];
> +
> +    /*
> +     * For the node that already has some memory blocks, we will
> +     * expand the node memory range temporarily to check memory
> +     * interleaves with other nodes. We will not use this node
> +     * temp memory range to check overlaps, because it will mask
> +     * the overlaps in same node.
> +     *
> +     * Node with 0 bytes memory doesn't need this expandsion.
> +     */
> +    if ( nd->start != nd->end )
> +    {
> +        if ( nd_start > nd->start )
> +            nd_start = nd->start;
> +
> +        if ( nd_end < nd->end )
> +            nd_end = nd->end;
> +    }
> +
> +    /* It is fine to add this area to the nodes data it will be used later*/

Please adjust style here.

> +    switch ( conflicting_memblks(node, start, end, nd_start, nd_end, &i) )
> +    {
> +    case OVERLAP:
> +        if ( memblk_nodeid[i] == node )
> +        {
> +            bool mismatch = !(hotplug) != !test_bit(i, memblk_hotplug);
> +
> +            printk("%sNUMA: %s %u [%"PRIpaddr", %"PRIpaddr"] overlaps with 
> itself [%"PRIpaddr", %"PRIpaddr"]\n",
> +                   mismatch ? KERN_ERR : KERN_WARNING, prefix,
> +                   arch_nid, start, end - 1,
> +                   node_memblk_range[i].start, node_memblk_range[i].end - 1);
> +            if ( mismatch )
> +                return -EINVAL;
> +            break;
> +        }
> +
> +        printk(KERN_ERR
> +               "NUMA: %s %u [%"PRIpaddr", %"PRIpaddr"] overlaps with %s %u 
> [%"PRIpaddr", %"PRIpaddr"]\n",
> +               prefix, arch_nid, start, end - 1, prefix,
> +               numa_node_to_arch_nid(memblk_nodeid[i]),
> +               node_memblk_range[i].start, node_memblk_range[i].end - 1);
> +        return -EINVAL;
> +
> +
> +    case INTERLEAVE:
> +        printk(KERN_ERR
> +               "NUMA: %s %u: [%"PRIpaddr", %"PRIpaddr"] interleaves with %s 
> %u memblk [%"PRIpaddr", %"PRIpaddr"]\n",
> +               prefix, arch_nid, nd_start, nd_end - 1,
> +               prefix, numa_node_to_arch_nid(memblk_nodeid[i]),
> +               node_memblk_range[i].start, node_memblk_range[i].end - 1);
> +        return -EINVAL;
> +
> +    case NO_CONFLICT:
> +        break;
> +    }
> +
> +    if ( !hotplug )
> +    {
> +        node_set(node, memory_nodes_parsed);
> +        nd->start = nd_start;
> +        nd->end = nd_end;
> +    }
> +
> +    if ( strcasecmp("Node", prefix) )
> +        printk(KERN_INFO "NUMA: Node %u %s %u [%"PRIpaddr", 
> %"PRIpaddr"]%s\n",
> +               node, prefix, arch_nid, start, end - 1,
> +               hotplug ? " (hotplug)" : "");
> +    else
> +        printk(KERN_INFO "NUMA: Node %u [%"PRIpaddr", %"PRIpaddr"]%s\n",
> +               node, start, end - 1, hotplug ? " (hotplug)" : "");

Hmm, if I'm not mistaken one of the two printk()s and hence also one of
the two format strings will be dead code / data on every archiecture.
I wonder if we don't want to have a HAS_NUMA_FW_NODE_ID (name subject
to improvment) Kconfig setting to avoid such. I could imagine this to
become relevant also in other code.

> +static int __init numa_scan_nodes(paddr_t start, paddr_t end)

This function returns only 0 or -1, i.e. is even more of a candidate
for having "bool" return type than numa_update_node_memblks().

> +{
> +    unsigned int i;
> +    nodemask_t all_nodes_parsed;
> +
> +    /* First clean up the node list */
> +    for ( i = 0; i < MAX_NUMNODES; i++ )
> +        cutoff_node(i, start, end);
> +
> +    /* When numa is on with good firmware, we can do numa scan nodes. */
> +    if ( arch_numa_disabled(true) )
> +        return -1;
> +
> +    if ( !nodes_cover_memory() )
> +    {
> +        numa_fw_bad();
> +        return -1;
> +    }
> +
> +    memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
> +                                       memblk_nodeid);
> +
> +    if ( memnode_shift < 0 )
> +    {
> +        printk(KERN_ERR
> +               "NUMA: No NUMA node hash function found. Contact 
> maintainer\n");
> +        numa_fw_bad();
> +        return -1;
> +    }
> +
> +    nodes_or(all_nodes_parsed, memory_nodes_parsed, processor_nodes_parsed);
> +
> +    /* Finally register nodes */
> +    for_each_node_mask( i, all_nodes_parsed )
> +    {
> +        paddr_t size = nodes[i].end - nodes[i].start;
> +
> +        if ( size == 0 )
> +            printk(KERN_INFO "NUMA: node %u has no memory\n", i);
> +
> +        setup_node_bootmem(i, nodes[i].start, nodes[i].end);
> +    }

May I suggest to eliminate "size" at this occasion, for being used
only once and rather not helping readability (imo at least)?

> --- a/xen/include/xen/mm.h
> +++ b/xen/include/xen/mm.h
> @@ -58,6 +58,8 @@
>  #include <xen/perfc.h>
>  #include <public/memory.h>
>  
> +extern paddr_t mem_hotplug;
> +
>  struct page_info;
>  
>  void put_page(struct page_info *);

I'm sorry, but I guess this may go about anywhere in the file, but not
right at the top. I would probably have put it ahead of npfec_kind_t
or right after dom_cow.

Jan

Reply via email to