> +/* Nodes with one or more EPC sections. */
> +static nodemask_t sgx_numa_mask;

I'd also add that this is for optimization only.

> +/* Array of lists of EPC sections for each NUMA node. */
> +struct list_head *sgx_numa_nodes;

I'd much prefer:

/*
 * Array with one list_head for each possible NUMA node.  Each
 * list contains all the sgx_epc_section's which are on that
 * node.
 */

Otherwise, it's hard to imagine what this structure looks like.

>  /*
>   * These variables are part of the state of the reclaimer, and must be 
> accessed
>   * with sgx_reclaimer_lock acquired.
> @@ -473,6 +479,26 @@ static struct sgx_epc_page 
> *__sgx_alloc_epc_page_from_section(struct sgx_epc_sec
>       return page;
>  }
>  
> +static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
> +{
> +     struct sgx_epc_section *section;
> +     struct sgx_epc_page *page;
> +
> +     if (WARN_ON_ONCE(nid < 0 || nid >= MAX_NUMNODES))
> +             return NULL;
> +
> +     if (!node_isset(nid, sgx_numa_mask))
> +             return NULL;
> +
> +     list_for_each_entry(section, &sgx_numa_nodes[nid], section_list) {
> +             page = __sgx_alloc_epc_page_from_section(section);
> +             if (page)
> +                     return page;
> +     }
> +
> +     return NULL;
> +}
> +
>  /**
>   * __sgx_alloc_epc_page() - Allocate an EPC page
>   *
> @@ -485,13 +511,17 @@ static struct sgx_epc_page 
> *__sgx_alloc_epc_page_from_section(struct sgx_epc_sec
>   */
>  struct sgx_epc_page *__sgx_alloc_epc_page(void)
>  {
> +     int current_nid = numa_node_id();
>       struct sgx_epc_section *section;
>       struct sgx_epc_page *page;
>       int i;
>  
> +     page = __sgx_alloc_epc_page_from_node(current_nid);
> +     if (page)
> +             return page;

Comments, please.

        /* Try to allocate EPC from the current node, first: */

then:

        /* Search all EPC sections, ignoring locality: */

>       for (i = 0; i < sgx_nr_epc_sections; i++) {
>               section = &sgx_epc_sections[i];
> -
>               page = __sgx_alloc_epc_page_from_section(section);
>               if (page)
>                       return page;

This still has the problem that it exerts too much pressure on the
low-numbered sgx_epc_sections[].  If a node's sections are full, it
always tries to go after sgx_epc_sections[0].

It can be in another patch, but I think the *minimal* thing we can do
here for a NUMA allocator is to try to at least balance the allocations.

Instead of having a for-each-section loop, I'd make it for-each-node ->
for-each-section.  Something like:

        for (i = 0; i < num_possible_nodes(); i++) {
                node = (numa_node_id() + i) % num_possible_nodes()
                
                if (!node_isset(nid, sgx_numa_mask))
                        continue;

                list_for_each_entry(section, &sgx_numa_nodes[nid],
                                    section_list) {
                        __sgx_alloc_epc_page_from_section(section)
                }
        }
        
Then you have a single loop instead of a "try local then a fall back".

Also, that "node++" thing might be able to use next_online_node().

> @@ -665,8 +695,12 @@ static bool __init sgx_page_cache_init(void)
>  {
>       u32 eax, ebx, ecx, edx, type;
>       u64 pa, size;
> +     int nid;
>       int i;
>  
> +     nodes_clear(sgx_numa_mask);
> +     sgx_numa_nodes = kmalloc_array(MAX_NUMNODES, sizeof(*sgx_numa_nodes), 
> GFP_KERNEL);

MAX_NUMNODES will always be the largest compile-time constant.  That's
4k, IIRC.  num_possible_nodes() might be as small as 1 if NUMA is off.

>       for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) {
>               cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, 
> &edx);
>  
> @@ -690,6 +724,22 @@ static bool __init sgx_page_cache_init(void)
>               }
>  
>               sgx_nr_epc_sections++;
> +
> +             nid = numa_map_to_online_node(phys_to_target_node(pa));
> +
> +             if (nid == NUMA_NO_NODE) {
> +                     pr_err(FW_BUG "unable to map EPC section %d to online 
> node.\n", nid);
> +                     nid = 0;

Could we dump out the physical address there?  I think that's even more
informative than a section number.

> +             } else if (WARN_ON_ONCE(nid < 0 || nid >= MAX_NUMNODES)) {
> +                     nid = 0;
> +             }

I'm not sure we really need to check for these.  If we're worried about
the firmware returning these, I'd expect numa_map_to_online_node() to
sanity check them for us.

> +             if (!node_isset(nid, sgx_numa_mask)) {
> +                     INIT_LIST_HEAD(&sgx_numa_nodes[nid]);
> +                     node_set(nid, sgx_numa_mask);
> +             }
> +
> +             list_add_tail(&sgx_epc_sections[i].section_list, 
> &sgx_numa_nodes[nid]);
>       }
>  
>       if (!sgx_nr_epc_sections) {
> diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
> index 5fa42d143feb..4bc31bc4bacf 100644
> --- a/arch/x86/kernel/cpu/sgx/sgx.h
> +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> @@ -45,6 +45,7 @@ struct sgx_epc_section {
>       spinlock_t lock;
>       struct list_head page_list;
>       unsigned long free_cnt;
> +     struct list_head section_list;

Maybe name this numa_section_list.

Reply via email to