Hi Marc,

On 05/06/2016 09:22 AM, Marc Zyngier wrote:
> Hi Shanker,
>
> Thanks for putting this together. Comments below:
>
> On Fri, 6 May 2016 08:43:36 -0500
> Shanker Donthineni <shank...@codeaurora.org> wrote:
>
>> Since device IDs are extremely sparse, the single, a.k.a flat table is
>> not sufficient for the following two reasons.
>>
>> 1) According to ARM-GIC spec, ITS hw can access maximum of 256(pages)*
>>    64K(pageszie) bytes. In the best case, it supports upto DEVid=21
>>    sparse with minimum device table entry size 8bytes.
>>
>> 2) The maximum memory size that is possible without memblock depends on
>>    MAX_ORDER. 4MB on 4K page size kernel with default MAX_ORDER, so it
>>    supports DEVid range 19bits.
>>
>> The two-level device table feature brings us two advantages, the first
>> is a very high possibility of supporting upto 32bit sparse, and the
>> second one is the best utilization of memory allocation.
>>
>> The feature is enabled automatically during driver probe if the flat
>> table is not adequate and the hardware is capable of two-level table
>> walk.
>>
>> Signed-off-by: Shanker Donthineni <shank...@codeaurora.org>
>> ---
>>
>> This patch is based on Marc Zyngier's branch 
>> https://git.kernel.org/cgit/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/irqchip-4.7
>>
>>  drivers/irqchip/irq-gic-v3-its.c   | 109 
>> ++++++++++++++++++++++++++++++++-----
>>  include/linux/irqchip/arm-gic-v3.h |   2 +
>>  2 files changed, 97 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
>> b/drivers/irqchip/irq-gic-v3-its.c
>> index 6bd881b..caeb105 100644
>> --- a/drivers/irqchip/irq-gic-v3-its.c
>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>> @@ -55,13 +55,14 @@ struct its_collection {
>>  };
>>  
>>  /*
>> - * The ITS_BASER structure - contains memory information and cached
>> - * value of BASER register configuration.
>> + * The ITS_BASER structure - contains memory information, cached value
>> + * of BASER register configuration and ITS page size in bytes.
>>   */
>>  struct its_baser {
>>      void            *base;
>>      u64             val;
>>      u32             order;
>> +    u32             psz;
>>  };
>>  
>>  /*
>> @@ -853,6 +854,8 @@ static int its_alloc_tables(const char *node_name, 
>> struct its_node *its)
>>              u64 type = GITS_BASER_TYPE(val);
>>              u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
>>              int order = get_order(psz);
>> +            bool tried_indirect = false;
>> +            u64 indirect = 0;
>>              int alloc_pages;
>>              u64 tmp;
>>              void *base;
>> @@ -877,11 +880,8 @@ static int its_alloc_tables(const char *node_name, 
>> struct its_node *its)
>>                       */
>>                      order = max(get_order((1UL << ids) * entry_size),
>>                                  order);
>> -                    if (order >= MAX_ORDER) {
>> +                    if (order >= MAX_ORDER)
>>                              order = MAX_ORDER - 1;
>> -                            pr_warn("%s: Device Table too large, reduce its 
>> page order to %u\n",
>> -                                    node_name, order);
>> -                    }
>>              }
>>  
>>  retry_alloc_baser:
>> @@ -889,8 +889,6 @@ retry_alloc_baser:
>>              if (alloc_pages > GITS_BASER_PAGES_MAX) {
>>                      alloc_pages = GITS_BASER_PAGES_MAX;
>>                      order = get_order(GITS_BASER_PAGES_MAX * psz);
>> -                    pr_warn("%s: Device Table too large, reduce its page 
>> order to %u (%u pages)\n",
>> -                            node_name, order, alloc_pages);
>>              }
>>  
>>              base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
>> @@ -924,6 +922,7 @@ retry_baser:
>>  
>>              val |= alloc_pages - 1;
>>              its->tables[i].val = val;
>> +            its->tables[i].psz = psz;
>>  
>>              writeq_relaxed(val, its->base + GITS_BASER + i * 8);
>>              tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
>> @@ -963,6 +962,49 @@ retry_baser:
>>                      }
>>              }
>>  
>> +            /*
>> +             * Attempt enabling Indirection table walk since we were unable
>> +             * to allocate enough memory for flat device table to cover
>> +             * whole DevID sparse that hw reporting, and ITS hardware has
>> +             * capablity of supporting two-level table.
>> +             */
>> +            if ((type == GITS_BASER_TYPE_DEVICE) &&
>> +                (!tried_indirect) &&
>> +                (PAGE_ORDER_TO_SIZE(order) < (entry_size << ids))) {
> Why not try to use indirect tables if they are available, irrespective
> of the size of the required allocation? From a memory usage point of
> view, it is likely to always be a gain, unless your id space is so
> small that it fits in a single page. And with PCIe, you know for sure
> that the ID space is extremely sparse, hence indirect tables are an
> immediate win.
>
> Do you see any value in using the indirect table as a last resort?
As such there is no strong reason other than saving one read
cycle penalty for accessing the second level table.

I need your advice, should I enable this feature always or
if the memory (ids * entry_size) requirement is more than
some threshold?


>> +
>> +                    val |= GITS_BASER_INDIRECT;
>> +                    writeq_relaxed(val, its->base + GITS_BASER + i * 8);
>> +                    tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
>> +                    tried_indirect = true;
>> +
>> +                    /* Does ITS HW supports Indirect table walk? */
>> +                    if (!(tmp & GITS_BASER_INDIRECT)) {
>> +                            pr_warn("%s: Device Table too large, reduce 
>> order to %u\n",
>> +                                    node_name, order);
>> +                            goto retry_baser;
>> +                    }
>> +
>> +                    /*
>> +                     * According to GIC spec, the size of the Level2 table
>> +                     * is equal to ITS page size which is 'psz'. For
>> +                     * computing Level1 table1 size, subtract DevID bits
>> +                     * that covers Level2 table from 'ids' which is reported
>> +                     * by ITS hardware times Level1 table entry size 8bytes.
>> +                     *
>> +                     *   L2-DevIDs = ilog2(psz / entry_size)
>> +                     *      L1-DevIDs = ids (reported by HW) - L2-DevIDs
>> +                     * Table1 size = L1-DevIDs * 8Bytes
>> +                     */
>> +                    ids -= ilog2(psz / entry_size);
>> +                    order = get_order(GITS_LEVEL1_ENTRY_SIZE << ids);
>> +
>> +                    /* Limit Level1 table size to MAX_ORDER-1 */
>> +                    order = min(order, MAX_ORDER - 1);
>> +                    indirect = GITS_BASER_INDIRECT;
>> +                    its->tables[type].base = NULL;
>> +                    goto retry_alloc_baser;
>> +            }
>> +
> Please take this opportunity to split this function into something
> manageable. It is really getting out of control. Something to directly
> deal with the device table seems in order.

Sure, I'll post a separate patch for this change.
>>              if (val != tmp) {
>>                      pr_err("ITS: %s: GITS_BASER%d doesn't stick: %lx %lx\n",
>>                             node_name, i,
>> @@ -971,10 +1013,12 @@ retry_baser:
>>                      goto out_free;
>>              }
>>  
>> -            pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
>> -                    (int)(PAGE_ORDER_TO_SIZE(order) / entry_size),
>> +            pr_info("ITS: allocated %d %s @%lx (%s, psz %dK, shr %d)\n",
>> +                    (int)(PAGE_ORDER_TO_SIZE(order) /
>> +                    (indirect ? GITS_LEVEL1_ENTRY_SIZE : entry_size)),
>>                      its_base_type_string[type],
>>                      (unsigned long)virt_to_phys(base),
>> +                    indirect ? "indirect" : "flat",
>>                      psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
>>      }
>>  
>> @@ -1149,6 +1193,46 @@ static struct its_device *its_find_device(struct 
>> its_node *its, u32 dev_id)
>>      return its_dev;
>>  }
>>  
>> +static bool its_alloc_device_table(struct its_baser *baser, u32 dev_id)
>> +{
>> +    u32 entry_size = GITS_BASER_ENTRY_SIZE(baser->val);
>> +    u64 *table = baser->base;
>> +    struct page *page;
>> +    u32 psz = SZ_64K;
>> +    u32 idx;
>> +
>> +    /* Don't allow 'dev_id' that exceeds single, flat table limit */
>> +    if (!(baser->val & GITS_BASER_INDIRECT)) {
>> +            if (dev_id >= (PAGE_ORDER_TO_SIZE(baser->order) / entry_size))
>> +                    return false;
>> +            return true;
>> +    }
>> +
>> +    /* Compute Level1 table index & check if that exceeds table range */
>> +    idx = dev_id >> ilog2(baser->psz / entry_size);
>> +    if (idx >= (PAGE_ORDER_TO_SIZE(baser->order) / GITS_LEVEL1_ENTRY_SIZE))
>> +            return false;
>> +
>> +    /* Allocate memory for Level2 table */
>> +    if (!table[idx]) {
>> +            page = alloc_pages_exact(psz, GFP_KERNEL | __GFP_ZERO);
>> +            if (!page)
>> +                    return false;
>> +
>> +            table[idx] = page_to_phys(page) | GITS_BASER_VALID;
> Be careful, this must be written in LE format. If you're running a BE
> kernel, you're in for a nasty surprise...

Sorry, I didn't think about breaking BE machines. I'll fix it.
>> +
>> +            /* Flush memory to PoC if hardware dosn't support coherency */
>> +            if (!(baser->val & GITS_BASER_SHAREABILITY_MASK)) {
>> +                    __flush_dcache_area(table + idx, 
>> GITS_LEVEL1_ENTRY_SIZE);
>> +                    __flush_dcache_area(page_address(page), psz);
>> +            }
>> +            /* Ensure updated table contents are visible to ITS hardware */
>> +            dsb(sy);
>> +    }
>> +
>> +    return true;
>> +}
>> +
>>  static struct its_baser *its_get_baser(struct its_node *its, u32 type)
>>  {
>>      int i;
>> @@ -1176,11 +1260,8 @@ static struct its_device *its_create_device(struct 
>> its_node *its, u32 dev_id,
>>      int sz;
>>  
>>      baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE);
>> -
>> -    /* Don't allow 'dev_id' that exceeds single, flat table limit */
>>      if (baser) {
>> -            if (dev_id >= (PAGE_ORDER_TO_SIZE(baser->order) /
>> -                          GITS_BASER_ENTRY_SIZE(baser->val)))
>> +            if (!its_alloc_device_table(baser, dev_id))
>>                      return NULL;
>>      } else if (ilog2(dev_id) >= its->device_ids)
>>              return NULL;
> Please fold these checks into its_alloc_device_table so that it takes
> an its_node as a first parameter instead of an its_baser. That will make
> the code a bit more readable.

I'll fix it in the next patch.
>> diff --git a/include/linux/irqchip/arm-gic-v3.h 
>> b/include/linux/irqchip/arm-gic-v3.h
>> index 9e6fdd3..df404ea 100644
>> --- a/include/linux/irqchip/arm-gic-v3.h
>> +++ b/include/linux/irqchip/arm-gic-v3.h
>> @@ -187,6 +187,7 @@
>>  #define GITS_TYPER_PTA                      (1UL << 19)
>>  
>>  #define GITS_CBASER_VALID           (1UL << 63)
>> +#define GITS_BASER_INDIRECT         (1UL << 62)
>>  #define GITS_CBASER_nCnB            (0UL << 59)
>>  #define GITS_CBASER_nC                      (1UL << 59)
>>  #define GITS_CBASER_RaWt            (2UL << 59)
>> @@ -238,6 +239,7 @@
>>  #define GITS_BASER_TYPE_RESERVED6   6
>>  #define GITS_BASER_TYPE_RESERVED7   7
>>  
>> +#define GITS_LEVEL1_ENTRY_SIZE              (8UL)
>>  /*
>>   * ITS commands
>>   */
>
> Thanks,
>
>       M.

-- 
Shanker Donthineni
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project

Reply via email to