Hi Marc,
On 03/20/2017 05:14 AM, Shanker Donthineni wrote: > Hi Marc, > > > On 03/17/2017 10:33 AM, Marc Zyngier wrote: >> On 17/03/17 14:18, Shanker Donthineni wrote: >>> Hi Marc, >>> >>> >>> On 03/17/2017 08:50 AM, Marc Zyngier wrote: >>>> On 07/03/17 14:25, Shanker Donthineni wrote: >>>>> We are always allocating extra 255Bytes of memory to handle ITE >>>>> physical address alignment requirement. The kmalloc() satisfies >>>>> the ITE alignment since the ITS driver is requesting a minimum >>>>> size of ITS_ITT_ALIGN bytes. >>>>> >>>>> Let's try to allocate the exact amount of memory that is required >>>>> for ITEs to avoid wastage. >>>>> >>>>> Signed-off-by: Shanker Donthineni <shank...@codeaurora.org> >>>>> ---Hi >>>>> v2: removed 'Change-Id: Ia8084189833f2081ff13c392deb5070c46a64038' from >>>>> commit. >>>>> v3: changed from IITE to ITE. >>>>> >>>>> drivers/irqchip/irq-gic-v3-its.c | 7 ++++++- >>>>> 1 file changed, 6 insertions(+), 1 deletion(-) >>>>> >>>>> diff --git a/drivers/irqchip/irq-gic-v3-its.c >>>>> b/drivers/irqchip/irq-gic-v3-its.c >>>>> index 86bd428..5aeca78 100644 >>>>> --- a/drivers/irqchip/irq-gic-v3-its.c >>>>> +++ b/drivers/irqchip/irq-gic-v3-its.c >>>>> @@ -1329,8 +1329,13 @@ static struct its_device *its_create_device(struct >>>>> its_node *its, u32 dev_id, >>>>> */ >>>>> nr_ites = max(2UL, roundup_pow_of_two(nvecs)); >>>>> sz = nr_ites * its->ite_size; >>>>> - sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; >>>>> + sz = max(sz, ITS_ITT_ALIGN); >>>>> itt = kzalloc(sz, GFP_KERNEL); >>>>> + if (itt && !IS_ALIGNED(virt_to_phys(itt), ITS_ITT_ALIGN)) { >>>>> + kfree(itt); >>>>> + itt = kzalloc(sz + ITS_ITT_ALIGN - 1, GFP_KERNEL); >>>>> + } >>>>> + >>>> Is this really worth the complexity? Are you aware of a system where the >>>> accumulation of overallocation actually shows up as being an issue? >>> As such there is no issue with over allocation. Actually this change masked >>> QDF2400 bug 'iirqchip/gicv3-its: Add workaround for QDF2400 ITS erratum >>> 0065' till now, found and fixed recently while looking at the code for >>> possible memory optimizations. >>> >>>> If you want to be absolutely exact in your allocation, then I'd suggest >>>> doing it all the time, and have a proper dedicated allocator that always >>>> do the right thing, without a wasteful fallback like you still have here. >>> We don't need to fallbak, and it can be removed safely. Looking for >>> your suggestion. should I implement a dedicated allocator or remove >>> fallbak for simpler code? >> Are you saying that kmalloc is guaranteed to give us something that is >> 256 byte aligned? If so, why do we test for alignment (with free + >> over-allocate if it fails)? > I've verified on my system kmalloc() is always allocating memory with > 256bytes alignment. kmalloc() uses the generic slab caches available in the > kernel to allocate memory based on the input size. > >> I'd rather have only one way of allocating the ITT. Either we always >> overallocate in order to guarantee right alignment (and my personal view >> is that for most system, this doesn't matter at all), or we create our >> own allocator. The issue with the latter is that we don't really have a >> good story for allocating arrays of objects with a given alignment >> (kmem_cache_* only deals with single objects). > Adding a dedicated function to allocate memory is preferable but need pull a > few of lines of code. > > diff --git a/drivers/irqchip/irq-gic-v3-its.c > b/drivers/irqchip/irq-gic-v3-its.c > index a27a074..f0125e5 100644 > --- a/drivers/irqchip/irq-gic-v3-its.c > +++ b/drivers/irqchip/irq-gic-v3-its.c > @@ -90,6 +90,8 @@ struct its_node { > u32 ite_size; > u32 device_ids; > int numa_node; > + struct page *ite_page; > + u32 ite_psz; > }; > > #define ITS_ITT_ALIGN SZ_256 > @@ -266,7 +268,6 @@ static struct its_collection *its_build_mapd_cmd(struct > its_cmd_block *cmd, > u8 size = ilog2(desc->its_mapd_cmd.dev->nr_ites); > > itt_addr = virt_to_phys(desc->its_mapd_cmd.dev->itt); > - itt_addr = ALIGN(itt_addr, ITS_ITT_ALIGN); > > its_encode_cmd(cmd, GITS_CMD_MAPD); > its_encode_devid(cmd, desc->its_mapd_cmd.dev->device_id); > @@ -1319,6 +1320,42 @@ static bool its_alloc_device_table(struct its_node > *its, u32 dev_id) > return true; > } > > +static void *its_alloc_memory_ites(struct its_node *its, int nr_ites) > +{ > + unsigned long flags; > + struct page *page; > + void *ite; > + u32 size; > + > + size = ALIGN(nr_ites * its->ite_size, ITS_ITT_ALIGN); > + raw_spin_lock_irqsave(&its->lock, flags); > + > + /* Try to reuse the current page if enough space is available */ > + if (size > its->ite_psz) { > + /* Allocate a new compound page with minimum order 1 */ > + page = alloc_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO, > + max(get_order(size), 1)); > + if (!page) { > + raw_spin_unlock_irqrestore(&its->lock, flags); > + return NULL; > + } > + > + /* Free current page, decrement page count */ > + if (its->ite_page) > + put_page(its->ite_page); > + its->ite_psz = PAGE_ORDER_TO_SIZE(compound_order(page)); > + its->ite_page = page; > + } > + > + get_page(its->ite_page); /* increment page count */ > + its->ite_psz -= size; /* update free space */ > + ite = page_address(its->ite_page) + its->ite_psz; > + raw_spin_unlock_irqrestore(&its->lock, flags); > + gic_flush_dcache_to_poc(ite, size); > + > + return ite; > +} > + > static struct its_device *its_create_device(struct its_node *its, u32 dev_id, > int nvecs) > { > @@ -1330,7 +1367,6 @@ static struct its_device *its_create_device(struct > its_node *its, u32 dev_id, > int lpi_base; > int nr_lpis; > int nr_ites; > - int sz; > > if (!its_alloc_device_table(its, dev_id)) > return NULL; > @@ -1342,22 +1378,22 @@ static struct its_device *its_create_device(struct > its_node *its, u32 dev_id, > * express an ITT with a single entry. > */ > nr_ites = max(2UL, roundup_pow_of_two(nvecs)); > - sz = nr_ites * its->ite_size; > - sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; > - itt = kzalloc(sz, GFP_KERNEL); > + itt = its_alloc_memory_ites(its, nr_ites); > + if (!itt) > + return NULL; > + > lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis); > if (lpi_map) > col_map = kzalloc(sizeof(*col_map) * nr_lpis, GFP_KERNEL); > > - if (!dev || !itt || !lpi_map || !col_map) { > + if (!dev || !lpi_map || !col_map) { > kfree(dev); > - kfree(itt); > + put_page(virt_to_page(itt)); > kfree(lpi_map); > kfree(col_map); > return NULL; > } > > - gic_flush_dcache_to_poc(itt, sz); > > dev->its = its; > dev->itt = itt; > @@ -1386,7 +1422,7 @@ static void its_free_device(struct its_device *its_dev) > raw_spin_lock_irqsave(&its_dev->its->lock, flags); > list_del(&its_dev->entry); > raw_spin_unlock_irqrestore(&its_dev->its->lock, flags); > - kfree(its_dev->itt); > + put_page(virt_to_page(its_dev->itt)); > kfree(its_dev); > } > > > This patch is not urgent, if you want we can revisit it at later time. diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 86bd428..5aeca78 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1329,8 +1329,13 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, */ nr_ites = max(2UL, roundup_pow_of_two(nvecs)); sz = nr_ites * its->ite_size; - sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; + sz = max(sz, ITS_ITT_ALIGN); itt = kzalloc(sz, GFP_KERNEL); + if (itt && !IS_ALIGNED(virt_to_phys(itt), ITS_ITT_ALIGN)) { + kfree(itt); + itt = kzalloc(sz + ITS_ITT_ALIGN - 1, GFP_KERNEL); + } + lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis); if (lpi_map) col_map = kzalloc(sizeof(*col_map) * nr_lpis, GFP_KERNEL); -- Shanker Donthineni Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.