[PATCH -next 1/2] drm/bridge: Use PTR_ERR_OR_ZERO() to simplify code

2023-08-22 Thread Jinjie Ruan
Return PTR_ERR_OR_ZERO() instead of return 0 or PTR_ERR() to
simplify code.

Signed-off-by: Jinjie Ruan 
---
 drivers/gpu/drm/bridge/tc358762.c | 10 ++
 drivers/gpu/drm/bridge/tc358764.c |  5 +
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/bridge/tc358762.c 
b/drivers/gpu/drm/bridge/tc358762.c
index 46198af9eebb..49fd60a08c1c 100644
--- a/drivers/gpu/drm/bridge/tc358762.c
+++ b/drivers/gpu/drm/bridge/tc358762.c
@@ -240,19 +240,13 @@ static int tc358762_parse_dt(struct tc358762 *ctx)
 
/* Reset GPIO is optional */
ctx->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
-   if (IS_ERR(ctx->reset_gpio))
-   return PTR_ERR(ctx->reset_gpio);
-
-   return 0;
+   return PTR_ERR_OR_ZERO(ctx->reset_gpio);
 }
 
 static int tc358762_configure_regulators(struct tc358762 *ctx)
 {
ctx->regulator = devm_regulator_get(ctx->dev, "vddc");
-   if (IS_ERR(ctx->regulator))
-   return PTR_ERR(ctx->regulator);
-
-   return 0;
+   return PTR_ERR_OR_ZERO(ctx->regulator);
 }
 
 static int tc358762_probe(struct mipi_dsi_device *dsi)
diff --git a/drivers/gpu/drm/bridge/tc358764.c 
b/drivers/gpu/drm/bridge/tc358764.c
index deccb3995022..738ea6e11712 100644
--- a/drivers/gpu/drm/bridge/tc358764.c
+++ b/drivers/gpu/drm/bridge/tc358764.c
@@ -319,10 +319,7 @@ static int tc358764_parse_dt(struct tc358764 *ctx)
}
 
ctx->next_bridge = devm_drm_of_get_bridge(dev, dev->of_node, 1, 0);
-   if (IS_ERR(ctx->next_bridge))
-   return PTR_ERR(ctx->next_bridge);
-
-   return 0;
+   return PTR_ERR_OR_ZERO(ctx->next_bridge);
 }
 
 static int tc358764_configure_regulators(struct tc358764 *ctx)
-- 
2.34.1



[PATCH -next 0/2] drm: Use PTR_ERR_OR_ZERO() to simplify code

2023-08-22 Thread Jinjie Ruan
PTR_ERR_OR_ZERO() return the error code within @ptr if it is
an error pointer, otherwise return 0. So use it to simplify code.

Jinjie Ruan (2):
  drm/bridge: Use PTR_ERR_OR_ZERO() to simplify code
  drm/tegra: Use PTR_ERR_OR_ZERO() to simplify code

 drivers/gpu/drm/bridge/tc358762.c | 10 ++
 drivers/gpu/drm/bridge/tc358764.c |  5 +
 drivers/gpu/drm/tegra/drm.c   |  5 +
 drivers/gpu/drm/tegra/gem.c   |  5 +
 4 files changed, 5 insertions(+), 20 deletions(-)

-- 
2.34.1



[PATCH -next 2/2] drm/tegra: Use PTR_ERR_OR_ZERO() to simplify code

2023-08-22 Thread Jinjie Ruan
Return PTR_ERR_OR_ZERO() instead of return 0 or PTR_ERR() to
simplify code.

Signed-off-by: Jinjie Ruan 
---
 drivers/gpu/drm/tegra/drm.c | 5 +
 drivers/gpu/drm/tegra/gem.c | 5 +
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index ff36171c8fb7..4e29d76da1be 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -354,10 +354,7 @@ static int tegra_gem_create(struct drm_device *drm, void 
*data,
 
bo = tegra_bo_create_with_handle(file, drm, args->size, args->flags,
 &args->handle);
-   if (IS_ERR(bo))
-   return PTR_ERR(bo);
-
-   return 0;
+   return PTR_ERR_OR_ZERO(bo);
 }
 
 static int tegra_gem_mmap(struct drm_device *drm, void *data,
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index a4023163493d..11ef0f8cb1e1 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -533,10 +533,7 @@ int tegra_bo_dumb_create(struct drm_file *file, struct 
drm_device *drm,
 
bo = tegra_bo_create_with_handle(file, drm, args->size, 0,
 &args->handle);
-   if (IS_ERR(bo))
-   return PTR_ERR(bo);
-
-   return 0;
+   return PTR_ERR_OR_ZERO(bo);
 }
 
 static vm_fault_t tegra_bo_fault(struct vm_fault *vmf)
-- 
2.34.1



Re: [PATCH v14 0/6] drm/imx: Introduce i.MX8qm/qxp DPU DRM

2023-08-22 Thread Marcel Ziswiler
Hi Liu Ying

On Tue, 2023-08-22 at 05:36 +, Ying Liu wrote:
> Hi,
> 
> > On Friday, January 6, 2023 1:50 PM Ying Liu wrote:
> > 
> > Hi,
> > 
> > 
> > This is the v14 series to introduce i.MX8qm/qxp Display Processing Unit(DPU)
> > DRM support.

[snip]

> This patch series has been submitted for a quite long period of time.
> 
> Anything I can do to have it landed ?

Well, may it be tested? Are all the missing pieces there now?

Thanks!

> Regards,
> Liu Ying

Cheers

Marcel


Re: [PATCH RFC v5 02/10] drm: Introduce solid fill DRM plane property

2023-08-22 Thread Pekka Paalanen
On Mon, 21 Aug 2023 17:30:21 +0300
Dmitry Baryshkov  wrote:

> On Fri, 18 Aug 2023 at 16:55, Pekka Paalanen  wrote:
> >
> > On Fri, 18 Aug 2023 14:03:14 +0300
> > Dmitry Baryshkov  wrote:
> >  
> > > On 18/08/2023 13:51, Pekka Paalanen wrote:  
> > > > On Fri, 4 Aug 2023 16:59:00 +0300
> > > > Dmitry Baryshkov  wrote:
> > > >  
> > > >> On Fri, 4 Aug 2023 at 16:44, Sebastian Wick 
> > > >>  wrote:  
> > > >>>
> > > >>> On Fri, Aug 4, 2023 at 3:27 PM Dmitry Baryshkov
> > > >>>  wrote:  
> > > 
> > >  On Fri, 28 Jul 2023 at 20:03, Jessica Zhang 
> > >   wrote:  
> > > >
> > > > Document and add support for solid_fill property to drm_plane. In
> > > > addition, add support for setting and getting the values for 
> > > > solid_fill.
> > > >
> > > > To enable solid fill planes, userspace must assign a property blob 
> > > > to
> > > > the "solid_fill" plane property containing the following 
> > > > information:
> > > >
> > > > struct drm_mode_solid_fill {
> > > >  u32 version;
> > > >  u32 r, g, b;
> > > > };
> > > >
> > > > Signed-off-by: Jessica Zhang 
> > > > ---
> > > >   drivers/gpu/drm/drm_atomic_state_helper.c |  9 +
> > > >   drivers/gpu/drm/drm_atomic_uapi.c | 55 
> > > > +++
> > > >   drivers/gpu/drm/drm_blend.c   | 30 +
> > > >   include/drm/drm_blend.h   |  1 +
> > > >   include/drm/drm_plane.h   | 35 
> > > > 
> > > >   include/uapi/drm/drm_mode.h   | 24 ++
> > > >   6 files changed, 154 insertions(+)
> > > >  
> > > 
> > >  [skipped most of the patch]  
> >
> > ...
> >  
> > > >>> Maybe another COLOR_FILL enum value
> > > >>> with alpha might be better? Maybe just doing the alpha via the alpha
> > > >>> property is good enough.  
> > > >>
> > > >> One of our customers has a use case for setting the opaque solid fill,
> > > >> while keeping the plane's alpha intact.  
> > > >
> > > > Could you explain more about why they must keep plane alpha intact
> > > > instead of reprogramming everything with atomic? Is there some
> > > > combination that just cannot reach the same end result via userspace
> > > > manipulation of the solid fill values with plane alpha?
> > > >
> > > > Or is it a matter of userspace architecture where you have independent
> > > > components responsible for different KMS property values?  
> >  
> > > The latter one. The goal is to be able to switch between pixel sources
> > > without touching any additional properties (including plane's alpha 
> > > value).  
> >
> > Sorry, but that does not seem like a good justification for KMS UAPI
> > design.
> >
> > It is even in conflict with how atomic KMS UAPI was designed to work:
> > collect all your changes into a single commit, and push it at once.
> > Here we are talking about separate components changing the different
> > properties of the same KMS plane even. If you want to change both plane
> > opacity and contents, does it mean you need two refresh cycles, one at
> > a time? Could the two components be even racing with each other,
> > stalling each other randomly?  
> 
> Most likely I was not verbose enough.
> 
> We want to setup the blending scene, including the FB and the solid
> fill properties for the plane. FB is set up in the RGBA format, each
> pixel having its own alpha value in addition to the plane's alpha
> value. Then under certain circumstances, the plane gets hidden by the
> solid fill (think of a curtain). We do not want to touch the global
> scene setup (including plane alpha value), just switch the curtain on
> and off.
> I think this plays good enough with the defined plane blending rules,
> where one can use pre-multiplied blending mode or use coverage
> blending mode.

Right, that's what I understood. But this does complicate the KMS UAPI
for something that is well possible and feasible without the added
complication as well.

Is there a hardware or driver reason to avoid touching the global scene
setup? Does something in the hardware or driver work more optimally
that way?

Personally I'd favour simpler UAPI with more complex userspace for
maintainability and testing reasons. I'd also favour UAPI that exposes
common hardware features instead of design driven by userspace
process-internal architecture. There does not seem to be any
functionality or performance reasons to justify adding alpha channel to
the solid fill color.

OTOH, do we know of hardware that does not have separate alpha for the
fill color?

Do we know of hardware that can only do opaque solid fills, meaning no
alpha in the fill color nor for the plane?

What about hardware that has no plane alpha, but does have fill color
alpha?

If the plane has an alpha property, then drivers could implement fill
color alpha by combining the two alpha values before program

Re: [PATCH v2 0/5] use refcount+RCU method to implement lockless slab shrink (part 1)

2023-08-22 Thread Qi Zheng




On 2023/8/17 19:23, Qi Zheng wrote:

Hi all,

To make reviewing and updating easier, I've chosen to split the previous
patchset[1] into the following three parts:

part 1: some cleanups and preparations
part 2: introduce new APIs and convert all shrinnkers to use these
part 3: implement lockless slab shrink

This series is the part 1 and is based on the next-20230815.

Comments and suggestions are welcome.

[1]. 
https://lore.kernel.org/lkml/20230807110936.21819-1-zhengqi.a...@bytedance.com/

Thanks,
Qi

Changlog in part 1 v1 -> part 1 v2:
  - fix compilation warning in [PATCH 1/5]
  - rename synchronize_shrinkers() to ttm_pool_synchronize_shrinkers()
(pointed by Christian König)
  - collect Reviewed-by

Changlog in v4 -> part 1 v1:
  - split from the previous large patchset
  - fix comment format in [PATCH v4 01/48] (pointed by Muchun Song)
  - change to use kzalloc_node() and fix typo in [PATCH v4 44/48]
(pointed by Dave Chinner)
  - collect Reviewed-bys
  - rebase onto the next-20230815


Hi all,

Any more comments and suggestions for this part 1? Or can this part
be merged first (just some cleanups and preparations)? Or should I
post the part 2 and part 3 first for everyone to review together?

Either one is fine for me. :)

Thanks,
Qi



Qi Zheng (5):
   mm: move some shrinker-related function declarations to mm/internal.h
   mm: vmscan: move shrinker-related code into a separate file
   mm: shrinker: remove redundant shrinker_rwsem in debugfs operations
   drm/ttm: introduce pool_shrink_rwsem
   mm: shrinker: add a secondary array for shrinker_info::{map,
 nr_deferred}

  drivers/gpu/drm/ttm/ttm_pool.c |  17 +-
  include/linux/memcontrol.h |  12 +-
  include/linux/shrinker.h   |  37 +-
  mm/Makefile|   4 +-
  mm/internal.h  |  28 ++
  mm/shrinker.c  | 751 +
  mm/shrinker_debug.c|  18 +-
  mm/vmscan.c| 701 --
  8 files changed, 818 insertions(+), 750 deletions(-)
  create mode 100644 mm/shrinker.c



RE: [PATCH v14 0/6] drm/imx: Introduce i.MX8qm/qxp DPU DRM

2023-08-22 Thread Ying Liu

On Tuesday, August 22, 2023 3:20 PM Marcel Ziswiler 
 wrote:
> 
> Hi Liu Ying

Hi Marcel,

> 
> On Tue, 2023-08-22 at 05:36 +, Ying Liu wrote:
> > Hi,
> >
> > > On Friday, January 6, 2023 1:50 PM Ying Liu wrote:
> > >
> > > Hi,
> > >
> > >
> > > This is the v14 series to introduce i.MX8qm/qxp Display Processing
> Unit(DPU)
> > > DRM support.
> 
> [snip]
> 
> > This patch series has been submitted for a quite long period of time.
> >
> > Anything I can do to have it landed ?
> 
> Well, may it be tested? Are all the missing pieces there now?

I've tested this series on i.MX8qm MEK and i.MX8qxp MEK with LVDS
panel and LVDS to HDMI.

To test LVDS displays, compatible strings "fsl,imx8qm-lvds-csr" and
"fsl,imx8qxp-mipi-lvds-csr" need to be added in simple-pm-bus.c by
patch [1] and a clock patch [2] is needed if you use relatively new
SCU firmware in bootloader.

Aside from that, I just wrote/changed device tree.
See attached DT files for LVDS panel test. 

[1] https://www.spinics.net/lists/kernel/msg4664520.html
[2] https://pastebin.mozilla.org/LaXEeoY9

Regards,
Liu Ying

> 
> Thanks!
> 
> > Regards,
> > Liu Ying
> 
> Cheers
> 
> Marcel


for_marcel.tgz
Description: for_marcel.tgz


Re: [PATCH v14 0/6] drm/imx: Introduce i.MX8qm/qxp DPU DRM

2023-08-22 Thread mrip...@kernel.org
Hi,

On Tue, Aug 22, 2023 at 05:36:14AM +, Ying Liu wrote:
> Hi,
> 
> > On Friday, January 6, 2023 1:50 PM Ying Liu wrote:
> > 
> > Hi,
> > 
> > 
> > This is the v14 series to introduce i.MX8qm/qxp Display Processing Unit(DPU)
> > DRM support.
> > 
> > DPU is comprised of a blit engine for 2D graphics, a display controller
> > and a command sequencer.  Outside of DPU, optional prefetch engines can
> > fetch data from memory prior to some DPU fetchunits of blit engine and
> > display controller.  The pre-fetchers support linear formats and Vivante
> > GPU tile formats.
> > 
> > Reference manual can be found at:
> > https://www.nxp.com/webapp/Download?colCode=IMX8DQXPRM
> > 
> > 
> > This patch set adds kernel modesetting support for the display controller 
> > part.
> > It supports two CRTCs per display controller, several planes, prefetch
> > engines and some properties of CRTC and plane.  Currently, the registers of
> > the controller is accessed without command sequencer involved, instead just
> > by
> > using CPU.  DRM connectors would be created from the DPU KMS driver.
> > 
> > 
> > Patch 1 ~ 3 add dt-bindings for DPU and prefetch engines.
> > Patch 4 is a minor improvement of a macro to suppress warning as the KMS
> > driver
> > uses it.
> > Patch 5 introduces the DPU DRM support.
> > Patch 6 updates MAINTAINERS.
> > 
> > Welcome comments, thanks.
> > 
> > v13->v14:
> > * Rebase the patch series to the latest drm-misc-next branch(v6.1-rc6 
> > based).
> > * Include drm_fbdev_generic.h in dpu_drv.c due to the rebase.
> > * Fix dpu drm driver suspend/resume by properly get drm device through
> >   dev_get_drvdata().
> > * Use pm_ptr() macro for dpu core driver PM operations.
> > * Use pm_sleep_ptr() macro for dpu drm driver PM operations.
> > * Use DEFINE_SIMPLE_DEV_PM_OPS() macro to define dpu drm driver PM
> > operations,
> >   instead of SIMPLE_DEV_PM_OPS().
> > * Update year of Copyright.
> > * Add SoC series name 'i.MX8'/'IMX8'/'imx8' to dpu driver module decription,
> >   Kconfig name, dpu driver names and dpu driver object name.
> > 
> > v12->v13:
> > * Drop 'drm->irq_enabled = true;' from patch 5/6 to fix a potential build
> >   break reported by 'kernel test robot '.  drm->irq_enabled
> >   should not be used by imx-dpu drm as it is only used by legacy drivers
> >   with userspace modesetting.
> > 
> > v11->v12:
> > * Rebase the series upon v6.1-rc1.
> > * Minor update on Kconfigs, struct names and macro names for patch 5/6
> >   due to the rebase.
> > 
> > v10->v11:
> > * Rebase the series upon v6.0-rc1.
> > * Include drm_blend.h and drm_framebuffer.h in dpu-kms.c and dpu-
> > plane.c
> >   to fix build errors due to the rebase.
> > * Fix a checkpatch warning for dpu-crtc.c.
> > * Properly use dev_err_probe() to return it's return value directly where
> >   possible.
> > 
> > v9->v10:
> > * Rebase the series upon v5.18-rc1.
> > * Make 'checkpatch.pl --strict' happier for patch 5/6.
> > * Add Rob's R-b tag on patch 3/6.
> > * Add Laurentiu's R-b tag on patch 5/6.
> > * Add Laurentiu's A-b tag on patch 6/6.
> > 
> > v8->v9:
> > * Use drm_atomic_get_new_plane_state() in dpu_plane_atomic_update()
> > for
> >   patch 5/6. (Laurentiu)
> > * Drop getting DPU DT alias ID for patch 5/6, as it is unused.
> > * Reference 'interrupts-extended' schema instead of 'interrupts' for patch
> > 3/6
> >   to require an additional DPR interrupt(r_rtram_stall) because the 
> > reference
> >   manual does mention it, though the driver doesn't get/use it for now.
> >   Reference 'interrupt-names' schema to define the two DPR interrupt names
> > -
> >   'dpr_wrap' and 'r_rtram_stall'.  Accordingly, patch 5/6 gets the 
> > 'dpr_wrap'
> >   interrupt by name.
> > * Drop Rob's R-b tag on patch 3/6, as review is needed.
> > 
> > v7->v8:
> > * Rebase this series up onto the latest drm-misc-next branch, due to DRM
> > plane
> >   helper functions API change(atomic_check and atomic_update) from DRM
> > atomic
> >   core.  So, dpu_plane_atomic_check() and dpu_plane_atomic_update() are
> > updated
> >   accordingly in patch 5/6.  Also, rename plane->state variables and 
> > relevant
> >   DPU plane state variables in those two functions to reflect they are new
> >   states, like the patch 'drm: Rename plane->state variables in atomic 
> > update
> >   and disable' recently landed in drm-misc-next.
> > * Replace drm_gem_fb_prepare_fb() with
> > drm_gem_plane_helper_prepare_fb() in
> >   patch 5/6, due to DRM core API change.
> > * Improve DPR burst length for GPU standard tile and 32bpp GPU super tile in
> >   patch 5/6 to align with the latest version of internal HW documention.
> > 
> > v6->v7:
> > * Fix return value of dpu_get_irqs() if platform_get_irq() fails. 
> > (Laurentiu)
> > * Use the function array dpu_irq_handler[] to store individual DPU irq
> > handlers.
> >   (Laurentiu)
> > * Call get/put() hooks directly to get/put DPU fetchunits for DPU plane 
> > groups.
> >   (Laurentiu)
> > * Shorten the names of indi

Re: [PATCH 3/4] drm/xe/vm: Perform accounting of userptr pinned pages

2023-08-22 Thread Thomas Hellström



On 8/20/23 05:43, Matthew Brost wrote:

On Fri, Aug 18, 2023 at 05:08:44PM +0200, Thomas Hellström wrote:

Account these pages against RLIMIT_MEMLOCK following how RDMA does this
with CAP_IPC_LOCK bypassing the limit.

Signed-off-by: Thomas Hellström 

Patch LGTM but nits on naming + possible assert.


---
  drivers/gpu/drm/xe/xe_vm.c | 43 --
  1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ecbcad696b60..d9c000689002 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -34,6 +34,33 @@
  
  #define TEST_VM_ASYNC_OPS_ERROR
  
+/*

+ * Perform userptr PIN accounting against RLIMIT_MEMLOCK for now, similarly
+ * to how RDMA does this.
+ */
+static int xe_vma_mlock_alloc(struct xe_vma *vma, unsigned long num_pages)
+{

xe_vma_userptr_mlock_alloc? or maybe even xe_vma_userptr_mlock_reserve?


+   unsigned long lock_limit, new_pinned;
+   struct mm_struct *mm = vma->userptr.notifier.mm;
+

This be a candidate to use the new aseert macros to ensure that the vma
is a userptr + pinned? Not sure if that merged yet.


+   if (!can_do_mlock())
+   return -EPERM;
+
+   lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+   new_pinned = atomic64_add_return(num_pages, &mm->pinned_vm);
+   if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
+   atomic64_sub(num_pages, &mm->pinned_vm);
+   return -ENOMEM;
+   }
+
+   return 0;
+}
+
+static void xe_vma_mlock_free(struct xe_vma *vma, unsigned long num_pages)
+{

xe_vma_userptr_mlock_free? or maybe even xe_vma_userptr_mlock_release?

Same for the assert here.

Anyways, I'll leave addressing these nits up to you, with that:
Reviewed-by: Matthew Brost 


OK, thanks. I'll take a look at addressing those.




+   atomic64_sub(num_pages, &vma->userptr.notifier.mm->pinned_vm);
+}
+
  /**
   * xe_vma_userptr_check_repin() - Advisory check for repin needed
   * @vma: The userptr vma
@@ -89,9 +116,17 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
!read_only);
pages = vma->userptr.pinned_pages;
} else {
+   if (xe_vma_is_pinned(vma)) {
+   ret = xe_vma_mlock_alloc(vma, num_pages);
+   if (ret)
+   return ret;
+   }
+
pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
-   if (!pages)
-   return -ENOMEM;
+   if (!pages) {
+   ret = -ENOMEM;
+   goto out_account;
+   }
}
  
  	pinned = ret = 0;

@@ -187,6 +222,9 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
  mm_closed:
kvfree(pages);
vma->userptr.pinned_pages = NULL;
+out_account:
+   if (xe_vma_is_pinned(vma))
+   xe_vma_mlock_free(vma, num_pages);
return ret;
  }
  
@@ -1004,6 +1042,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)

unpin_user_pages_dirty_lock(vma->userptr.pinned_pages,
vma->userptr.num_pinned,
!read_only);
+   xe_vma_mlock_free(vma, xe_vma_size(vma) >> PAGE_SHIFT);
kvfree(vma->userptr.pinned_pages);
}
  
--

2.41.0



Re: [RFC v1 1/3] mm/mmu_notifier: Add a new notifier for mapping updates (new pages)

2023-08-22 Thread Alistair Popple


"Kasireddy, Vivek"  writes:

> Hi Alistair,
>
>> >> > > > No, adding HMM_PFN_REQ_WRITE still doesn't help in fixing the
>> issue.
>> >> > > > Although, I do not have THP enabled (or built-in), shmem does not
>> evict
>> >> > > > the pages after hole punch as noted in the comment in
>> >> shmem_fallocate():
>> >> > >
>> >> > > This is the source of all your problems.
>> >> > >
>> >> > > Things that are mm-centric are supposed to track the VMAs and
>> changes
>> >> to
>> >> > > the PTEs. If you do something in userspace and it doesn't cause the
>> >> > > CPU page tables to change then it certainly shouldn't cause any mmu
>> >> > > notifiers or hmm_range_fault changes.
>> >> > I am not doing anything out of the blue in the userspace. I think the
>> >> behavior
>> >> > I am seeing with shmem (where an invalidation event
>> >> (MMU_NOTIFY_CLEAR)
>> >> > does occur because of a hole punch but the PTEs don't really get
>> updated)
>> >> > can arguably be considered an optimization.
>> >>
>> >> Your explanations don't make sense.
>> >>
>> >> If MMU_NOTIFER_CLEAR was sent but the PTEs were left present then:
>> >>
>> >> > > There should still be an invalidation notifier at some point when the
>> >> > > CPU tables do eventually change, whenever that is. Missing that
>> >> > > notification would be a bug.
>> >> > I clearly do not see any notification getting triggered (from both
>> >> shmem_fault()
>> >> > and hugetlb_fault()) when the PTEs do get updated as the hole is 
>> >> > refilled
>> >> > due to writes. Are you saying that there needs to be an invalidation
>> event
>> >> > (MMU_NOTIFY_CLEAR?) dispatched at this point?
>> >>
>> >> You don't get to get shmem_fault in the first place.
>> > What I am observing is that even after MMU_NOTIFY_CLEAR (hole punch)
>> is sent,
>> > hmm_range_fault() finds that the PTEs associated with the hole are still
>> pte_present().
>> > I think it remains this way as long as there are reads on the hole. Once
>> there are
>> > writes, it triggers shmem_fault() which results in PTEs getting updated but
>> without
>> > any notification.
>> 
>> Oh wait, this is shmem. The read from hmm_range_fault() (assuming you
>> specified HMM_PFN_REQ_FAULT) will trigger shmem_fault() due to the
>> missing PTE. 
> When running one of the udmabuf subtests (introduced in the third patch of
> this series), I see that MMU_NOTIFY_CLEAR is sent when a hole is punched.
> As a response, hmm_range_fault() is called from the udmabuf invalidate 
> callback,

Actually I'm suprised that works. If you've setup an interval notifier
and are updating the notifier sequence numbers correctly I would expect
hmm_range_fault() to return -EBUSY until
mmu_notifier_invalidate_range_end() is called.

It might be helpful to post the code you're testing with somewhere but
are you calling mmu_interval_read_begin() to start the critical section
and mmu_interval_set_seq() to update the sequence in another notifier?
I'm not at all convinced calling hmm_range_fault() from a notifier can
be made to work though.

> to walk over the PTEs associated with the hole. When this happens, I noticed 
> that
> the below function returns HMM_PFN_VALID | HMM_PFN_WRITE for all the
> PTEs associated with the hole. 
> static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
>  pte_t pte)
> {
> if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte))
> return 0;
> return pte_write(pte) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : 
> HMM_PFN_VALID;
> }
>
> As a result, hmm_pte_need_fault() always returns 0 and shmem_fault()
> never gets triggered despite specifying HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE.
> And, the set of PFNs returned by hmm_range_fault() are the same ones
> that existed before the hole was punched.
>
>> Subsequent writes will just upgrade PTE permissions
>> assuming the read didn't map them RW to begin with. If you want to
>> actually see the hole with hmm_range_fault() don't specify
>> HMM_PFN_REQ_FAULT (or _WRITE).
>> 
>> >>
>> >> If they were marked non-prsent during the CLEAR then the shadow side
>> >> remains non-present until it gets its own fault.
>> >>
>> >> If they were made non-present without an invalidation then that is a
>> >> bug.
>> >>
>> >> > > hmm_range_fault() is the correct API to use if you are working with
>> >> > > notifiers. Do not hack something together using pin_user_pages.
>> >>
>> >> > I noticed that hmm_range_fault() does not seem to be working as
>> expected
>> >> > given that it gets stuck(hangs) while walking hugetlb pages.
>> >>
>> >> You are the first to report that, it sounds like a serious bug. Please
>> >> try to fix it.
>> >>
>> >> > Regardless, as I mentioned above, the lack of notification when PTEs
>> >> > do get updated due to writes is the crux of the issue
>> >> > here. Therefore, AFAIU, triggering an invalidation event or some
>> >> > other kind of notification would help in fixing this issue.
>>

Re: [PATCH 1/1] drm/fourcc: Add documentation about software color conversion.

2023-08-22 Thread Pekka Paalanen
On Mon, 21 Aug 2023 17:55:33 +0200
Maxime Ripard  wrote:

> Hi Pekka,
> 
> Thanks for answering
> 
> On Fri, Aug 18, 2023 at 04:24:15PM +0300, Pekka Paalanen wrote:
> > On Thu, 10 Aug 2023 09:45:27 +0200
> > Maxime Ripard  wrote:  
> > > On Mon, Aug 07, 2023 at 03:45:15PM +0200, Jocelyn Falempe wrote:  
> > > > After discussions on IRC, the consensus is that the DRM drivers should
> > > > not do software color conversion, and only advertise the supported 
> > > > formats.
> > > > Update the doc accordingly so that the rule and exceptions are clear for
> > > > everyone.
> > > > 
> > > > Signed-off-by: Jocelyn Falempe 
> > > > ---
> > > >  include/uapi/drm/drm_fourcc.h | 7 +++
> > > >  1 file changed, 7 insertions(+)
> > > > 
> > > > diff --git a/include/uapi/drm/drm_fourcc.h 
> > > > b/include/uapi/drm/drm_fourcc.h
> > > > index 8db7fd3f743e..00a29152da9f 100644
> > > > --- a/include/uapi/drm/drm_fourcc.h
> > > > +++ b/include/uapi/drm/drm_fourcc.h
> > > > @@ -38,6 +38,13 @@ extern "C" {
> > > >   * fourcc code, a Format Modifier may optionally be provided, in order 
> > > > to
> > > >   * further describe the buffer's format - for example tiling or 
> > > > compression.
> > > >   *
> > > > + * DRM drivers should not do software color conversion, and only 
> > > > advertise the
> > > > + * format they support in hardware. But there are two exceptions:
> > > 
> > > I would do a bullet list here:
> > > https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#lists-and-quote-like-blocks
> > >   
> > > > + * The first is to support XRGB if the hardware doesn't support 
> > > > it, because
> > > > + * it's the de facto standard for userspace applications.
> > > 
> > > We can also provide a bit more context here, something like:
> > > 
> > > All drivers must support XRGB, even if the hardware cannot support
> > > it. This has become the de-facto standard and a lot of user-space assume
> > > it will be present.
> > >   
> > > > + * The second is to drop the unused bits when sending the data to the 
> > > > hardware,
> > > > + * to improve the bandwidth, like dropping the "X" in XRGB.
> > > 
> > > I think it can be made a bit more generic, with something like:
> > > 
> > > Any driver is free to modify its internal representation of the format,
> > > as long as it doesn't alter the visible content in any way. An example
> > > would be to drop the padding component from a format to save some memory
> > > bandwidth.  
> >
> > to my understanding and desire, the rule to not "fake" pixel format
> > support is strictly related to performance. When a KMS client does a
> > page flip, it usually does not expect a massive amount of CPU or GPU
> > work to occur just because of the flip. A name for such work is "copy",
> > referring to any kind of copying of large amounts of pixel data,
> > including a format conversion or not.  
> 
> Should we add that to the suggested documentation that it shouldn't
> degrade performance and shouldn't be something that the userspace can
> notice?

I would let Sima (or Simon Ser) answer that, and verify my
understanding too.

> > This is especially important with GPU rendering and hardware video
> > playback systems, where any such copy could destroy the usability of
> > the whole system. This is the main reason why KMS must not do any
> > expensive processing unexpectedly (as in, not documented in UAPI).
> > Doing any kind of copy could cause a vblank to be missed, ruining
> > display timings.
> > 
> > I believe the above is the spirit of the rule.  
> 
> That's totally reasonable to me :)
> 
> > Then there will be exceptions. I'd like to think that everything below
> > (except for XRGB) can be derived from the above with common sense
> > - that's what I did.
> > 
> > XRGB support is the prime exception. I suspect it originates from
> > the legacy KMS UAPI, and the practise that XRGB has been widely
> > supported always. This makes it plausible for userspace to exist that
> > cannot produce any other format. Hence, it is good to support XRGB
> > through a conversion (copy) in the kernel for dumb buffers (that is,
> > for software rendered framebuffers). I would be very hesitant to extend
> > this exception to GPU rendered buffers, but OTOH if you have a GPU,
> > presumably you also have a display controller capable of scanning out
> > what the GPU renders, so you wouldn't even consider copying under the
> > hood.
> > 
> > DRM devices that cannot directly scan out buffers at all are a whole
> > category of exceptions. They include USB display adapters (literal USB,
> > not USB-C alt mode), perhaps networked and wireless displays, VKMS
> > which does everything in software, and so on. They simply have to
> > process the bulk pixel data with a CPU one way or another, and
> > hopefully they make use of damage rectangles to minimise the work.
> > 
> > Old-school special cursor planes may have been using special pixel
> > formats that may 

Re: [PATCH 2/4] drm/xe/vm: Implement userptr page pinning

2023-08-22 Thread Thomas Hellström



On 8/20/23 06:06, Matthew Brost wrote:

On Fri, Aug 18, 2023 at 05:08:43PM +0200, Thomas Hellström wrote:

Implement pinning of userptrs between VM_BIND and VM_UNBIND, which will
facilitate avoiding long hangs on non-preemptible workloads. But don't
hook it up to userspace just yet.

Signed-off-by: Thomas Hellström 
---
  drivers/gpu/drm/xe/xe_vm.c   | 76 ++--
  drivers/gpu/drm/xe/xe_vm.h   |  9 
  drivers/gpu/drm/xe/xe_vm_types.h | 12 +
  3 files changed, 74 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 8bf7f62e6548..ecbcad696b60 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -74,10 +74,6 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
if (notifier_seq == vma->userptr.notifier_seq)
return 0;
  
-	pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);

-   if (!pages)
-   return -ENOMEM;
-
if (vma->userptr.sg) {
dma_unmap_sgtable(xe->drm.dev,
  vma->userptr.sg,
@@ -87,6 +83,17 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
vma->userptr.sg = NULL;
}
  
+	if (vma->userptr.pinned_pages) {

+   unpin_user_pages_dirty_lock(vma->userptr.pinned_pages,
+   vma->userptr.num_pinned,
+   !read_only);
+   pages = vma->userptr.pinned_pages;

This implies that we can repin already pinned pages, I don't think that
should be possible, right? We shouldn't call this function twice nor
should the retry loop be trigger - both of these cases require a
invalidation to occur which shouldn't be possible if the pages are
pinned. So we likely should have warning if vma->userptr.pinned_pages is
set, right?


Good catch. Currently since we still allow the userptr sequence number 
to be bumped, the next exec will release the old pages and pin new pages 
(which may be the same), but the GPU might still be accessing the old 
pages. Need to make sure this doesn't happen.


/Thomas


+   } else {
+   pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
+   if (!pages)
+   return -ENOMEM;
+   }
+
pinned = ret = 0;
if (in_kthread) {
if (!mmget_not_zero(vma->userptr.notifier.mm)) {
@@ -97,11 +104,18 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
}
  
  	while (pinned < num_pages) {

-   ret = get_user_pages_fast(xe_vma_userptr(vma) +
- pinned * PAGE_SIZE,
- num_pages - pinned,
- read_only ? 0 : FOLL_WRITE,
- &pages[pinned]);
+   if (xe_vma_is_pinned(vma))
+   ret = pin_user_pages_fast(xe_vma_userptr(vma) +
+ pinned * PAGE_SIZE,
+ num_pages - pinned,
+ read_only ? 0 : FOLL_WRITE,
+ &pages[pinned]);
+   else
+   ret = get_user_pages_fast(xe_vma_userptr(vma) +
+ pinned * PAGE_SIZE,
+ num_pages - pinned,
+ read_only ? 0 : FOLL_WRITE,
+ &pages[pinned]);
if (ret < 0) {
if (in_kthread)
ret = 0;
@@ -137,19 +151,24 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
if (ret)
goto out_free_sg;
  
-	for (i = 0; i < pinned; ++i) {

-   if (!read_only) {
-   lock_page(pages[i]);
-   set_page_dirty(pages[i]);
-   unlock_page(pages[i]);
+   if (!xe_vma_is_pinned(vma)) {
+   for (i = 0; i < pinned; ++i) {
+   if (!read_only) {
+   lock_page(pages[i]);
+   set_page_dirty(pages[i]);
+   unlock_page(pages[i]);
+   }
+
+   mark_page_accessed(pages[i]);
}
  
-		mark_page_accessed(pages[i]);

+   release_pages(pages, pinned);
+   kvfree(pages);
+   } else {
+   vma->userptr.pinned_pages = pages;
+   vma->userptr.num_pinned = pinned;
}
  
-	release_pages(pages, pinned);

-   kvfree(pages);
-
vma->userptr.notifier_seq = notifier_seq;
if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
goto retry;
@@ -160,9 +179,14 @@ int xe_vma_userptr_pin_pages(struct x

RE: [PATCH v14 0/6] drm/imx: Introduce i.MX8qm/qxp DPU DRM

2023-08-22 Thread Ying Liu


On Tuesday, August 22, 2023 4:06 PM Maxime  wrote:
> 
> Hi,

Hi,

> 
> On Tue, Aug 22, 2023 at 05:36:14AM +, Ying Liu wrote:
> > Hi,
> >
> > > On Friday, January 6, 2023 1:50 PM Ying Liu wrote:
> > >
> > > Hi,
> > >
> > >
> > > This is the v14 series to introduce i.MX8qm/qxp Display Processing
> Unit(DPU)
> > > DRM support.
> > >
> > > DPU is comprised of a blit engine for 2D graphics, a display controller
> > > and a command sequencer.  Outside of DPU, optional prefetch engines
> can
> > > fetch data from memory prior to some DPU fetchunits of blit engine and
> > > display controller.  The pre-fetchers support linear formats and Vivante
> > > GPU tile formats.
> > >
> > > Reference manual can be found at:
> > > https://www.nxp.com/webapp/Download?colCode=IMX8DQXPRM
> > >
> > >
> > > This patch set adds kernel modesetting support for the display controller
> part.
> > > It supports two CRTCs per display controller, several planes, prefetch
> > > engines and some properties of CRTC and plane.  Currently, the registers
> of
> > > the controller is accessed without command sequencer involved, instead
> just
> > > by
> > > using CPU.  DRM connectors would be created from the DPU KMS driver.
> > >
> > >
> > > Patch 1 ~ 3 add dt-bindings for DPU and prefetch engines.
> > > Patch 4 is a minor improvement of a macro to suppress warning as the
> KMS
> > > driver
> > > uses it.
> > > Patch 5 introduces the DPU DRM support.
> > > Patch 6 updates MAINTAINERS.
> > >
> > > Welcome comments, thanks.
> > >
> > > v13->v14:
> > > * Rebase the patch series to the latest drm-misc-next branch(v6.1-rc6
> based).
> > > * Include drm_fbdev_generic.h in dpu_drv.c due to the rebase.
> > > * Fix dpu drm driver suspend/resume by properly get drm device through
> > >   dev_get_drvdata().
> > > * Use pm_ptr() macro for dpu core driver PM operations.
> > > * Use pm_sleep_ptr() macro for dpu drm driver PM operations.
> > > * Use DEFINE_SIMPLE_DEV_PM_OPS() macro to define dpu drm driver
> PM
> > > operations,
> > >   instead of SIMPLE_DEV_PM_OPS().
> > > * Update year of Copyright.
> > > * Add SoC series name 'i.MX8'/'IMX8'/'imx8' to dpu driver module
> decription,
> > >   Kconfig name, dpu driver names and dpu driver object name.
> > >
> > > v12->v13:
> > > * Drop 'drm->irq_enabled = true;' from patch 5/6 to fix a potential build
> > >   break reported by 'kernel test robot '.  drm-
> >irq_enabled
> > >   should not be used by imx-dpu drm as it is only used by legacy drivers
> > >   with userspace modesetting.
> > >
> > > v11->v12:
> > > * Rebase the series upon v6.1-rc1.
> > > * Minor update on Kconfigs, struct names and macro names for patch 5/6
> > >   due to the rebase.
> > >
> > > v10->v11:
> > > * Rebase the series upon v6.0-rc1.
> > > * Include drm_blend.h and drm_framebuffer.h in dpu-kms.c and dpu-
> > > plane.c
> > >   to fix build errors due to the rebase.
> > > * Fix a checkpatch warning for dpu-crtc.c.
> > > * Properly use dev_err_probe() to return it's return value directly where
> > >   possible.
> > >
> > > v9->v10:
> > > * Rebase the series upon v5.18-rc1.
> > > * Make 'checkpatch.pl --strict' happier for patch 5/6.
> > > * Add Rob's R-b tag on patch 3/6.
> > > * Add Laurentiu's R-b tag on patch 5/6.
> > > * Add Laurentiu's A-b tag on patch 6/6.
> > >
> > > v8->v9:
> > > * Use drm_atomic_get_new_plane_state() in dpu_plane_atomic_update()
> > > for
> > >   patch 5/6. (Laurentiu)
> > > * Drop getting DPU DT alias ID for patch 5/6, as it is unused.
> > > * Reference 'interrupts-extended' schema instead of 'interrupts' for patch
> > > 3/6
> > >   to require an additional DPR interrupt(r_rtram_stall) because the
> reference
> > >   manual does mention it, though the driver doesn't get/use it for now.
> > >   Reference 'interrupt-names' schema to define the two DPR interrupt
> names
> > > -
> > >   'dpr_wrap' and 'r_rtram_stall'.  Accordingly, patch 5/6 gets the
> 'dpr_wrap'
> > >   interrupt by name.
> > > * Drop Rob's R-b tag on patch 3/6, as review is needed.
> > >
> > > v7->v8:
> > > * Rebase this series up onto the latest drm-misc-next branch, due to DRM
> > > plane
> > >   helper functions API change(atomic_check and atomic_update) from
> DRM
> > > atomic
> > >   core.  So, dpu_plane_atomic_check() and dpu_plane_atomic_update()
> are
> > > updated
> > >   accordingly in patch 5/6.  Also, rename plane->state variables and
> relevant
> > >   DPU plane state variables in those two functions to reflect they are new
> > >   states, like the patch 'drm: Rename plane->state variables in atomic
> update
> > >   and disable' recently landed in drm-misc-next.
> > > * Replace drm_gem_fb_prepare_fb() with
> > > drm_gem_plane_helper_prepare_fb() in
> > >   patch 5/6, due to DRM core API change.
> > > * Improve DPR burst length for GPU standard tile and 32bpp GPU super
> tile in
> > >   patch 5/6 to align with the latest version of internal HW documention.
> > >
> > > v6->v7:
> > > * Fix return value of dpu_get_irqs() if platform

Re: [PATCH 4/4] drm/xe/uapi: Support pinning of userptr vmas

2023-08-22 Thread Thomas Hellström



On 8/20/23 05:54, Matthew Brost wrote:

On Fri, Aug 18, 2023 at 05:08:45PM +0200, Thomas Hellström wrote:

Support pinning of vmas using XE_VM_BIND_FLAG_PIN, initially for userptr
only. Pinned memory becomes accounted against RLIMIT_MEMLOCK and processes
with CAP_IPC_LOCK will not apply the limit. This is pretty similar to
mlock()'ing userptr memory with the added benefit that the driver is
aware and can ignore some actions in the MMU invalidation notifier.

This will initially become useful for compute VMs on hardware without
mid-thread-preemption capability since with pinned pages, the MMU
invalidation notifier never tries to preempt a running compute kernel.

If that were the only usage we could restrict this to a flag that always
pins userptr VMAs on compute VMs on such hardware, but there are
indications that this may become needed in other situations as well.

 From a more general point of view, the usage pattern of a system may be
such that in most cases it only ever runs a single workload per system
and then the sysadmin would want to configure the system to allow
extensive pinning for performance reasons.

Hence we might want to extend the pinning capability to bo-backed VMAs
as well. How that pinning will be accounted remains an open but to build
on the current drm CGROUP work would be an option.

Signed-off-by: Thomas Hellström 

Patch LGTM but a few comments that are currently out of scope but want
to get out there for future work.


---
  drivers/gpu/drm/xe/xe_vm.c   | 33 +---
  drivers/gpu/drm/xe/xe_vm_types.h |  2 ++
  include/uapi/drm/xe_drm.h| 18 +
  3 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d9c000689002..3832f1f21def 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -936,6 +936,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
u64 start, u64 end,
bool read_only,
bool is_null,
+   bool pin,
u8 tile_mask)
  {
struct xe_vma *vma;
@@ -967,6 +968,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
vma->gpuva.flags |= XE_VMA_READ_ONLY;
if (is_null)
vma->gpuva.flags |= DRM_GPUVA_SPARSE;
+   if (pin)
+   vma->gpuva.flags |= XE_VMA_PINNED;
  
  	if (tile_mask) {

vma->tile_mask = tile_mask;
@@ -2367,6 +2370,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo 
*bo,
op->map.read_only =
operation & XE_VM_BIND_FLAG_READONLY;
op->map.is_null = operation & XE_VM_BIND_FLAG_NULL;
+   op->map.pin = operation & XE_VM_BIND_FLAG_PIN;
}
break;
case XE_VM_BIND_OP_UNMAP:
@@ -2431,7 +2435,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo 
*bo,
  }
  
  static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,

- u8 tile_mask, bool read_only, bool is_null)
+ u8 tile_mask, bool read_only, bool is_null,
+ bool pin)
  {
struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
struct xe_vma *vma;
@@ -2447,7 +2452,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct 
drm_gpuva_op_map *op,
}
vma = xe_vma_create(vm, bo, op->gem.offset,
op->va.addr, op->va.addr +
-   op->va.range - 1, read_only, is_null,
+   op->va.range - 1, read_only, is_null, pin,
tile_mask);
if (bo)
xe_bo_unlock(bo, &ww);
@@ -2562,7 +2567,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, 
struct xe_exec_queue *q,
  
  vma = new_vma(vm, &op->base.map,

  op->tile_mask, op->map.read_only,
- op->map.is_null);
+ op->map.is_null, op->map.pin);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto free_fence;
@@ -2587,10 +2592,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, 
struct xe_exec_queue *q,
bool is_null =
op->base.remap.unmap->va->flags 
&
DRM_GPUVA_SPARSE;
+   bool pin =
+   op->base.remap.unmap->va->flags 
&
+   XE_VMA_PINNED;

We probably should move the read_only, is_null, 

[PATCH v14 RESEND 0/6] drm/imx: Introduce i.MX8qm/qxp DPU DRM

2023-08-22 Thread Liu Ying
Hi,


This is the v14 series to introduce i.MX8qm/qxp Display Processing Unit(DPU)
DRM support.

DPU is comprised of a blit engine for 2D graphics, a display controller
and a command sequencer.  Outside of DPU, optional prefetch engines can
fetch data from memory prior to some DPU fetchunits of blit engine and
display controller.  The pre-fetchers support linear formats and Vivante
GPU tile formats.

Reference manual can be found at:
https://www.nxp.com/webapp/Download?colCode=IMX8DQXPRM


This patch set adds kernel modesetting support for the display controller part.
It supports two CRTCs per display controller, several planes, prefetch
engines and some properties of CRTC and plane.  Currently, the registers of
the controller is accessed without command sequencer involved, instead just by
using CPU.  DRM connectors would be created from the DPU KMS driver.


Patch 1 ~ 3 add dt-bindings for DPU and prefetch engines.
Patch 4 is a minor improvement of a macro to suppress warning as the KMS driver
uses it.
Patch 5 introduces the DPU DRM support.
Patch 6 updates MAINTAINERS.

Welcome comments, thanks.

v13->v14:
* Rebase the patch series to the latest drm-misc-next branch(v6.1-rc6 based).
* Include drm_fbdev_generic.h in dpu_drv.c due to the rebase.
* Fix dpu drm driver suspend/resume by properly get drm device through
  dev_get_drvdata().
* Use pm_ptr() macro for dpu core driver PM operations.
* Use pm_sleep_ptr() macro for dpu drm driver PM operations.
* Use DEFINE_SIMPLE_DEV_PM_OPS() macro to define dpu drm driver PM operations,
  instead of SIMPLE_DEV_PM_OPS().
* Update year of Copyright.
* Add SoC series name 'i.MX8'/'IMX8'/'imx8' to dpu driver module decription,
  Kconfig name, dpu driver names and dpu driver object name.
* Resend based on the latest drm-misc-next branch.

v12->v13:
* Drop 'drm->irq_enabled = true;' from patch 5/6 to fix a potential build
  break reported by 'kernel test robot '.  drm->irq_enabled
  should not be used by imx-dpu drm as it is only used by legacy drivers
  with userspace modesetting.

v11->v12:
* Rebase the series upon v6.1-rc1.
* Minor update on Kconfigs, struct names and macro names for patch 5/6
  due to the rebase.

v10->v11:
* Rebase the series upon v6.0-rc1.
* Include drm_blend.h and drm_framebuffer.h in dpu-kms.c and dpu-plane.c
  to fix build errors due to the rebase.
* Fix a checkpatch warning for dpu-crtc.c.
* Properly use dev_err_probe() to return it's return value directly where
  possible.

v9->v10:
* Rebase the series upon v5.18-rc1.
* Make 'checkpatch.pl --strict' happier for patch 5/6.
* Add Rob's R-b tag on patch 3/6.
* Add Laurentiu's R-b tag on patch 5/6.
* Add Laurentiu's A-b tag on patch 6/6.

v8->v9:
* Use drm_atomic_get_new_plane_state() in dpu_plane_atomic_update() for
  patch 5/6. (Laurentiu)
* Drop getting DPU DT alias ID for patch 5/6, as it is unused.
* Reference 'interrupts-extended' schema instead of 'interrupts' for patch 3/6
  to require an additional DPR interrupt(r_rtram_stall) because the reference
  manual does mention it, though the driver doesn't get/use it for now.
  Reference 'interrupt-names' schema to define the two DPR interrupt names -
  'dpr_wrap' and 'r_rtram_stall'.  Accordingly, patch 5/6 gets the 'dpr_wrap'
  interrupt by name.
* Drop Rob's R-b tag on patch 3/6, as review is needed.

v7->v8:
* Rebase this series up onto the latest drm-misc-next branch, due to DRM plane
  helper functions API change(atomic_check and atomic_update) from DRM atomic
  core.  So, dpu_plane_atomic_check() and dpu_plane_atomic_update() are updated
  accordingly in patch 5/6.  Also, rename plane->state variables and relevant
  DPU plane state variables in those two functions to reflect they are new
  states, like the patch 'drm: Rename plane->state variables in atomic update
  and disable' recently landed in drm-misc-next.
* Replace drm_gem_fb_prepare_fb() with drm_gem_plane_helper_prepare_fb() in
  patch 5/6, due to DRM core API change.
* Improve DPR burst length for GPU standard tile and 32bpp GPU super tile in
  patch 5/6 to align with the latest version of internal HW documention.

v6->v7:
* Fix return value of dpu_get_irqs() if platform_get_irq() fails. (Laurentiu)
* Use the function array dpu_irq_handler[] to store individual DPU irq handlers.
  (Laurentiu)
* Call get/put() hooks directly to get/put DPU fetchunits for DPU plane groups.
  (Laurentiu)
* Shorten the names of individual DPU irq handlers by using DPU unit abbrev
  names to make writing dpu_irq_handler[] easier.
* Add Rob's R-b tag back on DPU dt-binding patch as change in v6 was reviewed.

v5->v6:
* Use graph schema in the DPU dt-binding.
* Do not use macros where possible in the DPU DRM driver. (Laurentiu)
* Break dpu_plane_atomic_check() into some smaller functions. (Laurentiu)
* Address some minor comments from Laurentiu on the DPU DRM driver.
* Add dpu_crtc_err() helper marco in the DPU DRM driver to tell dmesg
  which CRTC generates error.
* Drop calling dev_set_drvdata() from

[PATCH v14 RESEND 1/6] dt-bindings: display: imx: Add i.MX8qxp/qm DPU binding

2023-08-22 Thread Liu Ying
This patch adds bindings for i.MX8qxp/qm Display Processing Unit.

Reviewed-by: Rob Herring 
Signed-off-by: Liu Ying 
---
v7->v14:
* No change.

v6->v7:
* Add Rob's R-b tag back.

v5->v6:
* Use graph schema. So, drop Rob's R-b tag as review is needed.

v4->v5:
* No change.

v3->v4:
* Improve compatible property by using enum instead of oneOf+const. (Rob)
* Add Rob's R-b tag.

v2->v3:
* No change.

v1->v2:
* Fix yamllint warnings.
* Require bypass0 and bypass1 clocks for both i.MX8qxp and i.MX8qm, as the
  display controller subsystem spec does say that they exist.
* Use new dt binding way to add clocks in the example.
* Trivial tweaks for the example.

 .../bindings/display/imx/fsl,imx8qxp-dpu.yaml | 387 ++
 1 file changed, 387 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dpu.yaml

diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dpu.yaml 
b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dpu.yaml
new file mode 100644
index ..6b05c586cd9d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dpu.yaml
@@ -0,0 +1,387 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dpu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qm/qxp Display Processing Unit
+
+maintainers:
+  - Liu Ying 
+
+description: |
+  The Freescale i.MX8qm/qxp Display Processing Unit(DPU) is comprised of two
+  main components that include a blit engine for 2D graphics accelerations
+  and a display controller for display output processing, as well as a command
+  sequencer.
+
+properties:
+  compatible:
+enum:
+  - fsl,imx8qxp-dpu
+  - fsl,imx8qm-dpu
+
+  reg:
+maxItems: 1
+
+  interrupts:
+items:
+  - description: |
+  store9 shadow load interrupt(blit engine)
+  - description: |
+  store9 frame complete interrupt(blit engine)
+  - description: |
+  store9 sequence complete interrupt(blit engine)
+  - description: |
+  extdst0 shadow load interrupt
+  (display controller, content stream 0)
+  - description: |
+  extdst0 frame complete interrupt
+  (display controller, content stream 0)
+  - description: |
+  extdst0 sequence complete interrupt
+  (display controller, content stream 0)
+  - description: |
+  extdst4 shadow load interrupt
+  (display controller, safety stream 0)
+  - description: |
+  extdst4 frame complete interrupt
+  (display controller, safety stream 0)
+  - description: |
+  extdst4 sequence complete interrupt
+  (display controller, safety stream 0)
+  - description: |
+  extdst1 shadow load interrupt
+  (display controller, content stream 1)
+  - description: |
+  extdst1 frame complete interrupt
+  (display controller, content stream 1)
+  - description: |
+  extdst1 sequence complete interrupt
+  (display controller, content stream 1)
+  - description: |
+  extdst5 shadow load interrupt
+  (display controller, safety stream 1)
+  - description: |
+  extdst5 frame complete interrupt
+  (display controller, safety stream 1)
+  - description: |
+  extdst5 sequence complete interrupt
+  (display controller, safety stream 1)
+  - description: |
+  disengcfg0 shadow load interrupt
+  (display controller, display stream 0)
+  - description: |
+  disengcfg0 frame complete interrupt
+  (display controller, display stream 0)
+  - description: |
+  disengcfg0 sequence complete interrupt
+  (display controller, display stream 0)
+  - description: |
+  framegen0 programmable interrupt0
+  (display controller, display stream 0)
+  - description: |
+  framegen0 programmable interrupt1
+  (display controller, display stream 0)
+  - description: |
+  framegen0 programmable interrupt2
+  (display controller, display stream 0)
+  - description: |
+  framegen0 programmable interrupt3
+  (display controller, display stream 0)
+  - description: |
+  signature0 shadow load interrupt
+  (display controller, display stream 0)
+  - description: |
+  signature0 measurement valid interrupt
+  (display controller, display stream 0)
+  - description: |
+  signature0 error condition interrupt
+  (display controller, display stream 0)
+  - description: |
+  disengcfg1 shadow load interrupt
+  (display controller, display stream 1)
+  - description: |
+  disengcfg1 frame complete interrupt
+  (display controller, display stream 1)
+  - description: |
+  diseng

[PATCH v14 RESEND 2/6] dt-bindings: display: imx: Add i.MX8qxp/qm PRG binding

2023-08-22 Thread Liu Ying
This patch adds bindings for i.MX8qxp/qm Display Prefetch Resolve Gasket.

Reviewed-by: Rob Herring 
Signed-off-by: Liu Ying 
---
v4->v14:
* No change.

v3->v4:
* Improve compatible property by using enum instead of oneOf+const. (Rob)
* Add Rob's R-b tag.

v2->v3:
* No change.

v1->v2:
* Use new dt binding way to add clocks in the example.

 .../bindings/display/imx/fsl,imx8qxp-prg.yaml | 60 +++
 1 file changed, 60 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-prg.yaml

diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-prg.yaml 
b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-prg.yaml
new file mode 100644
index ..3ff46e0d4e73
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-prg.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-prg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qm/qxp Display Prefetch Resolve Gasket
+
+maintainers:
+  - Liu Ying 
+
+description: |
+  The i.MX8qm/qxp Prefetch Resolve Gasket (PRG) is a gasket interface between
+  RTRAM controller and Display Controller.  The main function is to convert
+  the AXI interface to the RTRAM interface, which includes re-mapping the
+  ARADDR to a RTRAM address.
+
+properties:
+  compatible:
+enum:
+  - fsl,imx8qxp-prg
+  - fsl,imx8qm-prg
+
+  reg:
+maxItems: 1
+
+  clocks:
+items:
+  - description: rtram clock
+  - description: apb clock
+
+  clock-names:
+items:
+  - const: rtram
+  - const: apb
+
+  power-domains:
+maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - power-domains
+
+additionalProperties: false
+
+examples:
+  - |
+#include 
+#include 
+prg@5604 {
+compatible = "fsl,imx8qxp-prg";
+reg = <0x5604 0x1>;
+clocks = <&dc0_prg0_lpcg IMX_LPCG_CLK_0>,
+ <&dc0_prg0_lpcg IMX_LPCG_CLK_4>;
+clock-names = "rtram", "apb";
+power-domains = <&pd IMX_SC_R_DC_0>;
+};
-- 
2.37.1



[PATCH v14 RESEND 4/6] drm/atomic: Avoid unused-but-set-variable warning on for_each_old_plane_in_state

2023-08-22 Thread Liu Ying
Artificially use 'plane' and 'old_plane_state' to avoid 'not used' warning.
The precedent has already been set by other macros in the same file.

Acked-by: Daniel Vetter 
Signed-off-by: Liu Ying 
---
v6->v14:
* No change.

v5->v6:
* Fix commit message typo - s/Artifically/Artificially/

v4->v5:
* No change.

v3->v4:
* Add Daniel's A-b tag.

v2->v3:
* Add a missing blank line.

v1->v2:
* No change.

 include/drm/drm_atomic.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index cf8e1220a4ac..33fcc38a1dc9 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -954,7 +954,10 @@ void drm_state_dump(struct drm_device *dev, struct 
drm_printer *p);
 (__i)++)   \
for_each_if ((__state)->planes[__i].ptr &&  \
 ((plane) = (__state)->planes[__i].ptr, \
- (old_plane_state) = 
(__state)->planes[__i].old_state, 1))
+ (void)(plane) /* Only to avoid 
unused-but-set-variable warning */, \
+ (old_plane_state) = 
(__state)->planes[__i].old_state, \
+ (void)(old_plane_state) /* Only to avoid 
unused-but-set-variable warning */, 1))
+
 /**
  * for_each_new_plane_in_state - iterate over all planes in an atomic update
  * @__state: &struct drm_atomic_state pointer
-- 
2.37.1



[PATCH v14 RESEND 3/6] dt-bindings: display: imx: Add i.MX8qxp/qm DPR channel binding

2023-08-22 Thread Liu Ying
This patch adds bindings for i.MX8qxp/qm Display Prefetch Resolve Channel.

Reviewed-by: Rob Herring 
Signed-off-by: Liu Ying 
---
v10->v14:
* No change.

v9->v10:
* Add Rob's R-b tag.

v8->v9:
* Reference 'interrupts-extended' schema instead of 'interrupts' to require
  an additional interrupt(r_rtram_stall) because the reference manual does
  mention it, though the driver doesn't get/use it for now.
  Reference 'interrupt-names' schema to define the two interrupt names -
  'dpr_wrap' and 'r_rtram_stall'.
* Drop Rob's R-b tag, as review is needed.

v4->v8:
* No change.

v3->v4:
* Improve compatible property by using enum instead of oneOf+const. (Rob)
* Add Rob's R-b tag.

v2->v3:
* No change.

v1->v2:
* Use new dt binding way to add clocks in the example.

 .../display/imx/fsl,imx8qxp-dprc.yaml | 100 ++
 1 file changed, 100 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dprc.yaml

diff --git 
a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dprc.yaml 
b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dprc.yaml
new file mode 100644
index ..bd94254c1288
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dprc.yaml
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dprc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qm/qxp Display Prefetch Resolve Channel
+
+maintainers:
+  - Liu Ying 
+
+description: |
+  The i.MX8qm/qxp Display Prefetch Resolve Channel(DPRC) is an engine which
+  fetches display data before the display pipeline needs the data to drive
+  pixels in the active display region.  This data is transformed, or resolved,
+  from a variety of tiled buffer formats into linear format, if needed.
+  The DPR works with a double bank memory structure.  This memory structure is
+  implemented in the Resolve Tile Memory(RTRAM) and the banks are referred to
+  as A and B.  Each bank is either 4 or 8 lines high depending on the source
+  frame buffer format.
+
+properties:
+  compatible:
+enum:
+  - fsl,imx8qxp-dpr-channel
+  - fsl,imx8qm-dpr-channel
+
+  reg:
+maxItems: 1
+
+  interrupts-extended:
+items:
+  - description: DPR wrap interrupt
+  - description: |
+  'r_rtram_stall' interrupt which indicates relevant i.MX8qm/qxp
+  Prefetch Resolve Gasket(PRG) or PRGs are forcing an underflow
+  condition in the RTRAM.
+
+  interrupt-names:
+items:
+  - const: dpr_wrap
+  - const: r_rtram_stall
+
+  clocks:
+items:
+  - description: apb clock
+  - description: b clock
+  - description: rtram clock
+
+  clock-names:
+items:
+  - const: apb
+  - const: b
+  - const: rtram
+
+  fsl,sc-resource:
+$ref: /schemas/types.yaml#/definitions/uint32
+description: The SCU resource ID associated with this DPRC instance.
+
+  fsl,prgs:
+$ref: /schemas/types.yaml#/definitions/phandle-array
+description: |
+  List of phandle which points to PRG or PRGs associated with
+  this DPRC instance.
+
+  power-domains:
+maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts-extended
+  - interrupt-names
+  - clocks
+  - clock-names
+  - fsl,sc-resource
+  - fsl,prgs
+  - power-domains
+
+additionalProperties: false
+
+examples:
+  - |
+#include 
+#include 
+#include 
+dpr-channel@5610 {
+compatible = "fsl,imx8qxp-dpr-channel";
+reg = <0x5610 0x1>;
+interrupts-extended = <&gic GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>,
+  <&dc0_irqsteer 324>;
+interrupt-names = "dpr_wrap", "r_rtram_stall";
+clocks = <&dc0_dpr1_lpcg IMX_LPCG_CLK_4>,
+ <&dc0_dpr1_lpcg IMX_LPCG_CLK_5>,
+ <&dc0_rtram1_lpcg IMX_LPCG_CLK_0>;
+clock-names = "apb", "b", "rtram";
+fsl,sc-resource = ;
+fsl,prgs = <&dc0_prg4>, <&dc0_prg5>;
+power-domains = <&pd IMX_SC_R_DC_0>;
+};
-- 
2.37.1



[PATCH v14 RESEND 6/6] MAINTAINERS: add maintainer for i.MX8qxp DPU DRM driver

2023-08-22 Thread Liu Ying
Add myself as the maintainer of the i.MX8qxp DPU DRM driver.

Acked-by: Laurentiu Palcu 
Signed-off-by: Liu Ying 
---
v11->v14:
* No change.

v10->v11:
* Rebase upon v6.0-rc1.

v9->v10:
* Add Laurentiu's A-b tag.

v1->v9:
* No change.

 MAINTAINERS | 9 +
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6308efa121e1..dc0a0d15edad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6918,6 +6918,15 @@ F:   
Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pixel-link.yaml
 F: 
Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pxl2dpi.yaml
 F: drivers/gpu/drm/bridge/imx/
 
+DRM DRIVERS FOR FREESCALE i.MX8QXP
+M: Liu Ying 
+L: dri-devel@lists.freedesktop.org
+S: Maintained
+F: Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dprc.yaml
+F: Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dpu.yaml
+F: Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-prg.yaml
+F: drivers/gpu/drm/imx/dpu/
+
 DRM DRIVERS FOR GMA500 (Poulsbo, Moorestown and derivative chipsets)
 M: Patrik Jakobsson 
 L: dri-devel@lists.freedesktop.org
-- 
2.37.1



Re: [PATCH v2 1/9] drm/sched: Convert drm scheduler to use a work queue rather than kthread

2023-08-22 Thread Christian König

Am 21.08.23 um 21:07 schrieb Danilo Krummrich:

On 8/21/23 20:12, Christian König wrote:

Am 21.08.23 um 20:01 schrieb Danilo Krummrich:

On 8/21/23 16:07, Christian König wrote:

Am 18.08.23 um 13:58 schrieb Danilo Krummrich:

[SNIP]

I only see two possible outcomes:
1. You return -EBUSY (or similar) error code indicating the the 
hw can't receive more commands.

2. Wait on previously pushed commands to be executed.
(3. Your driver crash because you accidentally overwrite stuff in 
the ring buffer which is still executed. I just assume that's 
prevented).


Resolution #1 with -EBUSY is actually something the UAPI should 
not do, because your UAPI then depends on the specific timing of 
submissions which is a really bad idea.


Resolution #2 is usually bad because it forces the hw to run dry 
between submission and so degrade performance.


I agree, that is a good reason for at least limiting the maximum 
job size to half of the ring size.


However, there could still be cases where two subsequent jobs are 
submitted with just a single IB, which as is would still block 
subsequent jobs to be pushed to the ring although there is still 
plenty of space. Depending on the (CPU) scheduler latency, such a 
case can let the HW run dry as well.


Yeah, that was intentionally not done as well. The crux here is 
that the more you push to the hw the worse the scheduling 
granularity becomes. It's just that neither Xe nor Nouveau relies 
that much on the scheduling granularity at all (because of hw queues).


But Xe doesn't seem to need that feature and I would still try to 
avoid it because the more you have pushed to the hw the harder it 
is to get going again after a reset.




Surely, we could just continue decrease the maximum job size even 
further, but this would result in further overhead on user and 
kernel for larger IB counts. Tracking the actual job size seems to 
be the better solution for drivers where the job size can vary 
over a rather huge range.


I strongly disagree on that. A larger ring buffer is trivial to 
allocate 


That sounds like a workaround to me. The problem, in the case above, 
isn't that the ring buffer does not have enough space, the problem 
is that we account for the maximum job size although the actual job 
size is much smaller. And enabling the scheduler to track the actual 
job size is trivial as well.


That's what I agree on, so far I just didn't see the reason for doing 
it but at least a few reason for not doing it.




and if userspace submissions are so small that the scheduler can't 
keep up submitting them then your ring buffer size is your smallest 
problem.


In other words the submission overhead will completely kill your 
performance and you should probably consider stuffing more into a 
single submission.


I fully agree and that is also the reason why I want to keep the 
maximum job size as large as possible.


However, afaik with Vulkan it's the applications themselves deciding 
when and with how many command buffers a queue is submitted (@Faith: 
please correct me if I'm wrong). Hence, why not optimize for this 
case as well? It's not that it would make another case worse, right?


As I said it does make both the scheduling granularity as well as the 
reset behavior worse.


As you already mentioned Nouveau (and XE) don't really rely much on 
scheduling granularity. For Nouveau, the same is true for the reset 
behavior; if things go south the channel is killed anyway. Userspace 
would just request a new ring in this case.


Hence, I think Nouveau would profit from accounting the actual job 
size. And at the same time, other drivers having a benefit of always 
accounting for the maximum job size would still do so, by default.


Arbitrary ratios of how much the job size contributes to the ring 
being considered as full would also be possible.


That would indeed be rather interesting since for a bunch of drivers the 
limiting part is not the ring buffer size, but rather the utilization of 
engines.


But no idea how to properly design that. You would have multiple values 
to check instead of just one.


Christian.



- Danilo



In general I think we should try to push just enough work to the 
hardware to keep it busy and not as much as possible.


So as long as nobody from userspace comes and says we absolutely need 
to optimize this use case I would rather not do it.


Regards,
Christian.



- Danilo



Regards,
Christian.



- Danilo












Re: using gpu's to accelerate the linux kernel

2023-08-22 Thread Raj J Putari
nice read!

i was thinking of a kernel module that does stuff like offload some
work to the gpu.. like we can have like gpuaccel.ko that does stuff
like wrap gpu calls to stuff like compiles or low level stuff like
heavy computes, just looked up a few apis and it looks like opencl and
cuda are meant for 3d computation, so some way to access the gpus
compute internals would take some hacking, not sure if its possible

it would be awesome if we can offload some compilation from stuff like
cc and c++ to the gpu, if the technology is available (maybe with
amd?)


On Mon, Aug 21, 2023 at 7:21 AM Enrico Weigelt, metux IT consult
 wrote:
>
> On 27.04.23 12:51, Raj J Putari wrote:
>
> > id write it but im an amatuer and i dont have time to read the kernel
> > source and experiment, we're talking about nvidia and amd video cards
> > assisting in processing heavy data.
>
> obviously not w/ NVidia (except for some old, already reverse-engineered
> gpus), since Nvidia is doing all they can hiding the necessary specs
> to write drivers from us.
>
> Forget about Nvidia. Never ever waste a single penny on that.
>
> > lets say youre compiling a kernel, you can write optimizations into
> > the kernel through a cuda module and offload cpu data directly to the
> > gpu using opencl or cuda or what amd supplies,
>
> cuda, opencl, etc, are *userland* *library* APIs. They don't work inside
> the kernel. One had to write something similar *inside* the kernel
> (which is working very differently from userland). Also consider that
> the most complex stuff (eg. creating command streams) is done in
> userland (eg. mesa's pipe drivers, ...), the kernel is just responsible
> for some more lowlevel things like buffer management, modesetting, etc.
>
>
> If you wanna go that route, you'd have to create something like Mesa's
> Gallium inside the kernel. Besides that this is a pretty huge task
> (and you'd have to reimplement lots of drivers), you'd also have to
> find a way to get a good performance when calling from userland (note
> that syscalls, even ioctls, etc, are much more expensive than just
> plain library function calls inside the same process). Probably comes
> down to using some bytecode (tgsi ?) and loading it somewhat similar
> to bpf.
>
>
> Assuming that's really up and running one day, it indeed could solve
> other problems, eg. clear separation between containers and hosts
> (for now, containers still needs the userland parts of gpu drivers
> for the corresponding host hardware).
>
> But be warned: this is a huge endavour, *a lot* work to do and hard
> to get it right.
>
>
> OTOH, I'm yet sceptical whether there's much practical use cases for
> using GPUs by the kernel *itself*. What exactly do you have in mind
> here ?
>
>
> --mtx
>
> --
> ---
> Hinweis: unverschlüsselte E-Mails können leicht abgehört und manipuliert
> werden ! Für eine vertrauliche Kommunikation senden Sie bitte ihren
> GPG/PGP-Schlüssel zu.
> ---
> Enrico Weigelt, metux IT consult
> Free software and Linux embedded engineering
> i...@metux.net -- +49-151-27565287


[PATCH] gpu: drm: i915: fix documentation style

2023-08-22 Thread Ricardo B. Marliere
This patch fixes the following sphinx warnings in the htmldocs make target:

Documentation/gpu/i915:546: ./drivers/gpu/drm/i915/gt/uc/intel_huc.c:29: ERROR: 
Unexpected indentation.
Documentation/gpu/i915:546: ./drivers/gpu/drm/i915/gt/uc/intel_huc.c:30: 
WARNING: Block quote ends without a blank line; unexpected unindent.
Documentation/gpu/i915:546: ./drivers/gpu/drm/i915/gt/uc/intel_huc.c:35: 
WARNING: Bullet list ends without a blank line; unexpected unindent.

Signed-off-by: Ricardo B. Marliere 
---
 drivers/gpu/drm/i915/gt/uc/intel_huc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index ddd146265beb..fa70defcb5b2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -26,6 +26,7 @@
  * The kernel driver is only responsible for loading the HuC firmware and
  * triggering its security authentication. This is done differently depending
  * on the platform:
+ *
  * - older platforms (from Gen9 to most Gen12s): the load is performed via DMA
  *   and the authentication via GuC
  * - DG2: load and authentication are both performed via GSC.
@@ -33,6 +34,7 @@
  *   not-DG2 older platforms), while the authentication is done in 2-steps,
  *   a first auth for clear-media workloads via GuC and a second one for all
  *   workloads via GSC.
+ *
  * On platforms where the GuC does the authentication, to correctly do so the
  * HuC binary must be loaded before the GuC one.
  * Loading the HuC is optional; however, not using the HuC might negatively
-- 
2.40.1



Re: [PATCH v2 03/12] drm/bridge: tc358768: Fix bit updates

2023-08-22 Thread Maxim Schwalm
Hi Tomi,

On 16.08.23 13:25, Tomi Valkeinen wrote:
> The driver has a few places where it does:
> 
> if (thing_is_enabled_in_config)
>   update_thing_bit_in_hw()
> 
> This means that if the thing is _not_ enabled, the bit never gets
> cleared. This affects the h/vsyncs and continuous DSI clock bits.
> 
> Fix the driver to always update the bit.
> 
> Fixes: ff1ca6397b1d ("drm/bridge: Add tc358768 driver")
> Signed-off-by: Tomi Valkeinen 
> ---
>  drivers/gpu/drm/bridge/tc358768.c | 13 +++--
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/bridge/tc358768.c 
> b/drivers/gpu/drm/bridge/tc358768.c
> index bc97a837955b..b668f77673c3 100644
> --- a/drivers/gpu/drm/bridge/tc358768.c
> +++ b/drivers/gpu/drm/bridge/tc358768.c
> @@ -794,8 +794,8 @@ static void tc358768_bridge_pre_enable(struct drm_bridge 
> *bridge)
>   val |= BIT(i + 1);
>   tc358768_write(priv, TC358768_HSTXVREGEN, val);
>  
> - if (!(mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS))
> - tc358768_write(priv, TC358768_TXOPTIONCNTRL, 0x1);
> + tc358768_write(priv, TC358768_TXOPTIONCNTRL,
> +(mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) ? 0 : 
> BIT(0));
>  
>   /* TXTAGOCNT[26:16] RXTASURECNT[10:0] */
>   val = tc358768_to_ns((lptxcnt + 1) * dsibclk_nsk * 4);
> @@ -861,11 +861,12 @@ static void tc358768_bridge_pre_enable(struct 
> drm_bridge *bridge)
>   tc358768_write(priv, TC358768_DSI_HACT, hact);
>  
>   /* VSYNC polarity */
> - if (!(mode->flags & DRM_MODE_FLAG_NVSYNC))
> - tc358768_update_bits(priv, TC358768_CONFCTL, BIT(5), BIT(5));
> + tc358768_update_bits(priv, TC358768_CONFCTL, BIT(5),
> +  (mode->flags & DRM_MODE_FLAG_PVSYNC) ? BIT(5) : 0);
> +
>   /* HSYNC polarity */
> - if (mode->flags & DRM_MODE_FLAG_PHSYNC)
> - tc358768_update_bits(priv, TC358768_PP_MISC, BIT(0), BIT(0));
> + tc358768_update_bits(priv, TC358768_PP_MISC, BIT(0),
> +  (mode->flags & DRM_MODE_FLAG_PHSYNC) ? BIT(0) : 0);
>  
>   /* Start DSI Tx */
>   tc358768_write(priv, TC358768_DSI_START, 0x1);
> 

shouldn't the last patch of this series be moved before this one?
Currently, this patch will still lead to a temporary regression until
patch #12 is applied.

Best regards,
Maxim




[PATCH v1] drivers: gpu: drm: i915: intel_huc: fix formatting warnings

2023-08-22 Thread Nikita B
Fix formatting warnings when run "make htmldocs":
./drivers/gpu/drm/i915/gt/uc/intel_huc.c:29: WARNING: Unexpected indentation.
./drivers/gpu/drm/i915/gt/uc/intel_huc.c:30: WARNING: Block quote ends without 
a blank line; unexpected unindent.
./drivers/gpu/drm/i915/gt/uc/intel_huc.c:35: WARNING: Bullet list ends without 
a blank line; unexpected unindent.

Signed-off-by: Nikita B 
---
 drivers/gpu/drm/i915/gt/uc/intel_huc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index ddd146265beb..fa70defcb5b2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -26,6 +26,7 @@
  * The kernel driver is only responsible for loading the HuC firmware and
  * triggering its security authentication. This is done differently depending
  * on the platform:
+ *
  * - older platforms (from Gen9 to most Gen12s): the load is performed via DMA
  *   and the authentication via GuC
  * - DG2: load and authentication are both performed via GSC.
@@ -33,6 +34,7 @@
  *   not-DG2 older platforms), while the authentication is done in 2-steps,
  *   a first auth for clear-media workloads via GuC and a second one for all
  *   workloads via GSC.
+ *
  * On platforms where the GuC does the authentication, to correctly do so the
  * HuC binary must be loaded before the GuC one.
  * Loading the HuC is optional; however, not using the HuC might negatively
-- 
2.34.1



[PATCH] Remove the parameter not described warning

2023-08-22 Thread Vinayak Hegde
Signed-off-by: Vinayak Hegde 
---
 include/uapi/linux/sync_file.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/sync_file.h b/include/uapi/linux/sync_file.h
index 7e42a5b7558b..ff0a931833e2 100644
--- a/include/uapi/linux/sync_file.h
+++ b/include/uapi/linux/sync_file.h
@@ -56,7 +56,7 @@ struct sync_fence_info {
  * @name:  name of fence
  * @status:status of fence. 1: signaled 0:active <0:error
  * @flags: sync_file_info flags
- * @num_fences number of fences in the sync_file
+ * @num_fences:number of fences in the sync_file
  * @pad:   padding for 64-bit alignment, should always be zero
  * @sync_fence_info: pointer to array of struct &sync_fence_info with all
  *  fences in the sync_file
-- 
2.34.1



Re: using gpu's to accelerate the linux kernel

2023-08-22 Thread Enrico Weigelt, metux IT consult

On 27.04.23 12:51, Raj J Putari wrote:


id write it but im an amatuer and i dont have time to read the kernel
source and experiment, we're talking about nvidia and amd video cards
assisting in processing heavy data.


obviously not w/ NVidia (except for some old, already reverse-engineered
gpus), since Nvidia is doing all they can hiding the necessary specs
to write drivers from us.

Forget about Nvidia. Never ever waste a single penny on that.


lets say youre compiling a kernel, you can write optimizations into
the kernel through a cuda module and offload cpu data directly to the
gpu using opencl or cuda or what amd supplies,


cuda, opencl, etc, are *userland* *library* APIs. They don't work inside
the kernel. One had to write something similar *inside* the kernel
(which is working very differently from userland). Also consider that
the most complex stuff (eg. creating command streams) is done in
userland (eg. mesa's pipe drivers, ...), the kernel is just responsible
for some more lowlevel things like buffer management, modesetting, etc.


If you wanna go that route, you'd have to create something like Mesa's
Gallium inside the kernel. Besides that this is a pretty huge task
(and you'd have to reimplement lots of drivers), you'd also have to
find a way to get a good performance when calling from userland (note
that syscalls, even ioctls, etc, are much more expensive than just
plain library function calls inside the same process). Probably comes
down to using some bytecode (tgsi ?) and loading it somewhat similar
to bpf.


Assuming that's really up and running one day, it indeed could solve
other problems, eg. clear separation between containers and hosts
(for now, containers still needs the userland parts of gpu drivers
for the corresponding host hardware).

But be warned: this is a huge endavour, *a lot* work to do and hard
to get it right.


OTOH, I'm yet sceptical whether there's much practical use cases for
using GPUs by the kernel *itself*. What exactly do you have in mind
here ?


--mtx

--
---
Hinweis: unverschlüsselte E-Mails können leicht abgehört und manipuliert
werden ! Für eine vertrauliche Kommunikation senden Sie bitte ihren
GPG/PGP-Schlüssel zu.
---
Enrico Weigelt, metux IT consult
Free software and Linux embedded engineering
i...@metux.net -- +49-151-27565287


Re: [PATCH] drm/prime: Support page array >= 4GB

2023-08-22 Thread Christian König




Am 21.08.23 um 22:02 schrieb Philip Yang:

Without unsigned long typecast, the size is passed in as zero if page
array size >= 4GB, nr_pages >= 0x10, then sg list converted will
have the first and the last chunk lost.


Good catch, but I'm not sure if this is enough to make it work.

Additional to that I don't think we have an use case for BOs > 4GiB.

Christian.



Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/drm_prime.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index f924b8b4ab6b..2630ad2e504d 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -830,7 +830,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device 
*dev,
if (max_segment == 0)
max_segment = UINT_MAX;
err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0,
-   nr_pages << PAGE_SHIFT,
+   (unsigned long)nr_pages << 
PAGE_SHIFT,
max_segment, GFP_KERNEL);
if (err) {
kfree(sg);




Re: [PATCH v2 1/9] drm/sched: Convert drm scheduler to use a work queue rather than kthread

2023-08-22 Thread Christian König

Am 21.08.23 um 21:46 schrieb Faith Ekstrand:
On Mon, Aug 21, 2023 at 1:13 PM Christian König 
 wrote:


[SNIP]
So as long as nobody from userspace comes and says we absolutely
need to
optimize this use case I would rather not do it.


This is a place where nouveau's needs are legitimately different from 
AMD or Intel, I think.  NVIDIA's command streamer model is very 
different from AMD and Intel.  On AMD and Intel, each EXEC turns into 
a single small packet (on the order of 16B) which kicks off a command 
buffer.  There may be a bit of cache management or something around it 
but that's it.  From there, it's userspace's job to make one command 
buffer chain to another until it's finally done and then do a 
"return", whatever that looks like.


NVIDIA's model is much more static.  Each packet in the HW/FW ring is 
an address and a size and that much data is processed and then it 
grabs the next packet and processes. The result is that, if we use 
multiple buffers of commands, there's no way to chain them together.  
We just have to pass the whole list of buffers to the kernel.


So far that is actually completely identical to what AMD has.

A single EXEC ioctl / job may have 500 such addr+size packets 
depending on how big the command buffer is.


And that is what I don't understand. Why would you need 100dreds of such 
addr+size packets?


This is basically identical to what AMD has (well on newer hw there is 
an extension in the CP packets to JUMP/CALL subsequent IBs, but this 
isn't widely used as far as I know).


Previously the limit was something like 4 which we extended to because 
Bas came up with similar requirements for the AMD side from RADV.


But essentially those approaches with 100dreds of IBs doesn't sound like 
a good idea to me.


It gets worse on pre-Turing hardware where we have to split the batch 
for every single DrawIndirect or DispatchIndirect.


Lest you think NVIDIA is just crazy here, it's a perfectly reasonable 
model if you assume that userspace is feeding the firmware.  When 
that's happening, you just have a userspace thread that sits there and 
feeds the ringbuffer with whatever is next and you can marshal as much 
data through as you want. Sure, it'd be nice to have a 2nd level batch 
thing that gets launched from the FW ring and has all the individual 
launch commands but it's not at all necessary.


What does that mean from a gpu_scheduler PoV? Basically, it means a 
variable packet size.


What does this mean for implementation? IDK.  One option would be to 
teach the scheduler about actual job sizes. Another would be to 
virtualize it and have another layer underneath the scheduler that 
does the actual feeding of the ring. Another would be to decrease the 
job size somewhat and then have the front-end submit as many jobs as 
it needs to service userspace and only put the out-fences on the last 
job. All the options kinda suck.


Yeah, agree. The job size Danilo suggested is still the least painful.

Christian.



~Faith


[PATCH] accel/ivpu/40xx: Fix buttress interrupt handling

2023-08-22 Thread Stanislaw Gruszka
From: Karol Wachowski 

Buttress spec requires that the interrupt status is cleared at
the source first (before clearing MTL_BUTTRESS_INTERRUPT_STAT),
that implies that we have to mask out the global interrupt while
handling buttress interrupts.

Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4")
Signed-off-by: Karol Wachowski 
Signed-off-by: Stanislaw Gruszka 
---
 drivers/accel/ivpu/ivpu_hw_40xx.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c 
b/drivers/accel/ivpu/ivpu_hw_40xx.c
index 34626d66fa10..00c5dbbe6847 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -1046,7 +1046,8 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct 
ivpu_device *vdev, int irq)
if (status == 0)
return IRQ_NONE;
 
-   REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status);
+   /* Disable global interrupt before handling local buttress interrupts */
+   REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
 
if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE");
@@ -1092,6 +1093,12 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct 
ivpu_device *vdev, int irq)
schedule_recovery = true;
}
 
+   /* This must be done after interrupts are cleared at the source. */
+   REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status);
+
+   /* Re-enable global interrupt */
+   REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
+
if (schedule_recovery)
ivpu_pm_schedule_recovery(vdev);
 
-- 
2.25.1



Re: [PATCH v6 5/6] drm: Refuse to async flip with atomic prop changes

2023-08-22 Thread Sebastian Wick
On Tue, Aug 15, 2023 at 03:57:09PM -0300, André Almeida wrote:
> Given that prop changes may lead to modesetting, which would defeat the
> fast path of the async flip, refuse any atomic prop change for async
> flips in atomic API. The only exceptions are the framebuffer ID to flip
> to and the mode ID, that could be referring to an identical mode.

FYI, the solid fill series adds an enum drm_plane_pixel_source and and a
new solid fill pixel source. Changing the solid fill color would be
effectively the same as changing the FB_ID. On the other hand, changing
the FB_ID no longer necessarily results in an update when the pixel
source is set to solid fill.

> Signed-off-by: André Almeida 
> ---
> v5: no changes
> v4: new patch
> ---
>  drivers/gpu/drm/drm_atomic_helper.c |  5 +++
>  drivers/gpu/drm/drm_atomic_uapi.c   | 52 +++--
>  drivers/gpu/drm/drm_crtc_internal.h |  2 +-
>  drivers/gpu/drm/drm_mode_object.c   |  2 +-
>  4 files changed, 56 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
> b/drivers/gpu/drm/drm_atomic_helper.c
> index 292e38eb6218..b34e3104afd1 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -629,6 +629,11 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
>   WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
>  
>   if (!drm_mode_equal(&old_crtc_state->mode, 
> &new_crtc_state->mode)) {
> + if (new_crtc_state->async_flip) {
> + drm_dbg_atomic(dev, "[CRTC:%d:%s] no mode 
> changes allowed during async flip\n",
> +crtc->base.id, crtc->name);
> + return -EINVAL;
> + }
>   drm_dbg_atomic(dev, "[CRTC:%d:%s] mode changed\n",
>  crtc->base.id, crtc->name);
>   new_crtc_state->mode_changed = true;
> diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
> b/drivers/gpu/drm/drm_atomic_uapi.c
> index a15121e75a0a..6c423a7e8c7b 100644
> --- a/drivers/gpu/drm/drm_atomic_uapi.c
> +++ b/drivers/gpu/drm/drm_atomic_uapi.c
> @@ -1006,13 +1006,28 @@ int drm_atomic_connector_commit_dpms(struct 
> drm_atomic_state *state,
>   return ret;
>  }
>  
> +static int drm_atomic_check_prop_changes(int ret, uint64_t old_val, uint64_t 
> prop_value,
> +  struct drm_property *prop)
> +{
> + if (ret != 0 || old_val != prop_value) {
> + drm_dbg_atomic(prop->dev,
> +"[PROP:%d:%s] No prop can be changed during 
> async flip\n",
> +prop->base.id, prop->name);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
>  int drm_atomic_set_property(struct drm_atomic_state *state,
>   struct drm_file *file_priv,
>   struct drm_mode_object *obj,
>   struct drm_property *prop,
> - uint64_t prop_value)
> + uint64_t prop_value,
> + bool async_flip)
>  {
>   struct drm_mode_object *ref;
> + uint64_t old_val;
>   int ret;
>  
>   if (!drm_property_change_valid_get(prop, prop_value, &ref))
> @@ -1029,6 +1044,13 @@ int drm_atomic_set_property(struct drm_atomic_state 
> *state,
>   break;
>   }
>  
> + if (async_flip) {
> + ret = drm_atomic_connector_get_property(connector, 
> connector_state,
> + prop, &old_val);
> + ret = drm_atomic_check_prop_changes(ret, old_val, 
> prop_value, prop);
> + break;
> + }
> +
>   ret = drm_atomic_connector_set_property(connector,
>   connector_state, file_priv,
>   prop, prop_value);
> @@ -1037,6 +1059,7 @@ int drm_atomic_set_property(struct drm_atomic_state 
> *state,
>   case DRM_MODE_OBJECT_CRTC: {
>   struct drm_crtc *crtc = obj_to_crtc(obj);
>   struct drm_crtc_state *crtc_state;
> + struct drm_mode_config *config = &crtc->dev->mode_config;
>  
>   crtc_state = drm_atomic_get_crtc_state(state, crtc);
>   if (IS_ERR(crtc_state)) {
> @@ -1044,6 +1067,18 @@ int drm_atomic_set_property(struct drm_atomic_state 
> *state,
>   break;
>   }
>  
> + /*
> +  * We allow mode_id changes here for async flips, because we
> +  * check later on drm_atomic_helper_check_modeset() callers if
> +  * there are modeset changes or they are equal
> +  */
> + if (async_flip && prop != config->prop_mode_id) {
> + ret = drm_atomic_crtc_get_property(crtc, crtc_state,
> +  

TODO list task: Replace drm_detect_hdmi_monitor() with drm_display_info.is_hdmi

2023-08-22 Thread Sharq Mohammad
Hello All,

I am a usual kernel developer, and wanted to contribute to the open source.
I saw a small TODO list in the DRM graphics subsystem, with some tasks.
So, just wanted to ask, is anyone working on the task:
*Replace drm_detect_hdmi_monitor() with drm_display_info.is_hdmi*

Its on the TODO list.

Thanks and regards,
Sharique


Re: [PATCH v6 6/6] drm/doc: Define KMS atomic state set

2023-08-22 Thread Michel Dänzer
On 8/21/23 22:02, André Almeida wrote:
> Em 17/08/2023 07:37, Michel Dänzer escreveu:
>> On 8/15/23 20:57, André Almeida wrote:
>>> From: Pekka Paalanen 
>>>
>>> Specify how the atomic state is maintained between userspace and
>>> kernel, plus the special case for async flips.
>>>
>>> Signed-off-by: Pekka Paalanen 
>>> Signed-off-by: André Almeida 
>>
>> [...]
>>
>>> +An atomic commit with the flag DRM_MODE_PAGE_FLIP_ASYNC is allowed to
>>> +effectively change only the FB_ID property on any planes. No-operation 
>>> changes
>>> +are ignored as always. [...]
>>
>> During the hackfest in Brno, it was mentioned that a commit which re-sets 
>> the same FB_ID could actually have an effect with VRR: It could trigger 
>> scanout of the next frame before vertical blank has reached its maximum 
>> duration. Some kind of mechanism is required for this in order to allow user 
>> space to perform low frame rate compensation.
>>
> 
> I believe the documentation already addresses that sending redundant 
> information may not lead to the desired behavior during an async flip. Do you 
> think adding a note about using the same FB_ID would be helpful?

Maybe not.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



Re: [PATCH v7] drm/doc: Document DRM device reset expectations

2023-08-22 Thread Sebastian Wick
On Fri, Aug 18, 2023 at 05:06:42PM -0300, André Almeida wrote:
> Create a section that specifies how to deal with DRM device resets for
> kernel and userspace drivers.
> 
> Signed-off-by: André Almeida 
> 
> ---
> 
> v7 changes:
>  - s/application/graphical API contex/ in the robustness part (Michel)
>  - Grammar fixes (Randy)
> 
> v6: 
> https://lore.kernel.org/lkml/20230815185710.159779-1-andrealm...@igalia.com/
> 
> v6 changes:
>  - Due to substantial changes in the content, dropped Pekka's Acked-by
>  - Grammar fixes (Randy)
>  - Add paragraph about disabling device resets
>  - Add note about integrating reset tracking in drm/sched
>  - Add note that KMD should return failure for contexts affected by
>resets and UMD should check for this
>  - Add note about lack of consensus around what to do about non-robust
>apps
> 
> v5: 
> https://lore.kernel.org/dri-devel/20230627132323.115440-1-andrealm...@igalia.com/
> ---
>  Documentation/gpu/drm-uapi.rst | 77 ++
>  1 file changed, 77 insertions(+)
> 
> diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst
> index 65fb3036a580..3694bdb977f5 100644
> --- a/Documentation/gpu/drm-uapi.rst
> +++ b/Documentation/gpu/drm-uapi.rst
> @@ -285,6 +285,83 @@ for GPU1 and GPU2 from different vendors, and a third 
> handler for
>  mmapped regular files. Threads cause additional pain with signal
>  handling as well.
>  
> +Device reset
> +
> +
> +The GPU stack is really complex and is prone to errors, from hardware bugs,
> +faulty applications and everything in between the many layers. Some errors
> +require resetting the device in order to make the device usable again. This
> +section describes the expectations for DRM and usermode drivers when a
> +device resets and how to propagate the reset status.
> +
> +Device resets can not be disabled without tainting the kernel, which can 
> lead to
> +hanging the entire kernel through shrinkers/mmu_notifiers. Userspace role in
> +device resets is to propagate the message to the application and apply any
> +special policy for blocking guilty applications, if any. Corollary is that
> +debugging a hung GPU context require hardware support to be able to preempt 
> such
> +a GPU context while it's stopped.
> +
> +Kernel Mode Driver
> +--
> +
> +The KMD is responsible for checking if the device needs a reset, and to 
> perform
> +it as needed. Usually a hang is detected when a job gets stuck executing. KMD
> +should keep track of resets, because userspace can query any time about the
> +reset status for a specific context. This is needed to propagate to the rest 
> of
> +the stack that a reset has happened. Currently, this is implemented by each
> +driver separately, with no common DRM interface. Ideally this should be 
> properly
> +integrated at DRM scheduler to provide a common ground for all drivers. 
> After a
> +reset, KMD should reject new command submissions for affected contexts.
> +
> +User Mode Driver
> +
> +
> +After command submission, UMD should check if the submission was accepted or
> +rejected. After a reset, KMD should reject submissions, and UMD can issue an
> +ioctl to the KMD to check the reset status, and this can be checked more 
> often
> +if the UMD requires it. After detecting a reset, UMD will then proceed to 
> report
> +it to the application using the appropriate API error code, as explained in 
> the
> +section below about robustness.
> +
> +Robustness
> +--
> +
> +The only way to try to keep a graphical API context working after a reset is 
> if
> +it complies with the robustness aspects of the graphical API that it is 
> using.
> +
> +Graphical APIs provide ways to applications to deal with device resets. 
> However,
> +there is no guarantee that the app will use such features correctly, and a
> +userspace that doesn't support robust interfaces (like a non-robust
> +OpenGL context or API without any robustness support like libva) leave the
> +robustness handling entirely to the userspace driver. There is no strong
> +community consensus on what the userspace driver should do in that case,
> +since all reasonable approaches have some clear downsides.
> +
> +OpenGL
> +~~
> +
> +Apps using OpenGL should use the available robust interfaces, like the
> +extension ``GL_ARB_robustness`` (or ``GL_EXT_robustness`` for OpenGL ES). 
> This
> +interface tells if a reset has happened, and if so, all the context state is
> +considered lost and the app proceeds by creating new ones. There's no 
> consensus
> +on what to do to if robustness is not in use.
> +
> +Vulkan
> +~~
> +
> +Apps using Vulkan should check for ``VK_ERROR_DEVICE_LOST`` for submissions.
> +This error code means, among other things, that a device reset has happened 
> and
> +it needs to recreate the contexts to keep going.
> +
> +Reporting causes of resets
> +--
> +
> +Apart from propagating the reset through

Re: [PATCH v2 07/34] drm/amd/display: explicitly define EOTF and inverse EOTF

2023-08-22 Thread Pekka Paalanen
On Thu, 10 Aug 2023 15:02:47 -0100
Melissa Wen  wrote:

> Instead of relying on color block names to get the transfer function
> intention regarding encoding pixel's luminance, define supported
> Electro-Optical Transfer Functions (EOTFs) and inverse EOTFs, that
> includes pure gamma or standardized transfer functions.
> 
> Suggested-by: Harry Wentland 
> Signed-off-by: Melissa Wen 
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 19 +++--
>  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 69 +++
>  2 files changed, 67 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index c749c9cb3d94..f6251ed89684 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -718,14 +718,21 @@ extern const struct amdgpu_ip_block_version dm_ip_block;
>  
>  enum amdgpu_transfer_function {
>   AMDGPU_TRANSFER_FUNCTION_DEFAULT,
> - AMDGPU_TRANSFER_FUNCTION_SRGB,
> - AMDGPU_TRANSFER_FUNCTION_BT709,
> - AMDGPU_TRANSFER_FUNCTION_PQ,
> + AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF,
> + AMDGPU_TRANSFER_FUNCTION_BT709_EOTF,
> + AMDGPU_TRANSFER_FUNCTION_PQ_EOTF,
>   AMDGPU_TRANSFER_FUNCTION_LINEAR,
>   AMDGPU_TRANSFER_FUNCTION_UNITY,
> - AMDGPU_TRANSFER_FUNCTION_GAMMA22,
> - AMDGPU_TRANSFER_FUNCTION_GAMMA24,
> - AMDGPU_TRANSFER_FUNCTION_GAMMA26,
> + AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF,
> + AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF,
> + AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF,
> + AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF,
> + AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF,
> + AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF,
> + AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
> + AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
> + AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
> +AMDGPU_TRANSFER_FUNCTION_COUNT
>  };
>  
>  struct dm_plane_state {
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> index 56ce008b9095..cc2187c0879a 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> @@ -85,18 +85,59 @@ void amdgpu_dm_init_color_mod(void)
>  }
>  
>  #ifdef AMD_PRIVATE_COLOR
> -static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] 
> = {
> - { AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" },
> - { AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" },
> - { AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" },
> - { AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" },
> - { AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" },
> - { AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" },
> - { AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" },
> - { AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" },
> - { AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" },
> +static const char * const
> +amdgpu_transfer_function_names[] = {
> + [AMDGPU_TRANSFER_FUNCTION_DEFAULT]  = "Default",
> + [AMDGPU_TRANSFER_FUNCTION_LINEAR]   = "Linear",

Hi,

if the below is identity, then what is linear? Is there a coefficient
(multiplier) somewhere? Offset?

> + [AMDGPU_TRANSFER_FUNCTION_UNITY]= "Unity",

Should "Unity" be called "Identity"?

Doesn't unity mean that the output is always 1.0 regardless of input?

> + [AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF]= "sRGB EOTF",
> + [AMDGPU_TRANSFER_FUNCTION_BT709_EOTF]   = "BT.709 EOTF",

BT.709 says about "Overall opto-electronic transfer characteristics at
source":

In typical production practice the encoding function of image
sources is adjusted so that the final picture has the desired
look, as viewed on a reference monitor having the reference
decoding function of Recommendation ITU-R BT.1886, in the
reference viewing environment defined in Recommendation ITU-R
BT.2035.

IOW, typically people tweak the encoding function instead of using
BT.709 OETF as is, which means that inverting the BT.709 OETF produces
something slightly unknown. The note about BT.1886 means that that
something is also not quite how it's supposed to be turned into light.

Should this enum item be "BT.709 inverse OETF" and respectively below a
"BT.709 OETF"?

What curve does the hardware actually implement?

The others seem fine to me.


Thanks,
pq

> + [AMDGPU_TRANSFER_FUNCTION_PQ_EOTF]  = "PQ EOTF",
> + [AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF] = "Gamma 2.2 EOTF",
> + [AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF] = "Gamma 2.4 EOTF",
> + [AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF] = "Gamma 2.6 EOTF",
> + [AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF]= "sRGB inv_EOTF",
> + [AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF]   = "BT.709 inv_EOTF",
> + [AMDGPU_TRAN

[RFC]: shmem fd for non-DMA buffer sharing cross drivers

2023-08-22 Thread Hsia-Jun Li

Hello

I would like to introduce a usage of SHMEM slimier to DMA-buf, the major 
purpose of that is sharing metadata or just a pure container for cross 
drivers.


We need to exchange some sort of metadata between drivers, likes dynamic 
HDR data between video4linux2 and DRM. Or the graphics frame buffer is 
too complex to be described with plain plane's DMA-buf fd.
An issue between DRM and V4L2 is that DRM could only support 4 planes 
while it is 8 for V4L2. It would be pretty hard for DRM to expend its 
interface to support that 4 more planes which would lead to revision of 
many standard likes Vulkan, EGL.


Also, there is no reason to consume a device's memory for the content 
that device can't read it, or wasting an entry of IOMMU for such data.
Usually, such a metadata would be the value should be written to a 
hardware's registers, a 4KiB page would be 1024 items of 32 bits registers.


Still, I have some problems with SHMEM:
1. I don't want thhe userspace modify the context of the SHMEM allocated 
by the kernel, is there a way to do so?

2. Should I create a helper function for installing the SHMEM file as a fd?

--
Hsia-Jun(Randy) Li


Re: [PATCH 4/6] dt-bindings: net: microchip: Allow nvmem-cell usage

2023-08-22 Thread Alexander Stein
Am Montag, 21. August 2023, 19:14:39 CEST schrieb Rob Herring:
> On Thu, 10 Aug 2023 16:44:49 +0200, Alexander Stein wrote:
> > MAC address can be provided by a nvmem-cell, thus allow referencing a
> > source for the address. Fixes the warning:
> > arch/arm/boot/dts/nxp/imx/imx6q-mba6a.dtb: ethernet@1: 'nvmem-cell-names',
> > 
> >  'nvmem-cells' do not match any of the regexes: 'pinctrl-[0-9]+'
> >  From schema: Documentation/devicetree/bindings/net/microchip,lan95xx.yaml
> > 
> > Signed-off-by: Alexander Stein 
> > ---
> > 
> >  Documentation/devicetree/bindings/net/microchip,lan95xx.yaml | 2 ++
> >  1 file changed, 2 insertions(+)
> 
> Reviewed-by: Rob Herring 

Thanks. But while reading your comment on patch 3, I'm wondering if 
additionalProperties should be changed to unevaluatedProperties here as well.
This way local-mac-address and mac-address canbe removed as well, they are 
defined in ethernet-controller.yaml already.

Best regards,
Alexander
-- 
TQ-Systems GmbH | Mühlstraße 2, Gut Delling | 82229 Seefeld, Germany
Amtsgericht München, HRB 105018
Geschäftsführer: Detlef Schneider, Rüdiger Stahl, Stefan Schneider
http://www.tq-group.com/




[CI 1/2] drm: Add an HPD poll helper to reschedule the poll work

2023-08-22 Thread Imre Deak
Add a helper to reschedule drm_mode_config::output_poll_work after
polling has been enabled for a connector (and needing a reschedule,
since previously polling was disabled for all connectors and hence
output_poll_work was not running).

This is needed by the next patch fixing HPD polling on i915.

CC: sta...@vger.kernel.org # 6.4+
Cc: Dmitry Baryshkov 
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Jouni Högander 
Reviewed-by: Dmitry Baryshkov 
Signed-off-by: Imre Deak 
---
 drivers/gpu/drm/drm_probe_helper.c | 68 --
 include/drm/drm_probe_helper.h |  1 +
 2 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/drm_probe_helper.c 
b/drivers/gpu/drm/drm_probe_helper.c
index 2fb9bf901a2cc..3f479483d7d80 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -262,6 +262,26 @@ static bool drm_kms_helper_enable_hpd(struct drm_device 
*dev)
 }
 
 #define DRM_OUTPUT_POLL_PERIOD (10*HZ)
+static void reschedule_output_poll_work(struct drm_device *dev)
+{
+   unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
+
+   if (dev->mode_config.delayed_event)
+   /*
+* FIXME:
+*
+* Use short (1s) delay to handle the initial delayed event.
+* This delay should not be needed, but Optimus/nouveau will
+* fail in a mysterious way if the delayed event is handled as
+* soon as possible like it is done in
+* drm_helper_probe_single_connector_modes() in case the poll
+* was enabled before.
+*/
+   delay = HZ;
+
+   schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
+}
+
 /**
  * drm_kms_helper_poll_enable - re-enable output polling.
  * @dev: drm_device
@@ -279,37 +299,41 @@ static bool drm_kms_helper_enable_hpd(struct drm_device 
*dev)
  */
 void drm_kms_helper_poll_enable(struct drm_device *dev)
 {
-   bool poll = false;
-   unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
-
if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll ||
dev->mode_config.poll_running)
return;
 
-   poll = drm_kms_helper_enable_hpd(dev);
-
-   if (dev->mode_config.delayed_event) {
-   /*
-* FIXME:
-*
-* Use short (1s) delay to handle the initial delayed event.
-* This delay should not be needed, but Optimus/nouveau will
-* fail in a mysterious way if the delayed event is handled as
-* soon as possible like it is done in
-* drm_helper_probe_single_connector_modes() in case the poll
-* was enabled before.
-*/
-   poll = true;
-   delay = HZ;
-   }
-
-   if (poll)
-   schedule_delayed_work(&dev->mode_config.output_poll_work, 
delay);
+   if (drm_kms_helper_enable_hpd(dev) ||
+   dev->mode_config.delayed_event)
+   reschedule_output_poll_work(dev);
 
dev->mode_config.poll_running = true;
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_enable);
 
+/**
+ * drm_kms_helper_poll_reschedule - reschedule the output polling work
+ * @dev: drm_device
+ *
+ * This function reschedules the output polling work, after polling for a
+ * connector has been enabled.
+ *
+ * Drivers must call this helper after enabling polling for a connector by
+ * setting %DRM_CONNECTOR_POLL_CONNECT / %DRM_CONNECTOR_POLL_DISCONNECT flags
+ * in drm_connector::polled. Note that after disabling polling by clearing 
these
+ * flags for a connector will stop the output polling work automatically if
+ * the polling is disabled for all other connectors as well.
+ *
+ * The function can be called only after polling has been enabled by calling
+ * drm_kms_helper_poll_init() / drm_kms_helper_poll_enable().
+ */
+void drm_kms_helper_poll_reschedule(struct drm_device *dev)
+{
+   if (dev->mode_config.poll_running)
+   reschedule_output_poll_work(dev);
+}
+EXPORT_SYMBOL(drm_kms_helper_poll_reschedule);
+
 static enum drm_connector_status
 drm_helper_probe_detect_ctx(struct drm_connector *connector, bool force)
 {
diff --git a/include/drm/drm_probe_helper.h b/include/drm/drm_probe_helper.h
index 4977e0ab72dbb..fad3c4003b2b5 100644
--- a/include/drm/drm_probe_helper.h
+++ b/include/drm/drm_probe_helper.h
@@ -25,6 +25,7 @@ void drm_kms_helper_connector_hotplug_event(struct 
drm_connector *connector);
 
 void drm_kms_helper_poll_disable(struct drm_device *dev);
 void drm_kms_helper_poll_enable(struct drm_device *dev);
+void drm_kms_helper_poll_reschedule(struct drm_device *dev);
 bool drm_kms_helper_is_poll_worker(void);
 
 enum drm_mode_status drm_crtc_helper_mode_valid_fixed(struct drm_crtc *crtc,
-- 
2.37.2



[CI 2/2] drm/i915: Fix HPD polling, reenabling the output poll work as needed

2023-08-22 Thread Imre Deak
After the commit in the Fixes: line below, HPD polling stopped working
on i915, since after that change calling drm_kms_helper_poll_enable()
doesn't restart drm_mode_config::output_poll_work if the work was
stopped (no connectors needing polling) and enabling polling for a
connector (during runtime suspend or detecting an HPD IRQ storm).

After the above change calling drm_kms_helper_poll_enable() is a nop
after it's been called already and polling for some connectors was
disabled/re-enabled.

Fix this by calling drm_kms_helper_poll_reschedule() added in the
previous patch instead, which reschedules the work whenever expected.

Fixes: d33a54e3991d ("drm/probe_helper: sort out poll_running vs poll_enabled")
CC: sta...@vger.kernel.org # 6.4+
Cc: Dmitry Baryshkov 
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Jouni Högander 
Signed-off-by: Imre Deak 
---
 drivers/gpu/drm/i915/display/intel_hotplug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c 
b/drivers/gpu/drm/i915/display/intel_hotplug.c
index e3ca192eb569c..e8562f6f8bb44 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -212,7 +212,7 @@ intel_hpd_irq_storm_switch_to_polling(struct 
drm_i915_private *dev_priv)
 
/* Enable polling and queue hotplug re-enabling. */
if (hpd_disabled) {
-   drm_kms_helper_poll_enable(&dev_priv->drm);
+   drm_kms_helper_poll_reschedule(&dev_priv->drm);
mod_delayed_work(dev_priv->unordered_wq,
 &dev_priv->display.hotplug.reenable_work,
 msecs_to_jiffies(HPD_STORM_REENABLE_DELAY));
@@ -727,7 +727,7 @@ static void i915_hpd_poll_init_work(struct work_struct 
*work)
drm_connector_list_iter_end(&conn_iter);
 
if (enabled)
-   drm_kms_helper_poll_enable(&dev_priv->drm);
+   drm_kms_helper_poll_reschedule(&dev_priv->drm);
 
mutex_unlock(&dev_priv->drm.mode_config.mutex);
 
-- 
2.37.2



[PATCH AUTOSEL 6.4 07/11] drm/amd/pm: skip the RLC stop when S0i3 suspend for SMU v13.0.4/11

2023-08-22 Thread Sasha Levin
From: Tim Huang 

[ Upstream commit 730d44e1fa306a20746ad4a85da550662aed9daa ]

For SMU v13.0.4/11, driver does not need to stop RLC for S0i3,
the firmwares will handle that properly.

Signed-off-by: Tim Huang 
Reviewed-by: Mario Limonciello 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index ea03e8d9a3f6c..818379276a582 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1573,9 +1573,9 @@ static int smu_disable_dpms(struct smu_context *smu)
 
/*
 * For SMU 13.0.4/11, PMFW will handle the features disablement properly
-* for gpu reset case. Driver involvement is unnecessary.
+* for gpu reset and S0i3 cases. Driver involvement is unnecessary.
 */
-   if (amdgpu_in_reset(adev)) {
+   if (amdgpu_in_reset(adev) || adev->in_s0ix) {
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):
-- 
2.40.1



[PATCH AUTOSEL 6.4 09/11] drm/amdkfd: ignore crat by default

2023-08-22 Thread Sasha Levin
From: Alex Deucher 

[ Upstream commit a6dea2d64ff92851e68cd4e20a35f6534286e016 ]

We are dropping the IOMMUv2 path, so no need to enable this.
It's often buggy on consumer platforms anyway.

Reviewed-by: Felix Kuehling 
Acked-by: Christian König 
Tested-by: Mike Lothian 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 475e470273540..ee0cc35d68a84 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1543,11 +1543,7 @@ static bool kfd_ignore_crat(void)
if (ignore_crat)
return true;
 
-#ifndef KFD_SUPPORT_IOMMU_V2
ret = true;
-#else
-   ret = false;
-#endif
 
return ret;
 }
-- 
2.40.1



[PATCH AUTOSEL 6.4 08/11] drm/amdgpu: Match against exact bootloader status

2023-08-22 Thread Sasha Levin
From: Lijo Lazar 

[ Upstream commit d3de41ee5febe5c2d9989fe9810bce2bb54a3a8e ]

On PSP v13.x ASICs, boot loader will set only the MSB to 1 and clear the
least significant bits for any command submission. Hence match against
the exact register value, otherwise a register value of all 0xFFs also
could falsely indicate that boot loader is ready. Also, from PSP v13.0.6
and newer, bits[7:0] will be used to indicate command error status.

Signed-off-by: Lijo Lazar 
Acked-by: Alex Deucher 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index caee76ab71105..92f2ee412908d 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -136,14 +136,15 @@ static int psp_v13_0_wait_for_bootloader(struct 
psp_context *psp)
int ret;
int retry_loop;
 
+   /* Wait for bootloader to signify that it is ready having bit 31 of
+* C2PMSG_35 set to 1. All other bits are expected to be cleared.
+* If there is an error in processing command, bits[7:0] will be set.
+* This is applicable for PSP v13.0.6 and newer.
+*/
for (retry_loop = 0; retry_loop < 10; retry_loop++) {
-   /* Wait for bootloader to signify that is
-   ready having bit 31 of C2PMSG_35 set to 1 */
-   ret = psp_wait_for(psp,
-  SOC15_REG_OFFSET(MP0, 0, 
regMP0_SMN_C2PMSG_35),
-  0x8000,
-  0x8000,
-  false);
+   ret = psp_wait_for(
+   psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+   0x8000, 0x, false);
 
if (ret == 0)
return 0;
-- 
2.40.1



[PATCH AUTOSEL 6.4 10/11] drm/amdkfd: disable IOMMUv2 support for KV/CZ

2023-08-22 Thread Sasha Levin
From: Alex Deucher 

[ Upstream commit 616f92d188ee7142a95a52068efdbea82645f859 ]

Use the dGPU path instead.  There were a lot of platform
issues with IOMMU in general on these chips due to windows
not enabling IOMMU at the time.  The dGPU path has been
used for a long time with newer APUs and works fine.  This
also paves the way to simplify the driver significantly.

v2: use the dGPU queue manager functions

Reviewed-by: Felix Kuehling 
Acked-by: Christian König 
Tested-by: Mike Lothian 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 6 --
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +---
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 00f528eb98126..9c8197573dee7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -224,10 +224,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
asic_type != CHIP_TONGA)
kfd->device_info.supports_cwsr = true;
 
-   if (asic_type == CHIP_KAVERI ||
-   asic_type == CHIP_CARRIZO)
-   kfd->device_info.needs_iommu_device = true;
-
if (asic_type != CHIP_HAWAII && !vf)
kfd->device_info.needs_pci_atomics = true;
}
@@ -240,7 +236,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
uint32_t gfx_target_version = 0;
 
switch (adev->asic_type) {
-#ifdef KFD_SUPPORT_IOMMU_V2
 #ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
gfx_target_version = 7;
@@ -253,7 +248,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
if (!vf)
f2g = &gfx_v8_kfd2kgd;
break;
-#endif
 #ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_HAWAII:
gfx_target_version = 70001;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 7a95698d83f73..c73417e79745e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2335,18 +2335,12 @@ struct device_queue_manager 
*device_queue_manager_init(struct kfd_dev *dev)
}
 
switch (dev->adev->asic_type) {
-   case CHIP_CARRIZO:
-   device_queue_manager_init_vi(&dqm->asic_ops);
-   break;
-
case CHIP_KAVERI:
-   device_queue_manager_init_cik(&dqm->asic_ops);
-   break;
-
case CHIP_HAWAII:
device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
break;
 
+   case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
-- 
2.40.1



[PATCH AUTOSEL 6.4 11/11] drm/amdkfd: disable IOMMUv2 support for Raven

2023-08-22 Thread Sasha Levin
From: Alex Deucher 

[ Upstream commit 091ae5473f96ced844af6ba39b94757359b12348 ]

Use the dGPU path instead.  There were a lot of platform
issues with IOMMU in general on these chips due to windows
not enabling IOMMU at the time.  The dGPU path has been
used for a long time with newer APUs and works fine.  This
also paves the way to simplify the driver significantly.

Reviewed-by: Felix Kuehling 
Acked-by: Christian König 
Tested-by: Mike Lothian 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9c8197573dee7..224e057d2dbbf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -185,11 +185,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
 
kfd_device_info_set_event_interrupt_class(kfd);
 
-   /* Raven */
-   if (gc_version == IP_VERSION(9, 1, 0) ||
-   gc_version == IP_VERSION(9, 2, 2))
-   kfd->device_info.needs_iommu_device = true;
-
if (gc_version < IP_VERSION(11, 0, 0)) {
/* Navi2x+, Navi1x+ */
if (gc_version == IP_VERSION(10, 3, 6))
@@ -283,7 +278,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
gfx_target_version = 9;
f2g = &gfx_v9_kfd2kgd;
break;
-#ifdef KFD_SUPPORT_IOMMU_V2
/* Raven */
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
@@ -291,7 +285,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
if (!vf)
f2g = &gfx_v9_kfd2kgd;
break;
-#endif
/* Vega12 */
case IP_VERSION(9, 2, 1):
gfx_target_version = 90004;
-- 
2.40.1



[PATCH AUTOSEL 6.1 06/10] drm/amd/pm: skip the RLC stop when S0i3 suspend for SMU v13.0.4/11

2023-08-22 Thread Sasha Levin
From: Tim Huang 

[ Upstream commit 730d44e1fa306a20746ad4a85da550662aed9daa ]

For SMU v13.0.4/11, driver does not need to stop RLC for S0i3,
the firmwares will handle that properly.

Signed-off-by: Tim Huang 
Reviewed-by: Mario Limonciello 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index d191ff52d4f06..a664a0a284784 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1562,9 +1562,9 @@ static int smu_disable_dpms(struct smu_context *smu)
 
/*
 * For SMU 13.0.4/11, PMFW will handle the features disablement properly
-* for gpu reset case. Driver involvement is unnecessary.
+* for gpu reset and S0i3 cases. Driver involvement is unnecessary.
 */
-   if (amdgpu_in_reset(adev)) {
+   if (amdgpu_in_reset(adev) || adev->in_s0ix) {
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):
-- 
2.40.1



[PATCH AUTOSEL 6.1 07/10] drm/amdgpu: Match against exact bootloader status

2023-08-22 Thread Sasha Levin
From: Lijo Lazar 

[ Upstream commit d3de41ee5febe5c2d9989fe9810bce2bb54a3a8e ]

On PSP v13.x ASICs, boot loader will set only the MSB to 1 and clear the
least significant bits for any command submission. Hence match against
the exact register value, otherwise a register value of all 0xFFs also
could falsely indicate that boot loader is ready. Also, from PSP v13.0.6
and newer, bits[7:0] will be used to indicate command error status.

Signed-off-by: Lijo Lazar 
Acked-by: Alex Deucher 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 8c5fa4b7b68a2..c7cb30efe43de 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -147,14 +147,15 @@ static int psp_v13_0_wait_for_bootloader(struct 
psp_context *psp)
int ret;
int retry_loop;
 
+   /* Wait for bootloader to signify that it is ready having bit 31 of
+* C2PMSG_35 set to 1. All other bits are expected to be cleared.
+* If there is an error in processing command, bits[7:0] will be set.
+* This is applicable for PSP v13.0.6 and newer.
+*/
for (retry_loop = 0; retry_loop < 10; retry_loop++) {
-   /* Wait for bootloader to signify that is
-   ready having bit 31 of C2PMSG_35 set to 1 */
-   ret = psp_wait_for(psp,
-  SOC15_REG_OFFSET(MP0, 0, 
regMP0_SMN_C2PMSG_35),
-  0x8000,
-  0x8000,
-  false);
+   ret = psp_wait_for(
+   psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+   0x8000, 0x, false);
 
if (ret == 0)
return 0;
-- 
2.40.1



[PATCH AUTOSEL 6.1 08/10] drm/amdkfd: ignore crat by default

2023-08-22 Thread Sasha Levin
From: Alex Deucher 

[ Upstream commit a6dea2d64ff92851e68cd4e20a35f6534286e016 ]

We are dropping the IOMMUv2 path, so no need to enable this.
It's often buggy on consumer platforms anyway.

Reviewed-by: Felix Kuehling 
Acked-by: Christian König 
Tested-by: Mike Lothian 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index e45c6bc8d10bb..a9fa4772b2d35 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1543,11 +1543,7 @@ static bool kfd_ignore_crat(void)
if (ignore_crat)
return true;
 
-#ifndef KFD_SUPPORT_IOMMU_V2
ret = true;
-#else
-   ret = false;
-#endif
 
return ret;
 }
-- 
2.40.1



[PATCH AUTOSEL 6.1 09/10] drm/amdkfd: disable IOMMUv2 support for KV/CZ

2023-08-22 Thread Sasha Levin
From: Alex Deucher 

[ Upstream commit 616f92d188ee7142a95a52068efdbea82645f859 ]

Use the dGPU path instead.  There were a lot of platform
issues with IOMMU in general on these chips due to windows
not enabling IOMMU at the time.  The dGPU path has been
used for a long time with newer APUs and works fine.  This
also paves the way to simplify the driver significantly.

v2: use the dGPU queue manager functions

Reviewed-by: Felix Kuehling 
Acked-by: Christian König 
Tested-by: Mike Lothian 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 6 --
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +---
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 27820f0a282d1..4cc5debdd119b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -216,10 +216,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
asic_type != CHIP_TONGA)
kfd->device_info.supports_cwsr = true;
 
-   if (asic_type == CHIP_KAVERI ||
-   asic_type == CHIP_CARRIZO)
-   kfd->device_info.needs_iommu_device = true;
-
if (asic_type != CHIP_HAWAII && !vf)
kfd->device_info.needs_pci_atomics = true;
}
@@ -233,7 +229,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
uint32_t gfx_target_version = 0;
 
switch (adev->asic_type) {
-#ifdef KFD_SUPPORT_IOMMU_V2
 #ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
gfx_target_version = 7;
@@ -246,7 +241,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
if (!vf)
f2g = &gfx_v8_kfd2kgd;
break;
-#endif
 #ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_HAWAII:
gfx_target_version = 70001;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c06ada0844ba1..5616a722578f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2335,18 +2335,12 @@ struct device_queue_manager 
*device_queue_manager_init(struct kfd_dev *dev)
}
 
switch (dev->adev->asic_type) {
-   case CHIP_CARRIZO:
-   device_queue_manager_init_vi(&dqm->asic_ops);
-   break;
-
case CHIP_KAVERI:
-   device_queue_manager_init_cik(&dqm->asic_ops);
-   break;
-
case CHIP_HAWAII:
device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
break;
 
+   case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
-- 
2.40.1



[PATCH AUTOSEL 6.1 10/10] drm/amdkfd: disable IOMMUv2 support for Raven

2023-08-22 Thread Sasha Levin
From: Alex Deucher 

[ Upstream commit 091ae5473f96ced844af6ba39b94757359b12348 ]

Use the dGPU path instead.  There were a lot of platform
issues with IOMMU in general on these chips due to windows
not enabling IOMMU at the time.  The dGPU path has been
used for a long time with newer APUs and works fine.  This
also paves the way to simplify the driver significantly.

Reviewed-by: Felix Kuehling 
Acked-by: Christian König 
Tested-by: Mike Lothian 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4cc5debdd119b..af18378e58d9f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -185,11 +185,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
 
kfd_device_info_set_event_interrupt_class(kfd);
 
-   /* Raven */
-   if (gc_version == IP_VERSION(9, 1, 0) ||
-   gc_version == IP_VERSION(9, 2, 2))
-   kfd->device_info.needs_iommu_device = true;
-
if (gc_version < IP_VERSION(11, 0, 0)) {
/* Navi2x+, Navi1x+ */
if (gc_version == IP_VERSION(10, 3, 6))
@@ -287,7 +282,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
gfx_target_version = 9;
f2g = &gfx_v9_kfd2kgd;
break;
-#ifdef KFD_SUPPORT_IOMMU_V2
/* Raven */
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
@@ -295,7 +289,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
if (!vf)
f2g = &gfx_v9_kfd2kgd;
break;
-#endif
/* Vega12 */
case IP_VERSION(9, 2, 1):
gfx_target_version = 90004;
-- 
2.40.1



[PATCH AUTOSEL 5.15 5/6] drm/amdgpu: Match against exact bootloader status

2023-08-22 Thread Sasha Levin
From: Lijo Lazar 

[ Upstream commit d3de41ee5febe5c2d9989fe9810bce2bb54a3a8e ]

On PSP v13.x ASICs, boot loader will set only the MSB to 1 and clear the
least significant bits for any command submission. Hence match against
the exact register value, otherwise a register value of all 0xFFs also
could falsely indicate that boot loader is ready. Also, from PSP v13.0.6
and newer, bits[7:0] will be used to indicate command error status.

Signed-off-by: Lijo Lazar 
Acked-by: Alex Deucher 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 47a500f64db20..bcf356df1ef33 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -101,14 +101,15 @@ static int psp_v13_0_wait_for_bootloader(struct 
psp_context *psp)
int ret;
int retry_loop;
 
+   /* Wait for bootloader to signify that it is ready having bit 31 of
+* C2PMSG_35 set to 1. All other bits are expected to be cleared.
+* If there is an error in processing command, bits[7:0] will be set.
+* This is applicable for PSP v13.0.6 and newer.
+*/
for (retry_loop = 0; retry_loop < 10; retry_loop++) {
-   /* Wait for bootloader to signify that is
-   ready having bit 31 of C2PMSG_35 set to 1 */
-   ret = psp_wait_for(psp,
-  SOC15_REG_OFFSET(MP0, 0, 
regMP0_SMN_C2PMSG_35),
-  0x8000,
-  0x8000,
-  false);
+   ret = psp_wait_for(
+   psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+   0x8000, 0x, false);
 
if (ret == 0)
return 0;
-- 
2.40.1



[PATCH AUTOSEL 5.15 6/6] drm/amdkfd: ignore crat by default

2023-08-22 Thread Sasha Levin
From: Alex Deucher 

[ Upstream commit a6dea2d64ff92851e68cd4e20a35f6534286e016 ]

We are dropping the IOMMUv2 path, so no need to enable this.
It's often buggy on consumer platforms anyway.

Reviewed-by: Felix Kuehling 
Acked-by: Christian König 
Tested-by: Mike Lothian 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index e574aa32a111d..46dfd9baeb013 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1523,11 +1523,7 @@ static bool kfd_ignore_crat(void)
if (ignore_crat)
return true;
 
-#ifndef KFD_SUPPORT_IOMMU_V2
ret = true;
-#else
-   ret = false;
-#endif
 
return ret;
 }
-- 
2.40.1



[PATCH AUTOSEL 5.10 3/3] drm/amdkfd: ignore crat by default

2023-08-22 Thread Sasha Levin
From: Alex Deucher 

[ Upstream commit a6dea2d64ff92851e68cd4e20a35f6534286e016 ]

We are dropping the IOMMUv2 path, so no need to enable this.
It's often buggy on consumer platforms anyway.

Reviewed-by: Felix Kuehling 
Acked-by: Christian König 
Tested-by: Mike Lothian 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 86b4dadf772e3..61fea0d268b96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -749,11 +749,7 @@ static bool kfd_ignore_crat(void)
if (ignore_crat)
return true;
 
-#ifndef KFD_SUPPORT_IOMMU_V2
ret = true;
-#else
-   ret = false;
-#endif
 
return ret;
 }
-- 
2.40.1



RE: Implement svm without BO concept in xe driver

2023-08-22 Thread Ruhl, Michael J
>-Original Message-
>From: Felix Kuehling 
>Sent: Monday, August 21, 2023 4:57 PM
>To: Zeng, Oak ; Dave Airlie 
>Cc: Brost, Matthew ; Thomas Hellström
>; Philip Yang ;
>Welty, Brian ; dri-devel@lists.freedesktop.org;
>Christian König ; Vishwanathapura, Niranjana
>; intel...@lists.freedesktop.org;
>Ruhl, Michael J 
>Subject: Re: Implement svm without BO concept in xe driver
>
>
>On 2023-08-21 15:41, Zeng, Oak wrote:
>>> I have thought about emulating BO allocation APIs on top of system SVM.
>>> This was in the context of KFD where memory management is not tied into
>>> command submissions APIs, which would add a whole other layer of
>>> complexity. The main unsolved (unsolvable?) problem I ran into was, that
>>> there is no way to share SVM memory as DMABufs. So there is no good
>way
>>> to support applications that expect to share memory in that way.
>> Great point. I also discussed the dmabuf thing with Mike (cc'ed). dmabuf is a
>particular technology created specially for the BO driver (and other driver) to
>share buffer b/t devices. Hmm/system SVM doesn't need this technology:
>malloc'ed memory by the nature is already shared b/t different devices (in
>one process) and CPU. We just can simply submit GPU kernel to all devices
>with malloc'ed memory and let kmd decide the memory placement (such as
>map in place or migrate). No need of buffer export/import in hmm/system
>SVM world.
>
>I disagree. DMABuf can be used for sharing memory between processes. And
>it can be used for sharing memory with 3rd-party devices via PCIe P2P
>(e.g. a Mellanox NIC). You cannot easily do that with malloc'ed memory.
>POSIX IPC requires that you know that you'll be sharing the memory at
>allocation time. It adds overhead. And because it's file-backed, it's
>currently incompatible with migration. And HMM currently doesn't have a
>solution for P2P. Any access by a different device causes a migration to
>system memory.

Hey Oak,

I think we were discussing this solution in the context of using the P2P_DMA
feature.  This has an allocation path and a device 2 device capabilities.

Mike


>Regards,
>   Felix
>
>
>>
>> So yes from buffer sharing perspective, the design philosophy is also very
>different.
>>
>> Thanks,
>> Oak
>>


Re: [PATCH 3/3] drm/amd/display: drop unused count variable in create_eml_sink()

2023-08-22 Thread Jani Nikula
On Wed, 17 May 2023, Hamza Mahfooz  wrote:
> Since, we are only interested in having
> drm_edid_override_connector_update(), update the value of
> connector->edid_blob_ptr. We don't care about the return value of
> drm_edid_override_connector_update() here. So, drop count.
>
> Fixes: 068553e14f86 ("drm/amd/display: assign edid_blob_ptr with edid from 
> debugfs")

The *real* problems with that commit are:

1) It uses drm_edid_override_connector_update() *at all*. Its
   documentation says:

Only to be used from drm_helper_probe_single_connector_modes()
as a fallback for when DDC probe failed during drm_get_edid()
and caused the override/firmware EDID to be skipped.

2) It messes with edid_blob_ptr directly. All drivers should stop doing
   that. It just complicates all the logic in the overrides and the
   property updates.


BR,
Jani.


> Reported-by: kernel test robot 
> Signed-off-by: Hamza Mahfooz 
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 14b296e1d0f6..5a2d04f47276 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -6396,9 +6396,8 @@ static void create_eml_sink(struct amdgpu_dm_connector 
> *aconnector)
>   /* if connector->edid_override valid, pass
>* it to edid_override to edid_blob_ptr
>*/
> - int count;
>  
> - count = drm_edid_override_connector_update(&aconnector->base);
> + drm_edid_override_connector_update(&aconnector->base);
>  
>   if (!aconnector->base.edid_blob_ptr) {
>   DRM_ERROR("No EDID firmware found on connector: %s 
> ,forcing to OFF!\n",

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [PATCH v2 08/34] drm/amd/display: document AMDGPU pre-defined transfer functions

2023-08-22 Thread Pekka Paalanen
On Thu, 10 Aug 2023 15:02:48 -0100
Melissa Wen  wrote:

> Brief documentation about pre-defined transfer function usage on AMD
> display driver and standardized EOTFs and inverse EOTFs.
> 
> Co-developed-by: Harry Wentland 
> Signed-off-by: Harry Wentland 
> Signed-off-by: Melissa Wen 
> ---
>  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 39 +++
>  1 file changed, 39 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> index cc2187c0879a..7f13bcdaf016 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> @@ -85,6 +85,45 @@ void amdgpu_dm_init_color_mod(void)
>  }
>  
>  #ifdef AMD_PRIVATE_COLOR
> +/* Pre-defined Transfer Functions (TF)
> + *
> + * AMD driver supports pre-defined mathematical functions for transferring
> + * between encoded values and optical/linear space. Depending on HW color 
> caps,
> + * ROMs and curves built by the AMD color module support these transforms.
> + *
> + * The driver-specific color implementation exposes properties for 
> pre-blending
> + * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and
> + * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma
> + * supports ROM curves. AMD color module uses pre-defined coefficients to 
> build
> + * curves for the other blocks. What can be done by each color block is
> + * described by struct dpp_color_capsand struct mpc_color_caps.
> + *
> + * AMD driver-specific color API exposes the following pre-defined transfer
> + * functions:
> + *
> + * - Linear/Unity: linear/identity relationship between pixel value and
> + *   luminance value;

I asked about linear/unity on the previous patch.

> + * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure gamma functions;

I'd explain these as pure power functions. Gamma function is
something completely different:
https://en.wikipedia.org/wiki/Gamma_function

> + * - sRGB: 2.4 gamma with small initial linear section as standardized by IEC
> + *   61966-2-1:1999;

I'd leave out the mention of "2.4 gamma". Yes, the value of the gamma
parameter is 2.4, but the curve is actually an approximation of the
pure 2.2 power function suitable for integer arithmetic[1].

One could call it "The piece-wise transfer function from IEC ...".

[1] https://www.w3.org/Graphics/Color/sRGB.html

> + * - BT.709 (BT.1886): 2.4 gamma with differences in the dark end of the 
> scale.
> + *   Used in HD-TV and standardized by ITU-R BT.1886;

BT.1886 has two more parameters (a.k.a contrast and brightness). What
are their values?

It's also quite different from BT.709 inverse OETF. BT.1886 uses
exponent 2.4 while inverse of BT.709 OETF has exponent approximately
2.22. This difference is intentional and accounts for shooting vs.
viewing environment differences.

Either the curve comes from BT.709 or BT.1886. Which one is it?

Would be nice to spell out the mathematical formula in these cases.

> + * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range
> + *   capability of 0 to 10,000 nits; standardized by SMPTE ST 2084.

Right, but since we are working on numbers here,
is the PQ EOTF [0, 1] -> [0, 1] or [0, 1]?


Thanks,
pq

> + *
> + * In the driver-specific API, color block names attached to TF properties
> + * suggest the intention regarding non-linear encoding pixel's luminance
> + * values. As some newer encodings don't use gamma curve, we make encoding 
> and
> + * decoding explicit by defining an enum list of transfer functions supported
> + * in terms of EOTF and inverse EOTF, where:
> + *
> + * - EOTF (electro-optical transfer function): is the transfer function to go
> + *   from the encoded value to an optical (linear) value. De-gamma functions
> + *   traditionally do this.
> + * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go
> + *   from an optical/linear space (which might have been used for blending)
> + *   back to the encoded values. Gamma functions traditionally do this. 
> + */
>  static const char * const
>  amdgpu_transfer_function_names[] = {
>   [AMDGPU_TRANSFER_FUNCTION_DEFAULT]  = "Default",



Re: [PATCH v14 RESEND 1/6] dt-bindings: display: imx: Add i.MX8qxp/qm DPU binding

2023-08-22 Thread Maxime Ripard
Hi,

On Tue, Aug 22, 2023 at 04:59:44PM +0800, Liu Ying wrote:
> This patch adds bindings for i.MX8qxp/qm Display Processing Unit.
> 
> Reviewed-by: Rob Herring 
> Signed-off-by: Liu Ying 
> ---
> v7->v14:
> * No change.
> 
> v6->v7:
> * Add Rob's R-b tag back.
> 
> v5->v6:
> * Use graph schema. So, drop Rob's R-b tag as review is needed.
> 
> v4->v5:
> * No change.
> 
> v3->v4:
> * Improve compatible property by using enum instead of oneOf+const. (Rob)
> * Add Rob's R-b tag.
> 
> v2->v3:
> * No change.
> 
> v1->v2:
> * Fix yamllint warnings.
> * Require bypass0 and bypass1 clocks for both i.MX8qxp and i.MX8qm, as the
>   display controller subsystem spec does say that they exist.
> * Use new dt binding way to add clocks in the example.
> * Trivial tweaks for the example.
> 
>  .../bindings/display/imx/fsl,imx8qxp-dpu.yaml | 387 ++
>  1 file changed, 387 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dpu.yaml
> 
> diff --git 
> a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dpu.yaml 
> b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dpu.yaml
> new file mode 100644
> index ..6b05c586cd9d
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dpu.yaml
> @@ -0,0 +1,387 @@
> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dpu.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: Freescale i.MX8qm/qxp Display Processing Unit
> +
> +maintainers:
> +  - Liu Ying 
> +
> +description: |
> +  The Freescale i.MX8qm/qxp Display Processing Unit(DPU) is comprised of two
> +  main components that include a blit engine for 2D graphics accelerations
> +  and a display controller for display output processing, as well as a 
> command
> +  sequencer.
> +
> +properties:
> +  compatible:
> +enum:
> +  - fsl,imx8qxp-dpu
> +  - fsl,imx8qm-dpu
> +
> +  reg:
> +maxItems: 1
> +
> +  interrupts:
> +items:
> +  - description: |
> +  store9 shadow load interrupt(blit engine)
> +  - description: |
> +  store9 frame complete interrupt(blit engine)
> +  - description: |
> +  store9 sequence complete interrupt(blit engine)
> +  - description: |
> +  extdst0 shadow load interrupt
> +  (display controller, content stream 0)
> +  - description: |
> +  extdst0 frame complete interrupt
> +  (display controller, content stream 0)
> +  - description: |
> +  extdst0 sequence complete interrupt
> +  (display controller, content stream 0)
> +  - description: |
> +  extdst4 shadow load interrupt
> +  (display controller, safety stream 0)
> +  - description: |
> +  extdst4 frame complete interrupt
> +  (display controller, safety stream 0)
> +  - description: |
> +  extdst4 sequence complete interrupt
> +  (display controller, safety stream 0)
> +  - description: |
> +  extdst1 shadow load interrupt
> +  (display controller, content stream 1)
> +  - description: |
> +  extdst1 frame complete interrupt
> +  (display controller, content stream 1)
> +  - description: |
> +  extdst1 sequence complete interrupt
> +  (display controller, content stream 1)
> +  - description: |
> +  extdst5 shadow load interrupt
> +  (display controller, safety stream 1)
> +  - description: |
> +  extdst5 frame complete interrupt
> +  (display controller, safety stream 1)
> +  - description: |
> +  extdst5 sequence complete interrupt
> +  (display controller, safety stream 1)
> +  - description: |
> +  disengcfg0 shadow load interrupt
> +  (display controller, display stream 0)
> +  - description: |
> +  disengcfg0 frame complete interrupt
> +  (display controller, display stream 0)
> +  - description: |
> +  disengcfg0 sequence complete interrupt
> +  (display controller, display stream 0)
> +  - description: |
> +  framegen0 programmable interrupt0
> +  (display controller, display stream 0)
> +  - description: |
> +  framegen0 programmable interrupt1
> +  (display controller, display stream 0)
> +  - description: |
> +  framegen0 programmable interrupt2
> +  (display controller, display stream 0)
> +  - description: |
> +  framegen0 programmable interrupt3
> +  (display controller, display stream 0)
> +  - description: |
> +  signature0 shadow load interrupt
> +  (display controller, display stream 0)
> +  - description: |
> +  signature0 measurement valid interrupt
> +  (display controller, display stream 0)
> +  - description: |
> +  signature0 err

[PATCH v2 1/1] drm/bridge: Silence error messages upon probe deferral

2023-08-22 Thread Alexander Stein
When -EPROBE_DEFER is returned do not raise an error, but silently return
this error instead. Fixes error like this:
[drm:drm_bridge_attach] *ERROR* failed to attach bridge 
/soc@0/bus@3080/mipi-dsi@30a0 to encoder None-34: -517
[drm:drm_bridge_attach] *ERROR* failed to attach bridge 
/soc@0/bus@3080/mipi-dsi@30a0 to encoder None-34: -517

Signed-off-by: Alexander Stein 
---
Changes in v2:
* Adjust the indentation

Considering Laurent's input IMHO -517 should not occur when using component
framework, e.g. drivers/gpu/drm/mcde/mcde_drv.c. This should warrant to only
print an error if it is not deferred probe.
dev_err_probe() sounds reasonable, but this is something which should be done
in drivers. It is also arguable if this message is "hidden" within a debug
statement.

 drivers/gpu/drm/drm_bridge.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
index 39e68e45bb124..132180a03c0eb 100644
--- a/drivers/gpu/drm/drm_bridge.c
+++ b/drivers/gpu/drm/drm_bridge.c
@@ -352,13 +352,15 @@ int drm_bridge_attach(struct drm_encoder *encoder, struct 
drm_bridge *bridge,
bridge->encoder = NULL;
list_del(&bridge->chain_node);
 
+   if (ret != -EPROBE_DEFER) {
 #ifdef CONFIG_OF
-   DRM_ERROR("failed to attach bridge %pOF to encoder %s: %d\n",
- bridge->of_node, encoder->name, ret);
+   DRM_ERROR("failed to attach bridge %pOF to encoder %s: %d\n",
+   bridge->of_node, encoder->name, ret);
 #else
-   DRM_ERROR("failed to attach bridge to encoder %s: %d\n",
- encoder->name, ret);
+   DRM_ERROR("failed to attach bridge to encoder %s: %d\n",
+   encoder->name, ret);
 #endif
+   }
 
return ret;
 }
-- 
2.34.1



Re: [PATCH v2 09/34] drm/amd/display: add plane HDR multiplier driver-specific property

2023-08-22 Thread Pekka Paalanen
On Thu, 10 Aug 2023 15:02:49 -0100
Melissa Wen  wrote:

> From: Joshua Ashton 
> 
> Multiplier to 'gain' the plane. When PQ is decoded using the fixed func
> transfer function to the internal FP16 fb, 1.0 -> 80 nits (on AMD at
> least) When sRGB is decoded, 1.0 -> 1.0.  Therefore, 1.0 multiplier = 80
> nits for SDR content. So if you want, 203 nits for SDR content, pass in
> (203.0 / 80.0).

Does this mean that the fixed-function PQ EOTF is
actually [0, 128] -> [0, 1]?

How do you decode an integer pixel format into [0, 128] so it can be
fed through PQ EOTF?

And how do blocks after the PQ EOTF deal with the [0, 1] domain,
when any other EOTF would produce [0, 1]?


Thanks,
pq

> 
> Signed-off-by: Joshua Ashton 
> Co-developed-by: Melissa Wen 
> Signed-off-by: Melissa Wen 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h   |  4 
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  | 14 ++
>  .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c|  6 ++
>  .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c| 13 +
>  4 files changed, 37 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
> index 6ef958a14e16..66bae0eed80c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
> @@ -359,6 +359,10 @@ struct amdgpu_mode_info {
>* to go from scanout/encoded values to linear values.
>*/
>   struct drm_property *plane_degamma_tf_property;
> + /**
> +  * @plane_hdr_mult_property:
> +  */
> + struct drm_property *plane_hdr_mult_property;
>  };
>  
>  #define AMDGPU_MAX_BL_LEVEL 0xFF
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index f6251ed89684..44f17ac11a5f 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -54,6 +54,9 @@
>  #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x1A
>  #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
>  #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3
> +
> +#define AMDGPU_HDR_MULT_DEFAULT (0x1LL)
> +
>  /*
>  #include "include/amdgpu_dal_power_if.h"
>  #include "amdgpu_dm_irq.h"
> @@ -755,6 +758,17 @@ struct dm_plane_state {
>* linearize.
>*/
>   enum amdgpu_transfer_function degamma_tf;
> + /**
> +  * @hdr_mult:
> +  *
> +  * Multiplier to 'gain' the plane.  When PQ is decoded using the fixed
> +  * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
> +  * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
> +  * Therefore, 1.0 multiplier = 80 nits for SDR content.  So if you
> +  * want, 203 nits for SDR content, pass in (203.0 / 80.0).  Format is
> +  * S31.32 sign-magnitude.
> +  */
> + __u64 hdr_mult;
>  };
>  
>  struct dm_crtc_state {
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> index 7f13bcdaf016..b891aaf5f7c1 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> @@ -203,6 +203,12 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
> *adev)
>   return -ENOMEM;
>   adev->mode_info.plane_degamma_tf_property = prop;
>  
> + prop = drm_property_create_range(adev_to_drm(adev),
> +  0, "AMD_PLANE_HDR_MULT", 0, U64_MAX);
> + if (!prop)
> + return -ENOMEM;
> + adev->mode_info.plane_hdr_mult_property = prop;
> +
>   return 0;
>  }
>  #endif
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> index 0a955abb1abf..ab7f0332c431 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> @@ -1331,6 +1331,7 @@ static void dm_drm_plane_reset(struct drm_plane *plane)
>  
>   __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
>   amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
> + amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
>  }
>  
>  static struct drm_plane_state *
> @@ -1354,6 +1355,7 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane)
>   drm_property_blob_get(dm_plane_state->degamma_lut);
>  
>   dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
> + dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
>  
>   return &dm_plane_state->base;
>  }
> @@ -1450,6 +1452,10 @@ dm_atomic_plane_attach_color_mgmt_properties(struct 
> amdgpu_display_manager *dm,
>  
> dm->adev->mode_info.plane_degamma_tf_property,
>  AMDGPU_TRANSFER_FUNCTION_DEFAULT);
>  

[PATCH 1/4] Revert "drm/amd/display: drop unused count variable in create_eml_sink()"

2023-08-22 Thread Jani Nikula
This reverts commit 8789989b476b5f3bb0bf1a63b5223f6e76cfd13d.

Dependency for reverting the next commit cleanly.

Cc: Alex Deucher 
Cc: Alex Hung 
Cc: Chao-kai Wang 
Cc: Daniel Wheeler 
Cc: Harry Wentland 
Cc: Hersen Wu 
Cc: Leo Li 
Cc: Rodrigo Siqueira 
Cc: Wenchieh Chien 
Cc: David Airlie 
Cc: Daniel Vetter 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 268cb99a4c4b..3e132438bc13 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6373,8 +6373,9 @@ static void create_eml_sink(struct amdgpu_dm_connector 
*aconnector)
/* if connector->edid_override valid, pass
 * it to edid_override to edid_blob_ptr
 */
+   int count;
 
-   drm_edid_override_connector_update(&aconnector->base);
+   count = drm_edid_override_connector_update(&aconnector->base);
 
if (!aconnector->base.edid_blob_ptr) {
DRM_ERROR("No EDID firmware found on connector: %s 
,forcing to OFF!\n",
-- 
2.39.2



[PATCH 2/4] Revert "drm/amd/display: assign edid_blob_ptr with edid from debugfs"

2023-08-22 Thread Jani Nikula
This reverts commit 550e5d23f14784e2a625c25fe0c9d498589c9256.

drm_edid_override_connector_update() is *not* supposed to be used by
drivers directly.

>From the documentation:

  Only to be used from drm_helper_probe_single_connector_modes() as a
  fallback for when DDC probe failed during drm_get_edid() and caused
  the override/firmware EDID to be skipped.

It's impossible to unify firmare and override EDID handling and property
updates if drivers mess with this directly.

Cc: Alex Deucher 
Cc: Alex Hung 
Cc: Chao-kai Wang 
Cc: Daniel Wheeler 
Cc: Harry Wentland 
Cc: Hersen Wu 
Cc: Leo Li 
Cc: Rodrigo Siqueira 
Cc: Wenchieh Chien 
Cc: David Airlie 
Cc: Daniel Vetter 
Signed-off-by: Jani Nikula 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   | 17 -
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 3e132438bc13..c7c1260b7b6e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6370,20 +6370,11 @@ static void create_eml_sink(struct amdgpu_dm_connector 
*aconnector)
struct edid *edid;
 
if (!aconnector->base.edid_blob_ptr) {
-   /* if connector->edid_override valid, pass
-* it to edid_override to edid_blob_ptr
-*/
-   int count;
-
-   count = drm_edid_override_connector_update(&aconnector->base);
-
-   if (!aconnector->base.edid_blob_ptr) {
-   DRM_ERROR("No EDID firmware found on connector: %s 
,forcing to OFF!\n",
-   aconnector->base.name);
+   DRM_ERROR("No EDID firmware found on connector: %s ,forcing to 
OFF!\n",
+   aconnector->base.name);
 
-   aconnector->base.force = DRM_FORCE_OFF;
-   return;
-   }
+   aconnector->base.force = DRM_FORCE_OFF;
+   return;
}
 
edid = (struct edid *) aconnector->base.edid_blob_ptr->data;
-- 
2.39.2



[PATCH 3/4] Revert "drm/amd/display: mark amdgpu_dm_connector_funcs_force static"

2023-08-22 Thread Jani Nikula
This reverts commit dae343b343ff741d727312b2a9b03d86e64b31c5.

Dependency for reverting the next commit cleanly.

Cc: Alex Deucher 
Cc: Alex Hung 
Cc: Chao-kai Wang 
Cc: Daniel Wheeler 
Cc: Harry Wentland 
Cc: Hersen Wu 
Cc: Leo Li 
Cc: Rodrigo Siqueira 
Cc: Wenchieh Chien 
Cc: David Airlie 
Cc: Daniel Vetter 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c7c1260b7b6e..adfe2fcb915c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6317,7 +6317,7 @@ amdgpu_dm_connector_late_register(struct drm_connector 
*connector)
return 0;
 }
 
-static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
+void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
 {
struct amdgpu_dm_connector *aconnector = 
to_amdgpu_dm_connector(connector);
struct dc_link *dc_link = aconnector->dc_link;
-- 
2.39.2



[PATCH 4/4] Revert "drm/amd/display: implement force function in amdgpu_dm_connector_funcs"

2023-08-22 Thread Jani Nikula
This reverts commit 0ba4a784a14592abed41873e339eab78ceb6e230.

drm_edid_override_connector_update() is *not* supposed to be used by
drivers directly.

>From the documentation:

  Only to be used from drm_helper_probe_single_connector_modes() as a
  fallback for when DDC probe failed during drm_get_edid() and caused
  the override/firmware EDID to be skipped.

It's impossible to unify firmare and override EDID handling and property
updates if drivers mess with this directly.

Cc: Alex Deucher 
Cc: Alex Hung 
Cc: Chao-kai Wang 
Cc: Daniel Wheeler 
Cc: Harry Wentland 
Cc: Hersen Wu 
Cc: Leo Li 
Cc: Rodrigo Siqueira 
Cc: Wenchieh Chien 
Cc: David Airlie 
Cc: Daniel Vetter 
Signed-off-by: Jani Nikula 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 28 +--
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index adfe2fcb915c..25151085508f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6317,31 +6317,6 @@ amdgpu_dm_connector_late_register(struct drm_connector 
*connector)
return 0;
 }
 
-void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
-{
-   struct amdgpu_dm_connector *aconnector = 
to_amdgpu_dm_connector(connector);
-   struct dc_link *dc_link = aconnector->dc_link;
-   struct dc_sink *dc_em_sink = aconnector->dc_em_sink;
-   struct edid *edid;
-
-   if (!connector->edid_override)
-   return;
-
-   drm_edid_override_connector_update(&aconnector->base);
-   edid = aconnector->base.edid_blob_ptr->data;
-   aconnector->edid = edid;
-
-   /* Update emulated (virtual) sink's EDID */
-   if (dc_em_sink && dc_link) {
-   memset(&dc_em_sink->edid_caps, 0, sizeof(struct dc_edid_caps));
-   memmove(dc_em_sink->dc_edid.raw_edid, edid, (edid->extensions + 
1) * EDID_LENGTH);
-   dm_helpers_parse_edid_caps(
-   dc_link,
-   &dc_em_sink->dc_edid,
-   &dc_em_sink->edid_caps);
-   }
-}
-
 static const struct drm_connector_funcs amdgpu_dm_connector_funcs = {
.reset = amdgpu_dm_connector_funcs_reset,
.detect = amdgpu_dm_connector_detect,
@@ -6352,8 +6327,7 @@ static const struct drm_connector_funcs 
amdgpu_dm_connector_funcs = {
.atomic_set_property = amdgpu_dm_connector_atomic_set_property,
.atomic_get_property = amdgpu_dm_connector_atomic_get_property,
.late_register = amdgpu_dm_connector_late_register,
-   .early_unregister = amdgpu_dm_connector_unregister,
-   .force = amdgpu_dm_connector_funcs_force
+   .early_unregister = amdgpu_dm_connector_unregister
 };
 
 static int get_modes(struct drm_connector *connector)
-- 
2.39.2



[PATCH 0/4] drm/amd/display: stop using drm_edid_override_connector_update()

2023-08-22 Thread Jani Nikula
Over the past years I've been trying to unify the override and firmware
EDID handling as well as EDID property updates. It won't work if drivers
do their own random things.

BR,
Jani.


Cc: Alex Deucher 
Cc: Alex Hung 
Cc: Chao-kai Wang 
Cc: Daniel Wheeler 
Cc: Harry Wentland 
Cc: Hersen Wu 
Cc: Leo Li 
Cc: Rodrigo Siqueira 
Cc: Wenchieh Chien 
Cc: David Airlie 
Cc: Daniel Vetter 

Jani Nikula (4):
  Revert "drm/amd/display: drop unused count variable in
create_eml_sink()"
  Revert "drm/amd/display: assign edid_blob_ptr with edid from debugfs"
  Revert "drm/amd/display: mark amdgpu_dm_connector_funcs_force static"
  Revert "drm/amd/display: implement force function in
amdgpu_dm_connector_funcs"

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 44 +++
 1 file changed, 5 insertions(+), 39 deletions(-)

-- 
2.39.2



Re: [PATCH 4/4] Revert "drm/amd/display: implement force function in amdgpu_dm_connector_funcs"

2023-08-22 Thread Jani Nikula
On Tue, 22 Aug 2023, Jani Nikula  wrote:
> This reverts commit 0ba4a784a14592abed41873e339eab78ceb6e230.
>
> drm_edid_override_connector_update() is *not* supposed to be used by
> drivers directly.
>
> From the documentation:
>
>   Only to be used from drm_helper_probe_single_connector_modes() as a
>   fallback for when DDC probe failed during drm_get_edid() and caused
>   the override/firmware EDID to be skipped.
>
> It's impossible to unify firmare and override EDID handling and property
> updates if drivers mess with this directly.
>
> Cc: Alex Deucher 
> Cc: Alex Hung 
> Cc: Chao-kai Wang 
> Cc: Daniel Wheeler 
> Cc: Harry Wentland 
> Cc: Hersen Wu 
> Cc: Leo Li 
> Cc: Rodrigo Siqueira 
> Cc: Wenchieh Chien 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> Signed-off-by: Jani Nikula 
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 28 +--
>  1 file changed, 1 insertion(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index adfe2fcb915c..25151085508f 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -6317,31 +6317,6 @@ amdgpu_dm_connector_late_register(struct drm_connector 
> *connector)
>   return 0;
>  }
>  
> -void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
> -{
> - struct amdgpu_dm_connector *aconnector = 
> to_amdgpu_dm_connector(connector);
> - struct dc_link *dc_link = aconnector->dc_link;
> - struct dc_sink *dc_em_sink = aconnector->dc_em_sink;
> - struct edid *edid;
> -
> - if (!connector->edid_override)
> - return;

This one too, also documented:

/**
 * @edid_override: Override EDID set via debugfs.
 *
 * Do not modify or access outside of the drm_edid_override_* family of
 * functions.
 */

> -
> - drm_edid_override_connector_update(&aconnector->base);
> - edid = aconnector->base.edid_blob_ptr->data;
> - aconnector->edid = edid;
> -
> - /* Update emulated (virtual) sink's EDID */
> - if (dc_em_sink && dc_link) {
> - memset(&dc_em_sink->edid_caps, 0, sizeof(struct dc_edid_caps));
> - memmove(dc_em_sink->dc_edid.raw_edid, edid, (edid->extensions + 
> 1) * EDID_LENGTH);
> - dm_helpers_parse_edid_caps(
> - dc_link,
> - &dc_em_sink->dc_edid,
> - &dc_em_sink->edid_caps);
> - }
> -}
> -
>  static const struct drm_connector_funcs amdgpu_dm_connector_funcs = {
>   .reset = amdgpu_dm_connector_funcs_reset,
>   .detect = amdgpu_dm_connector_detect,
> @@ -6352,8 +6327,7 @@ static const struct drm_connector_funcs 
> amdgpu_dm_connector_funcs = {
>   .atomic_set_property = amdgpu_dm_connector_atomic_set_property,
>   .atomic_get_property = amdgpu_dm_connector_atomic_get_property,
>   .late_register = amdgpu_dm_connector_late_register,
> - .early_unregister = amdgpu_dm_connector_unregister,
> - .force = amdgpu_dm_connector_funcs_force
> + .early_unregister = amdgpu_dm_connector_unregister
>  };
>  
>  static int get_modes(struct drm_connector *connector)

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane

2023-08-22 Thread Pekka Paalanen
On Thu, 10 Aug 2023 15:02:59 -0100
Melissa Wen  wrote:

> The next patch adds pre-blending degamma to AMD color mgmt pipeline, but
> pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC
> atomic degamma or implict degamma on legacy gamma. Detach degamma usage
> regarging CRTC color properties to manage plane and CRTC color
> correction combinations.
> 
> Reviewed-by: Harry Wentland 
> Signed-off-by: Melissa Wen 
> ---
>  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 59 +--
>  1 file changed, 41 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> index 68e9f2c62f2e..74eb02655d96 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> @@ -764,20 +764,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct 
> dm_crtc_state *crtc)
>   return 0;
>  }
>  
> -/**
> - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
> - * @crtc: amdgpu_dm crtc state
> - * @dc_plane_state: target DC surface
> - *
> - * Update the underlying dc_stream_state's input transfer function (ITF) in
> - * preparation for hardware commit. The transfer function used depends on
> - * the preparation done on the stream for color management.
> - *
> - * Returns:
> - * 0 on success. -ENOMEM if mem allocation fails.
> - */
> -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
> -   struct dc_plane_state *dc_plane_state)
> +static int
> +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
> +  struct dc_plane_state *dc_plane_state)
>  {
>   const struct drm_color_lut *degamma_lut;
>   enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
> @@ -800,8 +789,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
> dm_crtc_state *crtc,
>°amma_size);
>   ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
>  
> - dc_plane_state->in_transfer_func->type =
> - TF_TYPE_DISTRIBUTED_POINTS;
> + dc_plane_state->in_transfer_func->type = 
> TF_TYPE_DISTRIBUTED_POINTS;
>  
>   /*
>* This case isn't fully correct, but also fairly
> @@ -837,7 +825,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
> dm_crtc_state *crtc,
>  degamma_lut, degamma_size);
>   if (r)
>   return r;
> - } else if (crtc->cm_is_degamma_srgb) {
> + } else {
>   /*
>* For legacy gamma support we need the regamma input
>* in linear space. Assume that the input is sRGB.
> @@ -847,8 +835,43 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
> dm_crtc_state *crtc,
>  
>   if (tf != TRANSFER_FUNCTION_SRGB &&
>   !mod_color_calculate_degamma_params(NULL,
> - dc_plane_state->in_transfer_func, NULL, false))
> + 
> dc_plane_state->in_transfer_func,
> + NULL, false))
>   return -ENOMEM;
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
> + * @crtc: amdgpu_dm crtc state
> + * @dc_plane_state: target DC surface
> + *
> + * Update the underlying dc_stream_state's input transfer function (ITF) in
> + * preparation for hardware commit. The transfer function used depends on
> + * the preparation done on the stream for color management.
> + *
> + * Returns:
> + * 0 on success. -ENOMEM if mem allocation fails.
> + */
> +int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
> +   struct dc_plane_state *dc_plane_state)
> +{
> + bool has_crtc_cm_degamma;
> + int ret;
> +
> + has_crtc_cm_degamma = (crtc->cm_has_degamma || 
> crtc->cm_is_degamma_srgb);
> + if (has_crtc_cm_degamma){
> + /* AMD HW doesn't have post-blending degamma caps. When DRM
> +  * CRTC atomic degamma is set, we maps it to DPP degamma block
> +  * (pre-blending) or, on legacy gamma, we use DPP degamma to
> +  * linearize (implicit degamma) from sRGB/BT709 according to
> +  * the input space.

Uhh, you can't just move degamma before blending if KMS userspace
wants it after blending. That would be incorrect behaviour. If you
can't implement it correctly, reject it.

I hope that magical unexpected linearization is not done with atomic,
either.

Or maybe this is all a lost cause, and only the new color-op pipeline
UAPI will actually work across drivers.


Thanks,
pq

> +  */
> + ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state);
> + if (ret)
> + r

Re: [PATCH v2 3/7] drm/amdgpu: Add new function to put GPU power profile

2023-08-22 Thread Yadav, Arvind

Hi Lijo,

The *_set function will set the GPU power profile and the *_put function 
will  schedule the
smu_delayed_work task after 100ms delay. This smu_delayed_work task will 
clear a GPU
power profile if any new jobs are not scheduled within 100 ms. But if 
any new job  comes within 100ms
then the *_workload_profile_set function  will cancel this work and set 
the GPU power profile based on

preferences.

Please see the below case.

case 1 - only same profile jobs run. It will take 100ms to clear the 
profile once all jobs complete.


                                       wl = VIDEO <100ms>
workload _|`|

Jobs (VIDEO) |```|__|```|___||___


Case2 - two jobs of two different profile. job1 profile will be set but 
when job2 will arrive it will be moved

    to higher profile.

                 wl = VIDEO  ->    wl = COMPUTE     
  <100ms>
workload 
___|``|


Jobs (VIDEO) ___|```|__|```|___||___||___

Jobs (COMPUTE) __|```|___||___||_



Case3 - two jobs of two different profile. job1 profile will be set but 
when job2 will arrive it will not be moved
to lower profile. When compute job2 will complete then only it will move 
to lower profile.


                                     wl = COMPUTE 
->   wl = VIDEO  <100ms>
workload 
_|``|


Jobs (COMPUTE)    |```|__|```|___||___||___

Jobs (VIDEO) ___|```|___||___||___||___

On 8/22/2023 10:21 AM, Lazar, Lijo wrote:



On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a function which will clear the GPU
power profile after job finished.

This is how it works:
- schedular will set the GPU power profile based on ring_type.
- Schedular will clear the GPU Power profile once job finished.
- Here, the *_workload_profile_set function will set the GPU
   power profile and the *_workload_profile_put function will
   schedule the smu_delayed_work task after 100ms delay. This
   smu_delayed_work task will clear a GPU power profile if any
   new jobs are not scheduled within 100 ms. But if any new job
   comes within 100ms then the *_workload_profile_set function
   will cancel this work and set the GPU power profile based on
   preferences.

v2:
- Splitting workload_profile_set and workload_profile_put
   into two separate patches.
- Addressed review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 97 +++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  3 +
  2 files changed, 100 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c

index e661cc5b3d92..6367eb88a44d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -24,6 +24,9 @@
    #include "amdgpu.h"
  +/* 100 millsecond timeout */
+#define SMU_IDLE_TIMEOUT    msecs_to_jiffies(100)
+
  static enum PP_SMC_POWER_PROFILE
  ring_to_power_profile(uint32_t ring_type)
  {
@@ -59,6 +62,80 @@ amdgpu_power_profile_set(struct amdgpu_device *adev,
  return ret;
  }
  +static int
+amdgpu_power_profile_clear(struct amdgpu_device *adev,
+   enum PP_SMC_POWER_PROFILE profile)
+{
+    int ret = amdgpu_dpm_switch_power_profile(adev, profile, false);
+
+    if (!ret) {
+    /* Clear the bit for the submitted workload profile */
+    adev->smu_workload.submit_workload_status &= ~(1 << profile);
+    }
+
+    return ret;
+}
+
+static void
+amdgpu_power_profile_idle_work_handler(struct work_struct *work)
+{
+
+    struct amdgpu_smu_workload *workload = container_of(work,
+  struct amdgpu_smu_workload,
+  smu_delayed_work.work);
+    struct amdgpu_device *adev = workload->adev;
+    bool reschedule = false;
+    int index  = fls(workload->submit_workload_status);
+    int ret;
+
+    mutex_lock(&workload->workload_lock);
+    for (; index > 0; index--) {


Why not use for_each_set_bit?


We are clearing which we have only set it. We will clear first higher 
profile then lower.






+    int val = atomic_read(&workload->power_profile_ref[index]);
+
+    if (val) {
+    reschedule = true;


Why do you need to do reschedule? For each put(), a schedule is 
called. If refcount is not zero, that means some other job has already 
set the profile. It is supposed to call put() and at that time, this 
job will be run to clear it anyway, right?



Yes, I have got the comment for this I am going to remove this.
Noted.


+    } else {
+    if (workload->submit_workload_status &
+    (1 << index)) {
+    ret = amdgpu_

Re: [PATCH v2 4/7] drm/amdgpu: Add suspend function to clear the GPU power profile.

2023-08-22 Thread Yadav, Arvind



On 8/22/2023 12:01 PM, Lazar, Lijo wrote:



On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a suspend function that will clear the GPU
power profile before going into suspend state.

v2:
- Add the new suspend function based on review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 23 +++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  2 ++
  3 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index cd3bf641b630..3b70e657b439 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4212,6 +4212,8 @@ int amdgpu_device_suspend(struct drm_device 
*dev, bool fbcon)

    amdgpu_ras_suspend(adev);
  +    amdgpu_workload_profile_suspend(adev);
+
  amdgpu_device_ip_suspend_phase1(adev);
    if (!adev->in_s0ix)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c

index 6367eb88a44d..44ca8e986984 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -174,6 +174,29 @@ void amdgpu_workload_profile_set(struct 
amdgpu_device *adev,

  mutex_unlock(&workload->workload_lock);
  }
  +void amdgpu_workload_profile_suspend(struct amdgpu_device *adev)
+{
+    struct amdgpu_smu_workload *workload = &adev->smu_workload;
+    int ret;
+
+    mutex_lock(&workload->workload_lock);
+ cancel_delayed_work_sync(&workload->smu_delayed_work);


Another deadlock candidate. Between fini() and suspend(), the only 
difference probably could be initialization status. If so, just use a 
helper that is used during fini() and suspend().


Before going to suspend(), we need to cancel the work and clear all the 
profiles but in fini() we are destroying the mutex. also it will be 
called when we are unloading everything.


~Arvind


Thanks,
Lijo


+
+    /* Clear all the set GPU power profile*/
+    for (int index = fls(workload->submit_workload_status);
+ index > 0; index--) {
+    if (workload->submit_workload_status & (1 << index)) {
+ atomic_set(&workload->power_profile_ref[index], 0);
+    ret = amdgpu_power_profile_clear(adev, index);
+    if (ret)
+    DRM_WARN("Failed to clear power profile %s, err = 
%d\n",

+ amdgpu_workload_mode_name[index], ret);
+    }
+    }
+    workload->submit_workload_status = 0;
+    mutex_unlock(&workload->workload_lock);
+}
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev)
  {
  adev->smu_workload.adev = adev;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h 
b/drivers/gpu/drm/amd/include/amdgpu_workload.h

index ee1f87257f2d..0acd8769ec52 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_workload.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h
@@ -52,6 +52,8 @@ void amdgpu_workload_profile_put(struct 
amdgpu_device *adev,

  void amdgpu_workload_profile_set(struct amdgpu_device *adev,
   uint32_t ring_type);
  +void amdgpu_workload_profile_suspend(struct amdgpu_device *adev);
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev);
    void amdgpu_workload_profile_fini(struct amdgpu_device *adev);


Re: [PATCH v1 0/3] udmabuf: Add support for page migration out of movable zone or CMA

2023-08-22 Thread Jason Gunthorpe
On Tue, Aug 22, 2023 at 05:36:56AM +, Kasireddy, Vivek wrote:
> Hi Jason,
> 
> > > This patch series adds support for migrating pages associated with
> > > a udmabuf out of the movable zone or CMA to avoid breaking features
> > > such as memory hotunplug.
> > >
> > > The first patch exports check_and_migrate_movable_pages() function
> > > out of GUP so that the udmabuf driver can leverage it for page
> > > migration that is done as part of the second patch. The last patch
> > > adds two new udmabuf selftests to verify data coherency after
> > > page migration.
> > 
> > Please don't do this. If you want to do what GUP does then call
> > GUP. udmabuf is not so special that it needs to open code its own
> > weird version of it.
> We can't call GUP directly as explained in the first patch of this series:
> "For drivers that would like to migrate pages out of the movable
> zone (or CMA) in order to pin them (longterm) for DMA, using
> check_and_migrate_movable_pages() directly provides a convenient
> option instead of duplicating similar checks (e.g, checking
> the folios for zone, hugetlb, etc) and calling migrate_pages()
> directly.
> 
> Ideally, a driver is expected to call pin_user_pages(FOLL_LONGTERM)
> to migrate and pin the pages for longterm DMA but there are
> situations where the GUP APIs cannot be used directly for
> various reasons (e.g, when the VMA or start addr cannot be
> easily determined but the relevant pages are available)."
> 
> Given the current (model and) UAPI (udmabuf_create), the userspace
> only shares (memfd, offset, size) values that we use to find the
> relevant pages and pin them (by doing get_page()). Since the goal
> is to also migrate these pages, I think we have the following options:

This seems like a bad choice of uAPI - we don't have any kernel
support for pinning from a memfd. If you want this then you have to
build this as generic code, not open code it into udmabuf.

> - Leverage check_and_migrate_movable_pages(); but this function
>   needs to be exported from GUP.

GUP has many behaviors, we keep adding more, these functions should
not leak out of the mm core code into drivers.
 
> - Iterate over all the pages (in udmabuf) to check for 
> folio_is_longterm_pinnable()
>   and call migrate_pages() eventually. This requires changes only to
>   the udmabuf driver but we'd be duplicating much of the functionality
>   provided by check_and_migrate_movable_pages().

Definately not

> - Call pin_user_pages_fast(FOLL_LONGTERM) from udmabuf driver. In
>   order to do this, we have to first unpin all pages and iterate over all
>   the VMAs of the VMM to identify the Guest RAM VMA and then use
>   page_address_in_vma() to find the start addr of the ranges and then
>   call GUP. Although this approach is feasible, it feels a bit convoluted.

Userspace should have told the kernel where the memfd is mapped.
 
> - Add a new udmabuf UAPI to have userspace share (start addr, len) values
>   so that the udmabuf driver can directly call GUP APIs. But this means all
>   the current users of udmabuf such as Qemu, CrosVM, etc, need to be
>   updated to use the new UAPI. 

There you go
 
> - Add a new API to the backing store/allocator to longterm-pin the page.
>   For example, something along the lines of shmem_pin_mapping_page_longterm()
>   for shmem as suggested by Daniel. A similar one needs to be added for
>   hugetlbfs as well.

This may also be reasonable.

Jason


Re: [RFC PATCH v2 06/11] page-pool: add device memory support

2023-08-22 Thread Jesper Dangaard Brouer




On 22/08/2023 08.05, Mina Almasry wrote:

On Sat, Aug 19, 2023 at 2:51 AM Jesper Dangaard Brouer
 wrote:


On 10/08/2023 03.57, Mina Almasry wrote:

Overload the LSB of struct page* to indicate that it's a page_pool_iov.

Refactor mm calls on struct page * into helpers, and add page_pool_iov
handling on those helpers. Modify callers of these mm APIs with calls to
these helpers instead.



I don't like of this approach.
This is adding code to the PP (page_pool) fast-path in multiple places.

I've not had time to run my usual benchmarks, which are here:

https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/lib/bench_page_pool_simple.c



I ported over this benchmark to my tree and ran it, my results:



What CPU is this and GHz?  (I guess 2.6 GHz based on results).

(It looks like this CPU is more efficient, instructions per cycles, than 
my E5-1650 v4 @ 3.60GHz).



net-next @ b44693495af8
https://pastebin.com/raw/JuU7UQXe

+ Jakub's memory-provider APIs:
https://pastebin.com/raw/StMBhetn

+ devmem TCP changes:
https://pastebin.com/raw/mY1L6U4r



Only a single cycle slowdown for "page_pool01_fast_path".
From 10 cycles to 11 cycles.


+ intentional regression just to make sure the benchmark is working:
https://pastebin.com/raw/wqWhcJdG

I don't seem to be able to detect a regression with this series as-is,
but I'm not that familiar with the test and may be doing something
wrong or misinterpreting the results. Does this look ok to you?



The performance results are better than I expected.  The small
regression from 10 cycles to 11 cycles is actually 10%, but I expect
with some likely/unlikely instrumentation we can "likely" remove this again.

So, this change actually looks acceptable from a performance PoV.
I still think this page_pool_iov is very invasive to page_pool, but
maybe it is better to hide this "uglyness" inside page_pool.

The test primarily tests fast-path, and you also add "if" statements to
all the DMA operations, which is not part of this benchmark.  Perhaps we 
can add unlikely statements, or inspect (objdump) the ASM to check code 
priorities the original page based "provider".



But I'm sure it will affect performance.



Guess, I was wrong ;-)

--Jesper



Regardless of performance, this approach is using ptr-LSB-bits, to hide
that page-pointer are not really struct-pages, feels like force feeding
a solution just to use the page_pool APIs.



In areas where struct page* is dereferenced, add a check for special
handling of page_pool_iov.

The memory providers producing page_pool_iov can set the LSB on the
struct page* returned to the page pool.

Note that instead of overloading the LSB of page pointers, we can
instead define a new union between struct page & struct page_pool_iov and
compact it in a new type. However, we'd need to implement the code churn
to modify the page_pool & drivers to use this new type. For this POC
that is not implemented (feedback welcome).



I've said before, that I prefer multiplexing on page->pp_magic.
For your page_pool_iov the layout would have to match the offset of
pp_magic, to do this. (And if insisting on using PP infra the refcnt
would also need to align).

On the allocation side, all drivers already use a driver helper
page_pool_dev_alloc_pages() or we could add another (better named)
helper to multiplex between other types of allocators, e.g. a devmem
allocator.

On free/return/recycle the functions napi_pp_put_page or skb_pp_recycle
could multiplex on pp_magic and call another API.  The API could be an
extension to PP helpers, but it could also be a devmap allocator helper.

IMHO forcing/piggy-bagging everything into page_pool is not the right
solution.  I really think netstack need to support different allocator
types. The page pool have been leading the way, yes, but perhaps it is
time to add an API layer that e.g. could be named netmem, that gives us
the multiplexing between allocators.  In that process some of page_pool
APIs would be lifted out as common blocks and others remain.

--Jesper


I have a sample implementation of adding a new page_pool_token type
in the page_pool to give a general idea here:
https://github.com/torvalds/linux/commit/3a7628700eb7fd02a117db036003bca50779608d

Full branch here:
https://github.com/torvalds/linux/compare/master...mina:linux:tcpdevmem-pp-tokens

(In the branches above, page_pool_iov is called devmem_slice).

Could also add static_branch to speed up the checks in page_pool_iov
memory providers are being used.

Signed-off-by: Mina Almasry 
---
   include/net/page_pool.h | 74 ++-
   net/core/page_pool.c| 85 -
   2 files changed, 131 insertions(+), 28 deletions(-)

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 537eb36115ed..f08ca230d68e 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -282,6 +282,64 @@ static inline struct page_pool_iov 
*page_to_page_pool_iov(struct page *

Re: [PATCH v2 31/34] drm/amd/display: set stream gamut remap matrix to MPC for DCN301

2023-08-22 Thread Pekka Paalanen
On Thu, 10 Aug 2023 15:03:11 -0100
Melissa Wen  wrote:

> dc->caps.color.mpc.gamut_remap says there is a post-blending color block
> for gamut remap matrix for DCN3 HW family and newer versions. However,
> those drivers still follow DCN10 programming that remap stream
> gamut_remap_matrix to DPP (pre-blending).

That's ok only as long as CRTC degamma is pass-through. Blending itself
is a linear operation, so it doesn't matter if a matrix is applied to
the blending result or to all blending inputs. But you cannot move a
matrix operation to the other side of a non-linear operation, and you
cannot move a non-linear operation across blending.


Thanks,
pq

> To enable pre-blending and post-blending gamut_remap matrix supports at
> the same time, set stream gamut_remap to MPC and plane gamut_remap to
> DPP for DCN301 that support both.
> 
> It was tested using IGT KMS color tests for DRM CRTC CTM property and it
> preserves test results.
> 
> Signed-off-by: Melissa Wen 
> ---
>  .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 37 +++
>  .../drm/amd/display/dc/dcn30/dcn30_hwseq.h|  3 ++
>  .../drm/amd/display/dc/dcn301/dcn301_init.c   |  2 +-
>  3 files changed, 41 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c 
> b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> index 4cd4ae07d73d..4fb4e9ec03f1 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> @@ -186,6 +186,43 @@ bool dcn30_set_input_transfer_func(struct dc *dc,
>   return result;
>  }
>  
> +void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx)
> +{
> + int i = 0;
> + struct dpp_grph_csc_adjustment dpp_adjust;
> + struct mpc_grph_gamut_adjustment mpc_adjust;
> + int mpcc_id = pipe_ctx->plane_res.hubp->inst;
> + struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
> +
> + memset(&dpp_adjust, 0, sizeof(dpp_adjust));
> + dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
> +
> + if (pipe_ctx->plane_state &&
> + pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) {
> + dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
> + for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
> + dpp_adjust.temperature_matrix[i] =
> + 
> pipe_ctx->plane_state->gamut_remap_matrix.matrix[i];
> + }
> +
> + 
> pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp,
> + &dpp_adjust);
> +
> + memset(&mpc_adjust, 0, sizeof(mpc_adjust));
> + mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
> +
> + if (pipe_ctx->top_pipe == NULL) {
> + if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
> + mpc_adjust.gamut_adjust_type = 
> GRAPHICS_GAMUT_ADJUST_TYPE_SW;
> + for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
> + mpc_adjust.temperature_matrix[i] =
> + 
> pipe_ctx->stream->gamut_remap_matrix.matrix[i];
> + }
> + }
> +
> + mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust);
> +}
> +
>  bool dcn30_set_output_transfer_func(struct dc *dc,
>   struct pipe_ctx *pipe_ctx,
>   const struct dc_stream_state *stream)
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h 
> b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
> index a24a8e33a3d2..cb34ca932a5f 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
> @@ -58,6 +58,9 @@ bool dcn30_set_blend_lut(struct pipe_ctx *pipe_ctx,
>  bool dcn30_set_input_transfer_func(struct dc *dc,
>   struct pipe_ctx *pipe_ctx,
>   const struct dc_plane_state *plane_state);
> +
> +void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx);
> +
>  bool dcn30_set_output_transfer_func(struct dc *dc,
>   struct pipe_ctx *pipe_ctx,
>   const struct dc_stream_state *stream);
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c 
> b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
> index 257df8660b4c..81fd50ee97c3 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
> @@ -33,7 +33,7 @@
>  #include "dcn301_init.h"
>  
>  static const struct hw_sequencer_funcs dcn301_funcs = {
> - .program_gamut_remap = dcn10_program_gamut_remap,
> + .program_gamut_remap = dcn30_program_gamut_remap,
>   .init_hw = dcn10_init_hw,
>   .power_down_on_boot = dcn10_power_down_on_boot,
>   .apply_ctx_to_hw = dce110_apply_ctx_to_hw,



Re: [PATCH -next 2/2] drm/tegra: Use PTR_ERR_OR_ZERO() to simplify code

2023-08-22 Thread Mikko Perttunen

On 8/22/23 10:15, Jinjie Ruan wrote:

Return PTR_ERR_OR_ZERO() instead of return 0 or PTR_ERR() to
simplify code.

Signed-off-by: Jinjie Ruan 
---
  drivers/gpu/drm/tegra/drm.c | 5 +
  drivers/gpu/drm/tegra/gem.c | 5 +
  2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index ff36171c8fb7..4e29d76da1be 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -354,10 +354,7 @@ static int tegra_gem_create(struct drm_device *drm, void 
*data,
  
  	bo = tegra_bo_create_with_handle(file, drm, args->size, args->flags,

 &args->handle);
-   if (IS_ERR(bo))
-   return PTR_ERR(bo);
-
-   return 0;
+   return PTR_ERR_OR_ZERO(bo);
  }
  
  static int tegra_gem_mmap(struct drm_device *drm, void *data,

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index a4023163493d..11ef0f8cb1e1 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -533,10 +533,7 @@ int tegra_bo_dumb_create(struct drm_file *file, struct 
drm_device *drm,
  
  	bo = tegra_bo_create_with_handle(file, drm, args->size, 0,

 &args->handle);
-   if (IS_ERR(bo))
-   return PTR_ERR(bo);
-
-   return 0;
+   return PTR_ERR_OR_ZERO(bo);
  }
  
  static vm_fault_t tegra_bo_fault(struct vm_fault *vmf)


NAK. See 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20230822&id=b784c77075023e1a71bc06e6b4f711acb99e9c73


Mikko


Re: [PATCH v4 3/3] usb: typec: nb7vpq904m: switch to DRM_AUX_BRIDGE

2023-08-22 Thread Greg Kroah-Hartman
On Thu, Aug 17, 2023 at 05:55:16PM +0300, Dmitry Baryshkov wrote:
> Switch to using the new DRM_AUX_BRIDGE helper to create the
> transparent DRM bridge device instead of handcoding corresponding
> functionality.
> 
> Reviewed-by: Heikki Krogerus 
> Signed-off-by: Dmitry Baryshkov 
> ---
>  drivers/usb/typec/mux/Kconfig  |  2 +-
>  drivers/usb/typec/mux/nb7vpq904m.c | 44 ++
>  2 files changed, 3 insertions(+), 43 deletions(-)

Just take this through the drm tree:

Acked-by: Greg Kroah-Hartman 


Re: [PATCH v2 2/7] drm/amdgpu: Add new function to set GPU power profile

2023-08-22 Thread Yadav, Arvind



On 8/22/2023 11:55 AM, Lazar, Lijo wrote:



On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a function which will change the GPU
power profile based on a submitted job. This can optimize
the power performance when the workload is on.

v2:
- Splitting workload_profile_set and workload_profile_put
   into two separate patches.
- Addressed review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 56 +++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  3 +
  2 files changed, 59 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c

index 32166f482f77..e661cc5b3d92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -24,6 +24,62 @@
    #include "amdgpu.h"
  +static enum PP_SMC_POWER_PROFILE
+ring_to_power_profile(uint32_t ring_type)
+{
+    switch (ring_type) {
+    case AMDGPU_RING_TYPE_GFX:
+    return PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+    case AMDGPU_RING_TYPE_COMPUTE:
+    return PP_SMC_POWER_PROFILE_COMPUTE;
+    case AMDGPU_RING_TYPE_UVD:
+    case AMDGPU_RING_TYPE_VCE:
+    case AMDGPU_RING_TYPE_UVD_ENC:
+    case AMDGPU_RING_TYPE_VCN_DEC:
+    case AMDGPU_RING_TYPE_VCN_ENC:
+    case AMDGPU_RING_TYPE_VCN_JPEG:
+    return PP_SMC_POWER_PROFILE_VIDEO;
+    default:
+    return PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+    }
+}
+
+static int
+amdgpu_power_profile_set(struct amdgpu_device *adev,
+ enum PP_SMC_POWER_PROFILE profile)
+{
+    int ret = amdgpu_dpm_switch_power_profile(adev, profile, true);
+


You don't need to interact with FW for every set() call. Only send the 
message if workload_status doesn't have the profile set or refcount is 
zero. Otherwise, only need to increment the refcount.

Noted.
Thank You,
~Arvind


Thanks,
Lijo


+    if (!ret) {
+    /* Set the bit for the submitted workload profile */
+    adev->smu_workload.submit_workload_status |= (1 << profile);
+ atomic_inc(&adev->smu_workload.power_profile_ref[profile]);
+    }
+
+    return ret;
+}
+
+void amdgpu_workload_profile_set(struct amdgpu_device *adev,
+ uint32_t ring_type)
+{
+    struct amdgpu_smu_workload *workload = &adev->smu_workload;
+    enum PP_SMC_POWER_PROFILE profile = 
ring_to_power_profile(ring_type);

+    int ret;
+
+    if (profile == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT)
+    return;
+
+    mutex_lock(&workload->workload_lock);
+
+    ret = amdgpu_power_profile_set(adev, profile);
+    if (ret) {
+    DRM_WARN("Failed to set workload profile to %s, error = %d\n",
+ amdgpu_workload_mode_name[profile], ret);
+    }
+
+    mutex_unlock(&workload->workload_lock);
+}
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev)
  {
  adev->smu_workload.adev = adev;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h 
b/drivers/gpu/drm/amd/include/amdgpu_workload.h

index 5d0f068422d4..5022f28fc2f9 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_workload.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h
@@ -46,6 +46,9 @@ static const char * const 
amdgpu_workload_mode_name[] = {

  "Window3D"
  };
  +void amdgpu_workload_profile_set(struct amdgpu_device *adev,
+ uint32_t ring_type);
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev);
    void amdgpu_workload_profile_fini(struct amdgpu_device *adev);


Re: [PATCH -next 2/2] drm/tegra: Use PTR_ERR_OR_ZERO() to simplify code

2023-08-22 Thread Ruan Jinjie



On 2023/8/22 20:32, Mikko Perttunen wrote:
> On 8/22/23 10:15, Jinjie Ruan wrote:
>> Return PTR_ERR_OR_ZERO() instead of return 0 or PTR_ERR() to
>> simplify code.
>>
>> Signed-off-by: Jinjie Ruan 
>> ---
>>   drivers/gpu/drm/tegra/drm.c | 5 +
>>   drivers/gpu/drm/tegra/gem.c | 5 +
>>   2 files changed, 2 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
>> index ff36171c8fb7..4e29d76da1be 100644
>> --- a/drivers/gpu/drm/tegra/drm.c
>> +++ b/drivers/gpu/drm/tegra/drm.c
>> @@ -354,10 +354,7 @@ static int tegra_gem_create(struct drm_device
>> *drm, void *data,
>>     bo = tegra_bo_create_with_handle(file, drm, args->size,
>> args->flags,
>>    &args->handle);
>> -    if (IS_ERR(bo))
>> -    return PTR_ERR(bo);
>> -
>> -    return 0;
>> +    return PTR_ERR_OR_ZERO(bo);
>>   }
>>     static int tegra_gem_mmap(struct drm_device *drm, void *data,
>> diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
>> index a4023163493d..11ef0f8cb1e1 100644
>> --- a/drivers/gpu/drm/tegra/gem.c
>> +++ b/drivers/gpu/drm/tegra/gem.c
>> @@ -533,10 +533,7 @@ int tegra_bo_dumb_create(struct drm_file *file,
>> struct drm_device *drm,
>>     bo = tegra_bo_create_with_handle(file, drm, args->size, 0,
>>    &args->handle);
>> -    if (IS_ERR(bo))
>> -    return PTR_ERR(bo);
>> -
>> -    return 0;
>> +    return PTR_ERR_OR_ZERO(bo);
>>   }
>>     static vm_fault_t tegra_bo_fault(struct vm_fault *vmf)
> 
> NAK. See
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20230822&id=b784c77075023e1a71bc06e6b4f711acb99e9c73

Thank you! It is right.

> 
> Mikko


Re: [PATCH v2 3/7] drm/amdgpu: Add new function to put GPU power profile

2023-08-22 Thread Lazar, Lijo




On 8/22/2023 5:41 PM, Yadav, Arvind wrote:

Hi Lijo,

The *_set function will set the GPU power profile and the *_put function 
will  schedule the
smu_delayed_work task after 100ms delay. This smu_delayed_work task will 
clear a GPU
power profile if any new jobs are not scheduled within 100 ms. But if 
any new job  comes within 100ms
then the *_workload_profile_set function  will cancel this work and set 
the GPU power profile based on

preferences.

Please see the below case.

case 1 - only same profile jobs run. It will take 100ms to clear the 
profile once all jobs complete.


                                        wl = VIDEO <100ms>
workload _|`|

Jobs (VIDEO) |```|__|```|___||___


Case2 - two jobs of two different profile. job1 profile will be set but 
when job2 will arrive it will be moved

     to higher profile.

                  wl = VIDEO  ->    wl = COMPUTE   <100ms>
workload 
___|``|


Jobs (VIDEO) ___|```|__|```|___||___||___

Jobs (COMPUTE) __|```|___||___||_



Case3 - two jobs of two different profile. job1 profile will be set but 
when job2 will arrive it will not be moved
to lower profile. When compute job2 will complete then only it will move 
to lower profile.


                                      wl = COMPUTE 
->   wl = VIDEO  <100ms>
workload 
_|``| 



Jobs (COMPUTE)    |```|__|```|___||___||___

Jobs (VIDEO) ___|```|___||___||___||___



swsmu layer maintains a workload mask based on priority. So once you 
have set the mask, until you unset it (i.e when refcount = 0), the mask 
will be set in the lower layer. swsmu layer will take care of requesting 
FW the highest priority. I don't think that needs to be repeated at this 
level.


At this layer, all you need is to refcount the requests and make the 
request.


When refcount of a profile becomes non-zero (only one-time), place one 
request for that profile. As swsmu layer maintains the workload mask, it 
will take the new profile also into consideration while requesting for 
the one  with the highest priority.


When refcount of a profile becomes zero, place a request to clear it. 
This is controlled by your idle work. As I see, it keeps an additional 
100ms tolerance before placing a clear request. In that way, there is no 
need to cancel that work.


Inside idle work handler -
Loop through the profiles that are set and clear those profiles whose 
refcount is zero.


Thus if a job starts during the 100ms delay, idle work won't see the ref 
count as zero and then it won't place a request to clear out that profile.



On 8/22/2023 10:21 AM, Lazar, Lijo wrote:



On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a function which will clear the GPU
power profile after job finished.

This is how it works:
- schedular will set the GPU power profile based on ring_type.
- Schedular will clear the GPU Power profile once job finished.
- Here, the *_workload_profile_set function will set the GPU
   power profile and the *_workload_profile_put function will
   schedule the smu_delayed_work task after 100ms delay. This
   smu_delayed_work task will clear a GPU power profile if any
   new jobs are not scheduled within 100 ms. But if any new job
   comes within 100ms then the *_workload_profile_set function
   will cancel this work and set the GPU power profile based on
   preferences.

v2:
- Splitting workload_profile_set and workload_profile_put
   into two separate patches.
- Addressed review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 97 +++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  3 +
  2 files changed, 100 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c

index e661cc5b3d92..6367eb88a44d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -24,6 +24,9 @@
    #include "amdgpu.h"
  +/* 100 millsecond timeout */
+#define SMU_IDLE_TIMEOUT    msecs_to_jiffies(100)
+
  static enum PP_SMC_POWER_PROFILE
  ring_to_power_profile(uint32_t ring_type)
  {
@@ -59,6 +62,80 @@ amdgpu_power_profile_set(struct amdgpu_device *adev,
  return ret;
  }
  +static int
+amdgpu_power_profile_clear(struct amdgpu_device *adev,
+   enum PP_SMC_POWER_PROFILE profile)
+{
+    int ret = amdgpu_dpm_switch_power_profile(adev, profile, false);
+
+    if (!ret) {
+    /* Clear the bit for the submitted workload profile */
+    adev->smu_workload.submit_workload_status &= ~(1 << profile);
+    }
+
+    return ret;
+}
+
+static 

Re: TODO list task: Replace drm_detect_hdmi_monitor() with drm_display_info.is_hdmi

2023-08-22 Thread Jani Nikula
On Tue, 22 Aug 2023, Sharq Mohammad  wrote:
> Hello All,
>
> I am a usual kernel developer, and wanted to contribute to the open source.
> I saw a small TODO list in the DRM graphics subsystem, with some tasks.
> So, just wanted to ask, is anyone working on the task:
> *Replace drm_detect_hdmi_monitor() with drm_display_info.is_hdmi*
>
> Its on the TODO list.

Yeah, I've got branch

https://gitlab.freedesktop.org/jani/linux/-/commits/drm-edid-is-hdmi-has-audio

BR,
Jani.


>
> Thanks and regards,
> Sharique

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [PATCH v2 4/7] drm/amdgpu: Add suspend function to clear the GPU power profile.

2023-08-22 Thread Lazar, Lijo




On 8/22/2023 5:52 PM, Yadav, Arvind wrote:


On 8/22/2023 12:01 PM, Lazar, Lijo wrote:



On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a suspend function that will clear the GPU
power profile before going into suspend state.

v2:
- Add the new suspend function based on review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 23 +++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  2 ++
  3 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index cd3bf641b630..3b70e657b439 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4212,6 +4212,8 @@ int amdgpu_device_suspend(struct drm_device 
*dev, bool fbcon)

    amdgpu_ras_suspend(adev);
  +    amdgpu_workload_profile_suspend(adev);
+
  amdgpu_device_ip_suspend_phase1(adev);
    if (!adev->in_s0ix)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c

index 6367eb88a44d..44ca8e986984 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -174,6 +174,29 @@ void amdgpu_workload_profile_set(struct 
amdgpu_device *adev,

  mutex_unlock(&workload->workload_lock);
  }
  +void amdgpu_workload_profile_suspend(struct amdgpu_device *adev)
+{
+    struct amdgpu_smu_workload *workload = &adev->smu_workload;
+    int ret;
+
+    mutex_lock(&workload->workload_lock);
+ cancel_delayed_work_sync(&workload->smu_delayed_work);


Another deadlock candidate. Between fini() and suspend(), the only 
difference probably could be initialization status. If so, just use a 
helper that is used during fini() and suspend().


Before going to suspend(), we need to cancel the work and clear all the 
profiles but in fini() we are destroying the mutex. also it will be 
called when we are unloading everything.




What I meant is for both suspend/fini, you need to cancel any work 
scheduled, clear refcounts and set the profile back to default profile. 
Keep this in a helper and reuse.


Thanks,
Lijo


~Arvind


Thanks,
Lijo


+
+    /* Clear all the set GPU power profile*/
+    for (int index = fls(workload->submit_workload_status);
+ index > 0; index--) {
+    if (workload->submit_workload_status & (1 << index)) {
+ atomic_set(&workload->power_profile_ref[index], 0);
+    ret = amdgpu_power_profile_clear(adev, index);
+    if (ret)
+    DRM_WARN("Failed to clear power profile %s, err = 
%d\n",

+ amdgpu_workload_mode_name[index], ret);
+    }
+    }
+    workload->submit_workload_status = 0;
+    mutex_unlock(&workload->workload_lock);
+}
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev)
  {
  adev->smu_workload.adev = adev;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h 
b/drivers/gpu/drm/amd/include/amdgpu_workload.h

index ee1f87257f2d..0acd8769ec52 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_workload.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h
@@ -52,6 +52,8 @@ void amdgpu_workload_profile_put(struct 
amdgpu_device *adev,

  void amdgpu_workload_profile_set(struct amdgpu_device *adev,
   uint32_t ring_type);
  +void amdgpu_workload_profile_suspend(struct amdgpu_device *adev);
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev);
    void amdgpu_workload_profile_fini(struct amdgpu_device *adev);


Re: [PATCH v2 4/7] drm/amdgpu: Add suspend function to clear the GPU power profile.

2023-08-22 Thread Yadav, Arvind



On 8/22/2023 6:24 PM, Lazar, Lijo wrote:



On 8/22/2023 5:52 PM, Yadav, Arvind wrote:


On 8/22/2023 12:01 PM, Lazar, Lijo wrote:



On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a suspend function that will clear the GPU
power profile before going into suspend state.

v2:
- Add the new suspend function based on review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 23 
+++

  drivers/gpu/drm/amd/include/amdgpu_workload.h |  2 ++
  3 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index cd3bf641b630..3b70e657b439 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4212,6 +4212,8 @@ int amdgpu_device_suspend(struct drm_device 
*dev, bool fbcon)

    amdgpu_ras_suspend(adev);
  +    amdgpu_workload_profile_suspend(adev);
+
  amdgpu_device_ip_suspend_phase1(adev);
    if (!adev->in_s0ix)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c

index 6367eb88a44d..44ca8e986984 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -174,6 +174,29 @@ void amdgpu_workload_profile_set(struct 
amdgpu_device *adev,

  mutex_unlock(&workload->workload_lock);
  }
  +void amdgpu_workload_profile_suspend(struct amdgpu_device *adev)
+{
+    struct amdgpu_smu_workload *workload = &adev->smu_workload;
+    int ret;
+
+    mutex_lock(&workload->workload_lock);
+ cancel_delayed_work_sync(&workload->smu_delayed_work);


Another deadlock candidate. Between fini() and suspend(), the only 
difference probably could be initialization status. If so, just use 
a helper that is used during fini() and suspend().


Before going to suspend(), we need to cancel the work and clear all 
the profiles but in fini() we are destroying the mutex. also it will 
be called when we are unloading everything.




What I meant is for both suspend/fini, you need to cancel any work 
scheduled, clear refcounts and set the profile back to default 
profile. Keep this in a helper and reuse.



Noted.

Thank you,
~Arvind


Thanks,
Lijo


~Arvind


Thanks,
Lijo


+
+    /* Clear all the set GPU power profile*/
+    for (int index = fls(workload->submit_workload_status);
+ index > 0; index--) {
+    if (workload->submit_workload_status & (1 << index)) {
+ atomic_set(&workload->power_profile_ref[index], 0);
+    ret = amdgpu_power_profile_clear(adev, index);
+    if (ret)
+    DRM_WARN("Failed to clear power profile %s, err = 
%d\n",

+ amdgpu_workload_mode_name[index], ret);
+    }
+    }
+    workload->submit_workload_status = 0;
+    mutex_unlock(&workload->workload_lock);
+}
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev)
  {
  adev->smu_workload.adev = adev;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h 
b/drivers/gpu/drm/amd/include/amdgpu_workload.h

index ee1f87257f2d..0acd8769ec52 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_workload.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h
@@ -52,6 +52,8 @@ void amdgpu_workload_profile_put(struct 
amdgpu_device *adev,

  void amdgpu_workload_profile_set(struct amdgpu_device *adev,
   uint32_t ring_type);
  +void amdgpu_workload_profile_suspend(struct amdgpu_device *adev);
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev);
    void amdgpu_workload_profile_fini(struct amdgpu_device *adev);


Re: [PATCH v14 RESEND 5/6] drm/imx: Introduce i.MX8qm/qxp DPU DRM

2023-08-22 Thread Maxime Ripard
Hi,

Aside from the discussion on the binding and the general architecture, I
have some comments there.

On Tue, Aug 22, 2023 at 04:59:48PM +0800, Liu Ying wrote:
> +int dpu_cf_init(struct dpu_soc *dpu, unsigned int index,
> + unsigned int id, enum dpu_unit_type type,
> + unsigned long pec_base, unsigned long base)
> +{
> + struct dpu_constframe *cf;
> +
> + cf = devm_kzalloc(dpu->dev, sizeof(*cf), GFP_KERNEL);
> + if (!cf)
> + return -ENOMEM;
> +
> + dpu->cf_priv[index] = cf;

You can't store structures related to KMS in a device managed structure.
The DRM KMS device will stick around (and be accessible from userspace)
after the device has been removed until the last application closed its
file descriptor to the device.

This can be checked by enabling KASAN and manually unbinding the driver
through sysfs.

> + cf->pec_base = devm_ioremap(dpu->dev, pec_base, SZ_16);
> + if (!cf->pec_base)
> + return -ENOMEM;
> +
> + cf->base = devm_ioremap(dpu->dev, base, SZ_32);
> + if (!cf->base)
> + return -ENOMEM;

For the same reason, you need to protect any access to a device managed
resource (so clocks, registers, regulators, etc.) by a call to
drm_dev_enter/drm_dev_exit and you need to call drm_dev_unplug instead
of drm_dev_unregister.

> +static int dpu_crtc_pm_runtime_get_sync(struct dpu_crtc *dpu_crtc)
> +{
> + int ret;
> +
> + ret = pm_runtime_get_sync(dpu_crtc->dev->parent);
> + if (ret < 0) {
> + pm_runtime_put_noidle(dpu_crtc->dev->parent);
> + dpu_crtc_err(&dpu_crtc->base,
> +  "failed to get parent device RPM sync: %d\n", ret);
> + }
> +
> + return ret;
> +}

That's pm_runtime_resume_and_get.

> +static int dpu_crtc_pm_runtime_put(struct dpu_crtc *dpu_crtc)
> +{
> + int ret;
> +
> + ret = pm_runtime_put(dpu_crtc->dev->parent);
> + if (ret < 0)
> + dpu_crtc_err(&dpu_crtc->base,
> +  "failed to put parent device RPM: %d\n", ret);
> +
> + return ret;
> +}
> +
> +static void dpu_crtc_mode_set_nofb(struct drm_crtc *crtc)
> +{
> + struct dpu_crtc *dpu_crtc = to_dpu_crtc(crtc);
> + struct drm_display_mode *adj = &crtc->state->adjusted_mode;
> + enum dpu_link_id cf_link;
> +
> + dpu_crtc_dbg(crtc, "mode " DRM_MODE_FMT "\n", DRM_MODE_ARG(adj));
> +
> + /* request power-on when we start to set mode for CRTC */
> + dpu_crtc_pm_runtime_get_sync(dpu_crtc);

From the drm_crtc_helper_funcs documentation:

"""
 * Note that the display pipe is completely off when this function is
 * called. Atomic drivers which need hardware to be running before they
 * program the new display mode (e.g. because they implement runtime PM)
 * should not use this hook. This is because the helper library calls
 * this hook only once per mode change and not every time the display
 * pipeline is suspended using either DPMS or the new "ACTIVE" property.
 * Which means register values set in this callback might get reset when
 * the CRTC is suspended, but not restored.  Such drivers should instead
 * move all their CRTC setup into the @atomic_enable callback.
"""

> +static void dpu_crtc_atomic_enable(struct drm_crtc *crtc,
> +struct drm_atomic_state *state)
> +{
> + struct dpu_crtc *dpu_crtc = to_dpu_crtc(crtc);
> + unsigned long flags;
> +
> + drm_crtc_vblank_on(crtc);
> +
> + enable_irq(dpu_crtc->dec_shdld_irq);
> + enable_irq(dpu_crtc->ed_cont_shdld_irq);
> + enable_irq(dpu_crtc->ed_safe_shdld_irq);
> +
> + dpu_fg_enable_clock(dpu_crtc->fg);
> + dpu_ed_pec_sync_trigger(dpu_crtc->ed_cont);
> + dpu_ed_pec_sync_trigger(dpu_crtc->ed_safe);
> + if (crtc->state->gamma_lut)
> + dpu_crtc_set_gammacor(dpu_crtc);
> + else
> + dpu_crtc_disable_gammacor(dpu_crtc);
> + dpu_fg_shdtokgen(dpu_crtc->fg);
> +
> + /* don't relinquish CPU until TCON is set to operation mode */
> + local_irq_save(flags);
> + preempt_disable();
> + dpu_fg_enable(dpu_crtc->fg);

That's super fishy. You shouldn't need that, at all. What is going on
there?

> +
> + /*
> +  * TKT320590:

Those are NXP internal references as far as as I can tell. They
shouldn't be here.

> +  * Turn TCON into operation mode as soon as the first dumb
> +  * frame is generated by DPU(we don't relinquish CPU to ensure
> +  * this).  This makes DPR/PRG be able to evade the frame.
> +  */
> + DPU_CRTC_WAIT_FOR_FRAMEGEN_FRAME_CNT_MOVING(dpu_crtc->fg);
> + dpu_tcon_set_operation_mode(dpu_crtc->tcon);
> + local_irq_restore(flags);
> + preempt_enable();
> +
> + DPU_CRTC_WAIT_FOR_COMPLETION_TIMEOUT(ed_safe_shdld_done);
> + DPU_CRTC_WAIT_FOR_COMPLETION_TIMEOUT(ed_cont_shdld_done);
> + DPU_CRTC_WAIT_FOR_COMPLETION_TIMEOUT(dec_shdld_done);
> +

Re: [Linaro-mm-sig] [PATCH v2] dma-buf/sw_sync: Avoid recursive lock during fence signal

2023-08-22 Thread Christian König

Am 18.08.23 um 16:59 schrieb Rob Clark:

From: Rob Clark 

If a signal callback releases the sw_sync fence, that will trigger a
deadlock as the timeline_fence_release recurses onto the fence->lock
(used both for signaling and the the timeline tree).

To avoid that, temporarily hold an extra reference to the signalled
fences until after we drop the lock.

(This is an alternative implementation of 
https://patchwork.kernel.org/patch/11664717/
which avoids some potential UAF issues with the original patch.)

v2: Remove now obsolete comment, use list_move_tail() and
 list_del_init()

Reported-by: Bas Nieuwenhuizen 
Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
Signed-off-by: Rob Clark 


Reviewed-by: Christian König 


---
  drivers/dma-buf/sw_sync.c | 18 +-
  1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 63f0aeb66db6..f0a35277fd84 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
   */
  static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
  {
+   LIST_HEAD(signalled);
struct sync_pt *pt, *next;
  
  	trace_sync_timeline(obj);

@@ -203,21 +204,20 @@ static void sync_timeline_signal(struct sync_timeline 
*obj, unsigned int inc)
if (!timeline_fence_signaled(&pt->base))
break;
  
-		list_del_init(&pt->link);

+   dma_fence_get(&pt->base);
+
+   list_move_tail(&pt->link, &signalled);
rb_erase(&pt->node, &obj->pt_tree);
  
-		/*

-* A signal callback may release the last reference to this
-* fence, causing it to be freed. That operation has to be
-* last to avoid a use after free inside this loop, and must
-* be after we remove the fence from the timeline in order to
-* prevent deadlocking on timeline->lock inside
-* timeline_fence_release().
-*/
dma_fence_signal_locked(&pt->base);
}
  
  	spin_unlock_irq(&obj->lock);

+
+   list_for_each_entry_safe(pt, next, &signalled, link) {
+   list_del_init(&pt->link);
+   dma_fence_put(&pt->base);
+   }
  }
  
  /**




[PATCH] drm/mediatek: Add spinlock for setting vblank event in atomic_begin

2023-08-22 Thread Jason-JH . Lin
Add spinlock protection to avoid race condition on vblank event
between mtk_drm_crtc_atomic_begin() and mtk_drm_finish_page_flip().

Fixes: 119f5173628a ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.")
Signed-off-by: Jason-JH.Lin 
---
 drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c 
b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index d40142842f85..128a672fe3c9 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -746,6 +746,9 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc,
  crtc);
struct mtk_crtc_state *mtk_crtc_state = to_mtk_crtc_state(crtc_state);
struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
+   unsigned long flags;
+
+   spin_lock_irqsave(&crtc->dev->event_lock, flags);
 
if (mtk_crtc->event && mtk_crtc_state->base.event)
DRM_ERROR("new event while there is still a pending event\n");
@@ -756,6 +759,8 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc,
mtk_crtc->event = mtk_crtc_state->base.event;
mtk_crtc_state->base.event = NULL;
}
+
+   spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 }
 
 static void mtk_drm_crtc_atomic_flush(struct drm_crtc *crtc,
-- 
2.18.0



Re: [Intel-gfx] [PATCH] drm/display/dp: Fix the DP DSC Receiver cap size

2023-08-22 Thread Jani Nikula
On Fri, 18 Aug 2023, Ankit Nautiyal  wrote:
> DP DSC Receiver Capabilities are exposed via DPCD 60h-6Fh.
> Fix the DSC RECEIVER CAP SIZE accordingly.
>
> Fixes: ffddc4363c28 ("drm/dp: Add DP DSC DPCD receiver capability size define 
> and missing SHIFT")
> Cc: Anusha Srivatsa 
> Cc: Manasi Navare 
> Cc:  # v5.0+
>
> Signed-off-by: Ankit Nautiyal 
> Reviewed-by: Stanislav Lisovskiy 

Thanks for the patch and review, pushed to drm-misc-fixes.

BR,
Jani.

> ---
>  include/drm/display/drm_dp.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
> index 02f2ac4dd2df..e69cece404b3 100644
> --- a/include/drm/display/drm_dp.h
> +++ b/include/drm/display/drm_dp.h
> @@ -1537,7 +1537,7 @@ enum drm_dp_phy {
>  
>  #define DP_BRANCH_OUI_HEADER_SIZE0xc
>  #define DP_RECEIVER_CAP_SIZE 0xf
> -#define DP_DSC_RECEIVER_CAP_SIZE0xf
> +#define DP_DSC_RECEIVER_CAP_SIZE0x10 /* DSC Capabilities 0x60 
> through 0x6F */
>  #define EDP_PSR_RECEIVER_CAP_SIZE2
>  #define EDP_DISPLAY_CTL_CAP_SIZE 3
>  #define DP_LTTPR_COMMON_CAP_SIZE 8

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [PATCH] drm/dp_mst: Fix NULL deref in get_mst_branch_device_by_guid_helper()

2023-08-22 Thread Radosław Biernacki
śr., 16 sie 2023 o 11:08 Lukasz Majczak  napisał(a):
>
> czw., 3 sie 2023 o 11:23 Lukasz Majczak  napisał(a):
> >
> > Check mgr->mst_primary, before passing it to
> > the get_mst_branch_device_by_guid_helper(), otherwise NULL dereference
> > may occur in the call to memcpy() and cause:
> >
> > [12579.365869] BUG: kernel NULL pointer dereference, address: 
> > 0049
> > [12579.365878] #PF: supervisor read access in kernel mode
> > [12579.365880] #PF: error_code(0x) - not-present page
> > [12579.365882] PGD 0 P4D 0
> > [12579.365887] Oops:  [#1] PREEMPT SMP NOPTI
> > ...
> > [12579.365895] Workqueue: events_long drm_dp_mst_up_req_work
> > [12579.365899] RIP: 0010:memcmp+0xb/0x29
> > [12579.365921] Call Trace:
> > [12579.365927] get_mst_branch_device_by_guid_helper+0x22/0x64
> > [12579.365930] drm_dp_mst_up_req_work+0x137/0x416
> > [12579.365933] process_one_work+0x1d0/0x419
> > [12579.365935] worker_thread+0x11a/0x289
> > [12579.365938] kthread+0x13e/0x14f
> > [12579.365941] ? process_one_work+0x419/0x419
> > [12579.365943] ? kthread_blkcg+0x31/0x31
> > [12579.365946] ret_from_fork+0x1f/0x30
> >
> > Similar check is done in e.g: drm_dp_mst_topology_get_mstb_validated().
> >
> > Fixes: 5e93b8208d3c ("drm/dp/mst: move GUID storage from mgr, port to only 
> > mst branch")
> > Cc:  # 4.14+
> > Signed-off-by: Lukasz Majczak 
> > ---
> >  drivers/gpu/drm/display/drm_dp_mst_topology.c | 16 
> >  1 file changed, 8 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c 
> > b/drivers/gpu/drm/display/drm_dp_mst_topology.c
> > index ed96cfcfa304..703cd97b1d11 100644
> > --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
> > +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
> > @@ -2595,19 +2595,19 @@ static struct drm_dp_mst_branch *
> >  drm_dp_get_mst_branch_device_by_guid(struct drm_dp_mst_topology_mgr *mgr,
> >  const uint8_t *guid)
> >  {
> > -   struct drm_dp_mst_branch *mstb;
> > +   struct drm_dp_mst_branch *mstb = NULL;
> > int ret;
> >
> > /* find the port by iterating down */
> > mutex_lock(&mgr->lock);
> > -
> > -   mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid);
> > -   if (mstb) {
> > -   ret = drm_dp_mst_topology_try_get_mstb(mstb);
> > -   if (!ret)
> > -   mstb = NULL;
> > +   if (mgr->mst_primary) {

One suggestion which just came to my mind:
get_mst_branch_device_by_guid_helper() is a recursive function.
This condition might be moved to the inside of that function as the first line.
This way we would have a single condition, meaning remove a similar
one for step over of NULL elements inside a recursive call so NULL
would be an acceptable value as param and therefore no need to check
for this here.

> > +   mstb = 
> > get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid);
> > +   if (mstb) {
> > +   ret = drm_dp_mst_topology_try_get_mstb(mstb);
> > +   if (!ret)
> > +   mstb = NULL;
> > +   }
> > }
> > -
> > mutex_unlock(&mgr->lock);
> > return mstb;
> >  }
> > --
> > 2.41.0.640.ga95def55d0-goog
> >
> Hi,
>
> Is there anything more I should do regarding these changes?
>
> Best regards,
> Lukasz


Re: [PATCH v5] drm/i915: Avoid circular locking dependency when flush delayed work on gt reset

2023-08-22 Thread Daniel Vetter
On Fri, Aug 11, 2023 at 11:20:11AM -0700, Zhanjun Dong wrote:
> This attempts to avoid circular locking dependency between flush delayed
> work and intel_gt_reset.
> When intel_gt_reset was called, task will hold a lock.
> To cacel delayed work here, the _sync version will also acquire a lock,
> which might trigger the possible cirular locking dependency warning.
> When intel_gt_reset called, reset_in_progress flag will be set, add code
> to check the flag, call async verion if reset is in progress.
> 
> Signed-off-by: Zhanjun Dong 
> Cc: John Harrison 
> Cc: Andi Shyti 
> Cc: Daniel Vetter 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index a0e3ef1c65d2..600388c849f7 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1359,7 +1359,16 @@ static void guc_enable_busyness_worker(struct 
> intel_guc *guc)
>  
>  static void guc_cancel_busyness_worker(struct intel_guc *guc)
>  {
> - cancel_delayed_work_sync(&guc->timestamp.work);
> + /*
> +  * When intel_gt_reset was called, task will hold a lock.
> +  * To cacel delayed work here, the _sync version will also acquire a 
> lock, which might
> +  * trigger the possible cirular locking dependency warning.

This is not even close to a locking bugfix. Consider this a formal nack,
because the issue here is not even close to "needs more comments to
explain what's going on".
-Daniel

> +  * Check the reset_in_progress flag, call async verion if reset is in 
> progress.
> +  */
> + if (guc_to_gt(guc)->uc.reset_in_progress)
> + cancel_delayed_work(&guc->timestamp.work);
> + else
> + cancel_delayed_work_sync(&guc->timestamp.work);
>  }
>  
>  static void __reset_guc_busyness_stats(struct intel_guc *guc)
> -- 
> 2.34.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH v4 2/3] phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE

2023-08-22 Thread Vinod Koul
On 17-08-23, 17:55, Dmitry Baryshkov wrote:
> Switch to using the new DRM_AUX_BRIDGE helper to create the
> transparent DRM bridge device instead of handcoding corresponding
> functionality.

Acked-by: Vinod Koul 

-- 
~Vinod


Re: [PATCH v8 2/7] phy: Add HDMI configuration options

2023-08-22 Thread Vinod Koul
On 17-08-23, 13:05, Dmitry Baryshkov wrote:
> On 08/08/2023 11:32, Sandor Yu wrote:
> > Allow HDMI PHYs to be configured through the generic
> > functions through a custom structure added to the generic union.
> > 
> > The parameters added here are based on HDMI PHY
> > implementation practices.  The current set of parameters
> > should cover the potential users.
> > 
> > Signed-off-by: Sandor Yu 
> > ---
> >   include/linux/phy/phy-hdmi.h | 24 
> >   include/linux/phy/phy.h  |  7 ++-
> >   2 files changed, 30 insertions(+), 1 deletion(-)
> >   create mode 100644 include/linux/phy/phy-hdmi.h
> 
> I think this looks good now, thank you!
> 
> Reviewed-by: Dmitry Baryshkov 

Should this go thru drm or phy...?

> 
> -- 
> With best wishes
> Dmitry

-- 
~Vinod


Re: [PATCH v4 43/48] drm/ttm: introduce pool_shrink_rwsem

2023-08-22 Thread Daniel Vetter
On Mon, Aug 07, 2023 at 07:09:31PM +0800, Qi Zheng wrote:
> Currently, the synchronize_shrinkers() is only used by TTM pool. It only
> requires that no shrinkers run in parallel.
> 
> After we use RCU+refcount method to implement the lockless slab shrink,
> we can not use shrinker_rwsem or synchronize_rcu() to guarantee that all
> shrinker invocations have seen an update before freeing memory.
> 
> So we introduce a new pool_shrink_rwsem to implement a private
> synchronize_shrinkers(), so as to achieve the same purpose.
> 
> Signed-off-by: Qi Zheng 
> Reviewed-by: Muchun Song 

On the 5 drm patches (I counted 2 ttm and 3 drivers) for merging through
some other tree (since I'm assuming that's how this will land):

Acked-by: Daniel Vetter 

> ---
>  drivers/gpu/drm/ttm/ttm_pool.c | 15 +++
>  include/linux/shrinker.h   |  2 --
>  mm/shrinker.c  | 15 ---
>  3 files changed, 15 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index c9c9618c0dce..38b4c280725c 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -74,6 +74,7 @@ static struct ttm_pool_type global_dma32_uncached[MAX_ORDER 
> + 1];
>  static spinlock_t shrinker_lock;
>  static struct list_head shrinker_list;
>  static struct shrinker *mm_shrinker;
> +static DECLARE_RWSEM(pool_shrink_rwsem);
>  
>  /* Allocate pages of size 1 << order with the given gfp_flags */
>  static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t 
> gfp_flags,
> @@ -317,6 +318,7 @@ static unsigned int ttm_pool_shrink(void)
>   unsigned int num_pages;
>   struct page *p;
>  
> + down_read(&pool_shrink_rwsem);
>   spin_lock(&shrinker_lock);
>   pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list);
>   list_move_tail(&pt->shrinker_list, &shrinker_list);
> @@ -329,6 +331,7 @@ static unsigned int ttm_pool_shrink(void)
>   } else {
>   num_pages = 0;
>   }
> + up_read(&pool_shrink_rwsem);
>  
>   return num_pages;
>  }
> @@ -572,6 +575,18 @@ void ttm_pool_init(struct ttm_pool *pool, struct device 
> *dev,
>  }
>  EXPORT_SYMBOL(ttm_pool_init);
>  
> +/**
> + * synchronize_shrinkers - Wait for all running shrinkers to complete.
> + *
> + * This is useful to guarantee that all shrinker invocations have seen an
> + * update, before freeing memory, similar to rcu.
> + */
> +static void synchronize_shrinkers(void)
> +{
> + down_write(&pool_shrink_rwsem);
> + up_write(&pool_shrink_rwsem);
> +}
> +
>  /**
>   * ttm_pool_fini - Cleanup a pool
>   *
> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> index c55c07c3f0cb..025c8070dd86 100644
> --- a/include/linux/shrinker.h
> +++ b/include/linux/shrinker.h
> @@ -103,8 +103,6 @@ struct shrinker *shrinker_alloc(unsigned int flags, const 
> char *fmt, ...);
>  void shrinker_register(struct shrinker *shrinker);
>  void shrinker_free(struct shrinker *shrinker);
>  
> -extern void synchronize_shrinkers(void);
> -
>  #ifdef CONFIG_SHRINKER_DEBUG
>  extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
> const char *fmt, ...);
> diff --git a/mm/shrinker.c b/mm/shrinker.c
> index 3ab301ff122d..a27779ed3798 100644
> --- a/mm/shrinker.c
> +++ b/mm/shrinker.c
> @@ -650,18 +650,3 @@ void shrinker_free(struct shrinker *shrinker)
>   kfree(shrinker);
>  }
>  EXPORT_SYMBOL_GPL(shrinker_free);
> -
> -/**
> - * synchronize_shrinkers - Wait for all running shrinkers to complete.
> - *
> - * This is equivalent to calling unregister_shrink() and register_shrinker(),
> - * but atomically and with less overhead. This is useful to guarantee that 
> all
> - * shrinker invocations have seen an update, before freeing memory, similar 
> to
> - * rcu.
> - */
> -void synchronize_shrinkers(void)
> -{
> - down_write(&shrinker_rwsem);
> - up_write(&shrinker_rwsem);
> -}
> -EXPORT_SYMBOL(synchronize_shrinkers);
> -- 
> 2.30.2
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH 1/3] drm/buddy: Fix contiguous memory allocation issues

2023-08-22 Thread Arunpravin Paneer Selvam



On 21/08/23 10:46, Matthew Auld wrote:

Hi,

On 21/08/2023 11:14, Arunpravin Paneer Selvam wrote:

The way now contiguous requests are implemented such that
the size rounded up to power of 2 and the corresponding order
block picked from the freelist.

In addition to the older method, the new method will rounddown
the size to power of 2 and the corresponding order block picked
from the freelist. And for the remaining size we traverse the
tree and try to allocate either from the freelist block's buddy
or from the peer block. If the remaining size from peer/buddy
block is not free, we pick the next freelist block and repeat
the same method.

Moved contiguous/alignment size computation part and trim
function to the drm buddy manager.


I think we should also mention somewhere what issue this is trying to 
solve. IIUC the roundup_power_of_two() might in some cases trigger 
-ENOSPC even though there might be enough free space, and so to help 
with that we introduce a try harder mechanism.
Yes, we are trying to solve the above issue. I will add the problem 
statement to the commit description.




Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/drm_buddy.c | 253 ++--
  include/drm/drm_buddy.h |   6 +-
  2 files changed, 248 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7098f125b54a..220f60c08a03 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -569,6 +569,197 @@ static int __drm_buddy_alloc_range(struct 
drm_buddy *mm,

  return __alloc_range(mm, &dfs, start, size, blocks);
  }
  +static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,
+   u64 size,
+   u64 min_block_size,
+   struct drm_buddy_block *block,
+   struct list_head *blocks)
+{
+    struct drm_buddy_block *buddy, *parent = NULL;
+    u64 start, offset = 0;
+    LIST_HEAD(dfs);
+    int err;
+
+    if (!block)
+    return -EINVAL;
+
+    buddy = __get_buddy(block);
+    if (!buddy)
+    return -ENOSPC;
+
+    if (drm_buddy_block_is_allocated(buddy))
+    return -ENOSPC;
+
+    parent = block->parent;
+    if (!parent)
+    return -ENOSPC;
+
+    if (block->parent->right == block) {
+    u64 remaining;
+
+    /* Compute the leftover size for allocation */
+    remaining = max((size - drm_buddy_block_size(mm, buddy)),
+    min_block_size);
+    if (!IS_ALIGNED(remaining, min_block_size))
+    remaining = round_up(remaining, min_block_size);
+
+    /* Check if remaining size is greater than buddy block size */
+    if (drm_buddy_block_size(mm, buddy) < remaining)
+    return -ENOSPC;
+
+    offset = drm_buddy_block_size(mm, buddy) - remaining;
+    }
+
+    list_add(&parent->tmp_link, &dfs);
+    start = drm_buddy_block_offset(parent) + offset;
+
+    err = __alloc_range(mm, &dfs, start, size, blocks);
+    if (err)
+    return -ENOSPC;
+
+    return 0;
+}
+
+static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
+  u64 size,
+  u64 min_block_size,
+  struct drm_buddy_block *block,
+  struct list_head *blocks)
+{
+    struct drm_buddy_block *first, *peer, *tmp;
+    struct drm_buddy_block *parent = NULL;
+    u64 start, offset = 0;
+    unsigned int order;
+    LIST_HEAD(dfs);
+    int err;
+
+    if (!block)
+    return -EINVAL;
+
+    order = drm_buddy_block_order(block);
+    /* Add freelist block to dfs list */
+    list_add(&block->tmp_link, &dfs);
+
+    tmp = block;
+    parent = block->parent;
+    while (parent) {
+    if (block->parent->left == block) {
+    if (parent->left != tmp) {
+    peer = parent->left;
+    break;
+    }
+    } else {
+    if (parent->right != tmp) {
+    peer = parent->right;
+    break;
+    }
+    }
+
+    tmp = parent;
+    parent = tmp->parent;
+    }
+
+    if (!parent)
+    return -ENOSPC;
+
+    do {
+    if (drm_buddy_block_is_allocated(peer))
+    return -ENOSPC;
+    /* Exit loop if peer block order is equal to block order */
+    if (drm_buddy_block_order(peer) == order)
+    break;
+
+    if (drm_buddy_block_is_split(peer)) {
+    /* Traverse down to the block order level */
+    if (block->parent->left == block)
+    peer = peer->right;
+    else
+    peer = peer->left;
+    } else {
+    break;
+    }
+    } while (1);
+
+    if (block->parent->left == block) {
+    u64 remaining;
+
+    /* Compute the leftover size for allocation */
+    remaining = max((size - drm_buddy_block_size(mm, block)),
+    min_block_size);
+    if (!IS_ALIGNED(rem

Re: TODO list task: Replace drm_detect_hdmi_monitor() with drm_display_info.is_hdmi

2023-08-22 Thread Sharique Mohammad
Ok. So it is already completed.
I have to find something else...

Thanks and regards,
Sharique

Am Di., 22. Aug. 2023 um 14:46 Uhr schrieb Jani Nikula <
jani.nik...@linux.intel.com>:

> On Tue, 22 Aug 2023, Sharq Mohammad  wrote:
> > Hello All,
> >
> > I am a usual kernel developer, and wanted to contribute to the open
> source.
> > I saw a small TODO list in the DRM graphics subsystem, with some tasks.
> > So, just wanted to ask, is anyone working on the task:
> > *Replace drm_detect_hdmi_monitor() with drm_display_info.is_hdmi*
> >
> > Its on the TODO list.
>
> Yeah, I've got branch
>
>
> https://gitlab.freedesktop.org/jani/linux/-/commits/drm-edid-is-hdmi-has-audio
>
> BR,
> Jani.
>
>
> >
> > Thanks and regards,
> > Sharique
>
> --
> Jani Nikula, Intel Open Source Graphics Center
>


Re: [PATCH RFC 00/13] drm/connector: Create HDMI Connector infrastructure

2023-08-22 Thread Daniel Vetter
On Mon, Aug 14, 2023 at 03:56:12PM +0200, Maxime Ripard wrote:
> Hi,
> 
> Here's a series that creates a subclass of drm_connector specifically
> targeted at HDMI controllers.
> 
> The idea behind this series came from a recent discussion on IRC during
> which we discussed infoframes generation of i915 vs everything else. 
> 
> Infoframes generation code still requires some decent boilerplate, with
> each driver doing some variation of it.
> 
> In parallel, while working on vc4, we ended up converting a lot of i915
> logic (mostly around format / bpc selection, and scrambler setup) to
> apply on top of a driver that relies only on helpers.
> 
> While currently sitting in the vc4 driver, none of that logic actually
> relies on any driver or hardware-specific behaviour.
> 
> The only missing piec to make it shareable are a bunch of extra
> variables stored in a state (current bpc, format, RGB range selection,
> etc.).
> 
> Thus, I decided to create some generic subclass of drm_connector to
> address HDMI connectors, with a bunch of helpers that will take care of
> all the "HDMI Spec" related code. Scrambler setup is missing at the
> moment but can easily be plugged in.
> 
> Last week, Hans Verkuil also expressed interest in retrieving the
> infoframes generated from userspace to create an infoframe-decode tool.
> This series thus leverages the infoframe generation code to expose it
> through debugfs.
> 
> This entire series is only build-tested at the moment. Let me know what
> you think,
> Maxime

I think the idea overall makes sense, we we probably need it to roll out
actual hdmi support to all the hdmi drivers we have. But there's the
eternal issue of "C sucks at multiple inheritance".

Which means if you have a driver that subclasses drm_connector already for
it's driver needs it defacto cannot, or only under some serious pains, use
this. Which is kinda why in practice we tend to not subclass, but stuff
subclass fields into a name sub-structure. So essentially struct
drm_connector.hdmi and struct drm_connector_state.hdmi instead of
drm_hdmi_connector and drm_hdmi_connector_state. The helper functions to
set it all up would all still be the same roughly. It's less typesafe but
I think the gain in practical use (like you could make i915 use the
helpers probably, which with this approach here is practically
impossible).

The only other nit is that we probably want to put some of the hdmi
properties into struct drm_mode_config because there's no reason to have
per-connector valid values.

Also, it might be really good if you can find a co-conspirator who also
wants to use this in their driver, then with some i915 extracting we'd
have three, which should ensure the helper api is solid.

Cheers, Sima


> 
> Signed-off-by: Maxime Ripard 
> ---
> Maxime Ripard (13):
>   drm/connector: Introduce an HDMI connector
>   drm/connector: hdmi: Create a custom state
>   drm/connector: hdmi: Add Broadcast RGB property
>   drm/connector: hdmi: Add helper to get the RGB range
>   drm/connector: hdmi: Add output BPC to the connector state
>   drm/connector: hdmi: Add support for output format
>   drm/connector: hdmi: Calculate TMDS character rate
>   drm/connector: hdmi: Add custom hook to filter TMDS character rate
>   drm/connector: hdmi: Compute bpc and format automatically
>   drm/connector: hdmi: Add Infoframes generation
>   drm/connector: hdmi: Create Infoframe DebugFS entries
>   drm/vc4: hdmi: Create destroy state implementation
>   drm/vc4: hdmi: Switch to HDMI connector
> 
>  drivers/gpu/drm/Makefile |1 +
>  drivers/gpu/drm/drm_hdmi_connector.c | 1112 
> ++
>  drivers/gpu/drm/vc4/vc4_hdmi.c   |  720 --
>  drivers/gpu/drm/vc4/vc4_hdmi.h   |   37 +-
>  drivers/gpu/drm/vc4/vc4_hdmi_phy.c   |4 +-
>  include/drm/drm_connector.h  |  256 
>  6 files changed, 1508 insertions(+), 622 deletions(-)
> ---
> base-commit: 5d0c230f1de8c7515b6567d9afba1f196fb4e2f4
> change-id: 20230814-kms-hdmi-connector-state-616787e67927
> 
> Best regards,
> -- 
> Maxime Ripard 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH v4 0/3] drm: simplify support for transparent DRM bridges

2023-08-22 Thread Laurent Pinchart
Hi Dmitry,

Thank you for the patches.

On Thu, Aug 17, 2023 at 05:55:13PM +0300, Dmitry Baryshkov wrote:
> Supporting DP/USB-C can result in a chain of several transparent
> bridges (PHY, redrivers, mux, etc). This results in drivers having
> similar boilerplate code for such bridges.

What do you mean by transparent bridge here ? Bridges are a DRM concept,
and as far as I can tell, a PHY isn't a bridge. Why does it need to be
handled as one, especially if it's completely transparent ?

> Next, these drivers are susceptible to -EPROBE_DEFER loops: the next
> bridge can either be probed from the bridge->attach callback, when it is
> too late to return -EPROBE_DEFER, or from the probe() callback, when the
> next bridge might not yet be available, because it depends on the
> resources provided by the probing device.

Can't device links help avoiding defer probing in those cases ?

> Last, but not least, this results in the the internal knowledge of DRM
> subsystem slowly diffusing into other subsystems, like PHY or USB/TYPEC.

Why so ? The PHY subsystem should provide a PHY, without considering
what subsystem it will be used by. This patch series seems to me to
actually create this DRM dependency in other subsystems, which I don't
think is a very good idea. Resources should be registered in their own
subsystem with the appropriate API, not in a way that is tied to a
particular consumer.

> To solve all these issues, define a separate DRM helper, which creates
> separate aux device just for the bridge. During probe such aux device
> doesn't result in the EPROBE_DEFER loops. Instead it allows the device
> drivers to probe properly, according to the actual resource
> dependencies. The bridge auxdevs are then probed when the next bridge
> becomes available, sparing drivers from drm_bridge_attach() returning
> -EPROBE_DEFER.

I'm not thrilled :-( Let's discuss the questions above first.

> Proposed merge strategy: immutable branch with the drm commit, which is
> then merged into PHY and USB subsystems together with the corresponding
> patch.
> 
> Changes since v3:
>  - Moved bridge driver to gpu/drm/bridge (Neil Armstrong)
>  - Renamed it to aux-bridge (since there is already a simple_bridge driver)
>  - Made CONFIG_OF mandatory for this driver (Neil Armstrong)
>  - Added missing kfree and ida_free (Dan Carpenter)
> 
> Changes since v2:
>  - ifdef'ed bridge->of_node access (LKP)
> 
> Changes since v1:
>  - Added EXPORT_SYMBOL_GPL / MODULE_LICENSE / etc. to drm_simple_bridge
> 
> Dmitry Baryshkov (3):
>   drm/bridge: add transparent bridge helper
>   phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE
>   usb: typec: nb7vpq904m: switch to DRM_AUX_BRIDGE
> 
>  drivers/gpu/drm/bridge/Kconfig|   9 ++
>  drivers/gpu/drm/bridge/Makefile   |   1 +
>  drivers/gpu/drm/bridge/aux-bridge.c   | 132 ++
>  drivers/phy/qualcomm/Kconfig  |   2 +-
>  drivers/phy/qualcomm/phy-qcom-qmp-combo.c |  44 +---
>  drivers/usb/typec/mux/Kconfig |   2 +-
>  drivers/usb/typec/mux/nb7vpq904m.c|  44 +---
>  include/drm/bridge/aux-bridge.h   |  19 
>  8 files changed, 167 insertions(+), 86 deletions(-)
>  create mode 100644 drivers/gpu/drm/bridge/aux-bridge.c
>  create mode 100644 include/drm/bridge/aux-bridge.h

-- 
Regards,

Laurent Pinchart


RE: [PATCH v5] drm/i915: Avoid circular locking dependency when flush delayed work on gt reset

2023-08-22 Thread Dong, Zhanjun


> -Original Message-
> From: Daniel Vetter 
> Sent: August 22, 2023 9:51 AM
> To: Dong, Zhanjun 
> Cc: intel-...@lists.freedesktop.org; dri-devel@lists.freedesktop.org; 
> Harrison,
> John C ; Andi Shyti ;
> Daniel Vetter 
> Subject: Re: [PATCH v5] drm/i915: Avoid circular locking dependency when
> flush delayed work on gt reset
> 
> On Fri, Aug 11, 2023 at 11:20:11AM -0700, Zhanjun Dong wrote:
> > This attempts to avoid circular locking dependency between flush delayed
> > work and intel_gt_reset.
> > When intel_gt_reset was called, task will hold a lock.
> > To cacel delayed work here, the _sync version will also acquire a lock,
> > which might trigger the possible cirular locking dependency warning.
> > When intel_gt_reset called, reset_in_progress flag will be set, add code
> > to check the flag, call async verion if reset is in progress.
> >
> > Signed-off-by: Zhanjun Dong 
> > Cc: John Harrison 
> > Cc: Andi Shyti 
> > Cc: Daniel Vetter 
> > ---
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 ++-
> >  1 file changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index a0e3ef1c65d2..600388c849f7 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -1359,7 +1359,16 @@ static void guc_enable_busyness_worker(struct
> intel_guc *guc)
> >
> >  static void guc_cancel_busyness_worker(struct intel_guc *guc)
> >  {
> > -   cancel_delayed_work_sync(&guc->timestamp.work);
> > +   /*
> > +* When intel_gt_reset was called, task will hold a lock.
> > +* To cacel delayed work here, the _sync version will also acquire a 
> > lock,
> which might
> > +* trigger the possible cirular locking dependency warning.
> 
> This is not even close to a locking bugfix. Consider this a formal nack,
> because the issue here is not even close to "needs more comments to
> explain what's going on".
> -Daniel

The purpose of the comment here it is to explain locking issue condition
> 
> > +* Check the reset_in_progress flag, call async verion if reset is in
> progress.


The comment here explains check with the flag to avoid locking condition.
The reset process is not considered to be complete in short time, other than 
that, do we missed anything?

> > +*/
> > +   if (guc_to_gt(guc)->uc.reset_in_progress)
> > +   cancel_delayed_work(&guc->timestamp.work);
> > +   else
> > +   cancel_delayed_work_sync(&guc->timestamp.work);
> >  }
> >
> >  static void __reset_guc_busyness_stats(struct intel_guc *guc)
> > --
> > 2.34.1
> >
> 
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch


Re: [PATCH v4 0/3] drm: simplify support for transparent DRM bridges

2023-08-22 Thread Laurent Pinchart
On Tue, Aug 22, 2023 at 05:17:37PM +0300, Laurent Pinchart wrote:
> Hi Dmitry,
> 
> Thank you for the patches.
> 
> On Thu, Aug 17, 2023 at 05:55:13PM +0300, Dmitry Baryshkov wrote:
> > Supporting DP/USB-C can result in a chain of several transparent
> > bridges (PHY, redrivers, mux, etc). This results in drivers having
> > similar boilerplate code for such bridges.
> 
> What do you mean by transparent bridge here ? Bridges are a DRM concept,
> and as far as I can tell, a PHY isn't a bridge. Why does it need to be
> handled as one, especially if it's completely transparent ?
> 
> > Next, these drivers are susceptible to -EPROBE_DEFER loops: the next
> > bridge can either be probed from the bridge->attach callback, when it is
> > too late to return -EPROBE_DEFER, or from the probe() callback, when the
> > next bridge might not yet be available, because it depends on the
> > resources provided by the probing device.
> 
> Can't device links help avoiding defer probing in those cases ?
> 
> > Last, but not least, this results in the the internal knowledge of DRM
> > subsystem slowly diffusing into other subsystems, like PHY or USB/TYPEC.
> 
> Why so ? The PHY subsystem should provide a PHY, without considering
> what subsystem it will be used by. This patch series seems to me to
> actually create this DRM dependency in other subsystems,

I was wrong on this one, there are indeed existing drm_bridge instances
in drivers/usb/ and drivers/phy/. That's certainly not nice. Why do we
even need drm_bridge there, why can't the PHYs be acquired by their
consumers in DRM (and anywhere else) using the PHY API ?

> which I don't
> think is a very good idea. Resources should be registered in their own
> subsystem with the appropriate API, not in a way that is tied to a
> particular consumer.
> 
> > To solve all these issues, define a separate DRM helper, which creates
> > separate aux device just for the bridge. During probe such aux device
> > doesn't result in the EPROBE_DEFER loops. Instead it allows the device
> > drivers to probe properly, according to the actual resource
> > dependencies. The bridge auxdevs are then probed when the next bridge
> > becomes available, sparing drivers from drm_bridge_attach() returning
> > -EPROBE_DEFER.
> 
> I'm not thrilled :-( Let's discuss the questions above first.
> 
> > Proposed merge strategy: immutable branch with the drm commit, which is
> > then merged into PHY and USB subsystems together with the corresponding
> > patch.
> > 
> > Changes since v3:
> >  - Moved bridge driver to gpu/drm/bridge (Neil Armstrong)
> >  - Renamed it to aux-bridge (since there is already a simple_bridge driver)
> >  - Made CONFIG_OF mandatory for this driver (Neil Armstrong)
> >  - Added missing kfree and ida_free (Dan Carpenter)
> > 
> > Changes since v2:
> >  - ifdef'ed bridge->of_node access (LKP)
> > 
> > Changes since v1:
> >  - Added EXPORT_SYMBOL_GPL / MODULE_LICENSE / etc. to drm_simple_bridge
> > 
> > Dmitry Baryshkov (3):
> >   drm/bridge: add transparent bridge helper
> >   phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE
> >   usb: typec: nb7vpq904m: switch to DRM_AUX_BRIDGE
> > 
> >  drivers/gpu/drm/bridge/Kconfig|   9 ++
> >  drivers/gpu/drm/bridge/Makefile   |   1 +
> >  drivers/gpu/drm/bridge/aux-bridge.c   | 132 ++
> >  drivers/phy/qualcomm/Kconfig  |   2 +-
> >  drivers/phy/qualcomm/phy-qcom-qmp-combo.c |  44 +---
> >  drivers/usb/typec/mux/Kconfig |   2 +-
> >  drivers/usb/typec/mux/nb7vpq904m.c|  44 +---
> >  include/drm/bridge/aux-bridge.h   |  19 
> >  8 files changed, 167 insertions(+), 86 deletions(-)
> >  create mode 100644 drivers/gpu/drm/bridge/aux-bridge.c
> >  create mode 100644 include/drm/bridge/aux-bridge.h

-- 
Regards,

Laurent Pinchart


Re: [PATCH v4 0/3] drm: simplify support for transparent DRM bridges

2023-08-22 Thread Neil Armstrong

On 22/08/2023 16:19, Laurent Pinchart wrote:

On Tue, Aug 22, 2023 at 05:17:37PM +0300, Laurent Pinchart wrote:

Hi Dmitry,

Thank you for the patches.

On Thu, Aug 17, 2023 at 05:55:13PM +0300, Dmitry Baryshkov wrote:

Supporting DP/USB-C can result in a chain of several transparent
bridges (PHY, redrivers, mux, etc). This results in drivers having
similar boilerplate code for such bridges.


What do you mean by transparent bridge here ? Bridges are a DRM concept,
and as far as I can tell, a PHY isn't a bridge. Why does it need to be
handled as one, especially if it's completely transparent ?


Next, these drivers are susceptible to -EPROBE_DEFER loops: the next
bridge can either be probed from the bridge->attach callback, when it is
too late to return -EPROBE_DEFER, or from the probe() callback, when the
next bridge might not yet be available, because it depends on the
resources provided by the probing device.


Can't device links help avoiding defer probing in those cases ?


Last, but not least, this results in the the internal knowledge of DRM
subsystem slowly diffusing into other subsystems, like PHY or USB/TYPEC.


Why so ? The PHY subsystem should provide a PHY, without considering
what subsystem it will be used by. This patch series seems to me to
actually create this DRM dependency in other subsystems,


I was wrong on this one, there are indeed existing drm_bridge instances
in drivers/usb/ and drivers/phy/. That's certainly not nice. Why do we
even need drm_bridge there, why can't the PHYs be acquired by their
consumers in DRM (and anywhere else) using the PHY API ?


Because with USB-C Altmode/USB4/Thunderbolt, DisplayPort is one of the
data streams handled by PHYs, USB-C PD manager, re-timers, SBU muxes...
and all this must be coordinated with the display controller and can
be considered as bridges between the DP controller and the USB-C connector.

As of today, it has been handled by OOB events on Intel & AMD, but the entirety
of USB-C chain is handled in firmare, so this scales.
When we need to describe the entire USB-C data stream chain as port/endpoint
in DT, OOB handling doesn't work anymore since we need to sync the entire
USB-C chain (muxes, switches, retimers, phys...) handled by Linux before
starting the DP stream.

Neil




which I don't
think is a very good idea. Resources should be registered in their own
subsystem with the appropriate API, not in a way that is tied to a
particular consumer.


To solve all these issues, define a separate DRM helper, which creates
separate aux device just for the bridge. During probe such aux device
doesn't result in the EPROBE_DEFER loops. Instead it allows the device
drivers to probe properly, according to the actual resource
dependencies. The bridge auxdevs are then probed when the next bridge
becomes available, sparing drivers from drm_bridge_attach() returning
-EPROBE_DEFER.


I'm not thrilled :-( Let's discuss the questions above first.


Proposed merge strategy: immutable branch with the drm commit, which is
then merged into PHY and USB subsystems together with the corresponding
patch.

Changes since v3:
  - Moved bridge driver to gpu/drm/bridge (Neil Armstrong)
  - Renamed it to aux-bridge (since there is already a simple_bridge driver)
  - Made CONFIG_OF mandatory for this driver (Neil Armstrong)
  - Added missing kfree and ida_free (Dan Carpenter)

Changes since v2:
  - ifdef'ed bridge->of_node access (LKP)

Changes since v1:
  - Added EXPORT_SYMBOL_GPL / MODULE_LICENSE / etc. to drm_simple_bridge

Dmitry Baryshkov (3):
   drm/bridge: add transparent bridge helper
   phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE
   usb: typec: nb7vpq904m: switch to DRM_AUX_BRIDGE

  drivers/gpu/drm/bridge/Kconfig|   9 ++
  drivers/gpu/drm/bridge/Makefile   |   1 +
  drivers/gpu/drm/bridge/aux-bridge.c   | 132 ++
  drivers/phy/qualcomm/Kconfig  |   2 +-
  drivers/phy/qualcomm/phy-qcom-qmp-combo.c |  44 +---
  drivers/usb/typec/mux/Kconfig |   2 +-
  drivers/usb/typec/mux/nb7vpq904m.c|  44 +---
  include/drm/bridge/aux-bridge.h   |  19 
  8 files changed, 167 insertions(+), 86 deletions(-)
  create mode 100644 drivers/gpu/drm/bridge/aux-bridge.c
  create mode 100644 include/drm/bridge/aux-bridge.h






Re: [PATCH v5] drm/i915: Avoid circular locking dependency when flush delayed work on gt reset

2023-08-22 Thread Daniel Vetter
On Tue, Aug 22, 2023 at 02:14:28PM +, Dong, Zhanjun wrote:
> 
> 
> > -Original Message-
> > From: Daniel Vetter 
> > Sent: August 22, 2023 9:51 AM
> > To: Dong, Zhanjun 
> > Cc: intel-...@lists.freedesktop.org; dri-devel@lists.freedesktop.org; 
> > Harrison,
> > John C ; Andi Shyti ;
> > Daniel Vetter 
> > Subject: Re: [PATCH v5] drm/i915: Avoid circular locking dependency when
> > flush delayed work on gt reset
> > 
> > On Fri, Aug 11, 2023 at 11:20:11AM -0700, Zhanjun Dong wrote:
> > > This attempts to avoid circular locking dependency between flush delayed
> > > work and intel_gt_reset.
> > > When intel_gt_reset was called, task will hold a lock.
> > > To cacel delayed work here, the _sync version will also acquire a lock,
> > > which might trigger the possible cirular locking dependency warning.
> > > When intel_gt_reset called, reset_in_progress flag will be set, add code
> > > to check the flag, call async verion if reset is in progress.
> > >
> > > Signed-off-by: Zhanjun Dong 
> > > Cc: John Harrison 
> > > Cc: Andi Shyti 
> > > Cc: Daniel Vetter 
> > > ---
> > >  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 ++-
> > >  1 file changed, 10 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > index a0e3ef1c65d2..600388c849f7 100644
> > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > @@ -1359,7 +1359,16 @@ static void guc_enable_busyness_worker(struct
> > intel_guc *guc)
> > >
> > >  static void guc_cancel_busyness_worker(struct intel_guc *guc)
> > >  {
> > > - cancel_delayed_work_sync(&guc->timestamp.work);
> > > + /*
> > > +  * When intel_gt_reset was called, task will hold a lock.
> > > +  * To cacel delayed work here, the _sync version will also acquire a 
> > > lock,
> > which might
> > > +  * trigger the possible cirular locking dependency warning.
> > 
> > This is not even close to a locking bugfix. Consider this a formal nack,
> > because the issue here is not even close to "needs more comments to
> > explain what's going on".
> > -Daniel
> 
> The purpose of the comment here it is to explain locking issue condition
> > 
> > > +  * Check the reset_in_progress flag, call async verion if reset is in
> > progress.
> 
> 
> The comment here explains check with the flag to avoid locking condition.
> The reset process is not considered to be complete in short time, other than 
> that, do we missed anything?

Either the _sync is not needed at all, in case you need to explain why.
Which this patch doesn't. And if the _sync isn't needed, then it's
probably not needed in all/most cases?

Or the _sync is needed, and in that case you just replace a potential
deadlock scenario with a potential race condition.

In neither case should this patch here be merged.
-Daniel

> 
> > > +  */
> > > + if (guc_to_gt(guc)->uc.reset_in_progress)
> > > + cancel_delayed_work(&guc->timestamp.work);
> > > + else
> > > + cancel_delayed_work_sync(&guc->timestamp.work);
> > >  }
> > >
> > >  static void __reset_guc_busyness_stats(struct intel_guc *guc)
> > > --
> > > 2.34.1
> > >
> > 
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH RFC 00/13] drm/connector: Create HDMI Connector infrastructure

2023-08-22 Thread Maxime Ripard
Hi,

On Tue, Aug 22, 2023 at 04:16:08PM +0200, Daniel Vetter wrote:
> On Mon, Aug 14, 2023 at 03:56:12PM +0200, Maxime Ripard wrote:
> > Here's a series that creates a subclass of drm_connector specifically
> > targeted at HDMI controllers.
> > 
> > The idea behind this series came from a recent discussion on IRC during
> > which we discussed infoframes generation of i915 vs everything else. 
> > 
> > Infoframes generation code still requires some decent boilerplate, with
> > each driver doing some variation of it.
> > 
> > In parallel, while working on vc4, we ended up converting a lot of i915
> > logic (mostly around format / bpc selection, and scrambler setup) to
> > apply on top of a driver that relies only on helpers.
> > 
> > While currently sitting in the vc4 driver, none of that logic actually
> > relies on any driver or hardware-specific behaviour.
> > 
> > The only missing piec to make it shareable are a bunch of extra
> > variables stored in a state (current bpc, format, RGB range selection,
> > etc.).
> > 
> > Thus, I decided to create some generic subclass of drm_connector to
> > address HDMI connectors, with a bunch of helpers that will take care of
> > all the "HDMI Spec" related code. Scrambler setup is missing at the
> > moment but can easily be plugged in.
> > 
> > Last week, Hans Verkuil also expressed interest in retrieving the
> > infoframes generated from userspace to create an infoframe-decode tool.
> > This series thus leverages the infoframe generation code to expose it
> > through debugfs.
> > 
> > This entire series is only build-tested at the moment. Let me know what
> > you think,
>
> I think the idea overall makes sense, we we probably need it to roll out
> actual hdmi support to all the hdmi drivers we have. But there's the
> eternal issue of "C sucks at multiple inheritance".
> 
> Which means if you have a driver that subclasses drm_connector already for
> it's driver needs it defacto cannot, or only under some serious pains, use
> this.

That's what vc4 is doing, and it went fine I think? it was mostly a
matter of subclassing drm_hdmi_connector instead of drm_connector, and
adjusting the various pointers and accessors here and there.

It does create a fairly big diffstat, but nothing too painful.

> Which is kinda why in practice we tend to not subclass, but stuff
> subclass fields into a name sub-structure. So essentially struct
> drm_connector.hdmi and struct drm_connector_state.hdmi instead of
> drm_hdmi_connector and drm_hdmi_connector_state. The helper functions to
> set it all up would all still be the same roughly. It's less typesafe but
> I think the gain in practical use (like you could make i915 use the
> helpers probably, which with this approach here is practically
> impossible).

Ack.

> The only other nit is that we probably want to put some of the hdmi
> properties into struct drm_mode_config because there's no reason to have
> per-connector valid values.

What property would you want to move?

> Also, it might be really good if you can find a co-conspirator who also
> wants to use this in their driver, then with some i915 extracting we'd
> have three, which should ensure the helper api is solid.

I can convert sunxi (old) HDMI driver if needed. I'm not sure how
helpful it would be since it doesn't support bpc > 8, but it could be a
nice showcase still for "simple" HDMI controllers.

Maxime


signature.asc
Description: PGP signature


Re: [PATCH RFC 00/13] drm/connector: Create HDMI Connector infrastructure

2023-08-22 Thread Daniel Vetter
On Tue, Aug 22, 2023 at 04:35:55PM +0200, Maxime Ripard wrote:
> Hi,
> 
> On Tue, Aug 22, 2023 at 04:16:08PM +0200, Daniel Vetter wrote:
> > On Mon, Aug 14, 2023 at 03:56:12PM +0200, Maxime Ripard wrote:
> > > Here's a series that creates a subclass of drm_connector specifically
> > > targeted at HDMI controllers.
> > > 
> > > The idea behind this series came from a recent discussion on IRC during
> > > which we discussed infoframes generation of i915 vs everything else. 
> > > 
> > > Infoframes generation code still requires some decent boilerplate, with
> > > each driver doing some variation of it.
> > > 
> > > In parallel, while working on vc4, we ended up converting a lot of i915
> > > logic (mostly around format / bpc selection, and scrambler setup) to
> > > apply on top of a driver that relies only on helpers.
> > > 
> > > While currently sitting in the vc4 driver, none of that logic actually
> > > relies on any driver or hardware-specific behaviour.
> > > 
> > > The only missing piec to make it shareable are a bunch of extra
> > > variables stored in a state (current bpc, format, RGB range selection,
> > > etc.).
> > > 
> > > Thus, I decided to create some generic subclass of drm_connector to
> > > address HDMI connectors, with a bunch of helpers that will take care of
> > > all the "HDMI Spec" related code. Scrambler setup is missing at the
> > > moment but can easily be plugged in.
> > > 
> > > Last week, Hans Verkuil also expressed interest in retrieving the
> > > infoframes generated from userspace to create an infoframe-decode tool.
> > > This series thus leverages the infoframe generation code to expose it
> > > through debugfs.
> > > 
> > > This entire series is only build-tested at the moment. Let me know what
> > > you think,
> >
> > I think the idea overall makes sense, we we probably need it to roll out
> > actual hdmi support to all the hdmi drivers we have. But there's the
> > eternal issue of "C sucks at multiple inheritance".
> > 
> > Which means if you have a driver that subclasses drm_connector already for
> > it's driver needs it defacto cannot, or only under some serious pains, use
> > this.
> 
> That's what vc4 is doing, and it went fine I think? it was mostly a
> matter of subclassing drm_hdmi_connector instead of drm_connector, and
> adjusting the various pointers and accessors here and there.
> 
> It does create a fairly big diffstat, but nothing too painful.

Yeah it's the massive churn that's the pain for refactoring existing
bigger drivers.

Plus what do you do when you both need a hdmi connector and a dp connector
(or a writeback connector).

> > Which is kinda why in practice we tend to not subclass, but stuff
> > subclass fields into a name sub-structure. So essentially struct
> > drm_connector.hdmi and struct drm_connector_state.hdmi instead of
> > drm_hdmi_connector and drm_hdmi_connector_state. The helper functions to
> > set it all up would all still be the same roughly. It's less typesafe but
> > I think the gain in practical use (like you could make i915 use the
> > helpers probably, which with this approach here is practically
> > impossible).
> 
> Ack.
> 
> > The only other nit is that we probably want to put some of the hdmi
> > properties into struct drm_mode_config because there's no reason to have
> > per-connector valid values.
> 
> What property would you want to move?

The rgb broadcast property looked very much like it's connector invariant.
Just the one I noticed, I didn't check all the others.

> > Also, it might be really good if you can find a co-conspirator who also
> > wants to use this in their driver, then with some i915 extracting we'd
> > have three, which should ensure the helper api is solid.
> 
> I can convert sunxi (old) HDMI driver if needed. I'm not sure how
> helpful it would be since it doesn't support bpc > 8, but it could be a
> nice showcase still for "simple" HDMI controllers.

Yeah that might be good. Or perhaps poke Rob Clark whether msm is
interested and someone could do a conversion for dpu5 or so?

Cheers, Sima
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH RFC 00/13] drm/connector: Create HDMI Connector infrastructure

2023-08-22 Thread Jani Nikula
On Tue, 22 Aug 2023, Maxime Ripard  wrote:
> Hi,
>
> On Tue, Aug 22, 2023 at 04:16:08PM +0200, Daniel Vetter wrote:
>> On Mon, Aug 14, 2023 at 03:56:12PM +0200, Maxime Ripard wrote:
>> > Here's a series that creates a subclass of drm_connector specifically
>> > targeted at HDMI controllers.
>> > 
>> > The idea behind this series came from a recent discussion on IRC during
>> > which we discussed infoframes generation of i915 vs everything else. 
>> > 
>> > Infoframes generation code still requires some decent boilerplate, with
>> > each driver doing some variation of it.
>> > 
>> > In parallel, while working on vc4, we ended up converting a lot of i915
>> > logic (mostly around format / bpc selection, and scrambler setup) to
>> > apply on top of a driver that relies only on helpers.
>> > 
>> > While currently sitting in the vc4 driver, none of that logic actually
>> > relies on any driver or hardware-specific behaviour.
>> > 
>> > The only missing piec to make it shareable are a bunch of extra
>> > variables stored in a state (current bpc, format, RGB range selection,
>> > etc.).
>> > 
>> > Thus, I decided to create some generic subclass of drm_connector to
>> > address HDMI connectors, with a bunch of helpers that will take care of
>> > all the "HDMI Spec" related code. Scrambler setup is missing at the
>> > moment but can easily be plugged in.
>> > 
>> > Last week, Hans Verkuil also expressed interest in retrieving the
>> > infoframes generated from userspace to create an infoframe-decode tool.
>> > This series thus leverages the infoframe generation code to expose it
>> > through debugfs.
>> > 
>> > This entire series is only build-tested at the moment. Let me know what
>> > you think,
>>
>> I think the idea overall makes sense, we we probably need it to roll out
>> actual hdmi support to all the hdmi drivers we have. But there's the
>> eternal issue of "C sucks at multiple inheritance".
>> 
>> Which means if you have a driver that subclasses drm_connector already for
>> it's driver needs it defacto cannot, or only under some serious pains, use
>> this.
>
> That's what vc4 is doing, and it went fine I think? it was mostly a
> matter of subclassing drm_hdmi_connector instead of drm_connector, and
> adjusting the various pointers and accessors here and there.
>
> It does create a fairly big diffstat, but nothing too painful.

The main pain point is not the diffstat per se, but that *all* casts to
subclass need to check what the connector type is before doing
so. You'll also get fun NULL conditions that you need to check and
handle if the type isn't what you'd like it to be.

Currently i915 can just assume all drm_connectors it encounters are
intel_connectors that it created, always.

Basically this has blocked the writeback connector stuff for a few years
now in i915, because writeback forces a different subclassing, and what
should be a small change in i915 turns into huge churn.

BR,
Jani.


>
>> Which is kinda why in practice we tend to not subclass, but stuff
>> subclass fields into a name sub-structure. So essentially struct
>> drm_connector.hdmi and struct drm_connector_state.hdmi instead of
>> drm_hdmi_connector and drm_hdmi_connector_state. The helper functions to
>> set it all up would all still be the same roughly. It's less typesafe but
>> I think the gain in practical use (like you could make i915 use the
>> helpers probably, which with this approach here is practically
>> impossible).
>
> Ack.
>
>> The only other nit is that we probably want to put some of the hdmi
>> properties into struct drm_mode_config because there's no reason to have
>> per-connector valid values.
>
> What property would you want to move?
>
>> Also, it might be really good if you can find a co-conspirator who also
>> wants to use this in their driver, then with some i915 extracting we'd
>> have three, which should ensure the helper api is solid.
>
> I can convert sunxi (old) HDMI driver if needed. I'm not sure how
> helpful it would be since it doesn't support bpc > 8, but it could be a
> nice showcase still for "simple" HDMI controllers.
>
> Maxime

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [PATCH RFC 00/13] drm/connector: Create HDMI Connector infrastructure

2023-08-22 Thread Daniel Vetter
On Tue, Aug 22, 2023 at 05:51:39PM +0300, Jani Nikula wrote:
> On Tue, 22 Aug 2023, Maxime Ripard  wrote:
> > Hi,
> >
> > On Tue, Aug 22, 2023 at 04:16:08PM +0200, Daniel Vetter wrote:
> >> On Mon, Aug 14, 2023 at 03:56:12PM +0200, Maxime Ripard wrote:
> >> > Here's a series that creates a subclass of drm_connector specifically
> >> > targeted at HDMI controllers.
> >> > 
> >> > The idea behind this series came from a recent discussion on IRC during
> >> > which we discussed infoframes generation of i915 vs everything else. 
> >> > 
> >> > Infoframes generation code still requires some decent boilerplate, with
> >> > each driver doing some variation of it.
> >> > 
> >> > In parallel, while working on vc4, we ended up converting a lot of i915
> >> > logic (mostly around format / bpc selection, and scrambler setup) to
> >> > apply on top of a driver that relies only on helpers.
> >> > 
> >> > While currently sitting in the vc4 driver, none of that logic actually
> >> > relies on any driver or hardware-specific behaviour.
> >> > 
> >> > The only missing piec to make it shareable are a bunch of extra
> >> > variables stored in a state (current bpc, format, RGB range selection,
> >> > etc.).
> >> > 
> >> > Thus, I decided to create some generic subclass of drm_connector to
> >> > address HDMI connectors, with a bunch of helpers that will take care of
> >> > all the "HDMI Spec" related code. Scrambler setup is missing at the
> >> > moment but can easily be plugged in.
> >> > 
> >> > Last week, Hans Verkuil also expressed interest in retrieving the
> >> > infoframes generated from userspace to create an infoframe-decode tool.
> >> > This series thus leverages the infoframe generation code to expose it
> >> > through debugfs.
> >> > 
> >> > This entire series is only build-tested at the moment. Let me know what
> >> > you think,
> >>
> >> I think the idea overall makes sense, we we probably need it to roll out
> >> actual hdmi support to all the hdmi drivers we have. But there's the
> >> eternal issue of "C sucks at multiple inheritance".
> >> 
> >> Which means if you have a driver that subclasses drm_connector already for
> >> it's driver needs it defacto cannot, or only under some serious pains, use
> >> this.
> >
> > That's what vc4 is doing, and it went fine I think? it was mostly a
> > matter of subclassing drm_hdmi_connector instead of drm_connector, and
> > adjusting the various pointers and accessors here and there.
> >
> > It does create a fairly big diffstat, but nothing too painful.
> 
> The main pain point is not the diffstat per se, but that *all* casts to
> subclass need to check what the connector type is before doing
> so. You'll also get fun NULL conditions that you need to check and
> handle if the type isn't what you'd like it to be.
> 
> Currently i915 can just assume all drm_connectors it encounters are
> intel_connectors that it created, always.
> 
> Basically this has blocked the writeback connector stuff for a few years
> now in i915, because writeback forces a different subclassing, and what
> should be a small change in i915 turns into huge churn.

Yeah after the writeback experience I'm heavily leaning towards "this was
a mistake".

For writeback we could refactor it I think by just moving it all (which I
hope isn't too much churn), and then removing the then empty types (which
is where the big churn kicks in, so maybe just add that to gpu/todo.rst).

Cheers, Sima

> 
> BR,
> Jani.
> 
> 
> >
> >> Which is kinda why in practice we tend to not subclass, but stuff
> >> subclass fields into a name sub-structure. So essentially struct
> >> drm_connector.hdmi and struct drm_connector_state.hdmi instead of
> >> drm_hdmi_connector and drm_hdmi_connector_state. The helper functions to
> >> set it all up would all still be the same roughly. It's less typesafe but
> >> I think the gain in practical use (like you could make i915 use the
> >> helpers probably, which with this approach here is practically
> >> impossible).
> >
> > Ack.
> >
> >> The only other nit is that we probably want to put some of the hdmi
> >> properties into struct drm_mode_config because there's no reason to have
> >> per-connector valid values.
> >
> > What property would you want to move?
> >
> >> Also, it might be really good if you can find a co-conspirator who also
> >> wants to use this in their driver, then with some i915 extracting we'd
> >> have three, which should ensure the helper api is solid.
> >
> > I can convert sunxi (old) HDMI driver if needed. I'm not sure how
> > helpful it would be since it doesn't support bpc > 8, but it could be a
> > nice showcase still for "simple" HDMI controllers.
> >
> > Maxime
> 
> -- 
> Jani Nikula, Intel Open Source Graphics Center

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


RE: [PATCH AUTOSEL 6.4 10/11] drm/amdkfd: disable IOMMUv2 support for KV/CZ

2023-08-22 Thread Deucher, Alexander
[Public]

> -Original Message-
> From: Sasha Levin 
> Sent: Tuesday, August 22, 2023 7:36 AM
> To: linux-ker...@vger.kernel.org; sta...@vger.kernel.org
> Cc: Deucher, Alexander ; Kuehling, Felix
> ; Koenig, Christian ;
> Mike Lothian ; Sasha Levin ; Pan,
> Xinhui ; airl...@gmail.com; dan...@ffwll.ch; amd-
> g...@lists.freedesktop.org; dri-devel@lists.freedesktop.org
> Subject: [PATCH AUTOSEL 6.4 10/11] drm/amdkfd: disable IOMMUv2
> support for KV/CZ
>
> From: Alex Deucher 
>
> [ Upstream commit 616f92d188ee7142a95a52068efdbea82645f859 ]
>
> Use the dGPU path instead.  There were a lot of platform issues with IOMMU
> in general on these chips due to windows not enabling IOMMU at the time.
> The dGPU path has been used for a long time with newer APUs and works
> fine.  This also paves the way to simplify the driver significantly.
>
> v2: use the dGPU queue manager functions

This is not needed for stable.

Alex

>
> Reviewed-by: Felix Kuehling 
> Acked-by: Christian König 
> Tested-by: Mike Lothian 
> Signed-off-by: Alex Deucher 
> Signed-off-by: Sasha Levin 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 6 --
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +---
>  2 files changed, 1 insertion(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 00f528eb98126..9c8197573dee7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -224,10 +224,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
>   asic_type != CHIP_TONGA)
>   kfd->device_info.supports_cwsr = true;
>
> - if (asic_type == CHIP_KAVERI ||
> - asic_type == CHIP_CARRIZO)
> - kfd->device_info.needs_iommu_device = true;
> -
>   if (asic_type != CHIP_HAWAII && !vf)
>   kfd->device_info.needs_pci_atomics = true;
>   }
> @@ -240,7 +236,6 @@ struct kfd_dev *kgd2kfd_probe(struct
> amdgpu_device *adev, bool vf)
>   uint32_t gfx_target_version = 0;
>
>   switch (adev->asic_type) {
> -#ifdef KFD_SUPPORT_IOMMU_V2
>  #ifdef CONFIG_DRM_AMDGPU_CIK
>   case CHIP_KAVERI:
>   gfx_target_version = 7;
> @@ -253,7 +248,6 @@ struct kfd_dev *kgd2kfd_probe(struct
> amdgpu_device *adev, bool vf)
>   if (!vf)
>   f2g = &gfx_v8_kfd2kgd;
>   break;
> -#endif
>  #ifdef CONFIG_DRM_AMDGPU_CIK
>   case CHIP_HAWAII:
>   gfx_target_version = 70001;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 7a95698d83f73..c73417e79745e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -2335,18 +2335,12 @@ struct device_queue_manager
> *device_queue_manager_init(struct kfd_dev *dev)
>   }
>
>   switch (dev->adev->asic_type) {
> - case CHIP_CARRIZO:
> - device_queue_manager_init_vi(&dqm->asic_ops);
> - break;
> -
>   case CHIP_KAVERI:
> - device_queue_manager_init_cik(&dqm->asic_ops);
> - break;
> -
>   case CHIP_HAWAII:
>   device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
>   break;
>
> + case CHIP_CARRIZO:
>   case CHIP_TONGA:
>   case CHIP_FIJI:
>   case CHIP_POLARIS10:
> --
> 2.40.1



  1   2   >