RE: [EXT] [PATCH -next] drm/fsl-dcu: Remove unnecessary print function dev_err()

2022-03-07 Thread Alison Wang
Acked-by: Alison Wang 


Best Regards,
Alison Wang


-Original Message-
From: Yang Li  
Sent: 2022年3月3日 10:30
To: Alison Wang 
Cc: ste...@agner.ch; airl...@linux.ie; dan...@ffwll.ch; 
dri-devel@lists.freedesktop.org; linux-ker...@vger.kernel.org; Yang Li 
; Abaci Robot 
Subject: [EXT] [PATCH -next] drm/fsl-dcu: Remove unnecessary print function 
dev_err()

Caution: EXT Email

The print function dev_err() is redundant because platform_get_irq() already 
prints an error.

Eliminate the follow coccicheck warning:
./drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c:277:2-9: line 277 is redundant 
because platform_get_irq() already prints an error

Reported-by: Abaci Robot 
Signed-off-by: Yang Li 
---
 drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c 
b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
index 7a503bf08d0f..20895ea79739 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
@@ -273,10 +273,8 @@ static int fsl_dcu_drm_probe(struct platform_device *pdev)
}

fsl_dev->irq = platform_get_irq(pdev, 0);
-   if (fsl_dev->irq < 0) {
-   dev_err(dev, "failed to get irq\n");
+   if (fsl_dev->irq < 0)
return fsl_dev->irq;
-   }

fsl_dev->regmap = devm_regmap_init_mmio(dev, base,
&fsl_dcu_regmap_config);
--[Alison Wang] r
2.20.1.7.g153144c



[PATCH RFC] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.

Signed-off-by: Jarkko Sakkinen 
---
 arch/mips/kernel/vdso.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c |  2 +-
 fs/overlayfs/file.c|  2 +-
 include/linux/fs.h | 10 --
 include/linux/mm.h |  2 +-
 ipc/shm.c  |  2 +-
 mm/mmap.c  | 10 +-
 mm/nommu.c |  4 ++--
 9 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-   0, NULL);
+   0, NULL, false);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, 
struct vm_area_struct *
if (!obj->base.filp)
return -ENODEV;
 
-   ret = call_mmap(obj->base.filp, vma);
+   ret = call_mmap(obj->base.filp, vma, false);
if (ret)
return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct 
vm_area_struct *vma)
spin_unlock(&cii->c_lock);
 
vma->vm_file = get_file(host_file);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
 
if (ret) {
/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct 
vm_area_struct *vma)
vma_set_file(vma, realfile);
 
old_cred = ovl_override_creds(file_inode(file)->i_sb);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
revert_creds(old_cred);
ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..fb90284e1c82 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1993,6 +1993,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+   int (*populate)(struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2075,14 @@ static inline ssize_t call_write_iter(struct file *file, 
struct kiocb *kio,
return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, 
bool do_populate)
 {
-   return file->f_op->mmap(file, vma);
+   int ret = file->f_op->mmap(file, vma);
+
+   if (!ret && do_populate)
+   ret = file->f_op->populate(file, vma);
+
+   return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2683,7 +2683,7 @@ extern unsigned long get_unmapped_area(struct file *, 
unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-   struct list_head *uf);
+   struct list_head *uf, bool do_populate);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
u

Re: Report 2 in ext4 and journal based on v5.17-rc1

2022-03-07 Thread Joel Fernandes
On Sat, Mar 05, 2022 at 11:15:38PM +0900, Byungchul Park wrote:
> On Fri, Mar 04, 2022 at 10:26:23PM -0500, Theodore Ts'o wrote:
> > On Fri, Mar 04, 2022 at 09:42:37AM +0900, Byungchul Park wrote:
> > > 
> > > All contexts waiting for any of the events in the circular dependency
> > > chain will be definitely stuck if there is a circular dependency as I
> > > explained. So we need another wakeup source to break the circle. In
> > > ext4 code, you might have the wakeup source for breaking the circle.
> > > 
> > > What I agreed with is:
> > > 
> > >The case that 1) the circular dependency is unevitable 2) there are
> > >another wakeup source for breadking the circle and 3) the duration
> > >in sleep is short enough, should be acceptable.
> > > 
> > > Sounds good?
> > 
> > These dependencies are part of every single ext4 metadata update,
> > and if there were any unnecessary sleeps, this would be a major
> > performance gap, and this is a very well studied part of ext4.
> > 
> > There are some places where we sleep, sure.  In some case
> > start_this_handle() needs to wait for a commit to complete, and the
> > commit thread might need to sleep for I/O to complete.  But the moment
> > the thing that we're waiting for is complete, we wake up all of the
> > processes on the wait queue.  But in the case where we wait for I/O
> > complete, that wakeupis coming from the device driver, when it
> > receives the the I/O completion interrupt from the hard drive.  Is
> > that considered an "external source"?  Maybe DEPT doesn't recognize
> > that this is certain to happen just as day follows the night?  (Well,
> > maybe the I/O completion interrupt might not happen if the disk drive
> > bursts into flames --- but then, you've got bigger problems. :-)
> 
> Almost all you've been blaming at Dept are totally non-sense. Based on
> what you're saying, I'm conviced that you don't understand how Dept
> works even 1%. You don't even try to understand it before blame.
> 
> You don't have to understand and support it. But I can't response to you
> if you keep saying silly things that way.

Byungchul, other than ext4 have there been any DEPT reports that other
subsystem maintainers' agree were valid usecases?

Regarding false-positives, just to note lockdep is not without its share of
false-positives. Just that (as you know), the signal-to-noise ratio should be
high for it to be useful. I've put up with lockdep's false positives just
because it occasionally saves me from catastrophe.

> > In any case, if DEPT is going to report these "circular dependencies
> > as bugs that MUST be fixed", it's going to be pure noise and I will
> > ignore all DEPT reports, and will push back on having Lockdep replaced
> 
> Dept is going to be improved so that what you are concerning about won't
> be reported.

Yeah I am looking forward to learning more about it however I was wondering
about the following: lockdep can already be used for modeling "resource
acquire/release" and "resource wait" semantics that are unrelated to locks,
like we do in mm reclaim. I am wondering why we cannot just use those existing
lockdep mechanisms for the wait/wake usecases (Assuming that we can agree
that circular dependencies on related to wait/wake is a bad thing. Or perhaps
there's a reason why Peter Zijlstra did not use lockdep for wait/wake
dependencies (such as multiple wake sources) considering he wrote a lot of
that code.

Keep kicking ass brother, you're doing great.

Thanks,

 Joel



Re: Report 2 in ext4 and journal based on v5.17-rc1

2022-03-07 Thread Reimar Döffinger
Hi,
Sorry to butt in as an outsider, but this seems like a shockingly disrespectful 
discussion for such a wide CC list.
I don't want to make rules how you discuss things (I very rarely contribute), 
and I see the value in a frank discussion, but maybe you could continue with a 
reduced CC list?
I find it unlikely that I am the only one who could do without this.

Best regards,
Reimar Döffinger

> On 5 Mar 2022, at 15:55, Byungchul Park  wrote:
> 
> On Fri, Mar 04, 2022 at 10:40:35PM -0500, Theodore Ts'o wrote:
>> On Fri, Mar 04, 2022 at 12:20:02PM +0900, Byungchul Park wrote:
>>> 
>>> I found a point that the two wait channels don't lead a deadlock in
>>> some cases thanks to Jan Kara. I will fix it so that Dept won't
>>> complain it.
>> 
>> I sent my last (admittedly cranky) message before you sent this.  I'm
>> glad you finally understood Jan's explanation.  I was trying to tell
> 
> Not finally. I've understood him whenever he tried to tell me something.
> 
>> you the same thing, but apparently I failed to communicate in a
> 
> I don't think so. Your point and Jan's point are different. All he has
> said make sense. But yours does not.
> 
>> sufficiently clear manner.  In any case, what Jan described is a
>> fundamental part of how wait queues work, and I'm kind of amazed that
>> you were able to implement DEPT without understanding it.  (But maybe
> 
> Of course, it was possible because all that Dept has to know for basic
> work is wait and event. The subtle things like what Jan told me help
> Dept be better.
> 
>> that is why some of the DEPT reports were completely incomprehensible
> 
> It's because you are blinded to blame at it without understanding how
> Dept works at all. I will fix those that must be fixed. Don't worry.
> 
>> to me; I couldn't interpret why in the world DEPT was saying there was
>> a problem.)
> 
> I can tell you if you really want to understand why. But I can't if you
> are like this.
> 
>> In any case, the thing I would ask is a little humility.  We regularly
>> use lockdep, and we run a huge number of stress tests, throughout each
>> development cycle.
> 
> Sure.
> 
>> So if DEPT is issuing lots of reports about apparently circular
>> dependencies, please try to be open to the thought that the fault is
> 
> No one was convinced that Dept doesn't have a fault. I think your
> worries are too much.
> 
>> in DEPT, and don't try to argue with maintainers that their code MUST
>> be buggy --- but since you don't understand our code, and DEPT must be
> 
> No one argued that their code must be buggy, either. So I don't think
> you have to worry about what's never happened.
> 
>> theoretically perfect, that it is up to the Maintainers to prove to
>> you that their code is correct.
>> 
>> I am going to gently suggest that it is at least as likely, if not
>> more likely, that the failure is in DEPT or your understanding of what
> 
> No doubt. I already think so. But it doesn't mean that I have to keep
> quiet without discussing to imporve Dept. I will keep improving Dept in
> a reasonable way.
> 
>> how kernel wait channels and locking works.  After all, why would it
>> be that we haven't found these problems via our other QA practices?
> 
> Let's talk more once you understand how Dept works at least 10%. Or I
> think we cannot talk in a productive way.
> 



Re: [PATCH] drm: Drop commas after SoC match table sentinels

2022-03-07 Thread Neil Armstrong

On 03/03/2022 13:44, Geert Uytterhoeven wrote:

It does not make sense to have a comma after a sentinel, as any new
elements must be added before the sentinel.

Signed-off-by: Geert Uytterhoeven 
---
  drivers/gpu/drm/bridge/nwl-dsi.c  | 2 +-
  drivers/gpu/drm/meson/meson_drv.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c
index 5abb5ec3de467458..846df1ee7a2888cb 100644
--- a/drivers/gpu/drm/bridge/nwl-dsi.c
+++ b/drivers/gpu/drm/bridge/nwl-dsi.c
@@ -1151,7 +1151,7 @@ MODULE_DEVICE_TABLE(of, nwl_dsi_dt_ids);
  static const struct soc_device_attribute nwl_dsi_quirks_match[] = {
{ .soc_id = "i.MX8MQ", .revision = "2.0",
  .data = (void *)E11418_HS_MODE_QUIRK },
-   { /* sentinel. */ },
+   { /* sentinel. */ }
  };
  
  static int nwl_dsi_probe(struct platform_device *pdev)

diff --git a/drivers/gpu/drm/meson/meson_drv.c 
b/drivers/gpu/drm/meson/meson_drv.c
index 26aeaf0ab86ef932..35eaa669e8fe0c2a 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -167,7 +167,7 @@ static const struct meson_drm_soc_attr 
meson_drm_soc_attrs[] = {
},
.attrs = (const struct soc_device_attribute []) {
{ .soc_id = "GXL (S805*)", },
-   { /* sentinel */ },
+   { /* sentinel */ }
}
},
  };


Reviewed-by: Neil Armstrong 

Tell me if you want me to apply it to drm-misc-next.

Thanks,
Neil


Re: [PATCH] drm: Drop commas after SoC match table sentinels

2022-03-07 Thread Geert Uytterhoeven
Hi Neil,

On Mon, Mar 7, 2022 at 9:30 AM Neil Armstrong  wrote:
> On 03/03/2022 13:44, Geert Uytterhoeven wrote:
> > It does not make sense to have a comma after a sentinel, as any new
> > elements must be added before the sentinel.
> >
> > Signed-off-by: Geert Uytterhoeven 

> Reviewed-by: Neil Armstrong 

Thank you!

> Tell me if you want me to apply it to drm-misc-next.

Do I have other options? ;-)
Thanks in advance!

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH] drm/msm/gpu: Fix crash on devices without devfreq support

2022-03-07 Thread Naresh Kamboju
Hi Rob,

On Sun, 20 Feb 2022 at 00:02, Rob Clark  wrote:
>
> From: Rob Clark 
>
> Avoid going down devfreq paths on devices where devfreq is not
> initialized.
>
> Reported-by: Linux Kernel Functional Testing 
> Reported-by: Anders Roxell 
> Signed-off-by: Rob Clark 

I have tested this patch and the reported kernel crash is fixed [1].

Tested-by: Linux Kernel Functional Testing 

> ---
>  drivers/gpu/drm/msm/msm_gpu_devfreq.c | 31 +--
>  1 file changed, 25 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c 
> b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
> index 9bf319be11f6..26a3669a97b3 100644
> --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
> +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
> @@ -83,12 +83,17 @@ static struct devfreq_dev_profile msm_devfreq_profile = {
>  static void msm_devfreq_boost_work(struct kthread_work *work);
>  static void msm_devfreq_idle_work(struct kthread_work *work);
>
> +static bool has_devfreq(struct msm_gpu *gpu)
> +{
> +   return !!gpu->funcs->gpu_busy;
> +}
> +
>  void msm_devfreq_init(struct msm_gpu *gpu)
>  {
> struct msm_gpu_devfreq *df = &gpu->devfreq;
>
> /* We need target support to do devfreq */
> -   if (!gpu->funcs->gpu_busy)
> +   if (!has_devfreq(gpu))
> return;
>
> dev_pm_qos_add_request(&gpu->pdev->dev, &df->idle_freq,
> @@ -149,6 +154,9 @@ void msm_devfreq_cleanup(struct msm_gpu *gpu)
>  {
> struct msm_gpu_devfreq *df = &gpu->devfreq;
>
> +   if (!has_devfreq(gpu))
> +   return;
> +
> devfreq_cooling_unregister(gpu->cooling);
> dev_pm_qos_remove_request(&df->boost_freq);
> dev_pm_qos_remove_request(&df->idle_freq);
> @@ -156,16 +164,24 @@ void msm_devfreq_cleanup(struct msm_gpu *gpu)
>
>  void msm_devfreq_resume(struct msm_gpu *gpu)
>  {
> -   gpu->devfreq.busy_cycles = 0;
> -   gpu->devfreq.time = ktime_get();
> +   struct msm_gpu_devfreq *df = &gpu->devfreq;
>
> -   devfreq_resume_device(gpu->devfreq.devfreq);
> +   if (!has_devfreq(gpu))
> +   return;
> +
> +   df->busy_cycles = 0;
> +   df->time = ktime_get();
> +
> +   devfreq_resume_device(df->devfreq);
>  }
>
>  void msm_devfreq_suspend(struct msm_gpu *gpu)
>  {
> struct msm_gpu_devfreq *df = &gpu->devfreq;
>
> +   if (!has_devfreq(gpu))
> +   return;
> +
> devfreq_suspend_device(df->devfreq);
>
> cancel_idle_work(df);
> @@ -185,6 +201,9 @@ void msm_devfreq_boost(struct msm_gpu *gpu, unsigned 
> factor)
> struct msm_gpu_devfreq *df = &gpu->devfreq;
> uint64_t freq;
>
> +   if (!has_devfreq(gpu))
> +   return;
> +
> freq = get_freq(gpu);
> freq *= factor;
>
> @@ -207,7 +226,7 @@ void msm_devfreq_active(struct msm_gpu *gpu)
> struct devfreq_dev_status status;
> unsigned int idle_time;
>
> -   if (!df->devfreq)
> +   if (!has_devfreq(gpu))
> return;
>
> /*
> @@ -253,7 +272,7 @@ void msm_devfreq_idle(struct msm_gpu *gpu)
>  {
> struct msm_gpu_devfreq *df = &gpu->devfreq;
>
> -   if (!df->devfreq)
> +   if (!has_devfreq(gpu))
> return;
>
> msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1),
> --
> 2.34.1


--
Linaro LKFT
https://lkft.linaro.org

[1] https://lkft.validation.linaro.org/scheduler/job/4664600#L1894


Re: [PATCH v12 03/23] dt-bindings: mediatek: add ethdr definition for mt8195

2022-03-07 Thread AngeloGioacchino Del Regno

Il 07/03/22 03:33, Nancy.Lin ha scritto:

Hi Angelo,

Thanks for the review.

On Wed, 2022-03-02 at 11:13 +0100, AngeloGioacchino Del Regno wrote:

Il 22/02/22 11:07, Nancy.Lin ha scritto:

Add vdosys1 ETHDR definition.

Signed-off-by: Nancy.Lin 
Reviewed-by: Chun-Kuang Hu 
---
   .../display/mediatek/mediatek,ethdr.yaml  | 147
++
   1 file changed, 147 insertions(+)
   create mode 100644
Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.y
aml

diff --git
a/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr
.yaml
b/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr
.yaml
new file mode 100644
index ..131eed5eeeb7
--- /dev/null
+++
b/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr
.yaml
@@ -0,0 +1,147 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id:
https://urldefense.com/v3/__http://devicetree.org/schemas/display/mediatek/mediatek,ethdr.yaml*__;Iw!!CTRNKA9wMg0ARbw!y6qWSq3epOM61tyVt6ijw6CRNaNmcK382oF8TQ-a57UAcXvx8o1yIPd7pTKakQkU$
  
+$schema:

https://urldefense.com/v3/__http://devicetree.org/meta-schemas/core.yaml*__;Iw!!CTRNKA9wMg0ARbw!y6qWSq3epOM61tyVt6ijw6CRNaNmcK382oF8TQ-a57UAcXvx8o1yIPd7pTQVqP2v$
  
+

+title: Mediatek Ethdr Device Tree Bindings
+
+maintainers:
+  - Chun-Kuang Hu 
+  - Philipp Zabel 
+
+description: |
+  ETHDR is designed for HDR video and graphics conversion in the
external display path.
+  It handles multiple HDR input types and performs tone mapping,
color space/color
+  format conversion, and then combine different layers, output the
required HDR or
+  SDR signal to the subsequent display path. This engine is
composed of two video
+  frontends, two graphic frontends, one video backend and a mixer.
ETHDR has two
+  DMA function blocks, DS and ADL. These two function blocks read
the pre-programmed
+  registers from DRAM and set them to HW in the v-blanking period.
+
+properties:
+  compatible:
+items:
+  - const: mediatek,mt8195-disp-ethdr
+  reg:
+maxItems: 7
+  reg-names:
+items:
+  - const: mixer
+  - const: vdo_fe0
+  - const: vdo_fe1
+  - const: gfx_fe0
+  - const: gfx_fe1
+  - const: vdo_be
+  - const: adl_ds
+  interrupts:
+minItems: 1
+  iommus:
+description: The compatible property is DMA function blocks.
+  Should point to the respective IOMMU block with master port
as argument,
+  see
Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for
+  details.
+minItems: 1
+maxItems: 2
+  clocks:
+items:
+  - description: mixer clock
+  - description: video frontend 0 clock
+  - description: video frontend 1 clock
+  - description: graphic frontend 0 clock
+  - description: graphic frontend 1 clock
+  - description: video backend clock
+  - description: autodownload and menuload clock
+  - description: video frontend 0 async clock
+  - description: video frontend 1 async clock
+  - description: graphic frontend 0 async clock
+  - description: graphic frontend 1 async clock
+  - description: video backend async clock
+  - description: ethdr top clock
+  clock-names:
+items:
+  - const: mixer
+  - const: vdo_fe0
+  - const: vdo_fe1
+  - const: gfx_fe0
+  - const: gfx_fe1
+  - const: vdo_be
+  - const: adl_ds
+  - const: vdo_fe0_async
+  - const: vdo_fe1_async
+  - const: gfx_fe0_async
+  - const: gfx_fe1_async
+  - const: vdo_be_async
+  - const: ethdr_top
+  power-domains:
+maxItems: 1
+  resets:
+maxItems: 5
+  mediatek,gce-client-reg:
+$ref: /schemas/types.yaml#/definitions/phandle-array
+description: The register of display function block to be set
by gce.
+  There are 4 arguments in this property, gce node, subsys id,
offset and
+  register size. The subsys id is defined in the gce header of
each chips
+  include/include/dt-bindings/gce/-gce.h, mapping to the
register of
+  display function block.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+  - power-domains
+
+additionalProperties: false
+
+examples:
+  - |


Please fix inclusions and address/size cells.

Thanks,
Angelo



Because the patch has been applied to mediatek-drm-next [1].
I saw that you have a patch for fixing this issue [2]. I think I will
just remove this patch in the next revision. Do you think this is OK?

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/chunkuang.hu/linux.git/log/?h=mediatek-drm-next
[2]
https://patchwork.kernel.org/project/linux-mediatek/patch/20220304095458.12409-4-
angelogioacchino.delre...@collabora.com/

Best,
Nancy



Yes, this is ok.

Thank you,
Angelo


Re: [PATCH v2 2/2] drm/i915/dg2: Add debugfs to control global preemption setting

2022-03-07 Thread Tvrtko Ursulin



On 04/03/2022 23:46, Matt Roper wrote:

From: Akeem G Abodunrin 

Since DG2 and beyond only support global preemption enable/disable (see
Wa_14015141709), userspace no longer has a way to control preemption on
a per-context basis.  Preemption is globally enabled by default, but the
UMD teams have requested that we provide a debugfs interface that can be
used to query and adjust the system-wide preemption setting for
development and bug reporting purposes.


I guess most distros enable debugfs, anyone knows for sure? Otherwise 
the bug reporting use case could be questionable.


And UMD acks would be desirable here I'd say.


v2 (MattR):
  - Split debugfs out into a separate patch.  (Jani)
  - Add the hardware update/query as facilities in intel_gt.c and just
call them from the debugfs code.  (Jani)
  - Add register to GuC's save/restore list so that the value will
persist across resets.  (Tvrtko)
  - Place under per-GT debugfs rather than i915 debugfs.  (MattR)
  - Only register debugfs entries on platforms subject to Wa_14015141709,
and only on platforms that have an RCS engine.  (MattR/Tvrtko)

Cc: Matt Roper 
Cc: Prathap Kumar Valsan 
Cc: John Harrison 
Cc: Joonas Lahtinen 
Cc: Jani Nikula 
Cc: Tvrtko Ursulin 
Signed-off-by: Akeem G Abodunrin 
Signed-off-by: Matt Roper 
---
  drivers/gpu/drm/i915/gt/intel_gt.c | 50 ++
  drivers/gpu/drm/i915/gt/intel_gt.h |  3 ++
  drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 31 ++
  drivers/gpu/drm/i915/gt/intel_gt_regs.h|  3 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c |  7 +++
  5 files changed, 94 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8a2483ccbfb9..90bdebd8d267 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1045,3 +1045,53 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
mutex_unlock(>->tlb_invalidate_lock);
  }
+
+/**
+ * intel_gt_get_global_preemption - return whether the global preemption
+ * setting is enabled in hardware
+ * @gt: GT structure
+ *
+ * Returns the hardware's global 'preemption enabled' setting.  Only relevant
+ * on newer platforms that lack per-context preemption control (and only on
+ * GTs that have a render engine).
+ *
+ * Returns 1 if preemption is enabled, 0 if disabled.
+ */
+u64 intel_gt_get_global_preemption(struct intel_gt *gt)
+{
+   intel_wakeref_t wakeref;
+   u32 val;
+
+   drm_WARN_ON(>->i915->drm, GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 
55));
+   drm_WARN_ON(>->i915->drm, RCS_MASK(gt) == 0);
+
+   with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
+   val = intel_uncore_read(gt->uncore, 
GEN12_VFG_PREEMPTION_CHICKEN);
+
+   return !(val & GEN12_VFG_PREEMPT_CHICKEN_DISABLE);
+}
+
+/**
+ * intel_gt_set_global_preemption - adjust global preemption enabled setting
+ * @gt: GT structure
+ * @val: desired preemption setting
+ *
+ * Enables global preemption if @val is non-zero, otherwise disables it.  Only
+ * relevant on newer platforms that lack per-context preemption control (and
+ * only on GTs that have a render engine).
+ *
+ * Returns 1 if preemption is enabled, 0 if disabled.
+ */
+void intel_gt_set_global_preemption(struct intel_gt *gt, u64 val)
+{
+   intel_wakeref_t wakeref;
+   u32 tmp = val ?
+   _MASKED_BIT_DISABLE(GEN12_VFG_PREEMPT_CHICKEN_DISABLE) :
+   _MASKED_BIT_ENABLE(GEN12_VFG_PREEMPT_CHICKEN_DISABLE);
+
+   drm_WARN_ON(>->i915->drm, GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 
55));
+   drm_WARN_ON(>->i915->drm, RCS_MASK(gt) == 0);


Bike shedding territory, but as long as these checks are present in 
release builds, it would be possible to return an error and propagate to 
debugfs caller/return. Not saying to do it just thinking out loud.



+
+   with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
+   intel_uncore_write(gt->uncore, GEN12_VFG_PREEMPTION_CHICKEN, 
tmp);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 0f571c8ee22b..63a599a1bf6d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -94,4 +94,7 @@ void intel_gt_watchdog_work(struct work_struct *work);
  
  void intel_gt_invalidate_tlbs(struct intel_gt *gt);
  
+u64 intel_gt_get_global_preemption(struct intel_gt *gt);

+void intel_gt_set_global_preemption(struct intel_gt *gt, u64 val);


Bool based would be nicer unless there is some reason for 64-bits in the 
future.


Regards,

Tvrtko


+
  #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
index f103664b71d4..d851e3f80877 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
@@ -6,6 +6,7 @@
  #include 
  
  #include "i915_drv.h"

+#in

[PATCH] drm/omap: fix NULL but dereferenced coccicheck error

2022-03-07 Thread Wan Jiabing
Fix the following coccicheck warning:
./drivers/gpu/drm/omapdrm/omap_overlay.c:89:22-25: ERROR: r_ovl is NULL
but dereferenced.

Here should be ovl->idx rather than r_ovl->idx.

Signed-off-by: Wan Jiabing 
---
 drivers/gpu/drm/omapdrm/omap_overlay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/omapdrm/omap_overlay.c 
b/drivers/gpu/drm/omapdrm/omap_overlay.c
index 10730c9b2752..b0bc9ad2ef73 100644
--- a/drivers/gpu/drm/omapdrm/omap_overlay.c
+++ b/drivers/gpu/drm/omapdrm/omap_overlay.c
@@ -86,7 +86,7 @@ int omap_overlay_assign(struct drm_atomic_state *s, struct 
drm_plane *plane,
r_ovl = omap_plane_find_free_overlay(s->dev, overlay_map,
 caps, fourcc);
if (!r_ovl) {
-   overlay_map[r_ovl->idx] = NULL;
+   overlay_map[ovl->idx] = NULL;
*overlay = NULL;
return -ENOMEM;
}
-- 
2.35.1



Re: [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread David Hildenbrand
On 06.03.22 06:32, Jarkko Sakkinen wrote:
> For device memory (aka VM_IO | VM_PFNMAP) MAP_POPULATE does nothing. Allow
> to use that for initializing the device memory by providing a new callback
> f_ops->populate() for the purpose.
> 
> SGX patches are provided to show the callback in context.
> 
> An obvious alternative is a ioctl but it is less elegant and requires
> two syscalls (mmap + ioctl) per memory range, instead of just one
> (mmap).

What about extending MADV_POPULATE_READ | MADV_POPULATE_WRITE to support
VM_IO | VM_PFNMAP (as well?) ?


-- 
Thanks,

David / dhildenb



Re: [PATCH] drm: Drop commas after SoC match table sentinels

2022-03-07 Thread Neil Armstrong
Hi,

On Thu, 3 Mar 2022 13:44:56 +0100, Geert Uytterhoeven wrote:
> It does not make sense to have a comma after a sentinel, as any new
> elements must be added before the sentinel.
> 
> 

Thanks, Applied to https://anongit.freedesktop.org/git/drm/drm-misc.git 
(drm-misc-next)

[1/1] drm: Drop commas after SoC match table sentinels
  
https://cgit.freedesktop.org/drm/drm-misc/commit/?id=f6e68388443ff50088e224b3a75090bdc0403be6

-- 
Neil


Re: [PATCH 7/8] drm/i915: fixup the initial fb base on DG1

2022-03-07 Thread Matthew Auld

On 04/03/2022 19:33, Ville Syrjälä wrote:

On Fri, Mar 04, 2022 at 05:23:32PM +, Matthew Auld wrote:

The offset we get looks to be the exact start of DSM, but the
inital_plane_vma expects the address to be relative.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
  .../drm/i915/display/intel_plane_initial.c| 22 +++
  1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
b/drivers/gpu/drm/i915/display/intel_plane_initial.c
index f797fcef18fc..b39d3a8dfe45 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915,
if (!mem || plane_config->size == 0)
return NULL;
  
-	base = round_down(plane_config->base,

- I915_GTT_MIN_ALIGNMENT);
-   size = round_up(plane_config->base + plane_config->size,
-   mem->min_page_size);
+   base = plane_config->base;
+   if (IS_DGFX(i915)) {
+   /*
+* On discrete the base address should be somewhere in LMEM, but
+* depending on the size of LMEM the base address might
+* intersect with the start of DSM, like on DG1, in which case
+* we need the relative address. In such cases we might also
+* need to choose between inital fb vs fbc, if space is limited.
+*
+* On future discrete HW, like DG2, we should be able to just
+* allocate directly from LMEM, due to larger LMEM size.
+*/
+   if (base >= i915->dsm.start)
+   base -= i915->dsm.start;


Subsequent code expects the object to actually be inside stolen.
If that is not the case we should just give up.


Thanks for taking a look at this. Is that subsequent code outside 
initial_plane_vma()? In the next patch this is now using LMEM directly 
for dg2. Would that blow up somewhere else?




The fact that we fail to confirm any of that on integrated
parts has always bugged me, but not enough to actually do
anything about it. Such a check would be somewhat more involved
since we'd have to look at the PTEs. But on discrete sounds like
we can get away with a trivial check.


Which PTEs? Is this for the below GGTT bind? I would have assumed that 
the create_at/for_preallocated would simply refuse to allocate the pages 
if the requested range is outside the regions usable range? Or maybe 
there is more going on behind the scenes here?





+   }
+
+   size = roundup(base + plane_config->size, mem->min_page_size);
+   base = round_down(base, I915_GTT_MIN_ALIGNMENT);
size -= base;
  
  	/*

--
2.34.1




Re: [PATCH RFC v2] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> On 3/6/22 15:24, Andrew Morton wrote:
> > On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen  
> > wrote:
> > 
> >> Sometimes you might want to use MAP_POPULATE to ask a device driver to
> >> initialize the device memory in some specific manner. SGX driver can use
> >> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> >> page in the address range.
> > Why is this useful?  Please fully describe the benefit to kernel users.
> > Convince us that the benefit justifies the code churn, maintenance
> > cost and larger kernel footprint.
> 
> In short: page faults stink.  The core kernel has lots of ways of
> avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
>  But, those only work on normal RAM that the core mm manages.
> 
> SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> are marked with VM_IO.  So, none of the existing methods for avoiding
> page faults work on SGX memory.
> 
> This essentially helps extend existing "normal RAM" kernel ABIs to work
> for avoiding faults for SGX too.  SGX users want to enjoy all of the
> benefits of a delayed allocation policy (better resource use,
> overcommit, NUMA affinity) but without the cost of millions of faults.
> 
> That said, this isn't how I would have implemented it.  I probably would
> have hooked in to populate_vma_page_range() or its callers.

The exact implementation path is not driver in this. I'm open for
better options. The point of these patches is more to show an issue
rather than solution, and they do carry RFC because of that.

Hooking into populate_vma_page_range() does sound like a better idea,
because then it would be nicely embedded into __mm_populate() and
other functionality that calls that function.

But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
filtered out and never reach that function.

I don't know unorthodox that'd be but could we perhaps have a VM
flag for SGX?

BR, Jarkko


Re: [PATCH v7 15/24] drm/rockchip: dw_hdmi: add default 594Mhz clk for 4K@60hz

2022-03-07 Thread Andy Yan

Hi:

 I have a test with the 24 patches applied on Linux-5.17-rc5 on 
rk3568-evb1-v10 board with Sony XR-75z9j  HDMI TV,


4K don't work, the tv shows no signal.

1080P can work.

On 2/25/22 15:51, Sascha Hauer wrote:

From: Nickey Yang 

add 594Mhz configuration parameters in rockchip_phy_config

Signed-off-by: Nickey Yang 
Signed-off-by: Sascha Hauer 
---

Notes:
 Changes since v3:
 - new patch

  drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c 
b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index e97ba072a097b..03cda7229e559 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -192,6 +192,7 @@ static const struct dw_hdmi_phy_config 
rockchip_phy_config[] = {
{ 7425,  0x8009, 0x0004, 0x0272},
{ 14850, 0x802b, 0x0004, 0x028d},
{ 29700, 0x8039, 0x0005, 0x028d},
+   { 59400, 0x8039, 0x, 0x019d},
{ ~0UL,  0x, 0x, 0x}
  };
  


Re: [PATCH v7 22/24] drm: rockchip: Add VOP2 driver

2022-03-07 Thread Andy Yan

Hi Sascha:

On 2/25/22 15:51, Sascha Hauer wrote:

From: Andy Yan 

The VOP2 unit is found on Rockchip SoCs beginning with rk3566/rk3568.
It replaces the VOP unit found in the older Rockchip SoCs.

This driver has been derived from the downstream Rockchip Kernel and
heavily modified:

- All nonstandard DRM properties have been removed
- dropped struct vop2_plane_state and pass around less data between
   functions
- Dropped all DRM_FORMAT_* not known on upstream
- rework register access to get rid of excessively used macros
- Drop all waiting for framesyncs

The driver is tested with HDMI and MIPI-DSI display on a RK3568-EVB
board. Overlay support is tested with the modetest utility. AFBC support
on the cluster windows is tested with weston-simple-dmabuf-egl on
weston using the (yet to be upstreamed) panfrost driver support.



When run a weston 10.0.0:

 # export XDG_RUNTIME_DIR=/tmp
 # weston --backend=drm-backend.so --use-pixma --tty=2 
--continue=without-input


I got the following error:

drm_atomic_check_only [PLANE:31:Smart0-win0] CRTC set but no FB

...atomic core check failed.


weston: [atomic] couldn't commit new state: Resource temporarily unavailable

repaint-flush faild: Resource temporarily unavaiable

this is no display on hdmi.

I have to mask the the "start with plane disabled" logic in weston to 
get it run correctly.


 -1066,14 +1066,14 @@ drm_pending_state_apply_atomic(struct 
drm_pending_state *pending_state,

    break;
    }

-   if (b->state_invalid) {
+   if (0/*b->state_invalid*/) {
    struct weston_head *head_base;
    struct drm_head *head;
    struct drm_crtc *crtc;
    uint32_t connector_id;
    int err;


I am not sure if this is a drm driver or weston problem.



Signed-off-by: Andy Yan 
Signed-off-by: Sascha Hauer 
---

Notes:
 Changes since v6:
 - Drop device tree parsing during runtime
 - Fix typo in Kconfig help text
 
 Changes since v5:

 - consistently use u8/u16/u32 rather than uint8_t/uint16_t/uint32_t
 - Use spin_lock rather than spin_lock_irqsave
 - replace printk with drm_dbg
 - break some overlong lines
 
 Changes since v4:

 - Avoid stack frame overflow by not allocating big array on the stack
 
 Changes since v3:

 - Sort includes
 - fix typos
 - Drop spinlock
 - Use regmap_set_bits()/regmap_clear_bits()
 - simplify vop2_scale_factor()
 - simplify vop2_afbc_transform_offset()
 
 Changes since v4:

 - Sort nodes alphabetically
 
 Changes since v3:

 - Fix HDMI connector type
 
 Changes since v4:

 - Add Robs Ack
 
 Changes since v3:

 - Bring back gamma_lut regs
 - Drop redundant _vop suffix from clock names
 
 Changes since v5:

 - Drop unnecessary #size-cells/#address-cells from nodes with only single 
endpoint
 
 Changes since v5:

 - consistently use u8/u16/u32 rather than uint8_t/uint16_t/uint32_t
 - Use spin_lock rather than spin_lock_irqsave
 - replace printk with drm_dbg
 - break some overlong lines
 
 Changes since v4:

 - Avoid stack frame overflow by not allocating big array on the stack
 
 Changes since v3:

 - Sort includes
 - fix typos
 - Drop spinlock
 - Use regmap_set_bits()/regmap_clear_bits()
 - simplify vop2_scale_factor()
 - simplify vop2_afbc_transform_offset()
 
 Changes since v4:

 - Sort nodes alphabetically
 
 Changes since v3:

 - Fix HDMI connector type

  drivers/gpu/drm/rockchip/Kconfig |6 +
  drivers/gpu/drm/rockchip/Makefile|1 +
  drivers/gpu/drm/rockchip/rockchip_drm_drv.c  |1 +
  drivers/gpu/drm/rockchip/rockchip_drm_drv.h  |6 +-
  drivers/gpu/drm/rockchip/rockchip_drm_fb.c   |2 +
  drivers/gpu/drm/rockchip/rockchip_drm_vop.h  |   15 +
  drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 2686 ++
  drivers/gpu/drm/rockchip/rockchip_drm_vop2.h |  477 
  drivers/gpu/drm/rockchip/rockchip_vop2_reg.c |  281 ++
  9 files changed, 3474 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_vop2_reg.c

diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig
index b9b156308460a..f033971103610 100644
--- a/drivers/gpu/drm/rockchip/Kconfig
+++ b/drivers/gpu/drm/rockchip/Kconfig
@@ -28,6 +28,12 @@ config ROCKCHIP_VOP
  This selects support for the VOP driver. You should enable it
  on all older SoCs up to RK3399.
  
+config ROCKCHIP_VOP2

+   bool "Rockchip VOP2 driver"
+   help
+ This selects support for the VOP2 driver. You should enable it
+ on all newer SoCs beginning from RK3568.
+
  config ROCKCHIP_ANALOGIX_DP
bool "Rockchip spec

[PATCH] omapfb: Add missing of_node_put() in dvic_probe_of

2022-03-07 Thread Miaoqian Lin
The device_node pointer is returned by of_parse_phandle()  with refcount
incremented. We should use of_node_put() on it when done.

Fixes: f76ee892a99e ("omapfb: copy omapdss & displays for omapfb")
Signed-off-by: Miaoqian Lin 
---
 drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c 
b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
index 2fa436475b40..c8ad3ef42bd3 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
@@ -246,6 +246,7 @@ static int dvic_probe_of(struct platform_device *pdev)
adapter_node = of_parse_phandle(node, "ddc-i2c-bus", 0);
if (adapter_node) {
adapter = of_get_i2c_adapter_by_node(adapter_node);
+   of_node_put(adapter_node);
if (adapter == NULL) {
dev_err(&pdev->dev, "failed to parse ddc-i2c-bus\n");
omap_dss_put_device(ddata->in);
-- 
2.17.1



Re: [PATCH v7 22/24] drm: rockchip: Add VOP2 driver

2022-03-07 Thread Sascha Hauer
Hi Andy,

On Mon, Mar 07, 2022 at 08:18:08PM +0800, Andy Yan wrote:
> Hi Sascha:
> 
> On 2/25/22 15:51, Sascha Hauer wrote:
> > From: Andy Yan 
> > 
> > The VOP2 unit is found on Rockchip SoCs beginning with rk3566/rk3568.
> > It replaces the VOP unit found in the older Rockchip SoCs.
> > 
> > This driver has been derived from the downstream Rockchip Kernel and
> > heavily modified:
> > 
> > - All nonstandard DRM properties have been removed
> > - dropped struct vop2_plane_state and pass around less data between
> >functions
> > - Dropped all DRM_FORMAT_* not known on upstream
> > - rework register access to get rid of excessively used macros
> > - Drop all waiting for framesyncs
> > 
> > The driver is tested with HDMI and MIPI-DSI display on a RK3568-EVB
> > board. Overlay support is tested with the modetest utility. AFBC support
> > on the cluster windows is tested with weston-simple-dmabuf-egl on
> > weston using the (yet to be upstreamed) panfrost driver support.
> 
> 
> When run a weston 10.0.0:

I used weston 9.0.90 during testing. I'll try to reproduce the issue
with weston 10.

Could you maybe have a look at the HCLK issue we are discussing? This
thread could use some input from someone who has contact to the hardware
guys.

Regards,
  Sascha


-- 
Pengutronix e.K.   | |
Steuerwalder Str. 21   | http://www.pengutronix.de/  |
31137 Hildesheim, Germany  | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |


[PATCH] drm/selftests: missing error code in igt_buddy_alloc_smoke()

2022-03-07 Thread Dan Carpenter
Set the error code to -ENOMEM if drm_random_order() fails.

Fixes: e6ff5ef81170 ("drm/selftests: add drm buddy smoke testcase")
Signed-off-by: Dan Carpenter 
---
 drivers/gpu/drm/selftests/test-drm_buddy.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/selftests/test-drm_buddy.c 
b/drivers/gpu/drm/selftests/test-drm_buddy.c
index fa997f89522b..6eba590afa9b 100644
--- a/drivers/gpu/drm/selftests/test-drm_buddy.c
+++ b/drivers/gpu/drm/selftests/test-drm_buddy.c
@@ -488,8 +488,10 @@ static int igt_buddy_alloc_smoke(void *arg)
}
 
order = drm_random_order(mm.max_order + 1, &prng);
-   if (!order)
+   if (!order) {
+   err = -ENOMEM;
goto out_fini;
+   }
 
for (i = 0; i <= mm.max_order; ++i) {
struct drm_buddy_block *block;
-- 
2.20.1



Re: [PATCH 5.15 000/262] 5.15.27-rc1 review

2022-03-07 Thread Naresh Kamboju
On Mon, 7 Mar 2022 at 15:07, Greg Kroah-Hartman
 wrote:
>
> This is the start of the stable review cycle for the 5.15.27 release.
> There are 262 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Wed, 09 Mar 2022 09:16:25 +.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.15.27-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-5.15.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h


Following build errors/warnings noticed on arm64.


arch/arm64/net/bpf_jit_comp.c: In function 'build_insn':
arch/arm64/net/bpf_jit_comp.c:791:21: error: implicit declaration of
function 'bpf_pseudo_func' [-Werror=implicit-function-declaration]
  791 | if (bpf_pseudo_func(insn))
  | ^~~
cc1: some warnings being treated as errors


drivers/gpu/drm/mediatek/mtk_dsi.c: In function 'mtk_dsi_host_attach':
drivers/gpu/drm/mediatek/mtk_dsi.c:858:28: error: implicit declaration
of function 'devm_drm_of_get_bridge'; did you mean
'devm_drm_panel_bridge_add'? [-Werror=implicit-function-declaration]
  858 | dsi->next_bridge = devm_drm_of_get_bridge(dev,
dev->of_node, 0, 0);
  |^~
  |devm_drm_panel_bridge_add
drivers/gpu/drm/mediatek/mtk_dsi.c:858:26: warning: assignment to
'struct drm_bridge *' from 'int' makes pointer from integer without a
cast [-Wint-conversion]
  858 | dsi->next_bridge = devm_drm_of_get_bridge(dev,
dev->of_node, 0, 0);
  |  ^
cc1: some warnings being treated as errors

Reported-by: Linux Kernel Functional Testing 

Build log [1].

--
Linaro LKFT
https://lkft.linaro.org

[1] https://builds.tuxbuild.com/263ZKyWWLLcPGRbiZwIEZw3wvXX/


Re: [PATCH RFC v2] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 03:24:56PM -0800, Andrew Morton wrote:
> On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen  wrote:
> 
> > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > initialize the device memory in some specific manner. SGX driver can use
> > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > page in the address range.
> 
> Why is this useful?  Please fully describe the benefit to kernel users.
> Convince us that the benefit justifies the code churn, maintenance
> cost and larger kernel footprint.
> 
> Do we know of any other drivers which might use this?

Brutal honesty: I don't know if any other drivers would use this but
neither I would not be surprised if they did. The need for this might
very well be "masked" by ioctl API's.  I was first proposing a ioctl
for this but Dave suggested to at least try out this route.

> > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > it conditionally called inside call_mmap(). Update call sites
> > accodingly.
> 
> spello

Thanks, I noticed that but did not want to spam with a new version just
because of that :-)

> 
> > -static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
> > +static inline int call_mmap(struct file *file, struct vm_area_struct *vma, 
> > bool do_populate)
> >  {
> > -   return file->f_op->mmap(file, vma);
> > +   int ret = file->f_op->mmap(file, vma);
> > +
> > +   if (!ret && do_populate && file->f_op->populate)
> > +   ret = file->f_op->populate(file, vma);
> > +
> > +   return ret;
> >  }
> 
> Should this still be inlined?

I think it might make sense at least to have call_mmap_populate() so and
mmap_region_populate() instead of putting that boolean parameter to every
flow (based on Greg's feedback). But only if this implementation approach
is used in the first place.

As said, I chose to use RFC to pinpoint a bottleneck for us, not claiming
that this would be the best possible way to work around it.

BR, Jarkko


Re: [PATCH v7 22/24] drm: rockchip: Add VOP2 driver

2022-03-07 Thread Daniel Stone
Hi Andy,

On Mon, 7 Mar 2022 at 12:18, Andy Yan  wrote:
> On 2/25/22 15:51, Sascha Hauer wrote:
> > The VOP2 unit is found on Rockchip SoCs beginning with rk3566/rk3568.
> > It replaces the VOP unit found in the older Rockchip SoCs.
> >
> > This driver has been derived from the downstream Rockchip Kernel and
> > heavily modified:
> >
> > - All nonstandard DRM properties have been removed
> > - dropped struct vop2_plane_state and pass around less data between
> >functions
> > - Dropped all DRM_FORMAT_* not known on upstream
> > - rework register access to get rid of excessively used macros
> > - Drop all waiting for framesyncs
> >
> > The driver is tested with HDMI and MIPI-DSI display on a RK3568-EVB
> > board. Overlay support is tested with the modetest utility. AFBC support
> > on the cluster windows is tested with weston-simple-dmabuf-egl on
> > weston using the (yet to be upstreamed) panfrost driver support.
>
> When run a weston 10.0.0:
>
>   # export XDG_RUNTIME_DIR=/tmp
>   # weston --backend=drm-backend.so --use-pixma --tty=2
> --continue=without-input
>
> I got the following error:
>
> drm_atomic_check_only [PLANE:31:Smart0-win0] CRTC set but no FB

Can you please start Weston with --logger-scopes=log,drm-backend and
attach the output?

Cheers,
Daniel


Re: [PATCH RFC 1/3] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 10:43:31PM +, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 07:02:57PM +0200, Jarkko Sakkinen wrote:
> > So can I conclude from this that in general having populate available for
> > device memory is something horrid, or just the implementation path?
> 
> You haven't even attempted to explain what the problem is you're trying
> to solve.  You've shown up with some terrible code and said "Hey, is
> this a good idea".  No, no, it's not.

The problem is that in order to include memory to enclave, which is
essentially a reserved address range processes virtual address space
there's two steps into it:

1. Host side (kernel) does ENCLS[EAUG] to request a new page to be
   added to the enclave.
2. Enclave accepts request with ENCLU[EACCEPT] or ENCLU[EACCEPTCOPY].

In the current SGX2 patch set this taken care by the page fault
handler. I.e. the enclave calls ENCLU[EACCEPT] for an empty address
and the #PF handler then does EAUG for a single page.

So if you want to process a batch of pages this generates O(n)
round-trips.

So if there was a way pre-do a batch of EAUG's, that would allow
to load data to the enclave without causing page faults happening
constantly.

One solution for this simply add ioctl:

https://lore.kernel.org/linux-sgx/yilrbgltebu8c...@iki.fi/T/#m195ec84bf85614a140abeee245c5118c22ace8f3

But in practice when you wanted to use it, you would setup the
parameters so that they match the mmap() range. So for pratical
user space API having mmap() take care of this would be much more
lean option.

BR, Jarkko


Re: [PATCH] omapfb: Add missing of_node_put() in dvic_probe_of

2022-03-07 Thread Helge Deller
On 3/7/22 13:38, Miaoqian Lin wrote:
> The device_node pointer is returned by of_parse_phandle()  with refcount
> incremented. We should use of_node_put() on it when done.
>
> Fixes: f76ee892a99e ("omapfb: copy omapdss & displays for omapfb")
> Signed-off-by: Miaoqian Lin 

applied to the fbdev for-next tree.
Thanks!
Helge

> ---
>  drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c 
> b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
> index 2fa436475b40..c8ad3ef42bd3 100644
> --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
> +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
> @@ -246,6 +246,7 @@ static int dvic_probe_of(struct platform_device *pdev)
>   adapter_node = of_parse_phandle(node, "ddc-i2c-bus", 0);
>   if (adapter_node) {
>   adapter = of_get_i2c_adapter_by_node(adapter_node);
> + of_node_put(adapter_node);
>   if (adapter == NULL) {
>   dev_err(&pdev->dev, "failed to parse ddc-i2c-bus\n");
>   omap_dss_put_device(ddata->in);



Re: [PATCH RFC 1/3] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Mon, Mar 07, 2022 at 03:16:57PM +0200, Jarkko Sakkinen wrote:
> On Sun, Mar 06, 2022 at 10:43:31PM +, Matthew Wilcox wrote:
> > On Sun, Mar 06, 2022 at 07:02:57PM +0200, Jarkko Sakkinen wrote:
> > > So can I conclude from this that in general having populate available for
> > > device memory is something horrid, or just the implementation path?
> > 
> > You haven't even attempted to explain what the problem is you're trying
> > to solve.  You've shown up with some terrible code and said "Hey, is
> > this a good idea".  No, no, it's not.
> 
> The problem is that in order to include memory to enclave, which is
> essentially a reserved address range processes virtual address space
> there's two steps into it:
> 
> 1. Host side (kernel) does ENCLS[EAUG] to request a new page to be
>added to the enclave.
> 2. Enclave accepts request with ENCLU[EACCEPT] or ENCLU[EACCEPTCOPY].
> 
> In the current SGX2 patch set this taken care by the page fault
> handler. I.e. the enclave calls ENCLU[EACCEPT] for an empty address
> and the #PF handler then does EAUG for a single page.
> 
> So if you want to process a batch of pages this generates O(n)
> round-trips.
> 
> So if there was a way pre-do a batch of EAUG's, that would allow
> to load data to the enclave without causing page faults happening
> constantly.
> 
> One solution for this simply add ioctl:
> 
> https://lore.kernel.org/linux-sgx/yilrbgltebu8c...@iki.fi/T/#m195ec84bf85614a140abeee245c5118c22ace8f3
> 
> But in practice when you wanted to use it, you would setup the
> parameters so that they match the mmap() range. So for pratical
> user space API having mmap() take care of this would be much more
> lean option.

For something like Graphene [1] the lazy #PF based option is probably
a way to go. For wasm runtime that we're doing in Enarx [2] we get better
performance by having something like this. I.e. we most of the time take
as much as we use.

[1] https://github.com/gramineproject/graphene
[2] https://enarx.dev/

BR, Jarkko


Re: [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 11:48:26PM -0800, Christoph Hellwig wrote:
> On Sun, Mar 06, 2022 at 11:33:02AM +, Matthew Wilcox wrote:
> > On Sun, Mar 06, 2022 at 07:32:04AM +0200, Jarkko Sakkinen wrote:
> > > For device memory (aka VM_IO | VM_PFNMAP) MAP_POPULATE does nothing. Allow
> > > to use that for initializing the device memory by providing a new callback
> > > f_ops->populate() for the purpose.
> > 
> > As I said, NAK.
> 
> Agreed.  This is an amazingly bad interface.

So what would you suggest to sort out the issue? I'm happy to go with
ioctl if nothing else is acceptable.

BR, Jarkko


[PATCH] drm/msm/adreno: fix cast in adreno_get_param()

2022-03-07 Thread Dan Carpenter
These casts need to happen before the shift.  The only time it would
matter would be if "rev.core" is >= 128.  In that case the sign bit
would be extended and we do not want that.

Fixes: afab9d91d872 ("drm/msm/adreno: Expose speedbin to userspace")
Signed-off-by: Dan Carpenter 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 15c8997b7251..f7b3f6d266a9 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -244,10 +244,10 @@ int adreno_get_param(struct msm_gpu *gpu, struct 
msm_file_private *ctx,
*value = !adreno_is_a650_family(adreno_gpu) ? 0x10 : 0;
return 0;
case MSM_PARAM_CHIP_ID:
-   *value = (uint64_t) adreno_gpu->rev.patchid |
-   (uint64_t) (adreno_gpu->rev.minor << 8) |
-   (uint64_t) (adreno_gpu->rev.major << 16) |
-   (uint64_t) (adreno_gpu->rev.core << 24);
+   *value =  (uint64_t)adreno_gpu->rev.patchid |
+((uint64_t)adreno_gpu->rev.minor << 8) |
+((uint64_t)adreno_gpu->rev.major << 16) |
+((uint64_t)adreno_gpu->rev.core  << 24);
if (!adreno_gpu->info->revn)
*value |= ((uint64_t) adreno_gpu->speedbin) << 32;
return 0;
-- 
2.20.1



[PATCH v3 0/6] drm/i915/ttm: Evict and restore of compressed object

2022-03-07 Thread Ramalingam C
On Xe-HP and later devices, we use dedicated compression control
state (CCS) stored in local memory for each surface, to support
the 3D and media compression formats.

The memory required for the CCS of the entire local memory is
1/256 of the local memory size. So before the kernel
boot, the required memory is reserved for the CCS data and a
secure register will be programmed with the CCS base address

So when we allocate a object in local memory we dont need to explicitly
allocate the space for ccs data. But when we evict the obj into the smem
to hold the compression related data along with the obj we need smem
space of obj_size + (obj_size/256).

Hence when we create smem for an obj with lmem placement possibility we
create with the extra space.

When we are swapping out the local memory obj on flat-ccs capable platform,
we need to capture the ccs data too along with main meory and we need to
restore it when we are swapping in the content.

When lmem object is swapped into a smem obj, smem obj will
have the extra pages required to hold the ccs data corresponding to the
lmem main memory. So main memory of lmem will be copied into the initial
pages of the smem and then ccs data corresponding to the main memory
will be copied to the subsequent pages of smem.

Swapin happens exactly in reverse order. First main memory of lmem is
restored from the smem's initial pages and the ccs data will be restored
from the subsequent pages of smem.

Extracting and restoring the CCS data is done through a special cmd called
XY_CTRL_SURF_COPY_BLT

v3:
  Fast_Clear_0 is used for clearing the ccs data on obj allocation [Thomas]
  Migration of main memory and ccs data are done in single request [Thomas]
  Small optimization patch is added for the migration loop
  CCS clearing is split into two patches.

Test-with: 20220307121042.23287-4-ramalinga...@intel.com

Ramalingam C (6):
  drm/i915/gt: Use XY_FASR_COLOR_BLT to clear obj on graphics ver 12+
  drm/i915/gt: Clear compress metadata for Flat-ccs objects
  drm/ttm: Add a parameter to add extra pages into ttm_tt
  drm/i915/gem: Add extra pages in ttm_tt for ccs data
  drm/i915/gt: Optimize the migration loop
  drm/i915/migrate: Evict and restore the flatccs capable lmem obj

 drivers/gpu/drm/drm_gem_vram_helper.c|   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  23 +-
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  23 ++
 drivers/gpu/drm/i915/gt/intel_migrate.c  | 390 +--
 drivers/gpu/drm/qxl/qxl_ttm.c|   2 +-
 drivers/gpu/drm/ttm/ttm_agp_backend.c|   2 +-
 drivers/gpu/drm/ttm/ttm_tt.c |  12 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c   |   2 +-
 include/drm/ttm/ttm_tt.h |   4 +-
 9 files changed, 421 insertions(+), 39 deletions(-)

-- 
2.20.1



[PATCH v3 1/6] drm/i915/gt: Use XY_FASR_COLOR_BLT to clear obj on graphics ver 12+

2022-03-07 Thread Ramalingam C
XY_FAST_COLOR_BLT cmd is faster than the older XY_COLOR_BLT. Hence for
clearing (Zero out) the pages of the newly allocated object, faster cmd
is used.

Signed-off-by: Ramalingam C 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  5 ++
 drivers/gpu/drm/i915/gt/intel_migrate.c  | 51 +---
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index d112ffd56418..925e55b6a94f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -205,6 +205,11 @@
 
 #define COLOR_BLT_CMD  (2 << 29 | 0x40 << 22 | (5 - 2))
 #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
+#define XY_FAST_COLOR_BLT_CMD  (2 << 29 | 0x44 << 22)
+#define   XY_FAST_COLOR_BLT_DEPTH_32   (2 << 19)
+#define   XY_FAST_COLOR_BLT_DW 16
+#define   XY_FAST_COLOR_BLT_MOCS_MASK  GENMASK(27, 21)
+#define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
 #define SRC_COPY_BLT_CMD   (2 << 29 | 0x43 << 22)
 #define GEN9_XY_FAST_COPY_BLT_CMD  (2 << 29 | 0x42 << 22)
 #define XY_SRC_COPY_BLT_CMD(2 << 29 | 0x53 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 20444d6ceb3c..cb68f7bf6b28 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -16,6 +16,8 @@ struct insert_pte_data {
 };
 
 #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
+#define GET_CCS_BYTES(i915, size)  (HAS_FLAT_CCS(i915) ? \
+DIV_ROUND_UP(size, 
NUM_BYTES_PER_CCS_BYTE) : 0)
 
 static bool engine_supports_migration(struct intel_engine_cs *engine)
 {
@@ -614,20 +616,56 @@ intel_context_migrate_copy(struct intel_context *ce,
return err;
 }
 
-static int emit_clear(struct i915_request *rq, u64 offset, int size, u32 value)
+static int emit_clear(struct i915_request *rq, u64 offset, int size,
+ u32 value, bool is_lmem)
 {
-   const int ver = GRAPHICS_VER(rq->engine->i915);
-   u32 *cs;
+   struct drm_i915_private *i915 = rq->engine->i915;
+   int mocs = rq->engine->gt->mocs.uc_index << 1;
+   const int ver = GRAPHICS_VER(i915);
+   u32 *cs, mem_type = 0;
+   int ring_sz;
 
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
 
offset += (u64)rq->engine->instance << 32;
 
-   cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6);
+   if (ver >= 12)
+   ring_sz = 16;
+   else if (ver >= 8)
+   ring_sz = 8;
+   else
+   ring_sz = 6;
+
+   if (!is_lmem)
+   mem_type = 1 << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
+
+   cs = intel_ring_begin(rq, ring_sz);
if (IS_ERR(cs))
return PTR_ERR(cs);
 
-   if (ver >= 8) {
+   if (ver >= 12) {
+   *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
+   (XY_FAST_COLOR_BLT_DW - 2);
+   *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
+   (PAGE_SIZE - 1);
+   *cs++ = 0;
+   *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+   *cs++ = lower_32_bits(offset);
+   *cs++ = upper_32_bits(offset);
+   *cs++ = mem_type;
+   /* BG7 */
+   *cs++ = value;
+   *cs++ = 0;
+   *cs++ = 0;
+   *cs++ = 0;
+   /* BG11 */
+   *cs++ = 0;
+   *cs++ = 0;
+   /* BG13 */
+   *cs++ = 0;
+   *cs++ = 0;
+   *cs++ = 0;
+   } else if (ver >= 8) {
*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
*cs++ = 0;
@@ -645,7 +683,6 @@ static int emit_clear(struct i915_request *rq, u64 offset, 
int size, u32 value)
*cs++ = lower_32_bits(offset);
*cs++ = value;
}
-
intel_ring_advance(rq, cs);
return 0;
 }
@@ -711,7 +748,7 @@ intel_context_migrate_clear(struct intel_context *ce,
if (err)
goto out_rq;
 
-   err = emit_clear(rq, offset, len, value);
+   err = emit_clear(rq, offset, len, value, is_lmem);
 
/* Arbitration is re-enabled between requests. */
 out_rq:
-- 
2.20.1



[PATCH v3 2/6] drm/i915/gt: Clear compress metadata for Flat-ccs objects

2022-03-07 Thread Ramalingam C
Xe-HP and latest devices support Flat CCS which reserved a portion of
the device memory to store compression metadata, during the clearing of
device memory buffer object we also need to clear the associated
CCS buffer.

XY_FAST_COLOR_BLT cmd provides a option to clear the ccs metadata
corresponding to the main memory that is cleared. So on Flat-CCS capable
platform we use this option to clear the CCS meta data along with main
memory.

v2: Fixed issues with platform naming [Lucas]
v3: Rebased [Ram]
Used the round_up funcs [Bob]
v4: Fixed ccs blk calculation [Ram]
Added Kdoc on flat-ccs.
v5: GENMASK is used [Matt]
mocs fix [Matt]
Comments Fix [Matt]
Flush address programming [Ram]
v6: FLUSH_DW is fixed
Few coding style fix
v7: Adopting the XY_FAST_COLOR_BLT (Thomas]

Signed-off-by: Ramalingam C 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  3 ++
 drivers/gpu/drm/i915/gt/intel_migrate.c  | 39 ++--
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 925e55b6a94f..34cead49f35e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -207,8 +207,11 @@
 #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
 #define XY_FAST_COLOR_BLT_CMD  (2 << 29 | 0x44 << 22)
 #define   XY_FAST_COLOR_BLT_DEPTH_32   (2 << 19)
+#define   FAST_CLEAR_0 (2 << 12)
 #define   XY_FAST_COLOR_BLT_DW 16
 #define   XY_FAST_COLOR_BLT_MOCS_MASK  GENMASK(27, 21)
+#define   XY_FAST_COLOR_BLT_AUX_MASK   GENMASK(20, 18)
+#define   XY_FAST_COLOR_BLT_AUX_CCS_E  5
 #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
 #define SRC_COPY_BLT_CMD   (2 << 29 | 0x43 << 22)
 #define GEN9_XY_FAST_COPY_BLT_CMD  (2 << 29 | 0x42 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index cb68f7bf6b28..05262f1b438e 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -469,6 +469,34 @@ static bool wa_1209644611_applies(int ver, u32 size)
return height % 4 == 3 && height <= 8;
 }
 
+/**
+ * DOC: Flat-CCS - Memory compression for Local memory
+ *
+ * On Xe-HP and later devices, we use dedicated compression control state (CCS)
+ * stored in local memory for each surface, to support the 3D and media
+ * compression formats.
+ *
+ * The memory required for the CCS of the entire local memory is 1/256 of the
+ * local memory size. So before the kernel boot, the required memory is 
reserved
+ * for the CCS data and a secure register will be programmed with the CCS base
+ * address.
+ *
+ * Flat CCS data needs to be cleared when a lmem object is allocated.
+ * And CCS data can be copied in and out of CCS region through
+ * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
+ *
+ * When we exhaust the lmem, if the object's placements support smem, then we 
can
+ * directly decompress the compressed lmem object into smem and start using it
+ * from smem itself.
+ *
+ * But when we need to swapout the compressed lmem object into a smem region
+ * though objects' placement doesn't support smem, then we copy the lmem 
content
+ * as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT).
+ * When the object is referred, lmem content will be swaped in along with
+ * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding
+ * location.
+ */
+
 static int emit_copy(struct i915_request *rq,
 u32 dst_offset, u32 src_offset, int size)
 {
@@ -621,8 +649,8 @@ static int emit_clear(struct i915_request *rq, u64 offset, 
int size,
 {
struct drm_i915_private *i915 = rq->engine->i915;
int mocs = rq->engine->gt->mocs.uc_index << 1;
+   u32 *cs, spl_mode = 0, aux = 0, mem_type = 0;
const int ver = GRAPHICS_VER(i915);
-   u32 *cs, mem_type = 0;
int ring_sz;
 
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
@@ -644,10 +672,15 @@ static int emit_clear(struct i915_request *rq, u64 
offset, int size,
return PTR_ERR(cs);
 
if (ver >= 12) {
+   if (HAS_FLAT_CCS(i915)) {
+   spl_mode = FAST_CLEAR_0;
+   aux = FIELD_PREP(XY_FAST_COLOR_BLT_AUX_MASK,
+XY_FAST_COLOR_BLT_AUX_CCS_E);
+   }
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
-   (XY_FAST_COLOR_BLT_DW - 2);
+   spl_mode | (XY_FAST_COLOR_BLT_DW - 2);
*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
-   (PAGE_SIZE - 1);
+   (PAGE_SIZE - 1) | aux;
*cs++ = 0;
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
*cs++ = lower_32_bits(offset);

[PATCH v3 3/6] drm/ttm: Add a parameter to add extra pages into ttm_tt

2022-03-07 Thread Ramalingam C
Add a parameter called "extra_pages" for ttm_tt_init, to indicate that
driver needs extra pages in ttm_tt.

v2:
  Used imperative wording [Thomas and Christian]

Signed-off-by: Ramalingam C 
cc: Christian Koenig 
cc: Hellstrom Thomas 
Reviewed-by: Thomas Hellstrom 
Reviewed-by: Christian Konig 
---
 drivers/gpu/drm/drm_gem_vram_helper.c  |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c|  2 +-
 drivers/gpu/drm/qxl/qxl_ttm.c  |  2 +-
 drivers/gpu/drm/ttm/ttm_agp_backend.c  |  2 +-
 drivers/gpu/drm/ttm/ttm_tt.c   | 12 +++-
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  2 +-
 include/drm/ttm/ttm_tt.h   |  4 +++-
 7 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c 
b/drivers/gpu/drm/drm_gem_vram_helper.c
index dc7f938bfff2..123045b58fec 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -867,7 +867,7 @@ static struct ttm_tt *bo_driver_ttm_tt_create(struct 
ttm_buffer_object *bo,
if (!tt)
return NULL;
 
-   ret = ttm_tt_init(tt, bo, page_flags, ttm_cached);
+   ret = ttm_tt_init(tt, bo, page_flags, ttm_cached, 0);
if (ret < 0)
goto err_ttm_tt_init;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 45cc5837ce00..1a8262f5f692 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -283,7 +283,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct 
ttm_buffer_object *bo,
i915_tt->is_shmem = true;
}
 
-   ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching);
+   ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching, 0);
if (ret)
goto err_free;
 
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index b2e33d5ba5d0..52156b54498f 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -113,7 +113,7 @@ static struct ttm_tt *qxl_ttm_tt_create(struct 
ttm_buffer_object *bo,
ttm = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
if (ttm == NULL)
return NULL;
-   if (ttm_tt_init(ttm, bo, page_flags, ttm_cached)) {
+   if (ttm_tt_init(ttm, bo, page_flags, ttm_cached, 0)) {
kfree(ttm);
return NULL;
}
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c 
b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index 6ddc16f0fe2b..d27691f2e451 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -134,7 +134,7 @@ struct ttm_tt *ttm_agp_tt_create(struct ttm_buffer_object 
*bo,
agp_be->mem = NULL;
agp_be->bridge = bridge;
 
-   if (ttm_tt_init(&agp_be->ttm, bo, page_flags, ttm_write_combined)) {
+   if (ttm_tt_init(&agp_be->ttm, bo, page_flags, ttm_write_combined, 0)) {
kfree(agp_be);
return NULL;
}
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index d234aab800a0..1a66d9fc589a 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -134,9 +134,10 @@ void ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt 
*ttm)
 static void ttm_tt_init_fields(struct ttm_tt *ttm,
   struct ttm_buffer_object *bo,
   uint32_t page_flags,
-  enum ttm_caching caching)
+  enum ttm_caching caching,
+  unsigned long extra_pages)
 {
-   ttm->num_pages = PAGE_ALIGN(bo->base.size) >> PAGE_SHIFT;
+   ttm->num_pages = (PAGE_ALIGN(bo->base.size) >> PAGE_SHIFT) + 
extra_pages;
ttm->caching = ttm_cached;
ttm->page_flags = page_flags;
ttm->dma_address = NULL;
@@ -146,9 +147,10 @@ static void ttm_tt_init_fields(struct ttm_tt *ttm,
 }
 
 int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
-   uint32_t page_flags, enum ttm_caching caching)
+   uint32_t page_flags, enum ttm_caching caching,
+   unsigned long extra_pages)
 {
-   ttm_tt_init_fields(ttm, bo, page_flags, caching);
+   ttm_tt_init_fields(ttm, bo, page_flags, caching, extra_pages);
 
if (ttm_tt_alloc_page_directory(ttm)) {
pr_err("Failed allocating page table\n");
@@ -180,7 +182,7 @@ int ttm_sg_tt_init(struct ttm_tt *ttm, struct 
ttm_buffer_object *bo,
 {
int ret;
 
-   ttm_tt_init_fields(ttm, bo, page_flags, caching);
+   ttm_tt_init_fields(ttm, bo, page_flags, caching, 0);
 
if (page_flags & TTM_TT_FLAG_EXTERNAL)
ret = ttm_sg_tt_alloc_page_directory(ttm);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index b84ecc6d6611..4e3938e62c08 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_

[PATCH v3 5/6] drm/i915/gt: Optimize the migration loop

2022-03-07 Thread Ramalingam C
Move the static calculations out of the loop.

Signed-off-by: Ramalingam C 
---
 drivers/gpu/drm/i915/gt/intel_migrate.c | 34 -
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 05262f1b438e..24e0e73e4a90 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -556,6 +556,7 @@ intel_context_migrate_copy(struct intel_context *ce,
   struct i915_request **out)
 {
struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst);
+   u32 src_offset, dst_offset;
struct i915_request *rq;
int err;
 
@@ -564,8 +565,20 @@ intel_context_migrate_copy(struct intel_context *ce,
 
GEM_BUG_ON(ce->ring->size < SZ_64K);
 
+   src_offset = 0;
+   dst_offset = CHUNK_SZ;
+   if (HAS_64K_PAGES(ce->engine->i915)) {
+   GEM_BUG_ON(!src_is_lmem && !dst_is_lmem);
+
+   src_offset = 0;
+   dst_offset = 0;
+   if (src_is_lmem)
+   src_offset = CHUNK_SZ;
+   if (dst_is_lmem)
+   dst_offset = 2 * CHUNK_SZ;
+   }
+
do {
-   u32 src_offset, dst_offset;
int len;
 
rq = i915_request_create(ce);
@@ -593,19 +606,6 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
 
-   src_offset = 0;
-   dst_offset = CHUNK_SZ;
-   if (HAS_64K_PAGES(ce->engine->i915)) {
-   GEM_BUG_ON(!src_is_lmem && !dst_is_lmem);
-
-   src_offset = 0;
-   dst_offset = 0;
-   if (src_is_lmem)
-   src_offset = CHUNK_SZ;
-   if (dst_is_lmem)
-   dst_offset = 2 * CHUNK_SZ;
-   }
-
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
   src_offset, CHUNK_SZ);
if (len <= 0) {
@@ -615,12 +615,10 @@ intel_context_migrate_copy(struct intel_context *ce,
 
err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem,
   dst_offset, len);
-   if (err < 0)
-   goto out_rq;
-   if (err < len) {
+   if (err < len)
err = -EINVAL;
+   if (err < 0)
goto out_rq;
-   }
 
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
if (err)
-- 
2.20.1



[PATCH v3 4/6] drm/i915/gem: Add extra pages in ttm_tt for ccs data

2022-03-07 Thread Ramalingam C
On Xe-HP and later devices, dedicated compression control state (CCS)
stored in local memory is used for each surface, to support the
3D and media compression formats.

The memory required for the CCS of the entire local memory is 1/256 of
the local memory size. So before the kernel boot, the required memory
is reserved for the CCS data and a secure register will be programmed
with the CCS base address

So when an object is allocated in local memory, dont need to explicitly
allocate the space for ccs data. But when the obj is evicted into the
smem, to hold the compression related data along with the obj extra space
is needed in smem. i.e obj_size + (obj_size/256).

Hence when a smem pages are allocated for an obj with lmem placement
possibility we create with the extra pages required for the ccs data for
the obj size.

v2:
  Used imperative wording [Thomas]

Signed-off-by: Ramalingam C 
cc: Christian Koenig 
cc: Hellstrom Thomas 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 1a8262f5f692..c7a36861c38d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -20,6 +20,7 @@
 #include "gem/i915_gem_ttm.h"
 #include "gem/i915_gem_ttm_move.h"
 #include "gem/i915_gem_ttm_pm.h"
+#include "gt/intel_gpu_commands.h"
 
 #define I915_TTM_PRIO_PURGE 0
 #define I915_TTM_PRIO_NO_PAGES  1
@@ -255,12 +256,27 @@ static const struct i915_refct_sgt_ops tt_rsgt_ops = {
.release = i915_ttm_tt_release
 };
 
+static inline bool
+i915_gem_object_has_lmem_placement(struct drm_i915_gem_object *obj)
+{
+   int i;
+
+   for (i = 0; i < obj->mm.n_placements; i++)
+   if (obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL)
+   return true;
+
+   return false;
+}
+
 static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 uint32_t page_flags)
 {
+   struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+bdev);
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, bo->resource->mem_type);
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+   unsigned long ccs_pages = 0;
enum ttm_caching caching;
struct i915_ttm_tt *i915_tt;
int ret;
@@ -283,7 +299,12 @@ static struct ttm_tt *i915_ttm_tt_create(struct 
ttm_buffer_object *bo,
i915_tt->is_shmem = true;
}
 
-   ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching, 0);
+   if (HAS_FLAT_CCS(i915) && i915_gem_object_has_lmem_placement(obj))
+   ccs_pages = DIV_ROUND_UP(DIV_ROUND_UP(bo->base.size,
+ NUM_BYTES_PER_CCS_BYTE),
+PAGE_SIZE);
+
+   ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching, ccs_pages);
if (ret)
goto err_free;
 
-- 
2.20.1



[PATCH v3 6/6] drm/i915/migrate: Evict and restore the flatccs capable lmem obj

2022-03-07 Thread Ramalingam C
When we are swapping out the local memory obj on flat-ccs capable platform,
we need to capture the ccs data too along with main meory and we need to
restore it when we are swapping in the content.

When lmem object is swapped into a smem obj, smem obj will
have the extra pages required to hold the ccs data corresponding to the
lmem main memory. So main memory of lmem will be copied into the initial
pages of the smem and then ccs data corresponding to the main memory
will be copied to the subsequent pages of smem. ccs data is 1/256 of
lmem size.

Swapin happens exactly in reverse order. First main memory of lmem is
restored from the smem's initial pages and the ccs data will be restored
from the subsequent pages of smem.

Extracting and restoring the CCS data is done through a special cmd called
XY_CTRL_SURF_COPY_BLT

v2: Fixing the ccs handling
v3: Handle the ccs data at same loop as main memory [Thomas]

Signed-off-by: Ramalingam C 
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  15 +
 drivers/gpu/drm/i915/gt/intel_migrate.c  | 274 ++-
 2 files changed, 285 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 34cead49f35e..fa428a67620e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -153,8 +153,10 @@
 #define   MI_FLUSH_DW_PROTECTED_MEM_EN (1 << 22)
 #define   MI_FLUSH_DW_STORE_INDEX  (1<<21)
 #define   MI_INVALIDATE_TLB(1<<18)
+#define   MI_FLUSH_DW_CCS  (1<<16)
 #define   MI_FLUSH_DW_OP_STOREDW   (1<<14)
 #define   MI_FLUSH_DW_OP_MASK  (3<<14)
+#define   MI_FLUSH_DW_LLC  (1<<9)
 #define   MI_FLUSH_DW_NOTIFY   (1<<8)
 #define   MI_INVALIDATE_BSD(1<<7)
 #define   MI_FLUSH_DW_USE_GTT  (1<<2)
@@ -203,6 +205,19 @@
 #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
 #define GFX_OP_DRAWRECT_INFO_I965  ((0x7900<<16)|0x2)
 
+#define XY_CTRL_SURF_INSTR_SIZE5
+#define MI_FLUSH_DW_SIZE   3
+#define XY_CTRL_SURF_COPY_BLT  ((2 << 29) | (0x48 << 22) | 3)
+#define   SRC_ACCESS_TYPE_SHIFT21
+#define   DST_ACCESS_TYPE_SHIFT20
+#define   CCS_SIZE_MASKGENMASK(17, 8)
+#define   XY_CTRL_SURF_MOCS_MASK   GENMASK(31, 25)
+#define   NUM_CCS_BYTES_PER_BLOCK  256
+#define   NUM_BYTES_PER_CCS_BYTE   256
+#define   NUM_CCS_BLKS_PER_XFER1024
+#define   INDIRECT_ACCESS  0
+#define   DIRECT_ACCESS1
+
 #define COLOR_BLT_CMD  (2 << 29 | 0x40 << 22 | (5 - 2))
 #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
 #define XY_FAST_COLOR_BLT_CMD  (2 << 29 | 0x44 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 24e0e73e4a90..6d2181725d76 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -497,6 +497,120 @@ static bool wa_1209644611_applies(int ver, u32 size)
  * location.
  */
 
+static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)
+{
+   *cmd++ = MI_FLUSH_DW | flags;
+   *cmd++ = 0;
+   *cmd++ = 0;
+
+   return cmd;
+}
+
+static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size)
+{
+   u32 num_cmds, num_blks, total_size;
+
+   if (!GET_CCS_BYTES(i915, size))
+   return 0;
+
+   /*
+* XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte
+* blocks. one XY_CTRL_SURF_COPY_BLT command can
+* transfer upto 1024 blocks.
+*/
+   num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
+   NUM_CCS_BYTES_PER_BLOCK);
+   num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER);
+   total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds;
+
+   /*
+* Adding a flush before and after XY_CTRL_SURF_COPY_BLT
+*/
+   total_size += 2 * MI_FLUSH_DW_SIZE;
+
+   return total_size;
+}
+
+static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd, u64 src_addr, u64 dst_addr,
+u8 src_mem_access, u8 dst_mem_access,
+int src_mocs, int dst_mocs,
+u32 ccs_blocks)
+{
+   /*
+* The XY_CTRL_SURF_COPY_BLT instruction is used to copy the CCS
+* data in and out of the CCS region.
+*
+* We can copy at most 1024 blocks of 256 bytes using one
+* XY_CTRL_SURF_COPY_BLT instruction.
+*
+* In case we need to copy more than 1024 blocks, we need to add
+* another instruction to the same batch buffer.
+*
+* 1024 blocks of 256 bytes of CCS represent a total 256KB of CCS.
+*
+* 256 KB of CCS represents 256 * 256 KB = 64 MB of LMEM.
+*/
+   do {
+   int blks_per_

Re: [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread Jarkko Sakkinen
On Mon, Mar 07, 2022 at 11:12:44AM +0100, David Hildenbrand wrote:
> On 06.03.22 06:32, Jarkko Sakkinen wrote:
> > For device memory (aka VM_IO | VM_PFNMAP) MAP_POPULATE does nothing. Allow
> > to use that for initializing the device memory by providing a new callback
> > f_ops->populate() for the purpose.
> > 
> > SGX patches are provided to show the callback in context.
> > 
> > An obvious alternative is a ioctl but it is less elegant and requires
> > two syscalls (mmap + ioctl) per memory range, instead of just one
> > (mmap).
> 
> What about extending MADV_POPULATE_READ | MADV_POPULATE_WRITE to support
> VM_IO | VM_PFNMAP (as well?) ?

What would be a proper point to bind that behaviour? For mmap/mprotect it'd
be probably populate_vma_page_range() because that would span both mmap()
and mprotect() (Dave's suggestion in this thread).

For MAP_POPULATE I did not have hard proof to show that it would be used
by other drivers but for madvice() you can find at least a few ioctl
based implementations:

$ git grep -e madv --and \( -e ioc \)  drivers/
drivers/gpu/drm/i915/gem/i915_gem_ioctls.h:int i915_gem_madvise_ioctl(struct 
drm_device *dev, void *data,
drivers/gpu/drm/i915/i915_driver.c: DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, 
i915_gem_madvise_ioctl, DRM_RENDER_ALLOW),
drivers/gpu/drm/i915/i915_gem.c:i915_gem_madvise_ioctl(struct drm_device *dev, 
void *data,
drivers/gpu/drm/msm/msm_drv.c:static int msm_ioctl_gem_madvise(struct 
drm_device *dev, void *data,
drivers/gpu/drm/msm/msm_drv.c:  DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE,  
msm_ioctl_gem_madvise,  DRM_RENDER_ALLOW),
drivers/gpu/drm/panfrost/panfrost_drv.c:static int 
panfrost_ioctl_madvise(struct drm_device *dev, void *data,
drivers/gpu/drm/vc4/vc4_drv.c:  DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, 
vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
drivers/gpu/drm/vc4/vc4_drv.h:int vc4_gem_madvise_ioctl(struct drm_device *dev, 
void *data,
drivers/gpu/drm/vc4/vc4_gem.c:int vc4_gem_madvise_ioctl(struct drm_device *dev, 
void *data,

IMHO this also provides supportive claim for MAP_POPULATE, and yeah, I
agree that to be consistent implementation, both madvice() and MAP_POPULATE
should work.

> -- 
> Thanks,
> 
> David / dhildenb

BR, Jarkko


Re: [PATCH v3 1/6] drm/i915/gt: Use XY_FASR_COLOR_BLT to clear obj on graphics ver 12+

2022-03-07 Thread Hellstrom, Thomas
Hi, Ram.

Typo in patch title FASR/FAST

On Mon, 2022-03-07 at 19:10 +0530, Ramalingam C wrote:
> XY_FAST_COLOR_BLT cmd is faster than the older XY_COLOR_BLT. Hence
> for
> clearing (Zero out) the pages of the newly allocated object, faster
> cmd
> is used.
> 
> Signed-off-by: Ramalingam C 
> Signed-off-by: Chris Wilson 
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  5 ++
>  drivers/gpu/drm/i915/gt/intel_migrate.c  | 51 +-
> --
>  2 files changed, 49 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index d112ffd56418..925e55b6a94f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -205,6 +205,11 @@
>  
>  #define COLOR_BLT_CMD  (2 << 29 | 0x40 << 22 | (5 -
> 2))
>  #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
> +#define XY_FAST_COLOR_BLT_CMD  (2 << 29 | 0x44 << 22)
> +#define   XY_FAST_COLOR_BLT_DEPTH_32   (2 << 19)
> +#define   XY_FAST_COLOR_BLT_DW 16
> +#define   XY_FAST_COLOR_BLT_MOCS_MASK  GENMASK(27, 21)
> +#define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
>  #define SRC_COPY_BLT_CMD   (2 << 29 | 0x43 << 22)
>  #define GEN9_XY_FAST_COPY_BLT_CMD  (2 << 29 | 0x42 << 22)
>  #define XY_SRC_COPY_BLT_CMD(2 << 29 | 0x53 << 22)
> diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c
> b/drivers/gpu/drm/i915/gt/intel_migrate.c
> index 20444d6ceb3c..cb68f7bf6b28 100644
> --- a/drivers/gpu/drm/i915/gt/intel_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
> @@ -16,6 +16,8 @@ struct insert_pte_data {
>  };
>  
>  #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
> +#define GET_CCS_BYTES(i915, size)  (HAS_FLAT_CCS(i915) ? \
> +    DIV_ROUND_UP(size,
> NUM_BYTES_PER_CCS_BYTE) : 0)
>  
>  static bool engine_supports_migration(struct intel_engine_cs
> *engine)
>  {
> @@ -614,20 +616,56 @@ intel_context_migrate_copy(struct intel_context
> *ce,
> return err;
>  }
>  
> -static int emit_clear(struct i915_request *rq, u64 offset, int size,
> u32 value)
> +static int emit_clear(struct i915_request *rq, u64 offset, int size,
> + u32 value, bool is_lmem)
>  {
> -   const int ver = GRAPHICS_VER(rq->engine->i915);
> -   u32 *cs;
> +   struct drm_i915_private *i915 = rq->engine->i915;
> +   int mocs = rq->engine->gt->mocs.uc_index << 1;
> +   const int ver = GRAPHICS_VER(i915);
> +   u32 *cs, mem_type = 0;
> +   int ring_sz;
>  
> GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
>  
> offset += (u64)rq->engine->instance << 32;
>  
> -   cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6);
> +   if (ver >= 12)
> +   ring_sz = 16;

Noting that DG1 doesn't use more than 11 dwords? Doesn't matter much I
guess if we pad with NOP.

> +   else if (ver >= 8)
> +   ring_sz = 8;
> +   else
> +   ring_sz = 6;
> +
> +   if (!is_lmem)
> +   mem_type = 1 << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;

Should we use the MEM_TYPE macros so it becomes clearer what we're
doing? 

Also does DG1 support the mocs and mem_type fields? If not should we
set these to 0 for relevant hardware?

> +
> +   cs = intel_ring_begin(rq, ring_sz);
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>  
> -   if (ver >= 8) {
> +   if (ver >= 12) {
> +   *cs++ = XY_FAST_COLOR_BLT_CMD |
> XY_FAST_COLOR_BLT_DEPTH_32 |
> +   (XY_FAST_COLOR_BLT_DW - 2);
> +   *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs)
> |
> +   (PAGE_SIZE - 1);
> +   *cs++ = 0;
> +   *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
> +   *cs++ = lower_32_bits(offset);
> +   *cs++ = upper_32_bits(offset);
> +   *cs++ = mem_type;
> +   /* BG7 */
> +   *cs++ = value;
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   /* BG11 */
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   /* BG13 */
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   } else if (ver >= 8) {
> *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
> *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY |
> PAGE_SIZE;
> *cs++ = 0;
> @@ -645,7 +683,6 @@ static int emit_clear(struct i915_request *rq,
> u64 offset, int size, u32 value)
> *cs++ = lower_32_bits(offset);
> *cs++ = value;
> }
> -
> intel_ring_advance(rq, cs);
> return 0;
>  }
> @@ -711,7 +748,7 @@ intel_context_migrate_clear(struct intel_context
> *ce,
> if (err)
> goto out_rq;
>  
> -   err = emit_cle

Re: [PATCH v3 2/6] drm/i915/gt: Clear compress metadata for Flat-ccs objects

2022-03-07 Thread Hellstrom, Thomas
On Mon, 2022-03-07 at 19:10 +0530, Ramalingam C wrote:
> Xe-HP and latest devices support Flat CCS which reserved a portion of
> the device memory to store compression metadata, during the clearing
> of
> device memory buffer object we also need to clear the associated
> CCS buffer.
> 
> XY_FAST_COLOR_BLT cmd provides a option to clear the ccs metadata
> corresponding to the main memory that is cleared. So on Flat-CCS
> capable
> platform we use this option to clear the CCS meta data along with
> main
> memory.
> 
> v2: Fixed issues with platform naming [Lucas]
> v3: Rebased [Ram]
>     Used the round_up funcs [Bob]
> v4: Fixed ccs blk calculation [Ram]
>     Added Kdoc on flat-ccs.
> v5: GENMASK is used [Matt]
>     mocs fix [Matt]
>     Comments Fix [Matt]
>     Flush address programming [Ram]
> v6: FLUSH_DW is fixed
>     Few coding style fix
> v7: Adopting the XY_FAST_COLOR_BLT (Thomas]
> 
> Signed-off-by: Ramalingam C 
> Signed-off-by: Ayaz A Siddiqui 
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  3 ++
>  drivers/gpu/drm/i915/gt/intel_migrate.c  | 39
> ++--
>  2 files changed, 39 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index 925e55b6a94f..34cead49f35e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -207,8 +207,11 @@
>  #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
>  #define XY_FAST_COLOR_BLT_CMD  (2 << 29 | 0x44 << 22)
>  #define   XY_FAST_COLOR_BLT_DEPTH_32   (2 << 19)
> +#define   FAST_CLEAR_0 (2 << 12)
>  #define   XY_FAST_COLOR_BLT_DW 16
>  #define   XY_FAST_COLOR_BLT_MOCS_MASK  GENMASK(27, 21)
> +#define   XY_FAST_COLOR_BLT_AUX_MASK   GENMASK(20, 18)
> +#define   XY_FAST_COLOR_BLT_AUX_CCS_E  5
>  #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
>  #define SRC_COPY_BLT_CMD   (2 << 29 | 0x43 << 22)
>  #define GEN9_XY_FAST_COPY_BLT_CMD  (2 << 29 | 0x42 << 22)
> diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c
> b/drivers/gpu/drm/i915/gt/intel_migrate.c
> index cb68f7bf6b28..05262f1b438e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
> @@ -469,6 +469,34 @@ static bool wa_1209644611_applies(int ver, u32
> size)
> return height % 4 == 3 && height <= 8;
>  }
>  
> +/**
> + * DOC: Flat-CCS - Memory compression for Local memory
> + *
> + * On Xe-HP and later devices, we use dedicated compression control
> state (CCS)
> + * stored in local memory for each surface, to support the 3D and
> media
> + * compression formats.
> + *
> + * The memory required for the CCS of the entire local memory is
> 1/256 of the
> + * local memory size. So before the kernel boot, the required memory
> is reserved
> + * for the CCS data and a secure register will be programmed with
> the CCS base
> + * address.
> + *
> + * Flat CCS data needs to be cleared when a lmem object is
> allocated.
> + * And CCS data can be copied in and out of CCS region through
> + * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
> + *
> + * When we exhaust the lmem, if the object's placements support
> smem, then we can
> + * directly decompress the compressed lmem object into smem and
> start using it
> + * from smem itself.
> + *
> + * But when we need to swapout the compressed lmem object into a
> smem region
> + * though objects' placement doesn't support smem, then we copy the
> lmem content
> + * as it is into smem region along with ccs data (using
> XY_CTRL_SURF_COPY_BLT).
> + * When the object is referred, lmem content will be swaped in along
> with
> + * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at
> corresponding
> + * location.
> + */
> +
>  static int emit_copy(struct i915_request *rq,
>  u32 dst_offset, u32 src_offset, int size)
>  {
> @@ -621,8 +649,8 @@ static int emit_clear(struct i915_request *rq,
> u64 offset, int size,
>  {
> struct drm_i915_private *i915 = rq->engine->i915;
> int mocs = rq->engine->gt->mocs.uc_index << 1;
> +   u32 *cs, spl_mode = 0, aux = 0, mem_type = 0;
> const int ver = GRAPHICS_VER(i915);
> -   u32 *cs, mem_type = 0;
> int ring_sz;
>  
> GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
> @@ -644,10 +672,15 @@ static int emit_clear(struct i915_request *rq,
> u64 offset, int size,
> return PTR_ERR(cs);
>  
> if (ver >= 12) {
> +   if (HAS_FLAT_CCS(i915)) {
> +   spl_mode = FAST_CLEAR_0;
> +   aux = FIELD_PREP(XY_FAST_COLOR_BLT_AUX_MASK,

Did you have a chance to verify that this actually works, and whether
setting aux will clear just the CCS data or both CCS & main DATA?

If so,
Reviewed-by: Thomas Hellström 



> +   
> XY_FAST_COLOR_BLT_AUX_CCS_E);
> +   }
>  

Re: [PATCH] simpldrm: Enable boot time VESA graphic mode selection.

2022-03-07 Thread Javier Martinez Canillas
On 3/4/22 21:47, Javier Martinez Canillas wrote:
> Hello Thomas,
> 
> On 3/4/22 21:00, Thomas Zimmermann wrote:
>> Hi,
>>
>> I've merged the patches into drm-misc-fixes. Thanks a lot to both of you.
>>
> 
> Ard already picked these through the efi tree:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git/log/
>

I talked with Ard and he kindly dropped these patches from the efi
tree. So everything is fine.
 
>> Best regards
>> Thomas
>>

-- 
Best regards,

Javier Martinez Canillas
Linux Engineering
Red Hat



Re: [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread David Hildenbrand
On 07.03.22 15:22, Jarkko Sakkinen wrote:
> On Mon, Mar 07, 2022 at 11:12:44AM +0100, David Hildenbrand wrote:
>> On 06.03.22 06:32, Jarkko Sakkinen wrote:
>>> For device memory (aka VM_IO | VM_PFNMAP) MAP_POPULATE does nothing. Allow
>>> to use that for initializing the device memory by providing a new callback
>>> f_ops->populate() for the purpose.
>>>
>>> SGX patches are provided to show the callback in context.
>>>
>>> An obvious alternative is a ioctl but it is less elegant and requires
>>> two syscalls (mmap + ioctl) per memory range, instead of just one
>>> (mmap).
>>
>> What about extending MADV_POPULATE_READ | MADV_POPULATE_WRITE to support
>> VM_IO | VM_PFNMAP (as well?) ?
> 
> What would be a proper point to bind that behaviour? For mmap/mprotect it'd
> be probably populate_vma_page_range() because that would span both mmap()
> and mprotect() (Dave's suggestion in this thread).

MADV_POPULATE_* ends up in faultin_vma_page_range(), right next to
populate_vma_page_range(). So it might require a similar way to hook
into the driver I guess.

> 
> For MAP_POPULATE I did not have hard proof to show that it would be used
> by other drivers but for madvice() you can find at least a few ioctl
> based implementations:
> 
> $ git grep -e madv --and \( -e ioc \)  drivers/
> drivers/gpu/drm/i915/gem/i915_gem_ioctls.h:int i915_gem_madvise_ioctl(struct 
> drm_device *dev, void *data,
> drivers/gpu/drm/i915/i915_driver.c: DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, 
> i915_gem_madvise_ioctl, DRM_RENDER_ALLOW),
> drivers/gpu/drm/i915/i915_gem.c:i915_gem_madvise_ioctl(struct drm_device 
> *dev, void *data,
> drivers/gpu/drm/msm/msm_drv.c:static int msm_ioctl_gem_madvise(struct 
> drm_device *dev, void *data,
> drivers/gpu/drm/msm/msm_drv.c:  DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE,  
> msm_ioctl_gem_madvise,  DRM_RENDER_ALLOW),
> drivers/gpu/drm/panfrost/panfrost_drv.c:static int 
> panfrost_ioctl_madvise(struct drm_device *dev, void *data,
> drivers/gpu/drm/vc4/vc4_drv.c:  DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, 
> vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
> drivers/gpu/drm/vc4/vc4_drv.h:int vc4_gem_madvise_ioctl(struct drm_device 
> *dev, void *data,
> drivers/gpu/drm/vc4/vc4_gem.c:int vc4_gem_madvise_ioctl(struct drm_device 
> *dev, void *data,
> 
> IMHO this also provides supportive claim for MAP_POPULATE, and yeah, I
> agree that to be consistent implementation, both madvice() and MAP_POPULATE
> should work.

MADV_POPULATE_WRITE + MADV_DONTNEED/FALLOC_FL_PUNCH_HOLE is one way to
dynamically manage memory consumption inside a sparse memory mapping
(preallocate/populate via MADV_POPULATE_WRITE, discard via
MADV_DONTNEED/FALLOC_FL_PUNCH_HOLE).  Extending that whole mechanism to
deal with VM_IO | VM_PFNMAP mappings as well could be interesting.

At least I herd about some ideas where we might want to dynamically
expose memory to a VM (via virtio-mem) inside a sparse memory mapping,
and the memory in that sparse memory mapping is provided from a
dedicated memory pool managed by a device driver -- not just using
ordinary anonymous/file/hugetlb memory as we do right now.

Now, this is certainly stuff for the future, I just wanted to mention it.

-- 
Thanks,

David / dhildenb



[PATCH] drm: remove min_order BUG_ON check

2022-03-07 Thread Arunpravin
place BUG_ON(order < min_order) outside do..while
loop as it fails Unigine Heaven benchmark.

Unigine Heaven has buffer allocation requests for
example required pages are 161 and alignment request
is 128. To allocate the remaining 33 pages, continues
the iteration to find the order value which is 5 and
when it compares with min_order = 7, enables the
BUG_ON(). To avoid this problem, placed the BUG_ON
check outside of do..while loop.

Signed-off-by: Arunpravin 
---
 drivers/gpu/drm/drm_buddy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 72f52f293249..ed94c56b720f 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -669,10 +669,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
order = fls(pages) - 1;
min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
 
+   BUG_ON(order < min_order);
+
do {
order = min(order, (unsigned int)fls(pages) - 1);
BUG_ON(order > mm->max_order);
-   BUG_ON(order < min_order);
 
do {
if (flags & DRM_BUDDY_RANGE_ALLOCATION)

base-commit: 8025c79350b90e5a8029234d433578f12abbae2b
-- 
2.25.1



Re: [PATCH RFC v2] mm: Add f_ops->populate()

2022-03-07 Thread Matthew Wilcox
On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> In short: page faults stink.  The core kernel has lots of ways of
> avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
>  But, those only work on normal RAM that the core mm manages.
> 
> SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> are marked with VM_IO.  So, none of the existing methods for avoiding
> page faults work on SGX memory.
> 
> This essentially helps extend existing "normal RAM" kernel ABIs to work
> for avoiding faults for SGX too.  SGX users want to enjoy all of the
> benefits of a delayed allocation policy (better resource use,
> overcommit, NUMA affinity) but without the cost of millions of faults.

We have a mechanism for dynamically reducing the number of page faults
already; it's just buried in the page cache code.  You have vma->vm_file,
which contains a file_ra_state.  You can use this to track where
recent faults have been and grow the size of the region you fault in
per page fault.  You don't have to (indeed probably don't want to) use
the same algorithm as the page cache, but the _principle_ is the same --
were recent speculative faults actually used; should we grow the number
of pages actually faulted in, or is this a random sparse workload where
we want to allocate individual pages.

Don't rely on the user to ask.  They don't know.


Re: [PATCH] drm/selftests: missing error code in igt_buddy_alloc_smoke()

2022-03-07 Thread Arunpravin
Reviewed-by:Arunpravin 

On 07/03/22 6:24 pm, Dan Carpenter wrote:
> Set the error code to -ENOMEM if drm_random_order() fails.
> 
> Fixes: e6ff5ef81170 ("drm/selftests: add drm buddy smoke testcase")
> Signed-off-by: Dan Carpenter 
> ---
>  drivers/gpu/drm/selftests/test-drm_buddy.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/selftests/test-drm_buddy.c 
> b/drivers/gpu/drm/selftests/test-drm_buddy.c
> index fa997f89522b..6eba590afa9b 100644
> --- a/drivers/gpu/drm/selftests/test-drm_buddy.c
> +++ b/drivers/gpu/drm/selftests/test-drm_buddy.c
> @@ -488,8 +488,10 @@ static int igt_buddy_alloc_smoke(void *arg)
>   }
>  
>   order = drm_random_order(mm.max_order + 1, &prng);
> - if (!order)
> + if (!order) {
> + err = -ENOMEM;
>   goto out_fini;
> + }
>  
>   for (i = 0; i <= mm.max_order; ++i) {
>   struct drm_buddy_block *block;
> 


Re: [Intel-gfx] [PATCH] drm: remove min_order BUG_ON check

2022-03-07 Thread Jani Nikula
On Mon, 07 Mar 2022, Arunpravin  wrote:
> place BUG_ON(order < min_order) outside do..while
> loop as it fails Unigine Heaven benchmark.
>
> Unigine Heaven has buffer allocation requests for
> example required pages are 161 and alignment request
> is 128. To allocate the remaining 33 pages, continues
> the iteration to find the order value which is 5 and
> when it compares with min_order = 7, enables the
> BUG_ON(). To avoid this problem, placed the BUG_ON
> check outside of do..while loop.

How about turning these BUG_ON()s to WARN_ON()s with an error return?
What's the point in oopsing?

BR,
Jani.


>
> Signed-off-by: Arunpravin 
> ---
>  drivers/gpu/drm/drm_buddy.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 72f52f293249..ed94c56b720f 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -669,10 +669,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   order = fls(pages) - 1;
>   min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>  
> + BUG_ON(order < min_order);
> +
>   do {
>   order = min(order, (unsigned int)fls(pages) - 1);
>   BUG_ON(order > mm->max_order);
> - BUG_ON(order < min_order);
>  
>   do {
>   if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>
> base-commit: 8025c79350b90e5a8029234d433578f12abbae2b

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [Freedreno] [PATCH] drm/msm/adreno: fix cast in adreno_get_param()

2022-03-07 Thread Akhil P Oommen

On 3/7/2022 7:01 PM, Dan Carpenter wrote:

These casts need to happen before the shift.  The only time it would
matter would be if "rev.core" is >= 128.  In that case the sign bit
would be extended and we do not want that.

Fixes: afab9d91d872 ("drm/msm/adreno: Expose speedbin to userspace")
Signed-off-by: Dan Carpenter 
---
  drivers/gpu/drm/msm/adreno/adreno_gpu.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 15c8997b7251..f7b3f6d266a9 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -244,10 +244,10 @@ int adreno_get_param(struct msm_gpu *gpu, struct 
msm_file_private *ctx,
*value = !adreno_is_a650_family(adreno_gpu) ? 0x10 : 0;
return 0;
case MSM_PARAM_CHIP_ID:
-   *value = (uint64_t) adreno_gpu->rev.patchid |
-   (uint64_t) (adreno_gpu->rev.minor << 8) |
-   (uint64_t) (adreno_gpu->rev.major << 16) |
-   (uint64_t) (adreno_gpu->rev.core << 24);
+   *value =  (uint64_t)adreno_gpu->rev.patchid |
+((uint64_t)adreno_gpu->rev.minor << 8) |
+((uint64_t)adreno_gpu->rev.major << 16) |
+((uint64_t)adreno_gpu->rev.core  << 24);
if (!adreno_gpu->info->revn)
*value |= ((uint64_t) adreno_gpu->speedbin) << 32;
return 0;

Reviewed-by: Akhil P Oommen 

-Akhil


Re: [PATCH 0/6] Remove usage of list iterator past the loop body

2022-03-07 Thread Dan Carpenter
Updating this API is risky because some places rely on the old behavior
and not all of them have been updated.  Here are some additional places
you might want to change.

drivers/usb/host/uhci-q.c:466 link_async() warn: iterator used outside loop: 
'pqh'
drivers/infiniband/core/mad.c:968 ib_get_rmpp_segment() warn: iterator used 
outside loop: 'mad_send_wr->cur_seg'
drivers/opp/debugfs.c:208 opp_migrate_dentry() warn: iterator used outside 
loop: 'new_dev'
drivers/staging/greybus/audio_codec.c:602 gbcodec_mute_stream() warn: iterator 
used outside loop: 'module'
drivers/staging/media/atomisp/pci/atomisp_acc.c:508 
atomisp_acc_load_extensions() warn: iterator used outside loop: 'acc_fw'
drivers/perf/thunderx2_pmu.c:814 tx2_uncore_pmu_init_dev() warn: iterator used 
outside loop: 'rentry'
drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c:111 
nvkm_control_mthd_pstate_attr() warn: iterator used outside loop: 'pstate'
drivers/gpu/drm/panfrost/panfrost_mmu.c:203 panfrost_mmu_as_get() warn: 
iterator used outside loop: 'lru_mmu'
drivers/media/usb/uvc/uvc_v4l2.c:885 uvc_ioctl_enum_input() warn: iterator used 
outside loop: 'iterm'
drivers/media/usb/uvc/uvc_v4l2.c:896 uvc_ioctl_enum_input() warn: iterator used 
outside loop: 'iterm'
drivers/scsi/dc395x.c:3596 device_alloc() warn: iterator used outside loop: 'p'
drivers/net/ethernet/mellanox/mlx4/alloc.c:379 __mlx4_alloc_from_zone() warn: 
iterator used outside loop: 'curr_node'
fs/ocfs2/dlm/dlmdebug.c:573 lockres_seq_start() warn: iterator used outside 
loop: 'res'

This patchset fixes 3 bugs.  Initially when it's merged it's probably
going to introduce some bugs because there are likely other places which
rely on the old behavior.

In an ideal world, with the new API the compiler would warn about
uninitialized variables, but unfortunately that warning is disabled by
default so we still have to rely on kbuild/Clang/Smatch to find the
bugs.

But hopefully the new API encourages people to write clearer code so it
prevents bugs in the long run.

regards,
dan carpenter



Re: [PATCH] drm/msm/a6xx: Fix missing ARRAY_SIZE() check

2022-03-07 Thread Akhil P Oommen

On 3/5/2022 11:04 PM, Rob Clark wrote:

From: Rob Clark 

Fixes: f6d62d091cfd ("drm/msm/a6xx: add support for Adreno 660 GPU")
Signed-off-by: Rob Clark 
Reviewed-by: Dmitry Baryshkov 
---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 12 
  1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 02b47977b5c3..83c31b2ad865 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -683,19 +683,23 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu)
  {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
const u32 *regs = a6xx_protect;
-   unsigned i, count = ARRAY_SIZE(a6xx_protect), count_max = 32;
-
-   BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
-   BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
+   unsigned i, count, count_max;
  
  	if (adreno_is_a650(adreno_gpu)) {

regs = a650_protect;
count = ARRAY_SIZE(a650_protect);
count_max = 48;
+   BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
} else if (adreno_is_a660_family(adreno_gpu)) {
regs = a660_protect;
count = ARRAY_SIZE(a660_protect);
count_max = 48;
+   BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
+   } else {
+   regs = a6xx_protect;
+   count = ARRAY_SIZE(a6xx_protect);
+   count_max = 32;
+   BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
}
  
  	/*

Reviewed-by: Akhil P Oommen 

-Akhil.


Re: [PATCH 0/4] drm/msm: Clear perf counters across context switch

2022-03-07 Thread Akhil P Oommen

On 3/4/2022 6:22 AM, Rob Clark wrote:

From: Rob Clark 

Some clever folks figured out a way to use performance counters as a
side-channel[1].  But, other than the special case of using the perf
counters for system profiling, we can reset the counters across context
switches to protect against this.

This series introduces a SYSPROF param which a sufficiently privilaged
userspace (like Mesa's pps-producer, which already must run as root) to
opt-out, and makes the default behavior to reset counters on context
switches.

[1] https://dl.acm.org/doi/pdf/10.1145/3503222.3507757

Rob Clark (4):
   drm/msm: Update generated headers
   drm/msm: Add SET_PARAM ioctl
   drm/msm: Add SYSPROF param (v2)
   drm/msm/a6xx: Zap counters across context switch

  drivers/gpu/drm/msm/adreno/a2xx.xml.h |  26 +-
  drivers/gpu/drm/msm/adreno/a2xx_gpu.c |   1 +
  drivers/gpu/drm/msm/adreno/a3xx.xml.h |  30 +-
  drivers/gpu/drm/msm/adreno/a3xx_gpu.c |   1 +
  drivers/gpu/drm/msm/adreno/a4xx.xml.h | 112 ++-
  drivers/gpu/drm/msm/adreno/a4xx_gpu.c |   1 +
  drivers/gpu/drm/msm/adreno/a5xx.xml.h |  63 +-
  drivers/gpu/drm/msm/adreno/a5xx_gpu.c |   1 +
  drivers/gpu/drm/msm/adreno/a6xx.xml.h | 674 +++---
  drivers/gpu/drm/msm/adreno/a6xx_gmu.xml.h |  26 +-
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c |  30 +
  .../gpu/drm/msm/adreno/adreno_common.xml.h|  31 +-
  drivers/gpu/drm/msm/adreno/adreno_gpu.c   |  14 +
  drivers/gpu/drm/msm/adreno/adreno_gpu.h   |   2 +
  drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h   |  46 +-
  drivers/gpu/drm/msm/disp/mdp4/mdp4.xml.h  |  37 +-
  drivers/gpu/drm/msm/disp/mdp5/mdp5.xml.h  |  37 +-
  drivers/gpu/drm/msm/disp/mdp_common.xml.h |  37 +-
  drivers/gpu/drm/msm/dsi/dsi.xml.h |  37 +-
  drivers/gpu/drm/msm/dsi/dsi_phy_10nm.xml.h|  37 +-
  drivers/gpu/drm/msm/dsi/dsi_phy_14nm.xml.h|  37 +-
  drivers/gpu/drm/msm/dsi/dsi_phy_20nm.xml.h|  37 +-
  drivers/gpu/drm/msm/dsi/dsi_phy_28nm.xml.h|  37 +-
  .../gpu/drm/msm/dsi/dsi_phy_28nm_8960.xml.h   |  37 +-
  drivers/gpu/drm/msm/dsi/dsi_phy_5nm.xml.h | 480 -
  drivers/gpu/drm/msm/dsi/dsi_phy_7nm.xml.h |  43 +-
  drivers/gpu/drm/msm/dsi/mmss_cc.xml.h |  37 +-
  drivers/gpu/drm/msm/dsi/sfpb.xml.h|  37 +-
  drivers/gpu/drm/msm/hdmi/hdmi.xml.h   |  37 +-
  drivers/gpu/drm/msm/hdmi/qfprom.xml.h |  37 +-
  drivers/gpu/drm/msm/msm_drv.c |  28 +
  drivers/gpu/drm/msm/msm_gpu.c |   2 +
  drivers/gpu/drm/msm/msm_gpu.h |  29 +
  drivers/gpu/drm/msm/msm_submitqueue.c |  39 +
  include/uapi/drm/msm_drm.h|  28 +-
  35 files changed, 1058 insertions(+), 1130 deletions(-)
  delete mode 100644 drivers/gpu/drm/msm/dsi/dsi_phy_5nm.xml.h



For the whole series except " drm/msm: Update generated headers",

Reviewed-by: Akhil P Oommen 

-Akhil.



Re: [PATCH 5.15 000/262] 5.15.27-rc1 review

2022-03-07 Thread Greg Kroah-Hartman
On Mon, Mar 07, 2022 at 06:30:18PM +0530, Naresh Kamboju wrote:
> On Mon, 7 Mar 2022 at 15:07, Greg Kroah-Hartman
>  wrote:
> >
> > This is the start of the stable review cycle for the 5.15.27 release.
> > There are 262 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> >
> > Responses should be made by Wed, 09 Mar 2022 09:16:25 +.
> > Anything received after that time might be too late.
> >
> > The whole patch series can be found in one patch at:
> > 
> > https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.15.27-rc1.gz
> > or in the git tree and branch at:
> > 
> > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> > linux-5.15.y
> > and the diffstat can be found below.
> >
> > thanks,
> >
> > greg k-h
> 
> 
> Following build errors/warnings noticed on arm64.
> 
> 
> arch/arm64/net/bpf_jit_comp.c: In function 'build_insn':
> arch/arm64/net/bpf_jit_comp.c:791:21: error: implicit declaration of
> function 'bpf_pseudo_func' [-Werror=implicit-function-declaration]
>   791 | if (bpf_pseudo_func(insn))
>   | ^~~
> cc1: some warnings being treated as errors

Found this one, now dropped.



RE: [PATCH 0/6] Remove usage of list iterator past the loop body

2022-03-07 Thread David Laight
From: Dan Carpenter
> Sent: 07 March 2022 15:01
> 
> Updating this API is risky because some places rely on the old behavior
> and not all of them have been updated.  Here are some additional places
> you might want to change.

I really can't help thinking that trying to merge this patch is
actually impossible.
It affects far too many different parts of the tree.

Since (I believe) this is a doubly linked list with forwards and
backwards pointers that point to a 'node' (not that there is a
nice comment to that effect in the header - and there are lots of
ways to do linked lists) the 'head' pretty much has to be a 'node'.

I'd write the following new defines (but I might be using
the old names here):

list_first(head, field) First item, NULL if empty.
list_last(head, field) Last item NULL if empty.
list_next(head, item, field) Item after 'item', NULL if last.
list_prev(head, item. field) Item before 'item', NULL if first.

You get (something like):
#define list_first(head, field) \
head->next == &head ? NULL : list_item(head->next, field)
(probably needs typeof(item) from somewhere).

The iterator loop is then just:
#define loop_iterate(item, head, field) \
for (item = list_first(head, field); item; \
item = list_next(head, item, field)

I'm not sure, but making the 'head' be a structure that contains
a single member that is a 'node' might help type checking.

Then all the code that uses the current defines can slowly be
moved over (probably a couple of releases) before the existing
defines are deleted.

That should simplify all the open-coded search loops that are
just as likely to be buggy (possibly more so).

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)



Re: [RFC PATCH] drm/panel: simple: panel-dpi: use bus-format to set bpc and bus_format

2022-03-07 Thread Max Krummenacher
On Wed, Mar 2, 2022 at 5:22 PM Marek Vasut  wrote:
>
> On 3/2/22 15:21, Maxime Ripard wrote:
> > Hi,
>
> Hi,
>
> > Please try to avoid top posting
Sorry.

> >
> > On Wed, Feb 23, 2022 at 04:25:19PM +0100, Max Krummenacher wrote:
> >> The goal here is to set the element bus_format in the struct
> >> panel_desc. This is an enum with the possible values defined in
> >> include/uapi/linux/media-bus-format.h.
> >>
> >> The enum values are not constructed in a way that you could calculate
> >> the value from color channel width/shift/mapping/whatever. You rather
> >> would have to check if the combination of color channel
> >> width/shift/mapping/whatever maps to an existing value and otherwise
> >> EINVAL out.
> >>
> >> I don't see the value in having yet another way of how this
> >> information can be specified and then having to write a more
> >> complicated parser which maps the dt data to bus_format.
> >
> > Generally speaking, sending an RFC without explicitly stating what you
> > want a comment on isn't very efficient.
>
> Isn't that what RFC stands for -- Request For Comment ?

I hoped that the link to the original discussion was enough.

panel-simple used to have a finite number of hardcoded panels selected
by their compatible.
The following patchsets added a compatible 'panel-dpi' which should
allow to specify the panel in the device tree with timing etc.
  
https://patchwork.kernel.org/project/dri-devel/patch/20200216181513.28109-6-...@ravnborg.org/
In the same release cycle part of it got reverted:
  
https://patchwork.kernel.org/project/dri-devel/patch/20200314153047.2486-3-...@ravnborg.org/
With this it is no longer possible to set bus_format.

The explanation what makes the use of a property "data-mapping" not a
suitable way in that revert
is a bit vague.

The RFC revert of the revert
  
https://patchwork.kernel.org/project/dri-devel/patch/20220201110717.3585-1-cniederma...@dh-electronics.com/
tried to get feedback what would be a way forward. This RFC tries the
same by giving a possible solution should
the property name and/or the a bit short strings of the original be
the reason why it is not suitable.

So the requested comments would be about what was not good enough with
'data-mapping' and what would be a way forward.

Especially since in my limited view it is not clear why in panel-lvds
'data-mapping' is used to state how the bits representing colour are
mapped to the 21 or 28 possible bit position in the LVDS lanes vs.
here where we want to say how the bits representing colour are mapped
to the 16/18/24 lines of the parallel interface would need a different
binding pattern.

>
> > That being said, what I (and I can only assume Marek) don't like is the
> > string encoding. Especially when the similar bus-type property uses a
> > integer with the various available bus options we have.
>
> Right, the string encoding isn't good.
>
> > Having an integer, with a set of defines that you would map to the
> > proper MEDIA_BUS_* would be more efficient and more elegant.

I have a look at that.

> >
> > That being said, the first question that needs to be answered is why
> > does this have to be in the DT in the first place?

The way I understand the compatible panel-dp, iti should allow to fill
a 'struct panel_desc'
with data provided by the device tree rather than having the info
hardcoded in the C source.
The missing element is bus_format which currently is kept at 0.

>
> Because panel-simple panel-dpi , you may need to specify the bus format
> between the last bridge and the panel .

Exactly.

Max


Re: [PATCH RFC v2] mm: Add f_ops->populate()

2022-03-07 Thread Dave Hansen
On 3/7/22 03:27, Jarkko Sakkinen wrote:
> But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
> filtered out and never reach that function.
> 
> I don't know unorthodox that'd be but could we perhaps have a VM
> flag for SGX?

SGX only works on a subset of the chips from one vendor on one
architecture.  That doesn't seem worth burning a VM flag.


Re: [PATCH v3 4/6] drm/i915/gem: Add extra pages in ttm_tt for ccs data

2022-03-07 Thread Matthew Auld

On 07/03/2022 13:40, Ramalingam C wrote:

On Xe-HP and later devices, dedicated compression control state (CCS)
stored in local memory is used for each surface, to support the
3D and media compression formats.

The memory required for the CCS of the entire local memory is 1/256 of
the local memory size. So before the kernel boot, the required memory
is reserved for the CCS data and a secure register will be programmed
with the CCS base address

So when an object is allocated in local memory, dont need to explicitly
allocate the space for ccs data. But when the obj is evicted into the
smem, to hold the compression related data along with the obj extra space
is needed in smem. i.e obj_size + (obj_size/256).

Hence when a smem pages are allocated for an obj with lmem placement
possibility we create with the extra pages required for the ccs data for
the obj size.

v2:
   Used imperative wording [Thomas]

Signed-off-by: Ramalingam C 
cc: Christian Koenig 
cc: Hellstrom Thomas 
Reviewed-by: Thomas Hellström 
---
  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 23 ++-
  1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 1a8262f5f692..c7a36861c38d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -20,6 +20,7 @@
  #include "gem/i915_gem_ttm.h"
  #include "gem/i915_gem_ttm_move.h"
  #include "gem/i915_gem_ttm_pm.h"
+#include "gt/intel_gpu_commands.h"
  
  #define I915_TTM_PRIO_PURGE 0

  #define I915_TTM_PRIO_NO_PAGES  1
@@ -255,12 +256,27 @@ static const struct i915_refct_sgt_ops tt_rsgt_ops = {
.release = i915_ttm_tt_release
  };
  
+static inline bool

+i915_gem_object_has_lmem_placement(struct drm_i915_gem_object *obj)
+{
+   int i;
+
+   for (i = 0; i < obj->mm.n_placements; i++)
+   if (obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL)
+   return true;
+
+   return false;
+}
+
  static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 uint32_t page_flags)
  {
+   struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+bdev);
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, bo->resource->mem_type);
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+   unsigned long ccs_pages = 0;
enum ttm_caching caching;
struct i915_ttm_tt *i915_tt;
int ret;
@@ -283,7 +299,12 @@ static struct ttm_tt *i915_ttm_tt_create(struct 
ttm_buffer_object *bo,
i915_tt->is_shmem = true;
}
  
-	ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching, 0);

+   if (HAS_FLAT_CCS(i915) && i915_gem_object_has_lmem_placement(obj))
+   ccs_pages = DIV_ROUND_UP(DIV_ROUND_UP(bo->base.size,
+ NUM_BYTES_PER_CCS_BYTE),
+PAGE_SIZE);


Did you figure out how to handle the case where we have LMEM + SMEM, and 
are unable to place the object into LMEM, and then it just ends up being 
kept in SMEM? AFAIK the vm.insert_entries code has always just assumed 
that the vma sg_table size is the same as the vma->size, and so will 
happily create PTEs for the hidden ccs page(s), which might corrupt the 
users vm, depending on the exact layout.


Also it looks like the _shmem_writeback() call should now use 
ttm_tt->num_pages, instead of the object size?



+
+   ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching, ccs_pages);
if (ret)
goto err_free;
  


[RESEND PATCH] drm/doc: Clarify what ioctls can be used on render nodes

2022-03-07 Thread Jeffrey Hugo
The documentation for render nodes indicates that only "PRIME-related"
ioctls are valid on render nodes, but the documentation does not clarify
what that means.  If the reader is not familiar with PRIME, they may
beleive this to be only the ioctls with "PRIME" in the name and not other
ioctls such as set of syncobj ioctls.  Clarify the situation for the
reader by referencing where the reader will find a current list of valid
ioctls.

Signed-off-by: Jeffrey Hugo 
Acked-by: Pekka Paalanen 
---

I was confused by this when reading the documentation.  Now that I have
figured out what the documentation means, I would like to add a clarification
for the next reader which would have helped me.

 Documentation/gpu/drm-uapi.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst
index 199afb5..ce47b42 100644
--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@@ -148,7 +148,9 @@ clients together with the legacy drmAuth authentication 
procedure.
 If a driver advertises render node support, DRM core will create a
 separate render node called renderD. There will be one render node
 per device. No ioctls except PRIME-related ioctls will be allowed on
-this node. Especially GEM_OPEN will be explicitly prohibited. Render
+this node. Especially GEM_OPEN will be explicitly prohibited. For a
+complete list of driver-independent ioctls that can be used on render
+nodes, see the ioctls marked DRM_RENDER_ALLOW in drm_ioctl.c  Render
 nodes are designed to avoid the buffer-leaks, which occur if clients
 guess the flink names or mmap offsets on the legacy interface.
 Additionally to this basic interface, drivers must mark their
-- 
2.7.4



Re: [PATCH RFC v2] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Mon, Mar 07, 2022 at 02:37:48PM +, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> > In short: page faults stink.  The core kernel has lots of ways of
> > avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
> >  But, those only work on normal RAM that the core mm manages.
> > 
> > SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> > have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> > are marked with VM_IO.  So, none of the existing methods for avoiding
> > page faults work on SGX memory.
> > 
> > This essentially helps extend existing "normal RAM" kernel ABIs to work
> > for avoiding faults for SGX too.  SGX users want to enjoy all of the
> > benefits of a delayed allocation policy (better resource use,
> > overcommit, NUMA affinity) but without the cost of millions of faults.
> 
> We have a mechanism for dynamically reducing the number of page faults
> already; it's just buried in the page cache code.  You have vma->vm_file,
> which contains a file_ra_state.  You can use this to track where
> recent faults have been and grow the size of the region you fault in
> per page fault.  You don't have to (indeed probably don't want to) use
> the same algorithm as the page cache, but the _principle_ is the same --
> were recent speculative faults actually used; should we grow the number
> of pages actually faulted in, or is this a random sparse workload where
> we want to allocate individual pages.
> 
> Don't rely on the user to ask.  They don't know.

This sounds like a possibility. I'll need to study it properly first
though. Thank you for pointing this out.

BR, Jarkko


Re: [PATCH RFC v2] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Mon, Mar 07, 2022 at 07:29:22AM -0800, Dave Hansen wrote:
> On 3/7/22 03:27, Jarkko Sakkinen wrote:
> > But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
> > filtered out and never reach that function.
> > 
> > I don't know unorthodox that'd be but could we perhaps have a VM
> > flag for SGX?
> 
> SGX only works on a subset of the chips from one vendor on one
> architecture.  That doesn't seem worth burning a VM flag.

What do you think of Matthew's idea of using ra_state for prediction?

BR, Jarkko


[PATCH v1 0/2] Revert vendor property from anx7625 bindings

2022-03-07 Thread Robert Foss
An issue[1] related to how the V4L2_FWNODE_BUS_TYPE_PARALLEL flag is mis-used
was found in recent addition to the anx7625 driver.

In order to not introduce this issue into the ABI, let's revert the changes
to the anx7625 dt-binding related to this.

[1] https://lore.kernel.org/all/yitruicikyxs3...@pendragon.ideasonboard.com/

Robert Foss (2):
  Revert "dt-bindings:drm/bridge:anx7625:add vendor define"
  Revert "arm64: dts: mt8183: jacuzzi: Fix bus properties in anx's DSI
endpoint"

 .../display/bridge/analogix,anx7625.yaml  | 65 +--
 .../dts/mediatek/mt8183-kukui-jacuzzi.dtsi|  2 -
 2 files changed, 2 insertions(+), 65 deletions(-)

-- 
2.32.0



[PATCH v1 1/2] Revert "dt-bindings:drm/bridge:anx7625:add vendor define"

2022-03-07 Thread Robert Foss
This reverts commit a43661e7e819b100e1f833a35018560a1d9abb39.
---
 .../display/bridge/analogix,anx7625.yaml  | 65 +--
 1 file changed, 2 insertions(+), 63 deletions(-)

diff --git 
a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml 
b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
index 1d3e88daca041..ab48ab2f4240d 100644
--- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
@@ -43,70 +43,14 @@ properties:
   vdd33-supply:
 description: Regulator that provides the supply 3.3V power.
 
-  analogix,lane0-swing:
-$ref: /schemas/types.yaml#/definitions/uint8-array
-minItems: 1
-maxItems: 20
-description:
-  an array of swing register setting for DP tx lane0 PHY.
-  Registers 0~9 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
-  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
-  Swing1_Pre2, Swing0_Pre3, they are for [Boost control] and
-  [Swing control] setting.
-  Registers 0~9, bit 3:0 is [Boost control], these bits control
-  post cursor manual, increase the [Boost control] to increase
-  Pre-emphasis value.
-  Registers 0~9, bit 6:4 is [Swing control], these bits control
-  swing manual, increase [Swing control] setting to add Vp-p value
-  for each Swing, Pre.
-  Registers 10~19 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
-  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
-  Swing1_Pre2, Swing0_Pre3, they are for [R select control] and
-  [R Termination control] setting.
-  Registers 10~19, bit 4:0 is [R select control], these bits are
-  compensation manual, increase it can enhance IO driven strength
-  and Vp-p.
-  Registers 10~19, bit 5:6 is [R termination control], these bits
-  adjust 50ohm impedance of DP tx termination. 00:55 ohm,
-  01:50 ohm(default), 10:45 ohm, 11:40 ohm.
-
-  analogix,lane1-swing:
-$ref: /schemas/types.yaml#/definitions/uint8-array
-minItems: 1
-maxItems: 20
-description:
-  an array of swing register setting for DP tx lane1 PHY.
-  DP TX lane1 swing register setting same with lane0
-  swing, please refer lane0-swing property description.
-
-  analogix,audio-enable:
-type: boolean
-description: let the driver enable audio HDMI codec function or not.
-
   ports:
 $ref: /schemas/graph.yaml#/properties/ports
 
 properties:
   port@0:
-$ref: /schemas/graph.yaml#/$defs/port-base
-unevaluatedProperties: false
+$ref: /schemas/graph.yaml#/properties/port
 description:
-  MIPI DSI/DPI input.
-
-properties:
-  endpoint:
-$ref: /schemas/media/video-interfaces.yaml#
-type: object
-additionalProperties: false
-
-properties:
-  remote-endpoint: true
-
-  bus-type:
-enum: [1, 5]
-default: 1
-
-  data-lanes: true
+  Video port for MIPI DSI input.
 
   port@1:
 $ref: /schemas/graph.yaml#/properties/port
@@ -143,9 +87,6 @@ examples:
 vdd10-supply = <&pp1000_mipibrdg>;
 vdd18-supply = <&pp1800_mipibrdg>;
 vdd33-supply = <&pp3300_mipibrdg>;
-analogix,audio-enable;
-analogix,lane0-swing = /bits/ 8 <0x14 0x54 0x64 0x74>;
-analogix,lane1-swing = /bits/ 8 <0x14 0x54 0x64 0x74>;
 
 ports {
 #address-cells = <1>;
@@ -155,8 +96,6 @@ examples:
 reg = <0>;
 anx7625_in: endpoint {
 remote-endpoint = <&mipi_dsi>;
-bus-type = <5>;
-data-lanes = <0 1 2 3>;
 };
 };
 
-- 
2.32.0



[PATCH v1 2/2] Revert "arm64: dts: mt8183: jacuzzi: Fix bus properties in anx's DSI endpoint"

2022-03-07 Thread Robert Foss
This reverts commit 32568ae37596b529628ac09b875f4874e614f63f.
---
 arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi 
b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
index e8f133dc96b95..8f7bf33f607da 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
@@ -171,8 +171,6 @@ port@0 {
 
anx7625_in: endpoint {
remote-endpoint = <&dsi_out>;
-   bus-type = <5>;
-   data-lanes = <0 1 2 3>;
};
};
 
-- 
2.32.0



[PATCH 0/3] Move FPU related code from DCN3.1x drivers to DML folder

2022-03-07 Thread Melissa Wen
This series moves FPU code from DCN 3.1x drivers to dml/dcn31 folder to
isolate FPU operations. For this, it creates dcn31_fpu files to centralize
FPU operations and structs from dcn31x drivers, that include:
- _vcs_dpi_ip_params_st and _vcs_dpi_soc_bounding_box_st structs
- dcn31x_update_bw_bounding_box() functions
- dcn31_calculate_wm_and_dlg_fp()

Also, it adds dc_assert_fp_enabled() in public dml-fpu functions, as required,
and I've checked if their calls are properly wrapped by DC_FP_START/END (and
removed when inside dml/fpu files too).

Melissa Wen (3):
  drm/amd/dicplay: move FPU related code from dcn31 to dml/dcn31 folder
  drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder
  drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder

 drivers/gpu/drm/amd/display/dc/dcn31/Makefile |  26 -
 .../drm/amd/display/dc/dcn31/dcn31_resource.c | 355 +--
 .../drm/amd/display/dc/dcn31/dcn31_resource.h |   4 +-
 .../gpu/drm/amd/display/dc/dcn315/Makefile|  26 -
 .../amd/display/dc/dcn315/dcn315_resource.c   | 232 +
 .../amd/display/dc/dcn315/dcn315_resource.h   |   3 +
 .../gpu/drm/amd/display/dc/dcn316/Makefile|  26 -
 .../amd/display/dc/dcn316/dcn316_resource.c   | 231 +
 .../amd/display/dc/dcn316/dcn316_resource.h   |   3 +
 drivers/gpu/drm/amd/display/dc/dml/Makefile   |   2 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 863 ++
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  |  44 +
 12 files changed, 921 insertions(+), 894 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h

-- 
2.34.1



[PATCH 2/3] drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder

2022-03-07 Thread Melissa Wen
Moves related structs and dcn315_update_bw_bounding_box from dcn315
driver code to dml/dcn31_fpu that centralizes FPU code for DCN 3.1x.

Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/dc/dcn315/Makefile|  26 --
 .../amd/display/dc/dcn315/dcn315_resource.c   | 232 +-
 .../amd/display/dc/dcn315/dcn315_resource.h   |   3 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 228 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  |   3 +
 5 files changed, 235 insertions(+), 257 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
index c831ad46e81c..59381d24800b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
@@ -25,32 +25,6 @@
 
 DCN315 = dcn315_resource.o
 
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += 
-mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -msse2
-endif
-endif
-
 AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN315)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
index 756fec81b9ad..51a712958dbd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
@@ -66,6 +66,7 @@
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dml/display_mode_vba.h"
+#include "dml/dcn31/dcn31_fpu.h"
 #include "dcn31/dcn31_dccg.h"
 #include "dcn10/dcn10_resource.h"
 #include "dcn31/dcn31_panel_cntl.h"
@@ -133,158 +134,9 @@
 
 #include "link_enc_cfg.h"
 
-#define DC_LOGGER_INIT(logger)
-
-#define DCN3_15_DEFAULT_DET_SIZE 192
 #define DCN3_15_MAX_DET_SIZE 384
-#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
 #define DCN3_15_CRB_SEGMENT_SIZE_KB 64
 
-struct _vcs_dpi_ip_params_st dcn3_15_ip = {
-   .gpuvm_enable = 1,
-   .gpuvm_max_page_table_levels = 1,
-   .hostvm_enable = 1,
-   .hostvm_max_page_table_levels = 2,
-   .rob_buffer_size_kbytes = 64,
-   .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE,
-   .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB,
-   .config_return_buffer_size_in_kbytes = 1024,
-   .compressed_buffer_segment_size_in_kbytes = 64,
-   .meta_fifo_size_in_kentries = 32,
-   .zero_size_buffer_entries = 512,
-   .compbuf_reserved_space_64b = 256,
-   .compbuf_reserved_space_zs = 64,
-   .dpp_output_buffer_pixels = 2560,
-   .opp_output_buffer_lines = 1,
-   .pixel_chunk_size_kbytes = 8,
-   .meta_chunk_size_kbytes = 2,
-   .min_meta_chunk_size_bytes = 256,
-   .writeback_chunk_size_kbytes = 8,
-   .ptoi_supported = false,
-   .num_dsc = 3,
-   .maximum_dsc_bits_per_component = 10,
-   .dsc422_native_support = false,
-   .is_line_buffer_bpp_fixed = true,
-   .line_buffer_fixed_bpp = 49,
-   .line_buffer_size_bits = 789504,
-   .max_line_buffer_lines = 12,
-   .writeback_interface_buffer_size_kbytes = 90,
-   .max_num_dpp = 4,
-   .max_num_otg = 4,
-   .max_num_hdmi_frl_outputs = 1,
-   .max_num_wb = 1,
-   .max_dchub_pscl_bw_pix_per_clk = 4,
-   .max_pscl_lb_bw_pix_per_clk = 2,
-   .max_lb_vscl_bw_pix_per_clk = 4,
-   .max_vscl_hscl_bw_pix_per_clk = 4,
-   .max_hscl_ratio = 6,
-   .max_vscl_ratio = 6,
-   .max_hscl_taps = 8,
-   .max_vscl_taps = 8,
-   .dpte_buffer_size_in_pte_reqs_luma = 64,
-   .dpte_buffer_size_in_pte_reqs_chroma = 34,
-   .dispclk_ramp_margin_percent = 1,
-   .max_inter_dcn_tile_repeaters = 9,
-   .cursor_buffer_size = 16,
-   .cursor_chunk_size = 2,
-   .writeback_line_buffer_buffer_size = 0,
-   .writeback_min_hscl_ratio = 1,
-   .writeback_min_vscl_ratio = 1,
-   .writeback_max_hscl_ratio = 1,
-   .writeback_max_vscl_ratio = 1,
-   .writeback_max_hscl_taps = 1,
-   .writeback_max_vscl_taps = 1,
-   .dppclk_delay_subtotal = 46,
-   .dppclk_delay_scl = 50,
-   .dppclk_delay_scl_lb_only = 16,
-   .dppclk_delay_cnvc_formatter = 27,
-   .dppclk_delay_cnvc_cursor = 6,
-   .dispclk_delay_subtotal = 119,
-   .dynamic_metadata_vm_enabled = false,
-   .odm_combine_4to1_supported = false,
-   .dcc_supported = true,
-};
-
-struct _vcs_dpi_soc_

[PATCH 1/3] drm/amd/dicplay: move FPU related code from dcn31 to dml/dcn31 folder

2022-03-07 Thread Melissa Wen
Creates FPU files in dml/dcn31 folder to centralize FPU operations
from 3.1x drivers and moves all FPU-associated code from dcn31 driver
to there. It includes the struct _vcs_dpi_ip_params_st and
_vcs_dpi_soc_bounding_box_st and functions:

- dcn31_calculate_wm_and_dlg_fp()
- dcn31_update_bw_bounding_box()

adding dc_assert_fp_enabled to them and drop DC_FP_START/END inside
functions that was moved to dml folder, as required.

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/dc/dcn31/Makefile |  26 --
 .../drm/amd/display/dc/dcn31/dcn31_resource.c | 355 +--
 .../drm/amd/display/dc/dcn31/dcn31_resource.h |   4 +-
 drivers/gpu/drm/amd/display/dc/dml/Makefile   |   2 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 406 ++
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  |  39 ++
 6 files changed, 451 insertions(+), 381 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index d20e3b8ccc30..ec041e3cda30 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -15,32 +15,6 @@ DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_hwseq.o 
dcn31_init.o dcn31_hubp.o
dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
dcn31_afmt.o dcn31_vpg.o
 
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -msse2
-endif
-endif
-
 AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN31)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index e8f38f4a9378..0e51ac029c8a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -65,6 +65,7 @@
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dml/display_mode_vba.h"
+#include "dml/dcn31/dcn31_fpu.h"
 #include "dcn31/dcn31_dccg.h"
 #include "dcn10/dcn10_resource.h"
 #include "dcn31_panel_cntl.h"
@@ -102,152 +103,6 @@
 
 #define DC_LOGGER_INIT(logger)
 
-#define DCN3_1_DEFAULT_DET_SIZE 384
-
-struct _vcs_dpi_ip_params_st dcn3_1_ip = {
-   .gpuvm_enable = 1,
-   .gpuvm_max_page_table_levels = 1,
-   .hostvm_enable = 1,
-   .hostvm_max_page_table_levels = 2,
-   .rob_buffer_size_kbytes = 64,
-   .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE,
-   .config_return_buffer_size_in_kbytes = 1792,
-   .compressed_buffer_segment_size_in_kbytes = 64,
-   .meta_fifo_size_in_kentries = 32,
-   .zero_size_buffer_entries = 512,
-   .compbuf_reserved_space_64b = 256,
-   .compbuf_reserved_space_zs = 64,
-   .dpp_output_buffer_pixels = 2560,
-   .opp_output_buffer_lines = 1,
-   .pixel_chunk_size_kbytes = 8,
-   .meta_chunk_size_kbytes = 2,
-   .min_meta_chunk_size_bytes = 256,
-   .writeback_chunk_size_kbytes = 8,
-   .ptoi_supported = false,
-   .num_dsc = 3,
-   .maximum_dsc_bits_per_component = 10,
-   .dsc422_native_support = false,
-   .is_line_buffer_bpp_fixed = true,
-   .line_buffer_fixed_bpp = 48,
-   .line_buffer_size_bits = 789504,
-   .max_line_buffer_lines = 12,
-   .writeback_interface_buffer_size_kbytes = 90,
-   .max_num_dpp = 4,
-   .max_num_otg = 4,
-   .max_num_hdmi_frl_outputs = 1,
-   .max_num_wb = 1,
-   .max_dchub_pscl_bw_pix_per_clk = 4,
-   .max_pscl_lb_bw_pix_per_clk = 2,
-   .max_lb_vscl_bw_pix_per_clk = 4,
-   .max_vscl_hscl_bw_pix_per_clk = 4,
-   .max_hscl_ratio = 6,
-   .max_vscl_ratio = 6,
-   .max_hscl_taps = 8,
-   .max_vscl_taps = 8,
-   .dpte_buffer_size_in_pte_reqs_luma = 64,
-   .dpte_buffer_size_in_pte_reqs_chroma = 34,
-   .dispclk_ramp_margin_percent = 1,
-   .max_inter_dcn_tile_repeaters = 8,
-   .cursor_buffer_size = 16,
-   .cursor_chunk_size = 2,
-   .writeback_line_buffer_buffer_size = 0,
-   .writeback_min_hscl_ratio = 1,
-   .writeback_min_vscl_ratio = 1,
-   .writeback_max_hscl_ratio = 1,
-   .writeback_max_vscl_ratio = 1,
-   .writeback_max_hscl_taps = 1,
-  

[PATCH 3/3] drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder

2022-03-07 Thread Melissa Wen
Moves FPU-related structs and dcn316_update_bw_bounding_box from dcn316
driver to dml/dcn31 that centralize FPU operations for DCN 3.1x

Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/dc/dcn316/Makefile|  26 --
 .../amd/display/dc/dcn316/dcn316_resource.c   | 231 +-
 .../amd/display/dc/dcn316/dcn316_resource.h   |   3 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 229 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  |   2 +
 5 files changed, 235 insertions(+), 256 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
index cd87b687c5e2..819d44a9439b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
@@ -25,32 +25,6 @@
 
 DCN316 = dcn316_resource.o
 
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += 
-mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -msse2
-endif
-endif
-
 AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN316)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
index 90c17c44dd7c..1e451d069bc3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
@@ -66,6 +66,7 @@
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dml/display_mode_vba.h"
+#include "dml/dcn31/dcn31_fpu.h"
 #include "dcn31/dcn31_dccg.h"
 #include "dcn10/dcn10_resource.h"
 #include "dcn31/dcn31_panel_cntl.h"
@@ -123,157 +124,10 @@
 
 #include "link_enc_cfg.h"
 
-#define DC_LOGGER_INIT(logger)
-
-#define DCN3_16_DEFAULT_DET_SIZE 192
 #define DCN3_16_MAX_DET_SIZE 384
 #define DCN3_16_MIN_COMPBUF_SIZE_KB 128
 #define DCN3_16_CRB_SEGMENT_SIZE_KB 64
 
-struct _vcs_dpi_ip_params_st dcn3_16_ip = {
-   .gpuvm_enable = 1,
-   .gpuvm_max_page_table_levels = 1,
-   .hostvm_enable = 1,
-   .hostvm_max_page_table_levels = 2,
-   .rob_buffer_size_kbytes = 64,
-   .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE,
-   .config_return_buffer_size_in_kbytes = 1024,
-   .compressed_buffer_segment_size_in_kbytes = 64,
-   .meta_fifo_size_in_kentries = 32,
-   .zero_size_buffer_entries = 512,
-   .compbuf_reserved_space_64b = 256,
-   .compbuf_reserved_space_zs = 64,
-   .dpp_output_buffer_pixels = 2560,
-   .opp_output_buffer_lines = 1,
-   .pixel_chunk_size_kbytes = 8,
-   .meta_chunk_size_kbytes = 2,
-   .min_meta_chunk_size_bytes = 256,
-   .writeback_chunk_size_kbytes = 8,
-   .ptoi_supported = false,
-   .num_dsc = 3,
-   .maximum_dsc_bits_per_component = 10,
-   .dsc422_native_support = false,
-   .is_line_buffer_bpp_fixed = true,
-   .line_buffer_fixed_bpp = 48,
-   .line_buffer_size_bits = 789504,
-   .max_line_buffer_lines = 12,
-   .writeback_interface_buffer_size_kbytes = 90,
-   .max_num_dpp = 4,
-   .max_num_otg = 4,
-   .max_num_hdmi_frl_outputs = 1,
-   .max_num_wb = 1,
-   .max_dchub_pscl_bw_pix_per_clk = 4,
-   .max_pscl_lb_bw_pix_per_clk = 2,
-   .max_lb_vscl_bw_pix_per_clk = 4,
-   .max_vscl_hscl_bw_pix_per_clk = 4,
-   .max_hscl_ratio = 6,
-   .max_vscl_ratio = 6,
-   .max_hscl_taps = 8,
-   .max_vscl_taps = 8,
-   .dpte_buffer_size_in_pte_reqs_luma = 64,
-   .dpte_buffer_size_in_pte_reqs_chroma = 34,
-   .dispclk_ramp_margin_percent = 1,
-   .max_inter_dcn_tile_repeaters = 8,
-   .cursor_buffer_size = 16,
-   .cursor_chunk_size = 2,
-   .writeback_line_buffer_buffer_size = 0,
-   .writeback_min_hscl_ratio = 1,
-   .writeback_min_vscl_ratio = 1,
-   .writeback_max_hscl_ratio = 1,
-   .writeback_max_vscl_ratio = 1,
-   .writeback_max_hscl_taps = 1,
-   .writeback_max_vscl_taps = 1,
-   .dppclk_delay_subtotal = 46,
-   .dppclk_delay_scl = 50,
-   .dppclk_delay_scl_lb_only = 16,
-   .dppclk_delay_cnvc_formatter = 27,
-   .dppclk_delay_cnvc_cursor = 6,
-   .dispclk_delay_subtotal = 119,
-   .dynamic_metadata_vm_enabled = false,
-   .odm_combine_4to1_supported = false,
-   .dcc_supported = true,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
-   /*TODO: correct disp

Re: [PATCH v12 3/4] drm/bridge: anx7625: add MIPI DPI input feature

2022-03-07 Thread Robert Foss
On Mon, 7 Mar 2022 at 07:12, Chen-Yu Tsai  wrote:
>
> On Sun, Mar 06, 2022 at 07:13:30PM +0200, Laurent Pinchart wrote:
> > Hello Xin,
> >
> > (Question for Rob below, and I'm afraid this is urgent as we need to
> > merge a fix in v5.17).
> >
> > On Fri, Nov 05, 2021 at 11:19:03AM +0800, Xin Ji wrote:
> > > The basic anx7625 driver only support MIPI DSI rx signal input.
> > > This patch add MIPI DPI rx input configuration support, after apply
> > > this patch, the driver can support DSI rx or DPI rx by adding
> > > 'bus-type' in DT.
> > >
> > > Reviewed-by: Robert Foss 
> > > Signed-off-by: Xin Ji 
> > > ---
> > >  drivers/gpu/drm/bridge/analogix/anx7625.c | 247 --
> > >  drivers/gpu/drm/bridge/analogix/anx7625.h |  18 +-
> > >  2 files changed, 205 insertions(+), 60 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c 
> > > b/drivers/gpu/drm/bridge/analogix/anx7625.c
> > > index f48e91134c20..f7c3386c8929 100644
> > > --- a/drivers/gpu/drm/bridge/analogix/anx7625.c
> > > +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c
>
> [...]
>
> > >  static int anx7625_parse_dt(struct device *dev,
> > > struct anx7625_platform_data *pdata)
> > >  {
> > > -   struct device_node *np = dev->of_node;
> > > +   struct device_node *np = dev->of_node, *ep0;
> > > struct drm_panel *panel;
> > > int ret;
> > > +   int bus_type, mipi_lanes;
> > > +
> > > +   anx7625_get_swing_setting(dev, pdata);
> > >
> > > +   pdata->is_dpi = 1; /* default dpi mode */
> > > pdata->mipi_host_node = of_graph_get_remote_node(np, 0, 0);
> > > if (!pdata->mipi_host_node) {
> > > DRM_DEV_ERROR(dev, "fail to get internal panel.\n");
> > > return -ENODEV;
> > > }
> > >
> > > -   DRM_DEV_DEBUG_DRIVER(dev, "found dsi host node.\n");
> > > +   bus_type = V4L2_FWNODE_BUS_TYPE_PARALLEL;
> > > +   mipi_lanes = MAX_LANES_SUPPORT;
> > > +   ep0 = of_graph_get_endpoint_by_regs(np, 0, 0);
> > > +   if (ep0) {
> > > +   if (of_property_read_u32(ep0, "bus-type", &bus_type))
> > > +   bus_type = 0;
> > > +
> > > +   mipi_lanes = of_property_count_u32_elems(ep0, "data-lanes");
> > > +   }
> > > +
> > > +   if (bus_type == V4L2_FWNODE_BUS_TYPE_PARALLEL) /* bus type is 
> > > Parallel(DSI) */
> >
> > This is not correct *at all*. V4L2_FWNODE_BUS_TYPE_PARALLEL has nothing
> > to do with DSI. DSI stands for Digital *Serial* Interface. If anything,
> > the V4L2_FWNODE_BUS_TYPE_PARALLEL type would map better to DPI, even if
> > it's not an exact match.
> >
> > This patch has landed in v5.17-rc1, along with the corresponding
> > bindings. As DT bindings are an ABI, we should really fix this before
> > v5.17 is released. There is no DSI bus types defined in DT, and adding
> > one as a fix so late in the v5.17-rc cycle seems a bit of a stretch to
> > me (unless Rob disagrees).
> >
> > It would seem best to revert this series and the corresponding bindings,
> > and retry in v5.18.
>
> There is a DT patch using this property that is already queued up for 5.17
> in the soc tree:
>
> https://lore.kernel.org/all/20220214200507.2500693-1-nfrapr...@collabora.com/
>
> merged here:
>
> http://git.kernel.org/soc/soc/c/32568ae37596b529628ac09b875f4874e614f63f
>
> We will need to revert that one as well.

I just submitted a series reverting the dt-binding change + the
related commit to "mt8183: jacuzzi".
Can I get a quick r-b/a-b in order to get this into v5.17.

https://lore.kernel.org/all/20220307154558.2505734-3-robert.f...@linaro.org/

>
> ChenYu


Re: [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread Jarkko Sakkinen
On Mon, Mar 07, 2022 at 03:33:52PM +0100, David Hildenbrand wrote:
> On 07.03.22 15:22, Jarkko Sakkinen wrote:
> > On Mon, Mar 07, 2022 at 11:12:44AM +0100, David Hildenbrand wrote:
> >> On 06.03.22 06:32, Jarkko Sakkinen wrote:
> >>> For device memory (aka VM_IO | VM_PFNMAP) MAP_POPULATE does nothing. Allow
> >>> to use that for initializing the device memory by providing a new callback
> >>> f_ops->populate() for the purpose.
> >>>
> >>> SGX patches are provided to show the callback in context.
> >>>
> >>> An obvious alternative is a ioctl but it is less elegant and requires
> >>> two syscalls (mmap + ioctl) per memory range, instead of just one
> >>> (mmap).
> >>
> >> What about extending MADV_POPULATE_READ | MADV_POPULATE_WRITE to support
> >> VM_IO | VM_PFNMAP (as well?) ?
> > 
> > What would be a proper point to bind that behaviour? For mmap/mprotect it'd
> > be probably populate_vma_page_range() because that would span both mmap()
> > and mprotect() (Dave's suggestion in this thread).
> 
> MADV_POPULATE_* ends up in faultin_vma_page_range(), right next to
> populate_vma_page_range(). So it might require a similar way to hook
> into the driver I guess.
> 
> > 
> > For MAP_POPULATE I did not have hard proof to show that it would be used
> > by other drivers but for madvice() you can find at least a few ioctl
> > based implementations:
> > 
> > $ git grep -e madv --and \( -e ioc \)  drivers/
> > drivers/gpu/drm/i915/gem/i915_gem_ioctls.h:int 
> > i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
> > drivers/gpu/drm/i915/i915_driver.c: DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, 
> > i915_gem_madvise_ioctl, DRM_RENDER_ALLOW),
> > drivers/gpu/drm/i915/i915_gem.c:i915_gem_madvise_ioctl(struct drm_device 
> > *dev, void *data,
> > drivers/gpu/drm/msm/msm_drv.c:static int msm_ioctl_gem_madvise(struct 
> > drm_device *dev, void *data,
> > drivers/gpu/drm/msm/msm_drv.c:  DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE,  
> > msm_ioctl_gem_madvise,  DRM_RENDER_ALLOW),
> > drivers/gpu/drm/panfrost/panfrost_drv.c:static int 
> > panfrost_ioctl_madvise(struct drm_device *dev, void *data,
> > drivers/gpu/drm/vc4/vc4_drv.c:  DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, 
> > vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
> > drivers/gpu/drm/vc4/vc4_drv.h:int vc4_gem_madvise_ioctl(struct drm_device 
> > *dev, void *data,
> > drivers/gpu/drm/vc4/vc4_gem.c:int vc4_gem_madvise_ioctl(struct drm_device 
> > *dev, void *data,
> > 
> > IMHO this also provides supportive claim for MAP_POPULATE, and yeah, I
> > agree that to be consistent implementation, both madvice() and MAP_POPULATE
> > should work.
> 
> MADV_POPULATE_WRITE + MADV_DONTNEED/FALLOC_FL_PUNCH_HOLE is one way to
> dynamically manage memory consumption inside a sparse memory mapping
> (preallocate/populate via MADV_POPULATE_WRITE, discard via
> MADV_DONTNEED/FALLOC_FL_PUNCH_HOLE).  Extending that whole mechanism to
> deal with VM_IO | VM_PFNMAP mappings as well could be interesting.
> 
> At least I herd about some ideas where we might want to dynamically
> expose memory to a VM (via virtio-mem) inside a sparse memory mapping,
> and the memory in that sparse memory mapping is provided from a
> dedicated memory pool managed by a device driver -- not just using
> ordinary anonymous/file/hugetlb memory as we do right now.
> 
> Now, this is certainly stuff for the future, I just wanted to mention it.

For SGX purposes I'm now studying the possibly to use ra_state to get
idea where do "prefetching" (EAUG's) in batches, as it is something
that would not require any intrusive changes to mm but thank you for
sharing this. Looking into implementing this properly is the 2nd option,
if that does not work out.

> -- 
> Thanks,
> 
> David / dhildenb

BR, Jarkko


Re: [PATCH v1 0/2] Revert vendor property from anx7625 bindings

2022-03-07 Thread Laurent Pinchart
Hi Rob,

Thank you for the patch.

On Mon, Mar 07, 2022 at 04:45:56PM +0100, Robert Foss wrote:
> An issue[1] related to how the V4L2_FWNODE_BUS_TYPE_PARALLEL flag is mis-used
> was found in recent addition to the anx7625 driver.
> 
> In order to not introduce this issue into the ABI, let's revert the changes
> to the anx7625 dt-binding related to this.
> 
> [1] https://lore.kernel.org/all/yitruicikyxs3...@pendragon.ideasonboard.com/
> 
> Robert Foss (2):
>   Revert "dt-bindings:drm/bridge:anx7625:add vendor define"
>   Revert "arm64: dts: mt8183: jacuzzi: Fix bus properties in anx's DSI
> endpoint"

If this is enough to avoid the wrong bus-type becoming an ABI, even if
the corresponding driver support isn't reverted, then, for the whole
series,

Reviewed-by: Laurent Pinchart 

>  .../display/bridge/analogix,anx7625.yaml  | 65 +--
>  .../dts/mediatek/mt8183-kukui-jacuzzi.dtsi|  2 -
>  2 files changed, 2 insertions(+), 65 deletions(-)

-- 
Regards,

Laurent Pinchart


Re: [PATCH] drm: remove min_order BUG_ON check

2022-03-07 Thread Christian König

Am 07.03.22 um 15:37 schrieb Arunpravin:

place BUG_ON(order < min_order) outside do..while
loop as it fails Unigine Heaven benchmark.

Unigine Heaven has buffer allocation requests for
example required pages are 161 and alignment request
is 128. To allocate the remaining 33 pages, continues
the iteration to find the order value which is 5 and
when it compares with min_order = 7, enables the
BUG_ON(). To avoid this problem, placed the BUG_ON
check outside of do..while loop.


Well using BUG_ON sounds like the wrong approach in the first place.

A BUG_ON() is only justified if you prevent further data corruption, 
e.g. when you detect for example a reference count overflow or similar.


In all other cases you should trigger a WARN_ON() and abort the 
operation with -EINVAL if possible.


Regards,
Christian.



Signed-off-by: Arunpravin 
---
  drivers/gpu/drm/drm_buddy.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 72f52f293249..ed94c56b720f 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -669,10 +669,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
order = fls(pages) - 1;
min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
  
+	BUG_ON(order < min_order);

+
do {
order = min(order, (unsigned int)fls(pages) - 1);
BUG_ON(order > mm->max_order);
-   BUG_ON(order < min_order);
  
  		do {

if (flags & DRM_BUDDY_RANGE_ALLOCATION)

base-commit: 8025c79350b90e5a8029234d433578f12abbae2b




RE: a null derefrence at [v7, 9/9] drm/omap: Add a 'right overlay' to plane state

2022-03-07 Thread Weiguo Li
> 
> From: Benoit Parrot 
> 
> If the drm_plane has a source width that's greater than the max width
> supported by a single hw overlay, then we assign a 'r_overlay' to it in
> omap_plane_atomic_check().
> 
> Both overlays should have the capabilities required to handle the source
> framebuffer. The only parameters that vary between the left and right
> hwoverlays are the src_w, crtc_w, src_x and crtc_x as we just even chop
> the fb into left and right halves.
> 
> We also take care of not creating odd width size when dealing with YUV
> formats.
> 
> Since both halves need to be 'appear' side by side the zpos is
> recalculated when dealing with dual overlay cases so that the other
> planes zpos is consistent.
> 
> Depending on user space usage it is possible that on occasion the number
> of requested planes exceeds the numbers of overlays required to display
> them. In that case a failure would be returned for the plane that cannot
> be handled at that time. It is up to user space to make sure the H/W
> resource are not over-subscribed.
> 
> Signed-off-by: Benoit Parrot 
> Signed-off-by: Neil Armstrong 
> ---
>  drivers/gpu/drm/omapdrm/omap_drv.c |  98 -
>  drivers/gpu/drm/omapdrm/omap_fb.c  |  33 ++-
>  drivers/gpu/drm/omapdrm/omap_fb.h  |   4 +-
>  drivers/gpu/drm/omapdrm/omap_overlay.c |  23 -
>  drivers/gpu/drm/omapdrm/omap_overlay.h |   3 +-
>  drivers/gpu/drm/omapdrm/omap_plane.c   | 117 +++--
>  drivers/gpu/drm/omapdrm/omap_plane.h   |   1 +
>  7 files changed, 267 insertions(+), 12 deletions(-)
> 
> ...
> 
> diff --git a/drivers/gpu/drm/omapdrm/omap_overlay.c 
> b/drivers/gpu/drm/omapdrm/omap_overlay.c
> index afb2b44fdf86..10730c9b2752 100644
> --- a/drivers/gpu/drm/omapdrm/omap_overlay.c
> +++ b/drivers/gpu/drm/omapdrm/omap_overlay.c
> @@ -67,12 +67,13 @@  omap_plane_find_free_overlay(struct drm_device *dev, 
> struct drm_plane *hwoverlay
>   * next global overlay_map to be enabled when atomic transaction is valid.
>   */
>  int omap_overlay_assign(struct drm_atomic_state *s, struct drm_plane *plane,
> - u32 caps, u32 fourcc, struct omap_hw_overlay **overlay)
> + u32 caps, u32 fourcc, struct omap_hw_overlay **overlay,
> + struct omap_hw_overlay **r_overlay)
>  {
>   /* Get the global state of the current atomic transaction */
>   struct omap_global_state *state = omap_get_global_state(s);
>   struct drm_plane **overlay_map = state->hwoverlay_to_plane;
> - struct omap_hw_overlay *ovl;
> + struct omap_hw_overlay *ovl, *r_ovl;
>  
>   ovl = omap_plane_find_free_overlay(s->dev, overlay_map, caps, fourcc);
>   if (!ovl)
> @@ -81,8 +82,26 @@  int omap_overlay_assign(struct drm_atomic_state *s, 
> struct drm_plane *plane,
>   overlay_map[ovl->idx] = plane;
>   *overlay = ovl;
>  
> + if (r_overlay) {
> + r_ovl = omap_plane_find_free_overlay(s->dev, overlay_map,
> +  caps, fourcc);
> + if (!r_ovl) {
> + overlay_map[r_ovl->idx] = NULL;

Hi,

  a null derefrence "r_ovl->idx" when "r_ovl" is null, in inner if clause.




Re: [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread Jarkko Sakkinen
On Mon, Mar 07, 2022 at 07:56:53AM -0800, Christoph Hellwig wrote:
> On Mon, Mar 07, 2022 at 03:29:35PM +0200, Jarkko Sakkinen wrote:
> > So what would you suggest to sort out the issue? I'm happy to go with
> > ioctl if nothing else is acceptable.
> 
> PLenty of drivers treat all mmaps as if MAP_POPULATE was specified,
> typically by using (io_)remap_pfn_range.  If there any reason to only
> optionally have the pre-fault semantics for sgx?  If not this should
> be really simple.  And if we have a real need for it to be optional
> we'll just need to find a sane way to pass that information to ->mmap.

Dave, what if mmap() would just unconditionally EAUG after initialization?

It's an option, yes.

BR, Jarkko


RE: [PATCH v4 4/4] arm64/dts/qcom/sm8250: remove assigned-clock-rate property for mdp clk

2022-03-07 Thread Vinod Polimera
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
> 
> On Sat, 5 Mar 2022 at 00:49, Doug Anderson 
> wrote:
> > On Thu, Mar 3, 2022 at 4:16 PM Dmitry Baryshkov
> >  wrote:
> > >
> > > On Fri, 4 Mar 2022 at 02:56, Stephen Boyd 
> wrote:
> > > >
> > > > Quoting Dmitry Baryshkov (2022-03-03 15:50:50)
> > > > > On Thu, 3 Mar 2022 at 12:40, Vinod Polimera
>  wrote:
> > > > > >
> > > > > > Kernel clock driver assumes that initial rate is the
> > > > > > max rate for that clock and was not allowing it to scale
> > > > > > beyond the assigned clock value.
> > > > > >
> > > > > > Drop the assigned clock rate property and vote on the mdp clock as
> per
> > > > > > calculated value during the usecase.
> > > > > >
> > > > > > Fixes: 7c1dffd471("arm64: dts: qcom: sm8250.dtsi: add display
> system nodes")
> > > > >
> > > > > Please remove the Fixes tags from all commits. Otherwise the
> patches
> > > > > might be picked up into earlier kernels, which do not have a patch
> > > > > adding a vote on the MDP clock.
> > > >
> > > > What patch is that? The Fixes tag could point to that commit.
> > >
> > > Please correct me if I'm wrong.
> > > Currently the dtsi enforces bumping the MDP clock when the mdss
> device
> > > is being probed and when the dpu device is being probed.
> > > Later during the DPU lifetime the core_perf would change the clock's
> > > rate as it sees fit according to the CRTC requirements.
> >
> > "Currently" means _before_ ${SUBJECT} patch lands, right? Since
> > ${SUBJECT} patch is removing the bump to max.
> 
> Yes. 'Before this patch'.
> 
> >
> >
> > > However it would happen only when the during the
> > > dpu_crtc_atomic_flush(), before we call this function, the MDP clock
> > > is left in the undetermined state. The power rails controlled by the
> > > opp table are left in the undetermined state.
> > >
> > > I suppose that during the dpu_bind we should bump the clock to the max
> > > possible freq and let dpu_core_perf handle it afterwards.
> >
> > Definitely feels like seeing the clock to something predictable during
> > the initial probe makes sense. If it's just for the initial probe then
> > setting it to max (based on the opp table) seems fine.
> 
> Vinod, could you please implement it?
> 
> > I think an
> > earlier version of this series set it to max every time we did runtime
> > resume. We'd have to have a good reason to do that.
> 
> Yes, this is correct. Based on the comments I had the impression that
> there was a suggestion that the place for the calls was wrong. Most
> probably I was instead projecting my own thoughts.
> 
I had discussed internally with the team. Traditionally, mdp clk vote during
probe/bind is required when display is turned on in bootloader and persists
till first update in kernel. As in chromebook, timing engine will be turned 
off during depthcharge exit and as there is no display transition from 
bootloader to kernel, mdp clk can be voted based on the calculated value 
during framework update and does not required vote during probe/bind.

Thanks,
Vinod.
> --
> With best wishes
> Dmitry


[PATCH v2] drm/mode: Improve drm_mode_fb_cmd2 documentation

2022-03-07 Thread Geert Uytterhoeven
From: Geert Uytterhoeven 

Fix various grammar mistakes in the kerneldoc comments documenting the
drm_mode_fb_cmd2 structure:
  - s/is/are/,
  - s/8 bit/8-bit/.

Signed-off-by: Geert Uytterhoeven 
Acked-by: Sam Ravnborg 
---
v2:
  - Add Acked-by,
  - Rebase on top of commit a3574119826d9a4e ("drm: document struct
drm_mode_fb_cmd2") in drm-next.
---
 include/uapi/drm/drm_mode.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 0a0d56a6158e6327..fa953309d9ce5775 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -675,11 +675,11 @@ struct drm_mode_fb_cmd {
  *   fetch metadata about an existing frame-buffer.
  *
  * In case of planar formats, this struct allows up to 4 buffer objects with
- * offsets and pitches per plane. The pitch and offset order is dictated by the
- * format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as:
+ * offsets and pitches per plane. The pitch and offset order are dictated by
+ * the format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as:
  *
- * YUV 4:2:0 image with a plane of 8 bit Y samples followed by an
- * interleaved U/V plane containing 8 bit 2x2 subsampled colour difference
+ * YUV 4:2:0 image with a plane of 8-bit Y samples followed by an
+ * interleaved U/V plane containing 8-bit 2x2 subsampled colour difference
  * samples.
  *
  * So it would consist of a Y plane at ``offsets[0]`` and a UV plane at
-- 
2.25.1



Re: [PATCH] drm/selftests: missing error code in igt_buddy_alloc_smoke()

2022-03-07 Thread Christian König

Pushed to drm-misc-next. Just one nit below.

Am 07.03.22 um 15:54 schrieb Arunpravin:

Reviewed-by:Arunpravin 


Some people are picky about using the full name here.

And you I think we should volunteer you for maintaining that stuff :)

So you might want to get commit rights for drm-misc-next as well.

Christian.



On 07/03/22 6:24 pm, Dan Carpenter wrote:

Set the error code to -ENOMEM if drm_random_order() fails.

Fixes: e6ff5ef81170 ("drm/selftests: add drm buddy smoke testcase")
Signed-off-by: Dan Carpenter 
---
  drivers/gpu/drm/selftests/test-drm_buddy.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/selftests/test-drm_buddy.c 
b/drivers/gpu/drm/selftests/test-drm_buddy.c
index fa997f89522b..6eba590afa9b 100644
--- a/drivers/gpu/drm/selftests/test-drm_buddy.c
+++ b/drivers/gpu/drm/selftests/test-drm_buddy.c
@@ -488,8 +488,10 @@ static int igt_buddy_alloc_smoke(void *arg)
}
  
  	order = drm_random_order(mm.max_order + 1, &prng);

-   if (!order)
+   if (!order) {
+   err = -ENOMEM;
goto out_fini;
+   }
  
  	for (i = 0; i <= mm.max_order; ++i) {

struct drm_buddy_block *block;





Re: [PATCH 5.15 000/262] 5.15.27-rc1 review

2022-03-07 Thread Greg Kroah-Hartman
On Mon, Mar 07, 2022 at 06:30:18PM +0530, Naresh Kamboju wrote:
> drivers/gpu/drm/mediatek/mtk_dsi.c: In function 'mtk_dsi_host_attach':
> drivers/gpu/drm/mediatek/mtk_dsi.c:858:28: error: implicit declaration
> of function 'devm_drm_of_get_bridge'; did you mean
> 'devm_drm_panel_bridge_add'? [-Werror=implicit-function-declaration]
>   858 | dsi->next_bridge = devm_drm_of_get_bridge(dev,
> dev->of_node, 0, 0);
>   |^~
>   |devm_drm_panel_bridge_add
> drivers/gpu/drm/mediatek/mtk_dsi.c:858:26: warning: assignment to
> 'struct drm_bridge *' from 'int' makes pointer from integer without a
> cast [-Wint-conversion]
>   858 | dsi->next_bridge = devm_drm_of_get_bridge(dev,
> dev->of_node, 0, 0);
>   |  ^
> cc1: some warnings being treated as errors

Offending commit now dropped, thanks.


greg k-h


Re: [PATCH v2] drm/mode: Improve drm_mode_fb_cmd2 documentation

2022-03-07 Thread Simon Ser
Reviewed-by: Simon Ser 


Re: [PATCH v1 2/2] Revert "arm64: dts: mt8183: jacuzzi: Fix bus properties in anx's DSI endpoint"

2022-03-07 Thread Robert Foss
Signed-off-by: Robert Foss 

On Mon, 7 Mar 2022 at 16:46, Robert Foss  wrote:
>
> This reverts commit 32568ae37596b529628ac09b875f4874e614f63f.
> ---
>  arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi 
> b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
> index e8f133dc96b95..8f7bf33f607da 100644
> --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
> +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
> @@ -171,8 +171,6 @@ port@0 {
>
> anx7625_in: endpoint {
> remote-endpoint = <&dsi_out>;
> -   bus-type = <5>;
> -   data-lanes = <0 1 2 3>;
> };
> };
>
> --
> 2.32.0
>


Re: [PATCH v1 1/2] Revert "dt-bindings:drm/bridge:anx7625:add vendor define"

2022-03-07 Thread Robert Foss
On Mon, 7 Mar 2022 at 16:46, Robert Foss  wrote:
>
> This reverts commit a43661e7e819b100e1f833a35018560a1d9abb39.
> ---
>  .../display/bridge/analogix,anx7625.yaml  | 65 +--
>  1 file changed, 2 insertions(+), 63 deletions(-)
>
> diff --git 
> a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml 
> b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> index 1d3e88daca041..ab48ab2f4240d 100644
> --- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> +++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> @@ -43,70 +43,14 @@ properties:
>vdd33-supply:
>  description: Regulator that provides the supply 3.3V power.
>
> -  analogix,lane0-swing:
> -$ref: /schemas/types.yaml#/definitions/uint8-array
> -minItems: 1
> -maxItems: 20
> -description:
> -  an array of swing register setting for DP tx lane0 PHY.
> -  Registers 0~9 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
> -  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
> -  Swing1_Pre2, Swing0_Pre3, they are for [Boost control] and
> -  [Swing control] setting.
> -  Registers 0~9, bit 3:0 is [Boost control], these bits control
> -  post cursor manual, increase the [Boost control] to increase
> -  Pre-emphasis value.
> -  Registers 0~9, bit 6:4 is [Swing control], these bits control
> -  swing manual, increase [Swing control] setting to add Vp-p value
> -  for each Swing, Pre.
> -  Registers 10~19 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
> -  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
> -  Swing1_Pre2, Swing0_Pre3, they are for [R select control] and
> -  [R Termination control] setting.
> -  Registers 10~19, bit 4:0 is [R select control], these bits are
> -  compensation manual, increase it can enhance IO driven strength
> -  and Vp-p.
> -  Registers 10~19, bit 5:6 is [R termination control], these bits
> -  adjust 50ohm impedance of DP tx termination. 00:55 ohm,
> -  01:50 ohm(default), 10:45 ohm, 11:40 ohm.
> -
> -  analogix,lane1-swing:
> -$ref: /schemas/types.yaml#/definitions/uint8-array
> -minItems: 1
> -maxItems: 20
> -description:
> -  an array of swing register setting for DP tx lane1 PHY.
> -  DP TX lane1 swing register setting same with lane0
> -  swing, please refer lane0-swing property description.
> -
> -  analogix,audio-enable:
> -type: boolean
> -description: let the driver enable audio HDMI codec function or not.
> -
>ports:
>  $ref: /schemas/graph.yaml#/properties/ports
>
>  properties:
>port@0:
> -$ref: /schemas/graph.yaml#/$defs/port-base
> -unevaluatedProperties: false
> +$ref: /schemas/graph.yaml#/properties/port
>  description:
> -  MIPI DSI/DPI input.
> -
> -properties:
> -  endpoint:
> -$ref: /schemas/media/video-interfaces.yaml#
> -type: object
> -additionalProperties: false
> -
> -properties:
> -  remote-endpoint: true
> -
> -  bus-type:
> -enum: [1, 5]
> -default: 1
> -
> -  data-lanes: true
> +  Video port for MIPI DSI input.
>
>port@1:
>  $ref: /schemas/graph.yaml#/properties/port
> @@ -143,9 +87,6 @@ examples:
>  vdd10-supply = <&pp1000_mipibrdg>;
>  vdd18-supply = <&pp1800_mipibrdg>;
>  vdd33-supply = <&pp3300_mipibrdg>;
> -analogix,audio-enable;
> -analogix,lane0-swing = /bits/ 8 <0x14 0x54 0x64 0x74>;
> -analogix,lane1-swing = /bits/ 8 <0x14 0x54 0x64 0x74>;
>
>  ports {
>  #address-cells = <1>;
> @@ -155,8 +96,6 @@ examples:
>  reg = <0>;
>  anx7625_in: endpoint {
>  remote-endpoint = <&mipi_dsi>;
> -bus-type = <5>;
> -data-lanes = <0 1 2 3>;
>  };
>  };
>
> --
> 2.32.0
>

Signed-off-by: Robert Foss 


Re: [PATCH 00/10] drm/gma500: Various cleanups to GEM code

2022-03-07 Thread Patrik Jakobsson
On Sun, Mar 6, 2022 at 9:36 PM Thomas Zimmermann  wrote:
>
> Refactor and simplify various parts of the memory management. This
> includes locking, initialization and finalizer functions, and code
> organization.
>
> Tested on Atom N2800 hardware.

Hi Thomas, nice cleanups!

All patches are:
Acked-by: Patrik Jakobsson 

>
> Thomas Zimmermann (10):
>   drm/gma500: Remove struct psb_gem_object.npage
>   drm/gma500: Acquire reservation lock for GEM objects
>   drm/gma500: Move GTT locking into GTT helpers
>   drm/gma500: Remove struct psb_gtt.sem sempahore
>   drm/gma500: Move GTT setup and restoration into helper funtions
>   drm/gma500: Move GTT resume logic out of psb_gtt_init()
>   drm/gma500: Cleanup GTT uninit and error handling
>   drm/gma500: Split GTT init/resume/fini into GTT and GEM functions
>   drm/gma500: Inline psb_gtt_restore()
>   drm/gma500: Move GEM memory management functions to gem.c
>
>  drivers/gpu/drm/gma500/gem.c | 161 --
>  drivers/gpu/drm/gma500/gem.h |  13 +-
>  drivers/gpu/drm/gma500/gma_display.c |   8 +-
>  drivers/gpu/drm/gma500/gtt.c | 239 +--
>  drivers/gpu/drm/gma500/gtt.h |   8 +-
>  drivers/gpu/drm/gma500/power.c   |   5 +-
>  drivers/gpu/drm/gma500/psb_drv.c |  13 +-
>  drivers/gpu/drm/gma500/psb_drv.h |   1 -
>  8 files changed, 296 insertions(+), 152 deletions(-)
>
>
> base-commit: 710a019ad85e96e66f7d75ee7f4733cdd8a2b0d0
> prerequisite-patch-id: c2b2f08f0eccc9f5df0c0da49fa1d36267deb11d
> prerequisite-patch-id: c67e5d886a47b7d0266d81100837557fda34cb24
> prerequisite-patch-id: 6e1032c6302461624f33194c8b8f37103a3fa6ef
> --
> 2.35.1
>


Re: [PATCH v4 4/4] arm64/dts/qcom/sm8250: remove assigned-clock-rate property for mdp clk

2022-03-07 Thread Dmitry Baryshkov
On Mon, 7 Mar 2022 at 19:05, Vinod Polimera  wrote:
>
> > WARNING: This email originated from outside of Qualcomm. Please be wary
> > of any links or attachments, and do not enable macros.
> >
> > On Sat, 5 Mar 2022 at 00:49, Doug Anderson 
> > wrote:
> > > On Thu, Mar 3, 2022 at 4:16 PM Dmitry Baryshkov
> > >  wrote:
> > > >
> > > > On Fri, 4 Mar 2022 at 02:56, Stephen Boyd 
> > wrote:
> > > > >
> > > > > Quoting Dmitry Baryshkov (2022-03-03 15:50:50)
> > > > > > On Thu, 3 Mar 2022 at 12:40, Vinod Polimera
> >  wrote:
> > > > > > >
> > > > > > > Kernel clock driver assumes that initial rate is the
> > > > > > > max rate for that clock and was not allowing it to scale
> > > > > > > beyond the assigned clock value.
> > > > > > >
> > > > > > > Drop the assigned clock rate property and vote on the mdp clock as
> > per
> > > > > > > calculated value during the usecase.
> > > > > > >
> > > > > > > Fixes: 7c1dffd471("arm64: dts: qcom: sm8250.dtsi: add display
> > system nodes")
> > > > > >
> > > > > > Please remove the Fixes tags from all commits. Otherwise the
> > patches
> > > > > > might be picked up into earlier kernels, which do not have a patch
> > > > > > adding a vote on the MDP clock.
> > > > >
> > > > > What patch is that? The Fixes tag could point to that commit.
> > > >
> > > > Please correct me if I'm wrong.
> > > > Currently the dtsi enforces bumping the MDP clock when the mdss
> > device
> > > > is being probed and when the dpu device is being probed.
> > > > Later during the DPU lifetime the core_perf would change the clock's
> > > > rate as it sees fit according to the CRTC requirements.
> > >
> > > "Currently" means _before_ ${SUBJECT} patch lands, right? Since
> > > ${SUBJECT} patch is removing the bump to max.
> >
> > Yes. 'Before this patch'.
> >
> > >
> > >
> > > > However it would happen only when the during the
> > > > dpu_crtc_atomic_flush(), before we call this function, the MDP clock
> > > > is left in the undetermined state. The power rails controlled by the
> > > > opp table are left in the undetermined state.
> > > >
> > > > I suppose that during the dpu_bind we should bump the clock to the max
> > > > possible freq and let dpu_core_perf handle it afterwards.
> > >
> > > Definitely feels like seeing the clock to something predictable during
> > > the initial probe makes sense. If it's just for the initial probe then
> > > setting it to max (based on the opp table) seems fine.
> >
> > Vinod, could you please implement it?
> >
> > > I think an
> > > earlier version of this series set it to max every time we did runtime
> > > resume. We'd have to have a good reason to do that.
> >
> > Yes, this is correct. Based on the comments I had the impression that
> > there was a suggestion that the place for the calls was wrong. Most
> > probably I was instead projecting my own thoughts.
> >
> I had discussed internally with the team. Traditionally, mdp clk vote during
> probe/bind is required when display is turned on in bootloader and persists
> till first update in kernel.

Not each and every board has a display setup in the bootloader. For
example the RB5 I have here doesn't support setting up the display.
Not to mention that we should tell Linux, which vote is cast,
otherwise the .sync_state can turn respective votes off.

> As in chromebook, timing engine will be turned
> off during depthcharge exit and as there is no display transition from
> bootloader to kernel, mdp clk can be voted based on the calculated value
> during framework update and does not required vote during probe/bind.

Generally Linux should not depend on the bootloader setup. You can not
be sure. What if we kexec next kernel?

-- 
With best wishes
Dmitry


[PATCH v2] drm/vmwgfx: Implement MSI/MSI-X support for IRQs

2022-03-07 Thread Zack Rusin
From: Zack Rusin 

SVGAv3 deprecates legacy interrupts and adds support for MSI/MSI-X. With
MSI the driver visible side remains largely unchanged but with MSI-X
each interrupt gets delivered on its own vector.

Add support for MSI/MSI-X while preserving the old functionality for
SVGAv2. Code between the SVGAv2 and SVGAv3 is exactly the same, only
the number of available vectors changes, in particular between legacy
and MSI-X interrupts.

Signed-off-by: Zack Rusin 
Reviewed-by: Martin Krastev 
Reviewed-by: Maaz Mombasawala 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h |  9 -
 drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 55 +
 3 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index f43afd56915e..791f9a5f3868 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -980,7 +980,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, 
u32 pci_id)
}
 
if (dev_priv->capabilities & SVGA_CAP_IRQMASK) {
-   ret = vmw_irq_install(&dev_priv->drm, pdev->irq);
+   ret = vmw_irq_install(dev_priv);
if (ret != 0) {
drm_err(&dev_priv->drm,
"Failed installing irq: %d\n", ret);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 12eb4de41036..be19aa6e1f13 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -65,6 +65,11 @@
 #define VMWGFX_PCI_ID_SVGA2  0x0405
 #define VMWGFX_PCI_ID_SVGA3  0x0406
 
+/*
+ * This has to match get_count_order(SVGA_IRQFLAG_MAX)
+ */
+#define VMWGFX_MAX_NUM_IRQS 6
+
 /*
  * Perhaps we should have sysfs entries for these.
  */
@@ -532,6 +537,8 @@ struct vmw_private {
bool has_mob;
spinlock_t hw_lock;
bool assume_16bpp;
+   u32 irqs[VMWGFX_MAX_NUM_IRQS];
+   u32 num_irq_vectors;
 
enum vmw_sm_type sm_type;
 
@@ -1158,7 +1165,7 @@ bool vmw_cmd_describe(const void *buf, u32 *size, char 
const **cmd);
  * IRQs and wating - vmwgfx_irq.c
  */
 
-extern int vmw_irq_install(struct drm_device *dev, int irq);
+extern int vmw_irq_install(struct vmw_private *dev_priv);
 extern void vmw_irq_uninstall(struct drm_device *dev);
 extern bool vmw_seqno_passed(struct vmw_private *dev_priv,
uint32_t seqno);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index fe4732bf2c9d..086e69a130d4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -300,6 +300,7 @@ void vmw_irq_uninstall(struct drm_device *dev)
struct vmw_private *dev_priv = vmw_priv(dev);
struct pci_dev *pdev = to_pci_dev(dev->dev);
uint32_t status;
+   u32 i;
 
if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
return;
@@ -309,20 +310,62 @@ void vmw_irq_uninstall(struct drm_device *dev)
status = vmw_irq_status_read(dev_priv);
vmw_irq_status_write(dev_priv, status);
 
-   free_irq(pdev->irq, dev);
+   for (i = 0; i < dev_priv->num_irq_vectors; ++i)
+   free_irq(dev_priv->irqs[i], dev);
+
+   pci_free_irq_vectors(pdev);
+   dev_priv->num_irq_vectors = 0;
 }
 
 /**
  * vmw_irq_install - Install the irq handlers
  *
- * @dev:  Pointer to the drm device.
- * @irq:  The irq number.
+ * @dev_priv:  Pointer to the vmw_private device.
  * Return:  Zero if successful. Negative number otherwise.
  */
-int vmw_irq_install(struct drm_device *dev, int irq)
+int vmw_irq_install(struct vmw_private *dev_priv)
 {
+   struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+   struct drm_device *dev = &dev_priv->drm;
+   int ret;
+   int nvec;
+   int i = 0;
+
+   BUILD_BUG_ON((SVGA_IRQFLAG_MAX >> VMWGFX_MAX_NUM_IRQS) != 1);
+   BUG_ON(VMWGFX_MAX_NUM_IRQS != get_count_order(SVGA_IRQFLAG_MAX));
+
+   nvec = pci_alloc_irq_vectors(pdev, 1, VMWGFX_MAX_NUM_IRQS,
+PCI_IRQ_ALL_TYPES);
+
+   if (nvec <= 0) {
+   drm_err(&dev_priv->drm,
+   "IRQ's are unavailable, nvec: %d\n", nvec);
+   ret = nvec;
+   goto done;
+   }
+
vmw_irq_preinstall(dev);
 
-   return request_threaded_irq(irq, vmw_irq_handler, vmw_thread_fn,
-   IRQF_SHARED, VMWGFX_DRIVER_NAME, dev);
+   for (i = 0; i < nvec; ++i) {
+   ret = pci_irq_vector(pdev, i);
+   if (ret < 0) {
+   drm_err(&dev_priv->drm,
+   "failed getting irq vector: %d\n", ret);
+   goto done;
+   }
+   dev_priv->irqs[i] = ret;
+
+   ret = request_threaded_irq(dev_priv->irqs[i], vmw_irq_handler, 
vm

Re: [PATCH v1 1/2] Revert "dt-bindings:drm/bridge:anx7625:add vendor define"

2022-03-07 Thread Rob Herring
On Mon, Mar 07, 2022 at 04:45:57PM +0100, Robert Foss wrote:
> This reverts commit a43661e7e819b100e1f833a35018560a1d9abb39.

S-o-b and reason for the revert?

> ---
>  .../display/bridge/analogix,anx7625.yaml  | 65 +--
>  1 file changed, 2 insertions(+), 63 deletions(-)
> 
> diff --git 
> a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml 
> b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> index 1d3e88daca041..ab48ab2f4240d 100644
> --- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> +++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> @@ -43,70 +43,14 @@ properties:
>vdd33-supply:
>  description: Regulator that provides the supply 3.3V power.
>  
> -  analogix,lane0-swing:
> -$ref: /schemas/types.yaml#/definitions/uint8-array
> -minItems: 1
> -maxItems: 20
> -description:
> -  an array of swing register setting for DP tx lane0 PHY.
> -  Registers 0~9 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
> -  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
> -  Swing1_Pre2, Swing0_Pre3, they are for [Boost control] and
> -  [Swing control] setting.
> -  Registers 0~9, bit 3:0 is [Boost control], these bits control
> -  post cursor manual, increase the [Boost control] to increase
> -  Pre-emphasis value.
> -  Registers 0~9, bit 6:4 is [Swing control], these bits control
> -  swing manual, increase [Swing control] setting to add Vp-p value
> -  for each Swing, Pre.
> -  Registers 10~19 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
> -  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
> -  Swing1_Pre2, Swing0_Pre3, they are for [R select control] and
> -  [R Termination control] setting.
> -  Registers 10~19, bit 4:0 is [R select control], these bits are
> -  compensation manual, increase it can enhance IO driven strength
> -  and Vp-p.
> -  Registers 10~19, bit 5:6 is [R termination control], these bits
> -  adjust 50ohm impedance of DP tx termination. 00:55 ohm,
> -  01:50 ohm(default), 10:45 ohm, 11:40 ohm.
> -
> -  analogix,lane1-swing:
> -$ref: /schemas/types.yaml#/definitions/uint8-array
> -minItems: 1
> -maxItems: 20
> -description:
> -  an array of swing register setting for DP tx lane1 PHY.
> -  DP TX lane1 swing register setting same with lane0
> -  swing, please refer lane0-swing property description.

These apply to the DP side, so no need to revert this part.

> -
> -  analogix,audio-enable:
> -type: boolean
> -description: let the driver enable audio HDMI codec function or not.
> -

Not sure on this one...

>ports:
>  $ref: /schemas/graph.yaml#/properties/ports
>  
>  properties:
>port@0:
> -$ref: /schemas/graph.yaml#/$defs/port-base
> -unevaluatedProperties: false
> +$ref: /schemas/graph.yaml#/properties/port
>  description:
> -  MIPI DSI/DPI input.
> -
> -properties:
> -  endpoint:
> -$ref: /schemas/media/video-interfaces.yaml#
> -type: object
> -additionalProperties: false
> -
> -properties:
> -  remote-endpoint: true
> -
> -  bus-type:
> -enum: [1, 5]

I think the error here is really 1 should be 4 which corresponds to 
D-PHY which is used by both CSI and DSI. Otherwise, I don't really see 
the issue with bus-type being shared between CSI and DSI.

> -default: 1
> -
> -  data-lanes: true
> +  Video port for MIPI DSI input.
>  
>port@1:
>  $ref: /schemas/graph.yaml#/properties/port
> @@ -143,9 +87,6 @@ examples:
>  vdd10-supply = <&pp1000_mipibrdg>;
>  vdd18-supply = <&pp1800_mipibrdg>;
>  vdd33-supply = <&pp3300_mipibrdg>;
> -analogix,audio-enable;
> -analogix,lane0-swing = /bits/ 8 <0x14 0x54 0x64 0x74>;
> -analogix,lane1-swing = /bits/ 8 <0x14 0x54 0x64 0x74>;
>  
>  ports {
>  #address-cells = <1>;
> @@ -155,8 +96,6 @@ examples:
>  reg = <0>;
>  anx7625_in: endpoint {
>  remote-endpoint = <&mipi_dsi>;
> -bus-type = <5>;
> -data-lanes = <0 1 2 3>;
>  };
>  };
>  
> -- 
> 2.32.0
> 
> 


Re: [PATCH] drm: remove min_order BUG_ON check

2022-03-07 Thread Matthew Auld

On 07/03/2022 14:37, Arunpravin wrote:

place BUG_ON(order < min_order) outside do..while
loop as it fails Unigine Heaven benchmark.

Unigine Heaven has buffer allocation requests for
example required pages are 161 and alignment request
is 128. To allocate the remaining 33 pages, continues
the iteration to find the order value which is 5 and
when it compares with min_order = 7, enables the
BUG_ON(). To avoid this problem, placed the BUG_ON
check outside of do..while loop.

Signed-off-by: Arunpravin 
---
  drivers/gpu/drm/drm_buddy.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 72f52f293249..ed94c56b720f 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -669,10 +669,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
order = fls(pages) - 1;
min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
  
+	BUG_ON(order < min_order);


Isn't the issue that we are allowing a size that is not aligned to the 
requested min_page_size? Should we not fix the caller(and throw a normal 
error here), or perhaps add the round_up() here instead?


i.e if someone does:

alloc_blocks(mm, 0, end, 4096, 1<<16, &blocks, flags);

This will still trigger the BUG_ON() even if we move it out of the loop, 
AFAICT.



+
do {
order = min(order, (unsigned int)fls(pages) - 1);
BUG_ON(order > mm->max_order);
-   BUG_ON(order < min_order);
  
  		do {

if (flags & DRM_BUDDY_RANGE_ALLOCATION)

base-commit: 8025c79350b90e5a8029234d433578f12abbae2b


Re: [PATCH v1 2/2] Revert "arm64: dts: mt8183: jacuzzi: Fix bus properties in anx's DSI endpoint"

2022-03-07 Thread Rob Herring
On Mon, Mar 07, 2022 at 04:45:58PM +0100, Robert Foss wrote:
> This reverts commit 32568ae37596b529628ac09b875f4874e614f63f.
> ---
>  arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi 
> b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
> index e8f133dc96b95..8f7bf33f607da 100644
> --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
> +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
> @@ -171,8 +171,6 @@ port@0 {
>  
>   anx7625_in: endpoint {
>   remote-endpoint = <&dsi_out>;
> - bus-type = <5>;
> - data-lanes = <0 1 2 3>;

Well, this was clearly wrong. Connected to a DSI output, but parallel 
interface with lanes...

We should have a schema to disallow this combination.

Rob


Re: [PATCH] drm/selftests: missing error code in igt_buddy_alloc_smoke()

2022-03-07 Thread Dan Carpenter
On Mon, Mar 07, 2022 at 05:14:59PM +0100, Christian König wrote:
> Pushed to drm-misc-next. Just one nit below.
> 
> Am 07.03.22 um 15:54 schrieb Arunpravin:
> > Reviewed-by:Arunpravin 
> 
> Some people are picky about using the full name here.
> 

Signed-off-by is like signing a legal document to say that you have not
stolen anyone else's copyrighted works (SCO lawsuit).  So it should be
whatever name you use to sign legal documents.

regards,
dan carpenter



Re: [PATCH v12 1/6] drm: Add arch arm64 for drm_clflush_virt_range

2022-03-07 Thread Michael Cheng

Ah Thanks for the great feedback!

@Lucas or @Matt, could you please chime in?

Michael Cheng

On 2022-03-02 11:10 a.m., Robin Murphy wrote:

On 2022-03-02 15:55, Michael Cheng wrote:

Thanks for the feedback Robin!

Sorry my choices of word weren't that great, but what I meant is to 
understand how ARM flushes a range of dcache for device drivers, and 
not an equal to x86 clflush.


I believe the concern is if the CPU writes an update, that update 
might only be sitting in the CPU cache and never make it to device 
memory where the device can see it; there are specific places that we 
are supposed to flush the CPU caches to make sure our updates are 
visible to the hardware.


Ah, OK, if it's more about ordering, and it's actually write buffers 
rather than caches that you care about flushing, then we might be a 
lot safer, phew!


For a very simple overview, in a case where the device itself needs to 
observe memory writes in the correct order, e.g.:


data_descriptor.valid = 1;

clflush(&data_descriptor);

command_descriptor.data = &data_descriptor

writel(/* control register to read command to then read data */)

then dma_wmb() between the first two writes should be the right tool 
to ensure that the command does not observe the command update while 
the data update is still sat somewhere in a CPU write buffer.


If you want a slightly stronger notion that, at a given point, all 
prior writes have actually been issued and should now be visible 
(rather than just that they won't become visible in the wrong order 
whenever they do), then wmb() should suffice on arm64.


Note that wioth arm64 memory types, a Non-Cacheable mapping of DRAM 
for a non-coherent DMA mapping, or of VRAM in a prefetchable BAR, can 
still be write-buffered, so barriers still matter even when actual 
cache maintenance ops don't (and as before if you're trying to perform 
cache maintenance outside the DMA API then you've already lost 
anyway). MMIO registers should be mapped as Device memory via 
ioremap(), which is not bufferable, hence the barrier implicit in 
writel() effectively pushes out any prior buffered writes ahead of a 
register write, which is why we don't need to worry about this most of 
the time.


This is only a very rough overview, though, and I'm not familiar 
enough with x86 semantics, your hardware, or the exact use-case to be 
able to say whether barriers alone are anywhere near the right answer 
or not.


Robin.



+Matt Roper

Matt, Lucas, any feed back here?

On 2022-03-02 4:49 a.m., Robin Murphy wrote:

On 2022-02-25 19:27, Michael Cheng wrote:

Hi Robin,

[ +arm64 maintainers for their awareness, which would have been a 
good thing to do from the start ]


  * Thanks for adding the arm64 maintainer and sorry I didn't rope 
them

    in sooner.

Why does i915 need to ensure the CPU's instruction cache is 
coherent with its data cache? Is it a self-modifying driver?


  * Also thanks for pointing this out. Initially I was using
    dcache_clean_inval_poc, which seem to be the equivalently to what
    x86 is doing for dcache flushing, but it was giving me build 
errors

    since its not on the global list of kernel symbols. And after
    revisiting the documentation for caches_clean_inval_pou, it won't
    fly for what we are trying to do. Moving forward, what would 
you (or
    someone in the ARM community) suggest we do? Could it be 
possible to

    export dcache_clean_inval_poc as a global symbol?


Unlikely, unless something with a legitimate need for CPU-centric 
cache maintenance like kexec or CPU hotplug ever becomes modular.


In the case of a device driver, it's not even the basic issues of 
assuming to find direct equivalents to x86 semantics in other CPU 
architectures, or effectively reinventing parts of the DMA API, it's 
even bigger than that. Once you move from being integrated in a 
single vendor's system architecture to being on a discrete card, you 
fundamentally *no longer have any control over cache coherency*. 
Whether the host CPU architecture happens to be AArch64, RISC-V, or 
whatever doesn't really matter, you're at the mercy of 3rd-party 
PCIe and interconnect IP vendors, and SoC integrators. You'll find 
yourself in systems where PCIe simply cannot snoop any caches, where 
you'd better have the correct DMA API calls in place to have any 
hope of even the most basic functionality working properly; you'll 
find yourself in systems where even if the PCIe root complex claims 
to support No Snoop, your uncached traffic will still end up 
snooping stale data that got prefetched back into caches you thought 
you'd invalidated; you'll find yourself in systems where your memory 
attributes may or may not get forcibly rewritten by an IOMMU 
depending on the kernel config and/or command line.


It's not about simply finding a substitute for clflush, it's that 
the reasons you have for using clflush in the first place can no 
longer be assumed to be valid.


Robin.


On 2022-

Re: [PATCH v1 1/2] Revert "dt-bindings:drm/bridge:anx7625:add vendor define"

2022-03-07 Thread Robert Foss
On Mon, 7 Mar 2022 at 17:38, Rob Herring  wrote:
>
> On Mon, Mar 07, 2022 at 04:45:57PM +0100, Robert Foss wrote:
> > This reverts commit a43661e7e819b100e1f833a35018560a1d9abb39.
>
> S-o-b and reason for the revert?
>
> > ---
> >  .../display/bridge/analogix,anx7625.yaml  | 65 +--
> >  1 file changed, 2 insertions(+), 63 deletions(-)
> >
> > diff --git 
> > a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml 
> > b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > index 1d3e88daca041..ab48ab2f4240d 100644
> > --- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > +++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > @@ -43,70 +43,14 @@ properties:
> >vdd33-supply:
> >  description: Regulator that provides the supply 3.3V power.
> >
> > -  analogix,lane0-swing:
> > -$ref: /schemas/types.yaml#/definitions/uint8-array
> > -minItems: 1
> > -maxItems: 20
> > -description:
> > -  an array of swing register setting for DP tx lane0 PHY.
> > -  Registers 0~9 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
> > -  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
> > -  Swing1_Pre2, Swing0_Pre3, they are for [Boost control] and
> > -  [Swing control] setting.
> > -  Registers 0~9, bit 3:0 is [Boost control], these bits control
> > -  post cursor manual, increase the [Boost control] to increase
> > -  Pre-emphasis value.
> > -  Registers 0~9, bit 6:4 is [Swing control], these bits control
> > -  swing manual, increase [Swing control] setting to add Vp-p value
> > -  for each Swing, Pre.
> > -  Registers 10~19 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
> > -  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
> > -  Swing1_Pre2, Swing0_Pre3, they are for [R select control] and
> > -  [R Termination control] setting.
> > -  Registers 10~19, bit 4:0 is [R select control], these bits are
> > -  compensation manual, increase it can enhance IO driven strength
> > -  and Vp-p.
> > -  Registers 10~19, bit 5:6 is [R termination control], these bits
> > -  adjust 50ohm impedance of DP tx termination. 00:55 ohm,
> > -  01:50 ohm(default), 10:45 ohm, 11:40 ohm.
> > -
> > -  analogix,lane1-swing:
> > -$ref: /schemas/types.yaml#/definitions/uint8-array
> > -minItems: 1
> > -maxItems: 20
> > -description:
> > -  an array of swing register setting for DP tx lane1 PHY.
> > -  DP TX lane1 swing register setting same with lane0
> > -  swing, please refer lane0-swing property description.
>
> These apply to the DP side, so no need to revert this part.

Ack.

>
> > -
> > -  analogix,audio-enable:
> > -type: boolean
> > -description: let the driver enable audio HDMI codec function or not.
> > -
>
> Not sure on this one...

These additions are independent from my reading of this, would you
like a v2 with only the bus-type related changes reverted?

>
> >ports:
> >  $ref: /schemas/graph.yaml#/properties/ports
> >
> >  properties:
> >port@0:
> > -$ref: /schemas/graph.yaml#/$defs/port-base
> > -unevaluatedProperties: false
> > +$ref: /schemas/graph.yaml#/properties/port
> >  description:
> > -  MIPI DSI/DPI input.
> > -
> > -properties:
> > -  endpoint:
> > -$ref: /schemas/media/video-interfaces.yaml#
> > -type: object
> > -additionalProperties: false
> > -
> > -properties:
> > -  remote-endpoint: true
> > -
> > -  bus-type:
> > -enum: [1, 5]
>
> I think the error here is really 1 should be 4 which corresponds to
> D-PHY which is used by both CSI and DSI. Otherwise, I don't really see
> the issue with bus-type being shared between CSI and DSI.

I think that would be a correct solution. And ignoring everything
else, the range of this property is something that should be fixed.

But that would mean that CPI (camera parallel interface) and DPI
(display parallel interface) would share the
V4L2_FWNODE_BUS_TYPE_PARALLEL enum. I think that would be perfectly
functional, but it is not what V4L2_FWNODE_BUS_TYPE_PARALLEL is
documented to represent. As far as I can see it's only intended to
represent CPI.

Instead of having V4L2_FWNODE_BUS_TYPE_PARALLEL represent two
standards, I think they should be split. And possibly
V4L2_FWNODE_BUS_TYPE_PARALLEL should be renamed for CPI, but that is a
separate story. This would provide for the neatest and most legible
solution. If this solution is implemented, this range would be
incorrect. Additionally the snippet reverted in 2/2 of this series
would no longer be valid.

As it stands V4L2_FWNODE_BUS_TYPE_PARALLEL was used to represent DPI
due to not being caught in the review process.

>
> > -default: 1
> > -
> > -  data-lanes: true
> > +  

Re: [PATCH v5 0/3] Update VMware maintainer entries

2022-03-07 Thread Srivatsa S. Bhat
[+virtualization list, which I forgot to CC when posting v5]

Hi Thomas, other x86 maintainers,

On 2/25/22 2:23 PM, Srivatsa S. Bhat wrote:
> This series updates a few maintainer entries for VMware-maintained
> subsystems and cleans up references to VMware's private mailing lists
> to make it clear that they are effectively email-aliases to reach out
> to reviewers.
> 

Since this patchset got ACKs from the relevant subsystem maintainers,
would you mind taking them through your tree, if this looks good to
you too?

Thank you!

Regards,
Srivatsa

> Changes from v4->v5:
> - Add Alexey as reviewer for paravirt ops.
> 
> Changes from v3->v4:
> - Remove Cc: sta...@vger.kernel.org from patches 1 and 2.
> 
> Changes from v1->v3:
> - Add Zack as the named maintainer for vmmouse driver
> - Use R: to denote email-aliases for VMware reviewers
> 
> Regards,
> Srivatsa
> VMware Photon OS
> 
> ---
> 
> Srivatsa S. Bhat (VMware) (3):
>   MAINTAINERS: Update maintainers for paravirt ops and VMware hypervisor 
> interface
>   MAINTAINERS: Add Zack as maintainer of vmmouse driver
>   MAINTAINERS: Mark VMware mailing list entries as email aliases
> 
> 
>  MAINTAINERS | 31 ++-
>  1 file changed, 18 insertions(+), 13 deletions(-)
> 




Re: [PATCH 7/8] drm/i915: fixup the initial fb base on DG1

2022-03-07 Thread Ville Syrjälä
On Mon, Mar 07, 2022 at 10:32:36AM +, Matthew Auld wrote:
> On 04/03/2022 19:33, Ville Syrjälä wrote:
> > On Fri, Mar 04, 2022 at 05:23:32PM +, Matthew Auld wrote:
> >> The offset we get looks to be the exact start of DSM, but the
> >> inital_plane_vma expects the address to be relative.
> >>
> >> Signed-off-by: Matthew Auld 
> >> Cc: Thomas Hellström 
> >> ---
> >>   .../drm/i915/display/intel_plane_initial.c| 22 +++
> >>   1 file changed, 18 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
> >> b/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >> index f797fcef18fc..b39d3a8dfe45 100644
> >> --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >> +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >> @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915,
> >>if (!mem || plane_config->size == 0)
> >>return NULL;
> >>   
> >> -  base = round_down(plane_config->base,
> >> -I915_GTT_MIN_ALIGNMENT);
> >> -  size = round_up(plane_config->base + plane_config->size,
> >> -  mem->min_page_size);
> >> +  base = plane_config->base;
> >> +  if (IS_DGFX(i915)) {
> >> +  /*
> >> +   * On discrete the base address should be somewhere in LMEM, but
> >> +   * depending on the size of LMEM the base address might
> >> +   * intersect with the start of DSM, like on DG1, in which case
> >> +   * we need the relative address. In such cases we might also
> >> +   * need to choose between inital fb vs fbc, if space is limited.
> >> +   *
> >> +   * On future discrete HW, like DG2, we should be able to just
> >> +   * allocate directly from LMEM, due to larger LMEM size.
> >> +   */
> >> +  if (base >= i915->dsm.start)
> >> +  base -= i915->dsm.start;
> > 
> > Subsequent code expects the object to actually be inside stolen.
> > If that is not the case we should just give up.
> 
> Thanks for taking a look at this. Is that subsequent code outside 
> initial_plane_vma()? In the next patch this is now using LMEM directly 
> for dg2. Would that blow up somewhere else?

It uses i915_gem_object_create_stolen_for_preallocated() which assumes
the stuff is inside stolen.

> > The fact that we fail to confirm any of that on integrated
> > parts has always bugged me, but not enough to actually do
> > anything about it. Such a check would be somewhat more involved
> > since we'd have to look at the PTEs. But on discrete sounds like
> > we can get away with a trivial check.
> 
> Which PTEs?

The PTEs the plane is actually using. We have no idea where they
actually point to and just assume they represent a 1:1 mapping of
stolen.

I suppose with lmem we'll just start assuming a 1:1 mapping of
the whole lmem rather than just stolen.

-- 
Ville Syrjälä
Intel


Re: [PATCH v1 1/2] Revert "dt-bindings:drm/bridge:anx7625:add vendor define"

2022-03-07 Thread Laurent Pinchart
On Mon, Mar 07, 2022 at 05:57:47PM +0100, Robert Foss wrote:
> On Mon, 7 Mar 2022 at 17:38, Rob Herring  wrote:
> >
> > On Mon, Mar 07, 2022 at 04:45:57PM +0100, Robert Foss wrote:
> > > This reverts commit a43661e7e819b100e1f833a35018560a1d9abb39.
> >
> > S-o-b and reason for the revert?
> >
> > > ---
> > >  .../display/bridge/analogix,anx7625.yaml  | 65 +--
> > >  1 file changed, 2 insertions(+), 63 deletions(-)
> > >
> > > diff --git 
> > > a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml 
> > > b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > > index 1d3e88daca041..ab48ab2f4240d 100644
> > > --- 
> > > a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > > +++ 
> > > b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > > @@ -43,70 +43,14 @@ properties:
> > >vdd33-supply:
> > >  description: Regulator that provides the supply 3.3V power.
> > >
> > > -  analogix,lane0-swing:
> > > -$ref: /schemas/types.yaml#/definitions/uint8-array
> > > -minItems: 1
> > > -maxItems: 20
> > > -description:
> > > -  an array of swing register setting for DP tx lane0 PHY.
> > > -  Registers 0~9 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
> > > -  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
> > > -  Swing1_Pre2, Swing0_Pre3, they are for [Boost control] and
> > > -  [Swing control] setting.
> > > -  Registers 0~9, bit 3:0 is [Boost control], these bits control
> > > -  post cursor manual, increase the [Boost control] to increase
> > > -  Pre-emphasis value.
> > > -  Registers 0~9, bit 6:4 is [Swing control], these bits control
> > > -  swing manual, increase [Swing control] setting to add Vp-p value
> > > -  for each Swing, Pre.
> > > -  Registers 10~19 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
> > > -  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
> > > -  Swing1_Pre2, Swing0_Pre3, they are for [R select control] and
> > > -  [R Termination control] setting.
> > > -  Registers 10~19, bit 4:0 is [R select control], these bits are
> > > -  compensation manual, increase it can enhance IO driven strength
> > > -  and Vp-p.
> > > -  Registers 10~19, bit 5:6 is [R termination control], these bits
> > > -  adjust 50ohm impedance of DP tx termination. 00:55 ohm,
> > > -  01:50 ohm(default), 10:45 ohm, 11:40 ohm.
> > > -
> > > -  analogix,lane1-swing:
> > > -$ref: /schemas/types.yaml#/definitions/uint8-array
> > > -minItems: 1
> > > -maxItems: 20
> > > -description:
> > > -  an array of swing register setting for DP tx lane1 PHY.
> > > -  DP TX lane1 swing register setting same with lane0
> > > -  swing, please refer lane0-swing property description.
> >
> > These apply to the DP side, so no need to revert this part.
> 
> Ack.
> 
> >
> > > -
> > > -  analogix,audio-enable:
> > > -type: boolean
> > > -description: let the driver enable audio HDMI codec function or not.
> > > -
> >
> > Not sure on this one...
> 
> These additions are independent from my reading of this, would you
> like a v2 with only the bus-type related changes reverted?
> 
> >
> > >ports:
> > >  $ref: /schemas/graph.yaml#/properties/ports
> > >
> > >  properties:
> > >port@0:
> > > -$ref: /schemas/graph.yaml#/$defs/port-base
> > > -unevaluatedProperties: false
> > > +$ref: /schemas/graph.yaml#/properties/port
> > >  description:
> > > -  MIPI DSI/DPI input.
> > > -
> > > -properties:
> > > -  endpoint:
> > > -$ref: /schemas/media/video-interfaces.yaml#
> > > -type: object
> > > -additionalProperties: false
> > > -
> > > -properties:
> > > -  remote-endpoint: true
> > > -
> > > -  bus-type:
> > > -enum: [1, 5]
> >
> > I think the error here is really 1 should be 4 which corresponds to
> > D-PHY which is used by both CSI and DSI. Otherwise, I don't really see
> > the issue with bus-type being shared between CSI and DSI.
> 
> I think that would be a correct solution. And ignoring everything
> else, the range of this property is something that should be fixed.
> 
> But that would mean that CPI (camera parallel interface) and DPI
> (display parallel interface) would share the
> V4L2_FWNODE_BUS_TYPE_PARALLEL enum. I think that would be perfectly
> functional, but it is not what V4L2_FWNODE_BUS_TYPE_PARALLEL is
> documented to represent. As far as I can see it's only intended to
> represent CPI.

Are you aware of any standard documenting camera parallel interfaces
with separate sync signals ? I was looking for that the other day, and
couldn't find much. CPI seems to be an old MIPI standard, but I couldn't
find any public document, I'not not sure if it actually matches.

Another common parallel interface in SoCs 

Re: [PATCH v1 1/2] Revert "dt-bindings:drm/bridge:anx7625:add vendor define"

2022-03-07 Thread Rob Herring
On Mon, Mar 7, 2022 at 11:11 AM Laurent Pinchart
 wrote:
>
> On Mon, Mar 07, 2022 at 05:57:47PM +0100, Robert Foss wrote:
> > On Mon, 7 Mar 2022 at 17:38, Rob Herring  wrote:
> > >
> > > On Mon, Mar 07, 2022 at 04:45:57PM +0100, Robert Foss wrote:
> > > > This reverts commit a43661e7e819b100e1f833a35018560a1d9abb39.
> > >
> > > S-o-b and reason for the revert?
> > >
> > > > ---
> > > >  .../display/bridge/analogix,anx7625.yaml  | 65 +--
> > > >  1 file changed, 2 insertions(+), 63 deletions(-)
> > > >
> > > > diff --git 
> > > > a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > > >  
> > > > b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > > > index 1d3e88daca041..ab48ab2f4240d 100644
> > > > --- 
> > > > a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > > > +++ 
> > > > b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > > > @@ -43,70 +43,14 @@ properties:
> > > >vdd33-supply:
> > > >  description: Regulator that provides the supply 3.3V power.
> > > >
> > > > -  analogix,lane0-swing:
> > > > -$ref: /schemas/types.yaml#/definitions/uint8-array
> > > > -minItems: 1
> > > > -maxItems: 20
> > > > -description:
> > > > -  an array of swing register setting for DP tx lane0 PHY.
> > > > -  Registers 0~9 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
> > > > -  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
> > > > -  Swing1_Pre2, Swing0_Pre3, they are for [Boost control] and
> > > > -  [Swing control] setting.
> > > > -  Registers 0~9, bit 3:0 is [Boost control], these bits control
> > > > -  post cursor manual, increase the [Boost control] to increase
> > > > -  Pre-emphasis value.
> > > > -  Registers 0~9, bit 6:4 is [Swing control], these bits control
> > > > -  swing manual, increase [Swing control] setting to add Vp-p value
> > > > -  for each Swing, Pre.
> > > > -  Registers 10~19 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
> > > > -  Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
> > > > -  Swing1_Pre2, Swing0_Pre3, they are for [R select control] and
> > > > -  [R Termination control] setting.
> > > > -  Registers 10~19, bit 4:0 is [R select control], these bits are
> > > > -  compensation manual, increase it can enhance IO driven strength
> > > > -  and Vp-p.
> > > > -  Registers 10~19, bit 5:6 is [R termination control], these bits
> > > > -  adjust 50ohm impedance of DP tx termination. 00:55 ohm,
> > > > -  01:50 ohm(default), 10:45 ohm, 11:40 ohm.
> > > > -
> > > > -  analogix,lane1-swing:
> > > > -$ref: /schemas/types.yaml#/definitions/uint8-array
> > > > -minItems: 1
> > > > -maxItems: 20
> > > > -description:
> > > > -  an array of swing register setting for DP tx lane1 PHY.
> > > > -  DP TX lane1 swing register setting same with lane0
> > > > -  swing, please refer lane0-swing property description.
> > >
> > > These apply to the DP side, so no need to revert this part.
> >
> > Ack.
> >
> > >
> > > > -
> > > > -  analogix,audio-enable:
> > > > -type: boolean
> > > > -description: let the driver enable audio HDMI codec function or 
> > > > not.
> > > > -
> > >
> > > Not sure on this one...
> >
> > These additions are independent from my reading of this, would you
> > like a v2 with only the bus-type related changes reverted?

Yes.

> >
> > >
> > > >ports:
> > > >  $ref: /schemas/graph.yaml#/properties/ports
> > > >
> > > >  properties:
> > > >port@0:
> > > > -$ref: /schemas/graph.yaml#/$defs/port-base
> > > > -unevaluatedProperties: false
> > > > +$ref: /schemas/graph.yaml#/properties/port
> > > >  description:
> > > > -  MIPI DSI/DPI input.
> > > > -
> > > > -properties:
> > > > -  endpoint:
> > > > -$ref: /schemas/media/video-interfaces.yaml#
> > > > -type: object
> > > > -additionalProperties: false
> > > > -
> > > > -properties:
> > > > -  remote-endpoint: true
> > > > -
> > > > -  bus-type:
> > > > -enum: [1, 5]
> > >
> > > I think the error here is really 1 should be 4 which corresponds to
> > > D-PHY which is used by both CSI and DSI. Otherwise, I don't really see
> > > the issue with bus-type being shared between CSI and DSI.
> >
> > I think that would be a correct solution. And ignoring everything
> > else, the range of this property is something that should be fixed.
> >
> > But that would mean that CPI (camera parallel interface) and DPI
> > (display parallel interface) would share the
> > V4L2_FWNODE_BUS_TYPE_PARALLEL enum. I think that would be perfectly
> > functional, but it is not what V4L2_FWNODE_BUS_TYPE_PARALLEL is
> > documented to represent. As far as I can see it's only intended to
> > represent CPI.
>
> Are you aware 

[PATCH v2 0/4] drm/bridge: ti-sn65dsi86: Support non-eDP DisplayPort connectors

2022-03-07 Thread Kieran Bingham
Implement support for non eDP connectors on the TI-SN65DSI86 bridge, and
provide IRQ based hotplug detect to identify when the connector is
present.

no-hpd is extended to be the default behaviour for non DisplayPort
connectors.

This series is based on top of José Expósito's patch [0] "drm/bridge:
ti-sn65dsi86: switch to devm_drm_of_get_bridge" and Nikita Yushchenko's
patch [1] "drm/bridge_connector: enable HPD by default if supported".

[0] 
https://lore.kernel.org/all/20220228183955.25508-1-jose.exposit...@gmail.com/
[1] 
https://lore.kernel.org/all/20211225063151.2110878-1-nikita.yo...@cogentembedded.com/

Kieran Bingham (1):
  drm/bridge: ti-sn65dsi86: Support hotplug detection

Laurent Pinchart (3):
  drm/bridge: ti-sn65dsi86: Implement bridge connector operations
  drm/bridge: ti-sn65dsi86: Make connector creation optional
  drm/bridge: ti-sn65dsi86: Support DisplayPort (non-eDP) mode

 drivers/gpu/drm/bridge/ti-sn65dsi86.c | 178 ++
 1 file changed, 156 insertions(+), 22 deletions(-)

-- 
2.32.0



[PATCH v2 3/4] drm/bridge: ti-sn65dsi86: Support DisplayPort (non-eDP) mode

2022-03-07 Thread Kieran Bingham
From: Laurent Pinchart 

Despite the SN65DSI86 being an eDP bridge, on some systems its output is
routed to a DisplayPort connector. Enable DisplayPort mode when the next
component in the display pipeline is detected as a DisplayPort
connector, and disable eDP features in that case.

Signed-off-by: Laurent Pinchart 
Reworked to set bridge type based on the next bridge/connector.
Signed-off-by: Kieran Bingham 
--
Changes since v1/RFC:
 - Rebased on top of "drm/bridge: ti-sn65dsi86: switch to
   devm_drm_of_get_bridge"
 - eDP/DP mode determined from the next bridge connector type.

 drivers/gpu/drm/bridge/ti-sn65dsi86.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c 
b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 29f5f7123ed9..461f963faa0b 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -60,6 +60,7 @@
 #define SN_LN_ASSIGN_REG   0x59
 #define  LN_ASSIGN_WIDTH   2
 #define SN_ENH_FRAME_REG   0x5A
+#define  ASSR_CONTROL  BIT(0)
 #define  VSTREAM_ENABLEBIT(3)
 #define  LN_POLRS_OFFSET   4
 #define  LN_POLRS_MASK 0xf0
@@ -91,6 +92,8 @@
 #define SN_DATARATE_CONFIG_REG 0x94
 #define  DP_DATARATE_MASK  GENMASK(7, 5)
 #define  DP_DATARATE(x)((x) << 5)
+#define SN_TRAINING_SETTING_REG0x95
+#define  SCRAMBLE_DISABLE  BIT(4)
 #define SN_ML_TX_MODE_REG  0x96
 #define  ML_TX_MAIN_LINK_OFF   0
 #define  ML_TX_NORMAL_MODE BIT(0)
@@ -1005,6 +1008,11 @@ static int ti_sn_link_training(struct ti_sn65dsi86 
*pdata, int dp_rate_idx,
regmap_update_bits(pdata->regmap, SN_DATARATE_CONFIG_REG,
   DP_DATARATE_MASK, DP_DATARATE(dp_rate_idx));
 
+   /* For DisplayPort, use the standard DP scrambler seed. */
+   if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort)
+   regmap_update_bits(pdata->regmap, SN_ENH_FRAME_REG,
+  ASSR_CONTROL, 0);
+
/* enable DP PLL */
regmap_write(pdata->regmap, SN_PLL_ENABLE_REG, 1);
 
@@ -1016,6 +1024,11 @@ static int ti_sn_link_training(struct ti_sn65dsi86 
*pdata, int dp_rate_idx,
goto exit;
}
 
+   /* For DisplayPort, disable scrambling mode. */
+   if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort)
+   regmap_update_bits(pdata->regmap, SN_TRAINING_SETTING_REG,
+  SCRAMBLE_DISABLE, SCRAMBLE_DISABLE);
+
/*
 * We'll try to link train several times.  As part of link training
 * the bridge chip will write DP_SET_POWER_D0 to DP_SET_POWER.  If
@@ -1260,7 +1273,8 @@ static int ti_sn_bridge_probe(struct auxiliary_device 
*adev,
pdata->bridge.funcs = &ti_sn_bridge_funcs;
pdata->bridge.of_node = np;
pdata->bridge.ops = DRM_BRIDGE_OP_EDID;
-   pdata->bridge.type = DRM_MODE_CONNECTOR_eDP;
+   pdata->bridge.type = pdata->next_bridge->type == 
DRM_MODE_CONNECTOR_DisplayPort
+  ? DRM_MODE_CONNECTOR_DisplayPort : 
DRM_MODE_CONNECTOR_eDP;
 
drm_bridge_add(&pdata->bridge);
 
-- 
2.32.0



[PATCH v2 2/4] drm/bridge: ti-sn65dsi86: Make connector creation optional

2022-03-07 Thread Kieran Bingham
From: Laurent Pinchart 

Now that the driver supports the connector-related bridge operations,
make the connector creation optional. This enables usage of the
sn65dsi86 with the DRM bridge connector helper.

Signed-off-by: Laurent Pinchart 
Signed-off-by: Kieran Bingham 

---
Changes since v1:
 - None

 drivers/gpu/drm/bridge/ti-sn65dsi86.c | 17 +++--
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c 
b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index ffb6c04f6c46..29f5f7123ed9 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -745,11 +745,6 @@ static int ti_sn_bridge_attach(struct drm_bridge *bridge,
struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge);
int ret;
 
-   if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) {
-   DRM_ERROR("Fix bridge driver to make connector optional!");
-   return -EINVAL;
-   }
-
pdata->aux.drm_dev = bridge->dev;
ret = drm_dp_aux_register(&pdata->aux);
if (ret < 0) {
@@ -757,12 +752,14 @@ static int ti_sn_bridge_attach(struct drm_bridge *bridge,
return ret;
}
 
-   ret = ti_sn_bridge_connector_init(pdata);
-   if (ret < 0)
-   goto err_conn_init;
+   if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)) {
+   ret = ti_sn_bridge_connector_init(pdata);
+   if (ret < 0)
+   goto err_conn_init;
 
-   /* We never want the next bridge to *also* create a connector: */
-   flags |= DRM_BRIDGE_ATTACH_NO_CONNECTOR;
+   /* We never want the next bridge to *also* create a connector: 
*/
+   flags |= DRM_BRIDGE_ATTACH_NO_CONNECTOR;
+   }
 
/* Attach the next bridge */
ret = drm_bridge_attach(bridge->encoder, pdata->next_bridge,
-- 
2.32.0



[PATCH v2 1/4] drm/bridge: ti-sn65dsi86: Implement bridge connector operations

2022-03-07 Thread Kieran Bingham
From: Laurent Pinchart 

Implement the bridge connector-related .get_edid() operation, and report
the related bridge capabilities and type.

Signed-off-by: Laurent Pinchart 
Reviewed-by: Stephen Boyd 
Reviewed-by: Douglas Anderson 
Signed-off-by: Kieran Bingham 
---
Changes since v1:

- The connector .get_modes() operation doesn't rely on EDID anymore,
  __ti_sn_bridge_get_edid() and ti_sn_bridge_get_edid() got merged
  together

Notes from Kieran:

RB Tags collected from:
 
https://lore.kernel.org/all/20210322030128.2283-9-laurent.pinchart+rene...@ideasonboard.com/

However this was over a year ago, so let me know if other patches now
superceed this one or otherwise invalidate this update.

 drivers/gpu/drm/bridge/ti-sn65dsi86.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c 
b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index c55848588123..ffb6c04f6c46 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -1154,6 +1154,19 @@ static void ti_sn_bridge_post_disable(struct drm_bridge 
*bridge)
pm_runtime_put_sync(pdata->dev);
 }
 
+static struct edid *ti_sn_bridge_get_edid(struct drm_bridge *bridge,
+ struct drm_connector *connector)
+{
+   struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge);
+   struct edid *edid;
+
+   pm_runtime_get_sync(pdata->dev);
+   edid = drm_get_edid(connector, &pdata->aux.ddc);
+   pm_runtime_put_autosuspend(pdata->dev);
+
+   return edid;
+}
+
 static const struct drm_bridge_funcs ti_sn_bridge_funcs = {
.attach = ti_sn_bridge_attach,
.detach = ti_sn_bridge_detach,
@@ -1162,6 +1175,7 @@ static const struct drm_bridge_funcs ti_sn_bridge_funcs = 
{
.enable = ti_sn_bridge_enable,
.disable = ti_sn_bridge_disable,
.post_disable = ti_sn_bridge_post_disable,
+   .get_edid = ti_sn_bridge_get_edid,
 };
 
 static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata,
@@ -1248,6 +1262,8 @@ static int ti_sn_bridge_probe(struct auxiliary_device 
*adev,
 
pdata->bridge.funcs = &ti_sn_bridge_funcs;
pdata->bridge.of_node = np;
+   pdata->bridge.ops = DRM_BRIDGE_OP_EDID;
+   pdata->bridge.type = DRM_MODE_CONNECTOR_eDP;
 
drm_bridge_add(&pdata->bridge);
 
-- 
2.32.0



[PATCH v2 4/4] drm/bridge: ti-sn65dsi86: Support hotplug detection

2022-03-07 Thread Kieran Bingham
When the SN65DSI86 is used in DisplayPort mode, its output is likely
routed to a DisplayPort connector, which can benefit from hotplug
detection. Support it in such cases, with polling mode only for now.

The implementation is limited to the bridge operations, as the connector
operations are legacy and new users should use
DRM_BRIDGE_ATTACH_NO_CONNECTOR.

Signed-off-by: Laurent Pinchart 
Signed-off-by: Kieran Bingham 
---
Changes since v1:

- Document the no_hpd field
- Rely on the SN_HPD_DISABLE_REG default value in the HPD case
- Add a TODO comment regarding IRQ support
[Kieran]
- Fix spelling s/assrted/asserted/
- Only enable HPD on DisplayPort connector.
- Support IRQ based hotplug detect
---
 drivers/gpu/drm/bridge/ti-sn65dsi86.c | 133 +++---
 1 file changed, 120 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c 
b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 461f963faa0b..febb4e672ece 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -68,6 +68,7 @@
 #define  BPP_18_RGBBIT(0)
 #define SN_HPD_DISABLE_REG 0x5C
 #define  HPD_DISABLE   BIT(0)
+#define  HPD_DEBOUNCED_STATE   BIT(4)
 #define SN_GPIO_IO_REG 0x5E
 #define  SN_GPIO_INPUT_SHIFT   4
 #define  SN_GPIO_OUTPUT_SHIFT  0
@@ -104,10 +105,24 @@
 #define SN_PWM_EN_INV_REG  0xA5
 #define  SN_PWM_INV_MASK   BIT(0)
 #define  SN_PWM_EN_MASKBIT(1)
+#define SN_IRQ_EN_REG  0xE0
+#define  IRQ_ENBIT(0)
+#define SN_IRQ_HPD_REG 0xE6
+#define  IRQ_HPD_ENBIT(0)
+#define  IRQ_HPD_INSERTION_EN  BIT(1)
+#define  IRQ_HPD_REMOVAL_ENBIT(2)
+#define  IRQ_HPD_REPLUG_EN BIT(3)
+#define  IRQ_HPD_PLL_UNLOCK_EN BIT(5)
 #define SN_AUX_CMD_STATUS_REG  0xF4
 #define  AUX_IRQ_STATUS_AUX_RPLY_TOUT  BIT(3)
 #define  AUX_IRQ_STATUS_AUX_SHORT  BIT(5)
 #define  AUX_IRQ_STATUS_NAT_I2C_FAIL   BIT(6)
+#define SN_IRQ_HPD_STATUS_REG  0xF5
+#define  IRQ_HPD_STATUSBIT(0)
+#define  IRQ_HPD_INSERTION_STATUS  BIT(1)
+#define  IRQ_HPD_REMOVAL_STATUSBIT(2)
+#define  IRQ_HPD_REPLUG_STATUS BIT(3)
+#define  IRQ_PLL_UNLOCKBIT(5)
 
 #define MIN_DSI_CLK_FREQ_MHZ   40
 
@@ -166,6 +181,11 @@
  * @pwm_enabled:  Used to track if the PWM signal is currently enabled.
  * @pwm_pin_busy: Track if GPIO4 is currently requested for GPIO or PWM.
  * @pwm_refclk_freq: Cache for the reference clock input to the PWM.
+ *
+ * @no_hpd:   Disable hot-plug detection as instructed by device tree (used
+ *for instance for eDP panels whose HPD signal won't be 
asserted
+ *until the panel is turned on, and is thus not usable for
+ *downstream device detection).
  */
 struct ti_sn65dsi86 {
struct auxiliary_device bridge_aux;
@@ -200,6 +220,8 @@ struct ti_sn65dsi86 {
atomic_tpwm_pin_busy;
 #endif
unsigned intpwm_refclk_freq;
+
+   boolno_hpd;
 };
 
 static const struct regmap_range ti_sn65dsi86_volatile_ranges[] = {
@@ -314,23 +336,25 @@ static void ti_sn65dsi86_enable_comms(struct ti_sn65dsi86 
*pdata)
ti_sn_bridge_set_refclk_freq(pdata);
 
/*
-* HPD on this bridge chip is a bit useless.  This is an eDP bridge
-* so the HPD is an internal signal that's only there to signal that
-* the panel is done powering up.  ...but the bridge chip debounces
-* this signal by between 100 ms and 400 ms (depending on process,
-* voltage, and temperate--I measured it at about 200 ms).  One
+* As this is an eDP bridge, the output will be connected to a fixed
+* panel in most systems. HPD is in that case only an internal signal
+* to signal that the panel is done powering up. The bridge chip
+* debounces this signal by between 100 ms and 400 ms (depending on
+* process, voltage, and temperate--I measured it at about 200 ms). One
 * particular panel asserted HPD 84 ms after it was powered on meaning
 * that we saw HPD 284 ms after power on.  ...but the same panel said
 * that instead of looking at HPD you could just hardcode a delay of
-* 200 ms.  We'll assume that the panel driver will have the hardcoded
-* delay in its prepare and always disable HPD.
+* 200 ms. HPD is thus a bit useless. For this type of use cases, we'll
+* assume that the panel driver will have the hardco

Re: [PATCH v1 3/3] drm/panel : innolux-ej030na and abt-y030xx067a : add .enable and .disable

2022-03-07 Thread Christophe Branchereau
Hi Paul, it should in theory, but doesn't work in practice, the
display doesn't like having that bit set outside of the init sequence.

Feel free to experiment if you think you can make it work though, you
should have that panel on 1 or 2 devices I think.

KR
CB

On Wed, Mar 2, 2022 at 12:22 PM Paul Cercueil  wrote:
>
> Hi Christophe,
>
> Le mar., mars 1 2022 at 16:31:22 +0100, Christophe Branchereau
>  a écrit :
> > Following the introduction of bridge_atomic_enable in the ingenic
> > drm driver, the crtc is enabled between .prepare and .enable, if
> > it exists.
> >
> > Add it so the backlight is only enabled after the crtc is, to avoid
> > graphical issues.
> >
> > Signed-off-by: Christophe Branchereau 
> > ---
> >  drivers/gpu/drm/panel/panel-abt-y030xx067a.c  | 23 --
> >  drivers/gpu/drm/panel/panel-innolux-ej030na.c | 31
> > ---
> >  2 files changed, 48 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c
> > b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c
> > index f043b484055b..b5736344e3ec 100644
> > --- a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c
> > +++ b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c
> > @@ -183,8 +183,6 @@ static int y030xx067a_prepare(struct drm_panel
> > *panel)
> >   goto err_disable_regulator;
> >   }
> >
> > - msleep(120);
> > -
> >   return 0;
> >
> >  err_disable_regulator:
> > @@ -202,6 +200,25 @@ static int y030xx067a_unprepare(struct drm_panel
> > *panel)
> >   return 0;
> >  }
> >
> > +static int y030xx067a_enable(struct drm_panel *panel)
> > +{
> > + if (panel->backlight) {
> > + /* Wait for the picture to be ready before enabling backlight 
> > */
> > + msleep(120);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int y030xx067a_disable(struct drm_panel *panel)
> > +{
> > + struct y030xx067a *priv = to_y030xx067a(panel);
> > +
> > + regmap_clear_bits(priv->map, 0x06, REG06_XPSAVE);
>
> Shouldn't that be balanced by a regmap_set_bits() in the .enable()
> function?
>
> Cheers,
> -Paul
>
> > +
> > + return 0;
> > +}
> > +
> >  static int y030xx067a_get_modes(struct drm_panel *panel,
> >   struct drm_connector *connector)
> >  {
> > @@ -239,6 +256,8 @@ static int y030xx067a_get_modes(struct drm_panel
> > *panel,
> >  static const struct drm_panel_funcs y030xx067a_funcs = {
> >   .prepare= y030xx067a_prepare,
> >   .unprepare  = y030xx067a_unprepare,
> > + .enable = y030xx067a_enable,
> > + .disable= y030xx067a_disable,
> >   .get_modes  = y030xx067a_get_modes,
> >  };
> >
> > diff --git a/drivers/gpu/drm/panel/panel-innolux-ej030na.c
> > b/drivers/gpu/drm/panel/panel-innolux-ej030na.c
> > index c558de3f99be..6de7370185cd 100644
> > --- a/drivers/gpu/drm/panel/panel-innolux-ej030na.c
> > +++ b/drivers/gpu/drm/panel/panel-innolux-ej030na.c
> > @@ -80,8 +80,6 @@ static const struct reg_sequence
> > ej030na_init_sequence[] = {
> >   { 0x47, 0x08 },
> >   { 0x48, 0x0f },
> >   { 0x49, 0x0f },
> > -
> > - { 0x2b, 0x01 },
> >  };
> >
> >  static int ej030na_prepare(struct drm_panel *panel)
> > @@ -109,8 +107,6 @@ static int ej030na_prepare(struct drm_panel
> > *panel)
> >   goto err_disable_regulator;
> >   }
> >
> > - msleep(120);
> > -
> >   return 0;
> >
> >  err_disable_regulator:
> > @@ -128,6 +124,31 @@ static int ej030na_unprepare(struct drm_panel
> > *panel)
> >   return 0;
> >  }
> >
> > +static int ej030na_enable(struct drm_panel *panel)
> > +{
> > + struct ej030na *priv = to_ej030na(panel);
> > +
> > + /* standby off */
> > + regmap_write(priv->map, 0x2b, 0x01);
> > +
> > + if (panel->backlight) {
> > + /* Wait for the picture to be ready before enabling backlight 
> > */
> > + msleep(120);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int ej030na_disable(struct drm_panel *panel)
> > +{
> > + struct ej030na *priv = to_ej030na(panel);
> > +
> > + /* standby on */
> > + regmap_write(priv->map, 0x2b, 0x00);
> > +
> > + return 0;
> > +}
> > +
> >  static int ej030na_get_modes(struct drm_panel *panel,
> >struct drm_connector *connector)
> >  {
> > @@ -165,6 +186,8 @@ static int ej030na_get_modes(struct drm_panel
> > *panel,
> >  static const struct drm_panel_funcs ej030na_funcs = {
> >   .prepare= ej030na_prepare,
> >   .unprepare  = ej030na_unprepare,
> > + .enable = ej030na_enable,
> > + .disable= ej030na_disable,
> >   .get_modes  = ej030na_get_modes,
> >  };
> >
> > --
> > 2.34.1
> >
>
>


[PATCH] drm/rockchip: remove redundant assignment to pointer connector

2022-03-07 Thread Colin Ian King
The pointer connector is being assigned a value that is never read,
it is being re-assigned in the following statement. The assignment
is redundant and can be removed.

Cleans up clang scan build warning:
drivers/gpu/drm/rockchip/rockchip_rgb.c:153:2: warning: Value stored
to 'connector' is never read [deadcode.DeadStores]

Signed-off-by: Colin Ian King 
---
 drivers/gpu/drm/rockchip/rockchip_rgb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c 
b/drivers/gpu/drm/rockchip/rockchip_rgb.c
index 2494b079489d..92a727931a49 100644
--- a/drivers/gpu/drm/rockchip/rockchip_rgb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c
@@ -150,7 +150,6 @@ struct rockchip_rgb *rockchip_rgb_init(struct device *dev,
if (ret)
goto err_free_encoder;
 
-   connector = &rgb->connector;
connector = drm_bridge_connector_init(rgb->drm_dev, encoder);
if (IS_ERR(connector)) {
DRM_DEV_ERROR(drm_dev->dev,
-- 
2.35.1



Re: [PATCH 7/8] drm/i915: fixup the initial fb base on DG1

2022-03-07 Thread Matthew Auld

On 07/03/2022 17:06, Ville Syrjälä wrote:

On Mon, Mar 07, 2022 at 10:32:36AM +, Matthew Auld wrote:

On 04/03/2022 19:33, Ville Syrjälä wrote:

On Fri, Mar 04, 2022 at 05:23:32PM +, Matthew Auld wrote:

The offset we get looks to be the exact start of DSM, but the
inital_plane_vma expects the address to be relative.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
   .../drm/i915/display/intel_plane_initial.c| 22 +++
   1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
b/drivers/gpu/drm/i915/display/intel_plane_initial.c
index f797fcef18fc..b39d3a8dfe45 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915,
if (!mem || plane_config->size == 0)
return NULL;
   
-	base = round_down(plane_config->base,

- I915_GTT_MIN_ALIGNMENT);
-   size = round_up(plane_config->base + plane_config->size,
-   mem->min_page_size);
+   base = plane_config->base;
+   if (IS_DGFX(i915)) {
+   /*
+* On discrete the base address should be somewhere in LMEM, but
+* depending on the size of LMEM the base address might
+* intersect with the start of DSM, like on DG1, in which case
+* we need the relative address. In such cases we might also
+* need to choose between inital fb vs fbc, if space is limited.
+*
+* On future discrete HW, like DG2, we should be able to just
+* allocate directly from LMEM, due to larger LMEM size.
+*/
+   if (base >= i915->dsm.start)
+   base -= i915->dsm.start;


Subsequent code expects the object to actually be inside stolen.
If that is not the case we should just give up.


Thanks for taking a look at this. Is that subsequent code outside
initial_plane_vma()? In the next patch this is now using LMEM directly
for dg2. Would that blow up somewhere else?


It uses i915_gem_object_create_stolen_for_preallocated() which assumes
the stuff is inside stolen.


At the start of the series that gets ripped out and replaced with 
i915_gem_object_create_region_at(), where we can now just pass in the 
intel_memory_region, and the backend hopefully takes care of the rest.





The fact that we fail to confirm any of that on integrated
parts has always bugged me, but not enough to actually do
anything about it. Such a check would be somewhat more involved
since we'd have to look at the PTEs. But on discrete sounds like
we can get away with a trivial check.


Which PTEs?


The PTEs the plane is actually using. We have no idea where they
actually point to and just assume they represent a 1:1 mapping of
stolen.

I suppose with lmem we'll just start assuming a 1:1 mapping of
the whole lmem rather than just stolen.


So IIUC the base that we read is actually some GGTT address(I guess it 
comes pre-programmed or something?), and that hopefully 1:1 maps to 
stolen. Ok, so as you say, I guess we only want to subtract the 
dsm.start for the physical allocation, and not the GGTT address, when 
dealing with stolen lmem.






Re: [PATCH 7/8] drm/i915: fixup the initial fb base on DG1

2022-03-07 Thread Ville Syrjälä
On Mon, Mar 07, 2022 at 06:26:32PM +, Matthew Auld wrote:
> On 07/03/2022 17:06, Ville Syrjälä wrote:
> > On Mon, Mar 07, 2022 at 10:32:36AM +, Matthew Auld wrote:
> >> On 04/03/2022 19:33, Ville Syrjälä wrote:
> >>> On Fri, Mar 04, 2022 at 05:23:32PM +, Matthew Auld wrote:
>  The offset we get looks to be the exact start of DSM, but the
>  inital_plane_vma expects the address to be relative.
> 
>  Signed-off-by: Matthew Auld 
>  Cc: Thomas Hellström 
>  ---
> .../drm/i915/display/intel_plane_initial.c| 22 +++
> 1 file changed, 18 insertions(+), 4 deletions(-)
> 
>  diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
>  b/drivers/gpu/drm/i915/display/intel_plane_initial.c
>  index f797fcef18fc..b39d3a8dfe45 100644
>  --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
>  +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
>  @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915,
>   if (!mem || plane_config->size == 0)
>   return NULL;
> 
>  -base = round_down(plane_config->base,
>  -  I915_GTT_MIN_ALIGNMENT);
>  -size = round_up(plane_config->base + plane_config->size,
>  -mem->min_page_size);
>  +base = plane_config->base;
>  +if (IS_DGFX(i915)) {
>  +/*
>  + * On discrete the base address should be somewhere in 
>  LMEM, but
>  + * depending on the size of LMEM the base address might
>  + * intersect with the start of DSM, like on DG1, in 
>  which case
>  + * we need the relative address. In such cases we might 
>  also
>  + * need to choose between inital fb vs fbc, if space is 
>  limited.
>  + *
>  + * On future discrete HW, like DG2, we should be able 
>  to just
>  + * allocate directly from LMEM, due to larger LMEM size.
>  + */
>  +if (base >= i915->dsm.start)
>  +base -= i915->dsm.start;
> >>>
> >>> Subsequent code expects the object to actually be inside stolen.
> >>> If that is not the case we should just give up.
> >>
> >> Thanks for taking a look at this. Is that subsequent code outside
> >> initial_plane_vma()? In the next patch this is now using LMEM directly
> >> for dg2. Would that blow up somewhere else?
> > 
> > It uses i915_gem_object_create_stolen_for_preallocated() which assumes
> > the stuff is inside stolen.
> 
> At the start of the series that gets ripped out and replaced with 
> i915_gem_object_create_region_at(), where we can now just pass in the 
> intel_memory_region, and the backend hopefully takes care of the rest.

Why? Is the BIOS no longer allocating its fbs from stolen?

> 
> > 
> >>> The fact that we fail to confirm any of that on integrated
> >>> parts has always bugged me, but not enough to actually do
> >>> anything about it. Such a check would be somewhat more involved
> >>> since we'd have to look at the PTEs. But on discrete sounds like
> >>> we can get away with a trivial check.
> >>
> >> Which PTEs?
> > 
> > The PTEs the plane is actually using. We have no idea where they
> > actually point to and just assume they represent a 1:1 mapping of
> > stolen.
> > 
> > I suppose with lmem we'll just start assuming a 1:1 mapping of
> > the whole lmem rather than just stolen.
> 
> So IIUC the base that we read is actually some GGTT address(I guess it 
> comes pre-programmed or something?), and that hopefully 1:1 maps to 
> stolen. Ok, so as you say, I guess we only want to subtract the 
> dsm.start for the physical allocation, and not the GGTT address, when 
> dealing with stolen lmem.
> 
> > 

-- 
Ville Syrjälä
Intel


Re: [PATCH 06/10] drm/gma500: Move GTT resume logic out of psb_gtt_init()

2022-03-07 Thread Sam Ravnborg
Hi Thomas,

One comment below.

On Sun, Mar 06, 2022 at 09:36:15PM +0100, Thomas Zimmermann wrote:
> The current implementation of psb_gtt_init() also does resume
> handling. Move the resume code into its own helper.
> 
> Signed-off-by: Thomas Zimmermann 
> ---
>  drivers/gpu/drm/gma500/gtt.c | 122 ++-
>  drivers/gpu/drm/gma500/gtt.h |   2 +-
>  drivers/gpu/drm/gma500/psb_drv.c |   2 +-
>  3 files changed, 104 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c
> index acd50ee26b03..43ad3ec38c80 100644
> --- a/drivers/gpu/drm/gma500/gtt.c
> +++ b/drivers/gpu/drm/gma500/gtt.c
> @@ -209,7 +209,7 @@ static void psb_gtt_populate_resources(struct 
> drm_psb_private *pdev)
>   drm_dbg(dev, "Restored %u of %u gtt ranges (%u KB)", restored, total, 
> (size / 1024));
>  }
>  
> -int psb_gtt_init(struct drm_device *dev, int resume)
> +int psb_gtt_init(struct drm_device *dev)
>  {
>   struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
>   struct pci_dev *pdev = to_pci_dev(dev->dev);
> @@ -218,10 +218,8 @@ int psb_gtt_init(struct drm_device *dev, int resume)
>   struct psb_gtt *pg;
>   int ret = 0;
>  
> - if (!resume) {
> - mutex_init(&dev_priv->gtt_mutex);
> - mutex_init(&dev_priv->mmap_mutex);
> - }
> + mutex_init(&dev_priv->gtt_mutex);
> + mutex_init(&dev_priv->mmap_mutex);
>  
>   pg = &dev_priv->gtt;
>  
> @@ -290,13 +288,6 @@ int psb_gtt_init(struct drm_device *dev, int resume)
>   dev_dbg(dev->dev, "Stolen memory base 0x%x, size %luK\n",
>   dev_priv->stolen_base, vram_stolen_size / 1024);
>  
> - if (resume && (gtt_pages != pg->gtt_pages) &&
> - (stolen_size != pg->stolen_size)) {
> - dev_err(dev->dev, "GTT resume error.\n");
> - ret = -EINVAL;
> - goto out_err;
> - }
> -
>   pg->gtt_pages = gtt_pages;
>   pg->stolen_size = stolen_size;
>   dev_priv->vram_stolen_size = vram_stolen_size;
> @@ -304,19 +295,14 @@ int psb_gtt_init(struct drm_device *dev, int resume)
>   /*
>*  Map the GTT and the stolen memory area
>*/
> - if (!resume)
> - dev_priv->gtt_map = ioremap(pg->gtt_phys_start,
> - gtt_pages << PAGE_SHIFT);
> + dev_priv->gtt_map = ioremap(pg->gtt_phys_start, gtt_pages << 
> PAGE_SHIFT);
>   if (!dev_priv->gtt_map) {
>   dev_err(dev->dev, "Failure to map gtt.\n");
>   ret = -ENOMEM;
>   goto out_err;
>   }
>  
> - if (!resume)
> - dev_priv->vram_addr = ioremap_wc(dev_priv->stolen_base,
> -  stolen_size);
> -
> + dev_priv->vram_addr = ioremap_wc(dev_priv->stolen_base, stolen_size);
>   if (!dev_priv->vram_addr) {
>   dev_err(dev->dev, "Failure to map stolen base.\n");
>   ret = -ENOMEM;
> @@ -333,11 +319,107 @@ int psb_gtt_init(struct drm_device *dev, int resume)
>   return ret;
>  }
>
The below is a lot of duplicated complex code.
Can you add one more helper for this?

> +static int psb_gtt_resume(struct drm_device *dev)
> +{
> + struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
> + struct pci_dev *pdev = to_pci_dev(dev->dev);
> + unsigned int gtt_pages;
> + unsigned long stolen_size, vram_stolen_size;
> + struct psb_gtt *pg;
> + int ret = 0;
> +
> + pg = &dev_priv->gtt;

static void psb_enable_gtt(..)
{
> +
> + /* Enable the GTT */
> + pci_read_config_word(pdev, PSB_GMCH_CTRL, &dev_priv->gmch_ctrl);
> + pci_write_config_word(pdev, PSB_GMCH_CTRL,
> +   dev_priv->gmch_ctrl | _PSB_GMCH_ENABLED);
> +
> + dev_priv->pge_ctl = PSB_RVDC32(PSB_PGETBL_CTL);
> + PSB_WVDC32(dev_priv->pge_ctl | _PSB_PGETBL_ENABLED, PSB_PGETBL_CTL);
> + (void) PSB_RVDC32(PSB_PGETBL_CTL);
> +
> + /* The root resource we allocate address space from */
> + dev_priv->gtt_initialized = 1;
> +
> + pg->gtt_phys_start = dev_priv->pge_ctl & PAGE_MASK;
> +
> + /*
> +  *  The video mmu has a hw bug when accessing 0x0D000.
> +  *  Make gatt start at 0x0e000,. This doesn't actually
> +  *  matter for us but may do if the video acceleration ever
> +  *  gets opened up.
> +  */
> + pg->mmu_gatt_start = 0xE000;
> +
> + pg->gtt_start = pci_resource_start(pdev, PSB_GTT_RESOURCE);
> + gtt_pages = pci_resource_len(pdev, PSB_GTT_RESOURCE) >> PAGE_SHIFT;
> + /* CDV doesn't report this. In which case the system has 64 gtt pages */
> + if (pg->gtt_start == 0 || gtt_pages == 0) {
> + dev_dbg(dev->dev, "GTT PCI BAR not initialized.\n");
> + gtt_pages = 64;
> + pg->gtt_start = dev_priv->pge_ctl;
> + }
> +
> + pg->gatt_start = pci_resource_start(pdev, PSB_GATT_RESOURCE);
> + 

Re: [Intel-gfx] [PATCH 7/8] drm/i915: fixup the initial fb base on DG1

2022-03-07 Thread Matthew Auld
On Mon, 7 Mar 2022 at 18:41, Ville Syrjälä
 wrote:
>
> On Mon, Mar 07, 2022 at 06:26:32PM +, Matthew Auld wrote:
> > On 07/03/2022 17:06, Ville Syrjälä wrote:
> > > On Mon, Mar 07, 2022 at 10:32:36AM +, Matthew Auld wrote:
> > >> On 04/03/2022 19:33, Ville Syrjälä wrote:
> > >>> On Fri, Mar 04, 2022 at 05:23:32PM +, Matthew Auld wrote:
> >  The offset we get looks to be the exact start of DSM, but the
> >  inital_plane_vma expects the address to be relative.
> > 
> >  Signed-off-by: Matthew Auld 
> >  Cc: Thomas Hellström 
> >  ---
> > .../drm/i915/display/intel_plane_initial.c| 22 
> >  +++
> > 1 file changed, 18 insertions(+), 4 deletions(-)
> > 
> >  diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
> >  b/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >  index f797fcef18fc..b39d3a8dfe45 100644
> >  --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >  +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >  @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915,
> >   if (!mem || plane_config->size == 0)
> >   return NULL;
> > 
> >  -base = round_down(plane_config->base,
> >  -  I915_GTT_MIN_ALIGNMENT);
> >  -size = round_up(plane_config->base + plane_config->size,
> >  -mem->min_page_size);
> >  +base = plane_config->base;
> >  +if (IS_DGFX(i915)) {
> >  +/*
> >  + * On discrete the base address should be somewhere 
> >  in LMEM, but
> >  + * depending on the size of LMEM the base address 
> >  might
> >  + * intersect with the start of DSM, like on DG1, in 
> >  which case
> >  + * we need the relative address. In such cases we 
> >  might also
> >  + * need to choose between inital fb vs fbc, if space 
> >  is limited.
> >  + *
> >  + * On future discrete HW, like DG2, we should be able 
> >  to just
> >  + * allocate directly from LMEM, due to larger LMEM 
> >  size.
> >  + */
> >  +if (base >= i915->dsm.start)
> >  +base -= i915->dsm.start;
> > >>>
> > >>> Subsequent code expects the object to actually be inside stolen.
> > >>> If that is not the case we should just give up.
> > >>
> > >> Thanks for taking a look at this. Is that subsequent code outside
> > >> initial_plane_vma()? In the next patch this is now using LMEM directly
> > >> for dg2. Would that blow up somewhere else?
> > >
> > > It uses i915_gem_object_create_stolen_for_preallocated() which assumes
> > > the stuff is inside stolen.
> >
> > At the start of the series that gets ripped out and replaced with
> > i915_gem_object_create_region_at(), where we can now just pass in the
> > intel_memory_region, and the backend hopefully takes care of the rest.
>
> Why? Is the BIOS no longer allocating its fbs from stolen?

On discrete, so far DSM is always just snipped off the end of lmem. On
DG1, which only has 4G lmem, the base seems to always exactly match
the DSM start(not sure if this is a fluke). However on DG2, which has
much larger lmem size, the base is still the same IIRC, but it isn't
even close to where DSM is located on such a device. Best guess is
that we were meant to just treat the bios fb(or that part of stolen
lmem) as a part of normal lmem, and might explain why the base is not
relative to the dsm.start like on integrated?

>
> >
> > >
> > >>> The fact that we fail to confirm any of that on integrated
> > >>> parts has always bugged me, but not enough to actually do
> > >>> anything about it. Such a check would be somewhat more involved
> > >>> since we'd have to look at the PTEs. But on discrete sounds like
> > >>> we can get away with a trivial check.
> > >>
> > >> Which PTEs?
> > >
> > > The PTEs the plane is actually using. We have no idea where they
> > > actually point to and just assume they represent a 1:1 mapping of
> > > stolen.
> > >
> > > I suppose with lmem we'll just start assuming a 1:1 mapping of
> > > the whole lmem rather than just stolen.
> >
> > So IIUC the base that we read is actually some GGTT address(I guess it
> > comes pre-programmed or something?), and that hopefully 1:1 maps to
> > stolen. Ok, so as you say, I guess we only want to subtract the
> > dsm.start for the physical allocation, and not the GGTT address, when
> > dealing with stolen lmem.
> >
> > >
>
> --
> Ville Syrjälä
> Intel


Re: [PATCH v2 1/4] drm/bridge: ti-sn65dsi86: Implement bridge connector operations

2022-03-07 Thread Doug Anderson
Hi,

On Mon, Mar 7, 2022 at 10:00 AM Kieran Bingham
 wrote:
>
> From: Laurent Pinchart 
>
> Implement the bridge connector-related .get_edid() operation, and report
> the related bridge capabilities and type.
>
> Signed-off-by: Laurent Pinchart 
> Reviewed-by: Stephen Boyd 
> Reviewed-by: Douglas Anderson 
> Signed-off-by: Kieran Bingham 
> ---
> Changes since v1:
>
> - The connector .get_modes() operation doesn't rely on EDID anymore,
>   __ti_sn_bridge_get_edid() and ti_sn_bridge_get_edid() got merged
>   together
>
> Notes from Kieran:
>
> RB Tags collected from:
>  
> https://lore.kernel.org/all/20210322030128.2283-9-laurent.pinchart+rene...@ideasonboard.com/
>
> However this was over a year ago, so let me know if other patches now
> superceed this one or otherwise invalidate this update.
>
>  drivers/gpu/drm/bridge/ti-sn65dsi86.c | 16 
>  1 file changed, 16 insertions(+)
>
> diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c 
> b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
> index c55848588123..ffb6c04f6c46 100644
> --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
> +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
> @@ -1154,6 +1154,19 @@ static void ti_sn_bridge_post_disable(struct 
> drm_bridge *bridge)
> pm_runtime_put_sync(pdata->dev);
>  }
>
> +static struct edid *ti_sn_bridge_get_edid(struct drm_bridge *bridge,
> + struct drm_connector *connector)
> +{
> +   struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge);
> +   struct edid *edid;
> +
> +   pm_runtime_get_sync(pdata->dev);
> +   edid = drm_get_edid(connector, &pdata->aux.ddc);
> +   pm_runtime_put_autosuspend(pdata->dev);
> +
> +   return edid;
> +}
> +
>  static const struct drm_bridge_funcs ti_sn_bridge_funcs = {
> .attach = ti_sn_bridge_attach,
> .detach = ti_sn_bridge_detach,
> @@ -1162,6 +1175,7 @@ static const struct drm_bridge_funcs ti_sn_bridge_funcs 
> = {
> .enable = ti_sn_bridge_enable,
> .disable = ti_sn_bridge_disable,
> .post_disable = ti_sn_bridge_post_disable,
> +   .get_edid = ti_sn_bridge_get_edid,
>  };
>
>  static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata,
> @@ -1248,6 +1262,8 @@ static int ti_sn_bridge_probe(struct auxiliary_device 
> *adev,
>
> pdata->bridge.funcs = &ti_sn_bridge_funcs;
> pdata->bridge.of_node = np;
> +   pdata->bridge.ops = DRM_BRIDGE_OP_EDID;
> +   pdata->bridge.type = DRM_MODE_CONNECTOR_eDP;

This doesn't look right to me. In the eDP case the EDID reading is
driven by the panel.

-Doug


  1   2   >