Re: [PATCH] video: fbdev: via: check the return value of kstrdup()

2022-02-22 Thread Helge Deller
On 2/21/22 11:37, xkernel.w...@foxmail.com wrote:
> From: Xiaoke Wang 
>
> kstrdup() is a memory allocation function which can return NULL when
> some internal memory errors happen. It is better to check the return
> value of it to catch the error in time during the setup of viafb.
>
> Signed-off-by: Xiaoke Wang 

applied to fbdev for-next tree.
Thanks!
Helge

> ---
>  drivers/video/fbdev/via/viafbdev.c | 10 ++
>  1 file changed, 10 insertions(+)
>
> diff --git a/drivers/video/fbdev/via/viafbdev.c 
> b/drivers/video/fbdev/via/viafbdev.c
> index 22deb34..2d67c92 100644
> --- a/drivers/video/fbdev/via/viafbdev.c
> +++ b/drivers/video/fbdev/via/viafbdev.c
> @@ -1939,8 +1939,12 @@ static int __init viafb_setup(void)
>
>   if (!strncmp(this_opt, "viafb_mode1=", 12)) {
>   viafb_mode1 = kstrdup(this_opt + 12, GFP_KERNEL);
> + if (!viafb_mode1)
> + return -ENOMEM;
>   } else if (!strncmp(this_opt, "viafb_mode=", 11)) {
>   viafb_mode = kstrdup(this_opt + 11, GFP_KERNEL);
> + if (!viafb_mode)
> + return -ENOMEM;
>   } else if (!strncmp(this_opt, "viafb_bpp1=", 11)) {
>   if (kstrtouint(this_opt + 11, 0, &viafb_bpp1) < 0)
>   return -EINVAL;
> @@ -1969,6 +1973,8 @@ static int __init viafb_setup(void)
>   return -EINVAL;
>   } else if (!strncmp(this_opt, "viafb_active_dev=", 17)) {
>   viafb_active_dev = kstrdup(this_opt + 17, GFP_KERNEL);
> + if (!viafb_active_dev)
> + return -ENOMEM;
>   } else if (!strncmp(this_opt,
>   "viafb_display_hardware_layout=", 30)) {
>   if (kstrtoint(this_opt + 30, 0,
> @@ -1995,8 +2001,12 @@ static int __init viafb_setup(void)
>   return -EINVAL;
>   } else if (!strncmp(this_opt, "viafb_lcd_port=", 15)) {
>   viafb_lcd_port = kstrdup(this_opt + 15, GFP_KERNEL);
> + if (!viafb_lcd_port)
> + return -ENOMEM;
>   } else if (!strncmp(this_opt, "viafb_dvi_port=", 15)) {
>   viafb_dvi_port = kstrdup(this_opt + 15, GFP_KERNEL);
> + if (!viafb_dvi_port)
> + return -ENOMEM;
>   }
>   }
>   return 0;



Re: [PATCH 2/2] drm/bridge: Document the expected behaviour of DSI host controllers

2022-02-22 Thread Dave Stevenson
Hi Laurent

On Tue, 22 Feb 2022 at 06:22, Laurent Pinchart
 wrote:
>
> Hi Dave,
>
> Thank you for the patch.
>
>
> On Wed, Feb 16, 2022 at 04:59:44PM +, Dave Stevenson wrote:
> > The exact behaviour of DSI host controllers is not specified,
> > therefore define it.
> >
> > Signed-off-by: Dave Stevenson 
> > ---
> >  Documentation/gpu/drm-kms-helpers.rst |  7 +++
> >  drivers/gpu/drm/drm_bridge.c  | 38 
> > +++
> >  2 files changed, 45 insertions(+)
> >
> > diff --git a/Documentation/gpu/drm-kms-helpers.rst 
> > b/Documentation/gpu/drm-kms-helpers.rst
> > index c3ce91eecbc1..362afdb867c6 100644
> > --- a/Documentation/gpu/drm-kms-helpers.rst
> > +++ b/Documentation/gpu/drm-kms-helpers.rst
> > @@ -185,6 +185,13 @@ Bridge Helper Reference
> >  .. kernel-doc:: drivers/gpu/drm/drm_bridge.c
> > :export:
> >
> > +MIPI-DSI bridge operation
> > +-
> > +
> > +.. kernel-doc:: drivers/gpu/drm/drm_bridge.c
> > +   :doc: dsi bridge operations
> > +
> > +
> >  Bridge Connector Helper Reference
> >  -
> >
> > diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
> > index 7c24e8340efa..14c2ee9e0328 100644
> > --- a/drivers/gpu/drm/drm_bridge.c
> > +++ b/drivers/gpu/drm/drm_bridge.c
> > @@ -152,6 +152,44 @@
> >   * situation when probing.
> >   */
> >
> > +/**
> > + * DOC: dsi bridge operations
> > + *
> > + * DSI host interfaces are expected to be implemented as bridges rather 
> > than
> > + * encoders, however there are a few aspects of their operation that need 
> > to
> > + * be defined in order to provide a consistent interface.
> > + *
> > + * A DSI host should keep the PHY powered down until the pre_enable op is
>
> I'd write "operation" in full everywhere to avoid mixing the two.

Ack on this and the other minor changes.

> > + * called. All lanes should be in an idle state (not LP-11) up to this 
> > point.
>
> Is the idle state LP-00 ? If so I'd state that explicitly.

I'd avoided specifying LP-00 as that is a specific state in the
transition from LP to HS (LP-11 -> LP-01 -> LP-00 -> HS).
LP-00 also implies that the line is being actively driven, whereas
this is "powered down" so potentially just has passive pull resistors
on the line.
Looking at the D-PHY spec (I happen to have 1.1 to hand), "Figure 25
Clock Lane Module State Diagram" and we're looking at the "Init
Master" state.

This would also be a point where ULPS might be implemented.

Perhaps replace with "All lanes are in an undefined idle state up to
this point" to allow for differences in hardware implementation?

  Dave

> "[...] in an idle state (LP-00, not LP-11) [...]"
>
> > + * pre_enable should initialise the PHY, set the data lanes to LP-11, and 
> > the
> > + * clock lane to either LP-11 or HS dependent on the mode_flag
>
> s/dependent/depending/ ?
>
> > + * MIPI_DSI_CLOCK_NON_CONTINUOUS.
> > + *
> > + * Ordinarily the downstream bridge DSI peripheral pre_enable will have 
> > been
> > + * called before the DSI host. If the DSI peripheral requires LP-11 and/or
> > + * the clock lane to be in HS mode prior to pre_enable, then it can set the
> > + * DRM_BRIDGE_OP_UPSTREAM_FIRST flag to request the pre_enable (and
> > + * post_disable) order to be altered to enable the DSI host first.
> > + *
> > + * Either the CRTC being enabled, or the DSI host enable op should switch 
> > the
> > + * host to actively transmitting video on the data lanes.
> > + *
> > + * The reverse also applies. The DSI host disable op or stopping the CRTC 
> > should
> > + * stop transmitting video, and the data lanes should return to the LP-11 
> > state.
> > + * The DSI host post_disable op should disable the PHY.
> > + * If the DRM_BRIDGE_OP_UPSTREAM_FIRST flag is set, then the DSI 
> > peripheral's
> > + * bridge post_disable will be called before the DSI host's post_disable.
> > + *
> > + * Whilst it is valid to call host_transfer prior to pre_enable or after
> > + * post_disable, the exact state of the lanes is undefined at this point. 
> > The
> > + * DSI host should initialise the interface, transmit the data, and then 
> > disable
> > + * the interface again.
> > + *
> > + * Ultra Low Power State (ULPS) is not explicitly supported by DRM. If
> > + * implemented, it therefore needs to be either handled entirely within 
> > the DSI
>
> s/either // (or you need an "or ..." :-))
>
> Reviewed-by: Laurent Pinchart 
>
> > + * Host driver.
> > + */
> > +
> >  static DEFINE_MUTEX(bridge_lock);
> >  static LIST_HEAD(bridge_list);
> >
>
> --
> Regards,
>
> Laurent Pinchart


Re: [PATCH v3 8/9] drm/tegra: vic: Implement get_streamid_offset

2022-02-22 Thread Mikko Perttunen

On 2/21/22 22:10, Dmitry Osipenko wrote:

21.02.2022 14:44, Mikko Perttunen пишет:

On 2/19/22 20:54, Dmitry Osipenko wrote:

19.02.2022 21:49, Dmitry Osipenko пишет:

18.02.2022 14:39, Mikko Perttunen пишет:

+static int vic_get_streamid_offset(struct tegra_drm_client *client)
+{
+    struct vic *vic = to_vic(client);
+    int err;
+
+    err = vic_load_firmware(vic);


You can't invoke vic_load_firmware() while RPM is suspended. Either
replace this with RPM get/put or do something else.


Why not, I'm not seeing any HW accesses in vic_load_firmware? Although
it looks like it might race with the vic_load_firmware call in
vic_runtime_resume which probably needs to be fixed.


It was not clear from the function's name that h/w is untouched, I read
"load" as "upload" and then looked at vic_runtime_resume(). I'd rename
vic_load_firmware() to vic_prepare_firmware_image().

And yes, technically lock is needed.


Yep, I'll consider renaming it.

Mikko


Re: Report 1 in ext4 and journal based on v5.17-rc1

2022-02-22 Thread Jan Kara
On Thu 17-02-22 20:10:03, Byungchul Park wrote:
> [7.009608] ===
> [7.009613] DEPT: Circular dependency has been detected.
> [7.009614] 5.17.0-rc1-00014-g8a599299c0cb-dirty #30 Tainted: GW
> [7.009616] ---
> [7.009617] summary
> [7.009618] ---
> [7.009618] *** DEADLOCK ***
> [7.009618]
> [7.009619] context A
> [7.009619] [S] (unknown)(&(bit_wait_table + i)->dmap:0)
> [7.009621] [W] down_write(&ei->i_data_sem:0)
> [7.009623] [E] event(&(bit_wait_table + i)->dmap:0)
> [7.009624]
> [7.009625] context B
> [7.009625] [S] down_read(&ei->i_data_sem:0)
> [7.009626] [W] wait(&(bit_wait_table + i)->dmap:0)
> [7.009627] [E] up_read(&ei->i_data_sem:0)
> [7.009628]

Looking into this I have noticed that Dept here tracks bitlocks (buffer
locks in particular) but it apparently treats locks on all buffers as one
locking class so it conflates lock on superblock buffer with a lock on
extent tree block buffer. These are wastly different locks with different
locking constraints. So to avoid false positives in filesystems we will
need to add annotations to differentiate locks on different buffers (based
on what the block is used for). Similarly how we e.g. annotate i_rwsem for
different inodes.

Honza
-- 
Jan Kara 
SUSE Labs, CR


Re: [PATCH v10 3/4] drm/lsdc: add drm driver for loongson display controller

2022-02-22 Thread Maxime Ripard
Hi,

On Sun, Feb 20, 2022 at 10:55:53PM +0800, Sui Jingfeng wrote:
> +/* lsdc_get_display_timings_from_dtb - Get display timings from the device 
> tree
> + *
> + * @np: point to the device node contain the display timings
> + * @pptim: point to where the pointer of struct display_timings is store to
> + */
> +static void lsdc_get_display_timings_from_dtb(struct device_node *np,
> +   struct display_timings **pptim)
> +{
> + struct display_timings *timings;
> +
> + if (!np)
> + return;
> +
> + timings = of_get_display_timings(np);
> + if (timings)
> + *pptim = timings;
> +}

This is not documented in your binding.

> +static int lsdc_get_connector_type(struct drm_device *ddev,
> +struct device_node *output,
> +unsigned int index)
> +{
> + const char *name;
> + int ret;
> +
> + ret = of_property_read_string(output, "connector", &name);
> + if (ret < 0)
> + return DRM_MODE_CONNECTOR_Unknown;
> +
> + if (strncmp(name, "vga-connector", 13) == 0) {
> + ret = DRM_MODE_CONNECTOR_VGA;
> + drm_info(ddev, "connector%d is VGA\n", index);
> + } else if (strncmp(name, "dvi-connector", 13) == 0) {
> + bool analog, digital;
> +
> + analog = of_property_read_bool(output, "analog");
> + digital = of_property_read_bool(output, "digital");
> +
> + if (analog && !digital)
> + ret = DRM_MODE_CONNECTOR_DVIA;
> + else if (analog && digital)
> + ret = DRM_MODE_CONNECTOR_DVII;
> + else
> + ret = DRM_MODE_CONNECTOR_DVID;
> +
> + drm_info(ddev, "connector%d is DVI\n", index);
> + } else if (strncmp(name, "virtual-connector", 17) == 0) {
> + ret = DRM_MODE_CONNECTOR_VIRTUAL;
> + drm_info(ddev, "connector%d is virtual\n", index);
> + } else if (strncmp(name, "dpi-connector", 13) == 0) {
> + ret = DRM_MODE_CONNECTOR_DPI;
> + drm_info(ddev, "connector%d is DPI\n", index);
> + } else if (strncmp(name, "hdmi-connector", 14) == 0) {
> + int res;
> + const char *hdmi_type;
> +
> + ret = DRM_MODE_CONNECTOR_HDMIA;
> +
> + res = of_property_read_string(output, "type", &hdmi_type);
> + if (res == 0 && !strcmp(hdmi_type, "b"))
> + ret = DRM_MODE_CONNECTOR_HDMIB;
> +
> + drm_info(ddev, "connector%d is HDMI, type is %s\n", index, 
> hdmi_type);
> + } else {
> + ret = DRM_MODE_CONNECTOR_Unknown;
> + drm_info(ddev, "The type of connector%d is unknown\n", index);
> + }
> +
> + return ret;
> +}

Your ports and that you're using the connectors bindings either.

> +struct lsdc_connector *lsdc_connector_init(struct lsdc_device *ldev, 
> unsigned int index)
> +{
> + struct drm_device *ddev = &ldev->drm;
> + struct device_node *np = ddev->dev->of_node;
> + struct device_node *output = NULL;
> + unsigned int connector_type = DRM_MODE_CONNECTOR_Unknown;
> + struct device_node *disp_tims_np;
> + struct lsdc_connector *lconn;
> + struct drm_connector *connector;
> + int ret;
> +
> + lconn = devm_kzalloc(ddev->dev, sizeof(*lconn), GFP_KERNEL);
> + if (!lconn)
> + return ERR_PTR(-ENOMEM);
> +
> + lconn->index = index;
> + lconn->has_disp_tim = false;
> + lconn->ddc = NULL;
> +
> + output = of_parse_phandle(np, "output-ports", index);
> + if (!output) {
> + drm_warn(ddev, "no output-ports property, please update dtb\n");
> + /*
> +  * Providing a blindly support even though no output-ports
> +  * property is provided in the dtb.
> +  */
> + goto DT_SKIPED;
> + }

output-ports is not documented either.

> + if (!of_device_is_available(output)) {
> + of_node_put(output);
> + drm_info(ddev, "connector%d is not available\n", index);
> + return NULL;
> + }
> +
> + disp_tims_np = of_get_child_by_name(output, "display-timings");
> + if (disp_tims_np) {
> + lsdc_get_display_timings_from_dtb(output, &lconn->disp_tim);
> + lconn->has_disp_tim = true;
> + of_node_put(disp_tims_np);
> + drm_info(ddev, "Found display timings provided by 
> connector%d\n", index);
> + }
> +
> + connector_type = lsdc_get_connector_type(ddev, output, index);
> +
> + if (output) {
> + of_node_put(output);
> + output = NULL;
> + }
> +
> +DT_SKIPED:
> +
> + /* Only create the i2c channel if display timing is not provided */
> + if (!lconn->has_disp_tim) {
> + const struct lsdc_chip_desc * const desc = ldev->desc;
> +
> + if (desc->have_builtin_i2c)
> +   

Re: [PATCH v3 9/9] drm/tegra: Support context isolation

2022-02-22 Thread Mikko Perttunen

On 2/21/22 22:02, Dmitry Osipenko wrote:

21.02.2022 15:06, Mikko Perttunen пишет:

On 2/19/22 20:35, Dmitry Osipenko wrote:

18.02.2022 14:39, Mikko Perttunen пишет:

+    if (context->memory_context &&
context->client->ops->get_streamid_offset) {

  ^^^

+    int offset =
context->client->ops->get_streamid_offset(context->client);
+
+    if (offset >= 0) {
+    job->context = context->memory_context;
+    job->engine_streamid_offset = offset;
+    host1x_context_get(job->context);
+    }


You should bump refcount unconditionally or you'll get refcnt underflow
on put, when offset < 0.


This refcount is intended to be dropped from 'release_job', where it's
dropped if job->context is set, which it is from this path.




+    }
+
   /*
    * job_data is now part of job reference counting, so don't
release
    * it from here.
diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c
index 9ab9179d2026..be33da54d12c 100644
--- a/drivers/gpu/drm/tegra/uapi.c
+++ b/drivers/gpu/drm/tegra/uapi.c
@@ -33,6 +33,9 @@ static void tegra_drm_channel_context_close(struct
tegra_drm_context *context)
   struct tegra_drm_mapping *mapping;
   unsigned long id;
   +    if (context->memory_context)
+    host1x_context_put(context->memory_context);


The "if (context->memory_context &&
context->client->ops->get_streamid_offset)" above doesn't match the "if
(context->memory_context)". You'll get refcount underflow.


And this drop is for the refcount implicitly added when allocating the
memory context through host1x_context_alloc; so these two places should
be independent.

Please elaborate if I missed something.


You named context as memory_context and then have
context=context->memory_context. Please try to improve the variable
names, like drm_ctx->host1x_ctx for example.

I'm also not a big fan of the "if (ref) put(ref)" pattern. Won't be
better to move all the "if (!NULL)" checks inside of get()/put() and
make the invocations unconditional?


I agree that the naming here is confusing with different kinds of 
contexts flying around, though I would prefer not to change the original 
'struct tegra_drm_context *context' since it's used all around the code. 
But I'll try to make it clearer.


Regarding moving NULL checks inside get/put, I personally dislike that 
pattern (also with e.g. kfree) since when reading the code, it makes it 
more difficult to see that the pointer can be NULL.


Mikko


Re: [PATCH] drm/simpledrm: Add "panel orientation" property on non-upright mounted LCD panels

2022-02-22 Thread Javier Martinez Canillas
Hello Hans,

On 2/21/22 23:00, Hans de Goede wrote:
> Some devices use e.g. a portrait panel in a standard laptop casing made
> for landscape panels. efifb calls drm_get_panel_orientation_quirk() and
> sets fb_info.fbcon_rotate_hint to make fbcon rotate the console so that
> it shows up-right instead of on its side.
> 
> When switching to simpledrm to fbcon renders on its side. Call the

This sentence sounds a little off to me. Did you mean:

"the fbcon renders on its side." ?

Maybe you can say something like the following:

 When switching to simpledrm, fbcon attachs to the fbdev emulated by
 the DRM core instead. And the fb_info.fbcon_rotate_hint field is set
 by the emulation layer, if panel orientation was set for a connector.

> drm_connector_set_panel_orientation_with_quirk() helper to add
> a "panel orientation" property on devices listed in the quirk table,
> to make the fbcon (and aware userspace apps) rotate the image to
> display properly.
> 
> Cc: Javier Martinez Canillas 
> Signed-off-by: Hans de Goede 
> ---

The patch looks good to me. Thanks a lot for fixing this

Reviewed-by: Javier Martinez Canillas 

Best regards,
-- 
Javier Martinez Canillas
Linux Engineering
Red Hat



Re: [PATCH libdrm v2 00/25] Update Tegra support

2022-02-22 Thread Mikko Perttunen

On 2/21/22 22:29, Dmitry Osipenko wrote:

18.02.2022 12:31, Mikko Perttunen пишет:

On 2/17/22 21:16, Thierry Reding wrote:

...


Reviewed-by: Mikko Perttunen 

Left one cosmetic comment in the VIC4.0 patch, but overall looks OK. I
think it would be fine to have some basic tests in libdrm as well.


There is a question about who is going to use this libdrm API. Are you
going to use it in the VAAPI driver?

Grate drivers can't use this API because:

1. More features are needed
2. There is no stable API
3. It's super painful to keep all drivers and libdrm in sync from a
packaging perspective.

It's much more practical nowadays to use DRM directly, without
SoC-specific libdrm API, i.e. to bundle that SoC-specific API within the
drivers.


I'm not planning to use this in the VAAPI driver -- I don't personally 
have any use case for the libdrm API.


Mikko


Re: [PATCH 1/2] drm: Introduce DRM_BRIDGE_OP_UPSTREAM_FIRST to alter bridge init order

2022-02-22 Thread Dave Stevenson
Hi Laurent.

Thanks for the review.

On Tue, 22 Feb 2022 at 06:34, Laurent Pinchart
 wrote:
>
> Hi Dave,
>
> Thank you for the patch.
>
> On Wed, Feb 16, 2022 at 04:59:43PM +, Dave Stevenson wrote:
> > DSI sink devices typically want the DSI host powered up and configured
> > before they are powered up. pre_enable is the place this would normally
> > happen, but they are called in reverse order from panel/connector towards
> > the encoder, which is the "wrong" order.
> >
> > Add a new flag DRM_BRIDGE_OP_UPSTREAM_FIRST that any bridge can set
> > to swap the order of pre_enable (and post_disable) so that any upstream
> > bridges are called first to create the desired state.
> >
> > eg:
> > - Panel
> > - Bridge 1
> > - Bridge 2 DRM_BRIDGE_OP_UPSTREAM_FIRST
> > - Bridge 3
> > - Encoder
> > Would result in pre_enable's being called as Panel, Bridge 1, Bridge 3,
> > Bridge 2.
>
> If there was a Bridge 4 between Bridge 3 and Encoder, would it be
>
> Panel, Bridge 1, Bridge 3, Bridge 4, Bridge 2
>
> ? I'd capture that here, to be explicit.

No.
 - Panel
 - Bridge 1
 - Bridge 2 DRM_BRIDGE_OP_UPSTREAM_FIRST
 - Bridge 3
 - Bridge 4
  - Encoder
Would result in pre_enable's being called as Panel, Bridge 1, Bridge
3, Bridge 2, Bridge 4, Encoder.
ie it only swaps the order of bridges 2 & 3.

 - Panel
 - Bridge 1
 - Bridge 2 DRM_BRIDGE_OP_UPSTREAM_FIRST
 - Bridge 3 DRM_BRIDGE_OP_UPSTREAM_FIRST
 - Bridge 4
 - Encoder
Would result in pre_enable's being called as Panel, Bridge 1, Bridge
4, Bridge 3, Bridge 2, Encoder.
(Bridge 2&3 have asked for upstream to be enabled first, which means
bridge 4. Bridge 2 wants upstream enabled first, which means bridge
3).

 - Panel
 - Bridge 1
 - Bridge 2 DRM_BRIDGE_OP_UPSTREAM_FIRST
 - Bridge 3
 - Bridge 4 DRM_BRIDGE_OP_UPSTREAM_FIRST
 - Bridge 5
 - Encoder
Would result in Panel, Bridge 1, Bridge 3, Bridge 2, Bridge 5, Bridge
4, Encoder.

So we only reverse the order whilst the bridges request that they want
upstream enabled first, but we can do that multiple times within the
chain. I hope that makes sense.

> > Signed-off-by: Dave Stevenson 
> > ---
> >  drivers/gpu/drm/drm_bridge.c | 197 
> > +--
> >  include/drm/drm_bridge.h |   8 ++
> >  2 files changed, 180 insertions(+), 25 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
> > index c96847fc0ebc..7c24e8340efa 100644
> > --- a/drivers/gpu/drm/drm_bridge.c
> > +++ b/drivers/gpu/drm/drm_bridge.c
> > @@ -522,21 +522,58 @@ EXPORT_SYMBOL(drm_bridge_chain_disable);
> >   * Calls &drm_bridge_funcs.post_disable op for all the bridges in the
> >   * encoder chain, starting from the first bridge to the last. These are 
> > called
> >   * after completing the encoder's prepare op.
>
> Missing blank line, as well as in three locations below.
>
> > + * If a bridge sets the DRM_BRIDGE_OP_UPSTREAM_FIRST, then the 
> > post_disable for
> > + * that bridge will be called before the previous one to reverse the 
> > pre_enable
> > + * calling direction.
> >   *
> >   * Note: the bridge passed should be the one closest to the encoder
> >   */
> >  void drm_bridge_chain_post_disable(struct drm_bridge *bridge)
> >  {
> >   struct drm_encoder *encoder;
> > + struct drm_bridge *next, *limit;
> >
> >   if (!bridge)
> >   return;
> >
> >   encoder = bridge->encoder;
> >   list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) {
> > + limit = NULL;
> > +
> > + if (!list_is_last(&bridge->chain_node, 
> > &encoder->bridge_chain)) {
> > + next = list_next_entry(bridge, chain_node);
> > +
> > + if (next->ops & DRM_BRIDGE_OP_UPSTREAM_FIRST) {
> > + limit = next;
> > +
> > + list_for_each_entry_from(next, 
> > &encoder->bridge_chain,
> > +  chain_node) {
> > + if (!(next->ops &
> > + 
> > DRM_BRIDGE_OP_UPSTREAM_FIRST)) {
> > + next = list_prev_entry(next, 
> > chain_node);
> > + limit = next;
> > + break;
> > + }
> > + }
> > +
> > + list_for_each_entry_from_reverse(next, 
> > &encoder->bridge_chain,
> > +  chain_node) {
> > + if (next == bridge)
> > + break;
> > +
> > + if (next->funcs->post_disable)
> > + 
> > next->funcs->post_disable(next);
> > + }
> > + }
> > + }
> > +
> > 

[RFC PATCH] drm/panel: simple: panel-dpi: use bus-format to set bpc and bus_format

2022-02-22 Thread Max Krummenacher
Use the new property bus-format to set the enum bus_format and bpc.
Completes: commit 4a1d0dbc8332 ("drm/panel: simple: add panel-dpi support")

Signed-off-by: Max Krummenacher 

---

 drivers/gpu/drm/panel/panel-simple.c | 32 
 1 file changed, 32 insertions(+)

Relates to the discussion: 
https://lore.kernel.org/all/20220201110717.3585-1-cniederma...@dh-electronics.com/

Max

diff --git a/drivers/gpu/drm/panel/panel-simple.c 
b/drivers/gpu/drm/panel/panel-simple.c
index c5f133667a2d..5c07260de71c 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -453,6 +453,7 @@ static int panel_dpi_probe(struct device *dev,
struct panel_desc *desc;
unsigned int bus_flags;
struct videomode vm;
+   const char *format = "";
int ret;
 
np = dev->of_node;
@@ -477,6 +478,37 @@ static int panel_dpi_probe(struct device *dev,
of_property_read_u32(np, "width-mm", &desc->size.width);
of_property_read_u32(np, "height-mm", &desc->size.height);
 
+   of_property_read_string(np, "bus-format", &format);
+   if (!strcmp(format, "BGR888_1X24")) {
+   desc->bpc = 8;
+   desc->bus_format = MEDIA_BUS_FMT_BGR888_1X24;
+   } else if (!strcmp(format, "GBR888_1X24")) {
+   desc->bpc = 8;
+   desc->bus_format = MEDIA_BUS_FMT_GBR888_1X24;
+   } else if (!strcmp(format, "RBG888_1X24")) {
+   desc->bpc = 8;
+   desc->bus_format = MEDIA_BUS_FMT_RBG888_1X24;
+   } else if (!strcmp(format, "RGB444_1X12")) {
+   desc->bpc = 6;
+   desc->bus_format = MEDIA_BUS_FMT_RGB444_1X12;
+   } else if (!strcmp(format, "RGB565_1X16")) {
+   desc->bpc = 6;
+   desc->bus_format = MEDIA_BUS_FMT_RGB565_1X16;
+   } else if (!strcmp(format, "RGB666_1X18")) {
+   desc->bpc = 6;
+   desc->bus_format = MEDIA_BUS_FMT_RGB666_1X18;
+   } else if (!strcmp(format, "RGB666_1X24_CPADHI")) {
+   desc->bpc = 6;
+   desc->bus_format = MEDIA_BUS_FMT_RGB666_1X24_CPADHI;
+   } else if (!strcmp(format, "RGB888_1X24")) {
+   desc->bpc = 8;
+   desc->bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+   } else {
+   dev_err(dev, "%pOF: missing or unknown bus-format property\n",
+   np);
+   return -EINVAL;
+   }
+
/* Extract bus_flags from display_timing */
bus_flags = 0;
vm.flags = timing->flags;
-- 
2.20.1



RE: [PATCH 1/3] drm/edid: parse multiple CEA extension block

2022-02-22 Thread Lee, Shawn C
On Tue, Feb 22, 2022 at 03:28:17PM +0800, Ville Syrjälä 
 wrote:
>On Tue, Feb 22, 2022 at 02:38:17PM +0800, Lee Shawn C wrote:
>> Try to find and parse more CEA ext blocks if edid->extensions is 
>> greater than one.
>> 
>> Cc: Jani Nikula 
>> Cc: Ville Syrjala 
>> Cc: Ankit Nautiyal 
>> Signed-off-by: Lee Shawn C 
>> ---
>>  drivers/gpu/drm/drm_edid.c | 75 
>> +++---
>>  1 file changed, 45 insertions(+), 30 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c 
>> index 12893e7be89b..3d5dbbeca7f9 100644
>> --- a/drivers/gpu/drm/drm_edid.c
>> +++ b/drivers/gpu/drm/drm_edid.c
>> @@ -4313,43 +4313,58 @@ add_cea_modes(struct drm_connector *connector, 
>> struct edid *edid)
>>  const u8 *cea = drm_find_cea_extension(edid);
>>  const u8 *db, *hdmi = NULL, *video = NULL;
>>  u8 dbl, hdmi_len, video_len = 0;
>> -int modes = 0;
>> +int modes = 0, j;
>>  
>> -if (cea && cea_revision(cea) >= 3) {
>> -int i, start, end;
>> +if (!cea)
>> +return 0;
>>  
>> -if (cea_db_offsets(cea, &start, &end))
>> -return 0;
>> +for (j = (cea - (u8 *)edid) / EDID_LENGTH; j <= edid->extensions;) {
>
>That looks rather illegible. I think we want a drm_find_cea_extension(const 
>struct edid *edid, int *ext_index) and then just loop until it stops giving us 
>stuff.
>

I will modify drm_find_cea_extension() to find out next CEA from *ext_index.

>There are also several other callers of drm_find_cea_extension().
>Why don't they require the same treatment?

My suspicion is the original design judge edid->extension would be zero or one. 
And only one extension block would available.

Best regards,
Shawn

>
>> +if (cea && cea_revision(cea) >= 3) {
>> +int i, start, end;
>>  
>> -for_each_cea_db(cea, i, start, end) {
>> -db = &cea[i];
>> -dbl = cea_db_payload_len(db);
>> +if (cea_db_offsets(cea, &start, &end))
>> +continue;
>>  
>> -if (cea_db_tag(db) == VIDEO_BLOCK) {
>> -video = db + 1;
>> -video_len = dbl;
>> -modes += do_cea_modes(connector, video, dbl);
>> -} else if (cea_db_is_hdmi_vsdb(db)) {
>> -hdmi = db;
>> -hdmi_len = dbl;
>> -} else if (cea_db_is_y420vdb(db)) {
>> -const u8 *vdb420 = &db[2];
>> -
>> -/* Add 4:2:0(only) modes present in EDID */
>> -modes += do_y420vdb_modes(connector,
>> -  vdb420,
>> -  dbl - 1);
>> +for_each_cea_db(cea, i, start, end) {
>> +db = &cea[i];
>> +dbl = cea_db_payload_len(db);
>> +
>> +if (cea_db_tag(db) == VIDEO_BLOCK) {
>> +video = db + 1;
>> +video_len = dbl;
>> +modes += do_cea_modes(connector, video, 
>> dbl);
>> +} else if (cea_db_is_hdmi_vsdb(db)) {
>> +hdmi = db;
>> +hdmi_len = dbl;
>> +} else if (cea_db_is_y420vdb(db)) {
>> +const u8 *vdb420 = &db[2];
>> +
>> +/* Add 4:2:0(only) modes present in 
>> EDID */
>> +modes += do_y420vdb_modes(connector,
>> +  vdb420,
>> +  dbl - 1);
>> +}
>>  }
>>  }
>> -}
>>  
>> -/*
>> - * We parse the HDMI VSDB after having added the cea modes as we will
>> - * be patching their flags when the sink supports stereo 3D.
>> - */
>> -if (hdmi)
>> -modes += do_hdmi_vsdb_modes(connector, hdmi, hdmi_len, video,
>> -video_len);
>> +/*
>> + * We parse the HDMI VSDB after having added the cea modes as 
>> we will
>> + * be patching their flags when the sink supports stereo 3D.
>> + */
>> +if (hdmi) {
>> +modes += do_hdmi_vsdb_modes(connector, hdmi, hdmi_len, 
>> video,
>> +video_len);
>> +hdmi  = NULL;
>> +video = NULL;
>> +hdmi_len = 0;
>> +video_len = 0;
>> +}
>> +
>> +/* move to next CEA ex

Re: [PATCH 0/2] DSI host and peripheral initialisation ordering

2022-02-22 Thread Dave Stevenson
Hi Laurent.

On Tue, 22 Feb 2022 at 06:43, Laurent Pinchart
 wrote:
>
> Hello,
>
> On Fri, Feb 18, 2022 at 02:20:19PM +0100, Andrzej Hajda wrote:
> > On 16.02.2022 17:59, Dave Stevenson wrote:
> > > Hi All
> > >
> > > Hopefully I've cc'ed all those that have bashed this problem around 
> > > previously,
> > > or are otherwise linked to DRM bridges.
> > >
> > > There have been numerous discussions around how DSI support is currently 
> > > broken
> > > as it doesn't support initialising the PHY to LP-11 and potentially the 
> > > clock
> > > lane to HS prior to configuring the DSI peripheral. There is no op where 
> > > the
> > > interface is initialised but HS video isn't also being sent.
> > > Currently you have:
> > > - peripheral pre_enable (host not initialised yet)
> > > - host pre_enable
> > > - encoder enable
> > > - host enable
> > > - peripheral enable (video already running)
> > >
> > > vc4 and exynos currently implement the DSI host as an encoder, and split 
> > > the
> > > bridge_chain. This fails if you want to switch to being a bridge and/or 
> > > use
> > > atomic calls as the state of all the elements split off are not added by
> > > drm_atomic_add_encoder_bridges.
> > >
> > > dw-mipi-dsi[1] and now msm[2] use the mode_set hook to initialise the 
> > > PHY, so
> > > the bridge/panel pre_enable can send commands. In their post_disable they 
> > > then
> > > call the downstream bridge/panel post_disable op manually so that shutdown
> > > commands can be sent before shutting down the PHY. Nothing handles that 
> > > fact,
> > > so the framework then continues down the bridge chain and calls the 
> > > post_disable
> > > again, so we get unbalanced panel prepare/unprepare calls being reported 
> > > [3].
> > >
> > > There have been patches[4] proposing reversing the entire direction of
> > > pre_enable and post_disable, but that risks driving voltage into devices 
> > > that
> > > have yet to be powered up.
> > > There have been discussions about adding either a pre_pre_enable, or 
> > > adding a
> > > DSI host_op to initialise the host[5]. Both require significant reworking 
> > > to all
> > > existing drivers in moving initialisation phases.
> > > We have patches that look like they may well be addressing race 
> > > conditions in
> > > starting up a DSI peripheral[6].
> > >
> > > This patch takes a hybrid of the two: an optional reversing of the order 
> > > for
> > > specific links within the bridge chain within pre_enable and post_disable 
> > > done
> > > within the drm_bridge framework.
> > > I'm more than happy to move where the flag exists in structures 
> > > (currently as
> > > DRM_BRIDGE_OP_UPSTREAM_FIRST in drm_bridge_ops, but it isn't an op),
>
> API-wise that's my only concern, the flag should go somewhere else.

Ah, the million dollar question then - where does it go? It is only
true or false, so a bool in struct drm_bridge, same as
interlace_allowed?

I've had the realisation that this needs to be accessible from the
panel drivers so DSI panel drivers such as panel-ilitek-ili9881 can
set it too. I'll have a slight rethink over that one, but it is
probably a similar extra flag in struct drm_panel.

  Dave

> > > but does
> > > this solve the problem posed? If not, then can you describe the actual 
> > > scenario
> > > it doesn't cover?
> > > A DSI peripheral can set the flag to get the DSI host initialised first, 
> > > and
> > > therefore it has a stable LP-11 state before pre_enable. Likewise the 
> > > peripheral
> > > can still send shutdown commands prior to the DSI host being shut down in
> > > post_disable. It also handles the case where there are multiple devices 
> > > in the
> > > chain that all want their upstream bridge enabled first, so should there 
> > > be a
> > > DSI mux between host and peripheral, then it can still get the host to the
> > > correct state.
> > >
> > > An example tree is at [7] which is drm-misc-next with these patches and 
> > > then a
> > > conversion of vc4_dsi to use the atomic bridge functions (will be 
> > > upstreamed
> > > once we're over this hurdle). It is working happily with the Toshiba 
> > > TC358762 on
> > > a Raspberry Pi 7" panel.
> > > The same approach but on our vendor 5.15 tree[8] has also been tested
> > > successfully on a TI SN65DSI83 and LVDS panel.
> > >
> > > Whilst here, I've also documented the expected behaviour of DSI hosts and
> > > peripherals to aid those who come along after.
> >
> > Good summary, of multiple attempts of solving the issue (however I still
> > could add some more :) ).
>
> Definitely good, thank you very much Dave for tackling this issue.
>
> > I think the main issue is that we try to squeeze different hardware
> > protocol requirements into one quite restrictive framework - whole
> > crtc->encoder->bridges->(panel ||connector) is managed directly by drm core.
> > No place to negotiate configuration directly between players
> > (bridges/panels).
> > This patchset slightly looses the restrictions

Re: [RFC][PATCH] Revert "drm/panel-simple: drop use of data-mapping property"

2022-02-22 Thread Max
Am Samstag, den 19.02.2022, 09:37 + schrieb Christoph Niedermaier:
> From: Max Krummenacher [mailto:max.oss...@gmail.com]
> Sent: Wednesday, February 9, 2022 2:14 PM
> > Hi
> > 
> > Am Mittwoch, den 09.02.2022, 00:52 +0100 schrieb Marek Vasut:
> > > On 2/8/22 22:27, Christoph Niedermaier wrote:
> > > > From: Laurent Pinchart [mailto:laurent.pinch...@ideasonboard.com]
> > > > Sent: Thursday, February 3, 2022 12:46 AM
> > > > > Hi Christoph,
> > > > > 
> > > > 
> > > > Hi Laurent,
> > > > 
> > > > > On Tue, Feb 01, 2022 at 12:07:17PM +0100, Christoph Niedermaier wrote:
> > > > > > Without the data-mapping devicetree property my display won't
> > > > > > work properly. It is flickering, because the bus flags won't
> > > > > > be assigned without a defined bus format by the imx parallel
> > > > > > display driver. There was a discussion about the removal [1]
> > > > > > and an agreement that a better solution is needed, but it is
> > > > > > missing so far. So what would be the better approach?
> > > > > > 
> > > > > > [1] 
> > > > > > https://patchwork.freedesktop.org/patch/357659/?series=74705&rev=1
> > > > > > 
> > > > > > This reverts commit d021d751c14752a0266865700f6f212fab40a18c.
> > > > > > 
> > > > > > Signed-off-by: Christoph Niedermaier 
> > > > > > 
> > > > > > Cc: Marek Vasut 
> > > > > > Cc: Sam Ravnborg 
> > > > > > Cc: Laurent Pinchart 
> > > > > > Cc: Maxime Ripard 
> > > > > > Cc: Philipp Zabel 
> > > > > > Cc: David Airlie 
> > > > > > Cc: Daniel Vetter 
> > > > > > Cc: Shawn Guo 
> > > > > > Cc: Sascha Hauer 
> > > > > > Cc: Pengutronix Kernel Team 
> > > > > > Cc: Fabio Estevam 
> > > > > > Cc: NXP Linux Team 
> > > > > > Cc: linux-arm-ker...@lists.infradead.org
> > > > > > To: dri-devel@lists.freedesktop.org
> > > > > > ---
> > > > > >   drivers/gpu/drm/panel/panel-simple.c | 11 +++
> > > > > >   1 file changed, 11 insertions(+)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/panel/panel-simple.c 
> > > > > > b/drivers/gpu/drm/panel/panel-simple.c
> > > > > > index 3c08f9827acf..2c683d94a3f3 100644
> > > > > > --- a/drivers/gpu/drm/panel/panel-simple.c
> > > > > > +++ b/drivers/gpu/drm/panel/panel-simple.c
> > > > > > @@ -453,6 +453,7 @@ static int panel_dpi_probe(struct device *dev,
> > > > > >struct panel_desc *desc;
> > > > > >unsigned int bus_flags;
> > > > > >struct videomode vm;
> > > > > > + const char *mapping;
> > > > > >int ret;
> > > > > > 
> > > > > >np = dev->of_node;
> > > > > > @@ -477,6 +478,16 @@ static int panel_dpi_probe(struct device *dev,
> > > > > >of_property_read_u32(np, "width-mm", &desc->size.width);
> > > > > >of_property_read_u32(np, "height-mm", &desc->size.height);
> > > > > > 
> > > > > > + of_property_read_string(np, "data-mapping", &mapping);
> > > > > > + if (!strcmp(mapping, "rgb24"))
> > > > > > + desc->bus_format = MEDIA_BUS_FMT_RGB888_1X24;
> > > > > > + else if (!strcmp(mapping, "rgb565"))
> > > > > > + desc->bus_format = MEDIA_BUS_FMT_RGB565_1X16;
> > > > > > + else if (!strcmp(mapping, "bgr666"))
> > > > > > + desc->bus_format = MEDIA_BUS_FMT_RGB666_1X18;
> > > > > > + else if (!strcmp(mapping, "lvds666"))
> > > > > > + desc->bus_format = MEDIA_BUS_FMT_RGB666_1X24_CPADHI;
> > > > > 
> > > > > You're right that there's an issue, but a revert isn't the right 
> > > > > option.
> > > > > The commit you're reverting never made it in a stable release, because
> > > > > it was deemed to not be a good enough option.
> > > > > 
> > > > > First of all, any attempt to fix this should include an update to the 
> > > > > DT
> > > > > binding. Second, as this is about DPI panels, the LVDS option should 
> > > > > be
> > > > > dropped. Finally, I've shared some initial thoughts in [1], maybe you
> > > > > can reply to that e-mail to continue the discussion there ?
> > > > 
> > > > According to your thoughts in [1] you mean that the bus format should be
> > > > build out of the devicetree properties bus-width and data-shift. It 
> > > > would
> > > > be possible for evenly structured busses like RGB888_1X24 and 
> > > > RGB666_1X18,
> > > > but what about a bus like RGB565_1X16, where each color has different
> > > > bus width. Also the order of the colors should be defined to differ
> > > > between busses like RGB888_1X24 and GBR888_1X24.
> > > > Are there any ideas how can this be covered?
> > > 
> > > Maybe with props like these ?
> > > 
> > > channel-width -- width of each color channel
> > > channel-shift -- shift of each color channel
> > > channel-map -- mapping of each color channel
> > > 
> > > So for RGB888
> > > channel-width = <8 8 8>;
> > > channel-shift = <0 0 0>;
> > > channel-map = "RGB"; // or something ?
> > > 
> > > For BGR565 panel connected to RGB24 scanout
> > > channel-width = <5 6 5>;
> > > channel-shift = <3 2 3>;
> > > channel-map = "BGR"; // or something ?
> > > 
> > > For BGR565 panel connect

[PATCH] video: fbdev: via: check the return value of kstrdup()

2022-02-22 Thread xkernel . wang
From: Xiaoke Wang 

kstrdup() is a memory allocation function which can return NULL when
some internal memory errors happen. It is better to check the return
value of it to catch the error in time during the setup of viafb.

Signed-off-by: Xiaoke Wang 
---
 drivers/video/fbdev/via/viafbdev.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/video/fbdev/via/viafbdev.c 
b/drivers/video/fbdev/via/viafbdev.c
index 22deb34..2d67c92 100644
--- a/drivers/video/fbdev/via/viafbdev.c
+++ b/drivers/video/fbdev/via/viafbdev.c
@@ -1939,8 +1939,12 @@ static int __init viafb_setup(void)
 
if (!strncmp(this_opt, "viafb_mode1=", 12)) {
viafb_mode1 = kstrdup(this_opt + 12, GFP_KERNEL);
+   if (!viafb_mode1)
+   return -ENOMEM;
} else if (!strncmp(this_opt, "viafb_mode=", 11)) {
viafb_mode = kstrdup(this_opt + 11, GFP_KERNEL);
+   if (!viafb_mode)
+   return -ENOMEM;
} else if (!strncmp(this_opt, "viafb_bpp1=", 11)) {
if (kstrtouint(this_opt + 11, 0, &viafb_bpp1) < 0)
return -EINVAL;
@@ -1969,6 +1973,8 @@ static int __init viafb_setup(void)
return -EINVAL;
} else if (!strncmp(this_opt, "viafb_active_dev=", 17)) {
viafb_active_dev = kstrdup(this_opt + 17, GFP_KERNEL);
+   if (!viafb_active_dev)
+   return -ENOMEM;
} else if (!strncmp(this_opt,
"viafb_display_hardware_layout=", 30)) {
if (kstrtoint(this_opt + 30, 0,
@@ -1995,8 +2001,12 @@ static int __init viafb_setup(void)
return -EINVAL;
} else if (!strncmp(this_opt, "viafb_lcd_port=", 15)) {
viafb_lcd_port = kstrdup(this_opt + 15, GFP_KERNEL);
+   if (!viafb_lcd_port)
+   return -ENOMEM;
} else if (!strncmp(this_opt, "viafb_dvi_port=", 15)) {
viafb_dvi_port = kstrdup(this_opt + 15, GFP_KERNEL);
+   if (!viafb_dvi_port)
+   return -ENOMEM;
}
}
return 0;
-- 


Re: [PATCH 1/2] drm/i915/vlv_dsi: Add DMI quirk for wrong panel modeline in BIOS on Asus TF103C

2022-02-22 Thread Javier Martinez Canillas
Hello Hans,

On 2/21/22 23:06, Hans de Goede wrote:
> Vtotal is wrong in the BIOS supplied modeline for the DSI panel on
> the Asus TF103C leading to the last line of the display being shown
> as the first line.
> 
> The factory installed Android has a hardcoded modeline in its kernel,
> causing it to not suffer from this BIOS bug;
> 
> and the Android boot-splash which uses the EFI FB which does have this bug
> has the last line all black causing the bug to not be visible.
> 
> This commit introduces a generic DMI based mechanism for doing modeline
> fixups, in case we need similar fixups on other models in the future.
> 
> Signed-off-by: Hans de Goede 
> ---
>  drivers/gpu/drm/i915/display/vlv_dsi.c | 36 ++
>  1 file changed, 36 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c 
> b/drivers/gpu/drm/i915/display/vlv_dsi.c
> index 06ef822c27bd..66f5cf32bb66 100644
> --- a/drivers/gpu/drm/i915/display/vlv_dsi.c
> +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
> @@ -23,6 +23,7 @@
>   * Author: Jani Nikula 
>   */
>  
> +#include 
>  #include 
>  
>  #include 
> @@ -1831,6 +1832,33 @@ static void vlv_dphy_param_init(struct intel_dsi 
> *intel_dsi)
>   intel_dsi_log_params(intel_dsi);
>  }
>  
> +typedef void (*vlv_dsi_mode_fixup_func)(struct drm_connector *connector,
> + struct drm_display_mode *fixed_mode);
> +
> +/*
> + * Vtotal is wrong on the Asus TF103C leading to the last line of the display
> + * being shown as the first line. The factory installed Android has a 
> hardcoded
> + * modeline, causing it to not suffer from this BIOS bug.
> + */
> +static void vlv_dsi_asus_tf103c_mode_fixup(struct drm_connector *connector,
> +struct drm_display_mode *fixed_mode)
> +{
> + fixed_mode->vtotal = 816;
> + fixed_mode->crtc_vtotal = 816;
> +}
> +
> +static const struct dmi_system_id dmi_mode_fixup_table[] = {
> + {
> + /* Asus Transformer Pad TF103C */
> + .matches = {
> + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
> + DMI_MATCH(DMI_PRODUCT_NAME, "TF103C"),
> + },
> + .driver_data = (void *)vlv_dsi_asus_tf103c_mode_fixup,
> + },
> + { }
> +};
> +

There's nothing driver specific in this mechanism so I wonder if would
be better to add it as a DRM helper, for others drivers to use it too.

Maybe in drivers/gpu/drm/drm_modeset_helper.c or a drm_modeset_quirks.c
like we have for drivers/gpu/drm/drm_panel_orientation_quirks.c ?

The patch looks good to me, regardless where you decide to add it.

Reviewed-by: Javier Martinez Canillas 

Best regards,
-- 
Javier Martinez Canillas
Linux Engineering
Red Hat



Re: [Intel-gfx] [PATCH v5 5/7] drm/i915/gt: Create per-tile RC6 sysfs interface

2022-02-22 Thread Andi Shyti
Hi Tvrtko and Joonas,

> > > > > Now tiles have their own sysfs interfaces under the gt/
> > > > > directory. Because RC6 is a property that can be configured on a
> > > > > tile basis, then each tile should have its own interface
> > > > > 
> > > > > The new sysfs structure will have a similar layout for the 4 tile
> > > > > case:
> > > > > 
> > > > > /sys/.../card0
> > > > >\u251c\u2500\u2500 gt
> > > > >\u2502   \u251c\u2500\u2500 gt0
> > > > >\u2502   \u2502   \u251c\u2500\u2500 id
> > > > >\u2502   \u2502   \u251c\u2500\u2500 rc6_enable
> > > > >\u2502   \u2502   \u251c\u2500\u2500 rc6_residency_ms
> > > > >.   .   .
> > > > >.   .   .
> > > > >.   .
> > > > >\u2502   \u2514\u2500\u2500 gtN
> > > > >\u2502   \u251c\u2500\u2500 id
> > > > >\u2502   \u251c\u2500\u2500 rc6_enable
> > > > >\u2502   \u251c\u2500\u2500 rc6_residency_ms
> > > > >\u2502   .
> > > > >\u2502   .
> > > > >\u2502
> > > > >\u2514\u2500\u2500 power/-+
> > > > > \u251c\u2500\u2500 rc6_enable|Original 
> > > > > interface
> > > > > \u251c\u2500\u2500 rc6_residency_ms  +->  kept as 
> > > > > existing ABI;
> > > > > . |it multiplexes over
> > > > > . |the GTs
> > > > >  -+
> > > > > 
> > > > > The existing interfaces have been kept in their original location
> > > > > to preserve the existing ABI. They act on all the GTs: when
> > > > > reading they provide the average value from all the GTs.
> > > > 
> > > > Average feels very odd to me. I'd ask if we can get away providing an 
> > > > errno
> > > > instead? Or tile zero data?
> > 
> > Tile zero data is always wrong, in my opinion. If we have round-robin
> > scaling workloads like some media cases, part of the system load might
> > just disappear when it goes to tile 1.
> 
> I was thinking that in conjunction with deprecated log message it wouldn't
> be wrong - I mean if the route take was to eventually retire the legacy
> files altogether.

that's a good point... do we want to treat the legacy interfaces
as an error or do we want to make them a feature? As the
discussion is turning those interfaces are becoming a feature.
But what are we going to do with the coming interfaces?

E.g. in the future we will have the rc6_enable/disable that can
be a command, so that we will add the "_store" interface per
tile. What are we going to do with the above interfaces? Are we
going to add a multiplexed command as well?

> > When we have frequency readbacks without control, returning MAX() across
> > tiles would be the logical thing. The fact that parts of the hardware can
> > be clocked lower when one part is fully utilized is the "new feature".
> > 
> > After that we're only really left with the rc6_residency_ms. And that is
> > the tough one. I'm inclined that MIN() across tiles would be the right
> > answer. If you are fully utilizing a single tile, you should be able to
> > see it.
>  So we have MIN, AVG or SUM, or errno, or remove the file (which is just a
> different kind of errno?) to choose from. :)

in this case it would just be MIN and MAX. At the end we have
here only two types of interface: frequencies and residency_ms.
For the first type we would use 'max', for the second 'min'.

But the question holds in case we want keep adding interfaces to
the above directories.

Andi


Re: [PATCH v5 0/5] drm: exynos: dsi: Convert drm bridge

2022-02-22 Thread Jagan Teki
Hi Marek,

On Tue, Feb 22, 2022 at 12:19 PM Jagan Teki  wrote:
>
> On Wed, Feb 2, 2022 at 9:54 PM Jagan Teki  wrote:
> >
> > Hi Marek,
> >
> > On Fri, Jan 21, 2022 at 6:14 PM Marek Szyprowski
> >  wrote:
> > >
> > > Hi Jagan,
> > >
> > > On 21.01.2022 12:40, Jagan Teki wrote:
> > > > On Fri, Jan 21, 2022 at 5:06 PM Marek Szyprowski
> > > >  wrote:
> > > >> On 17.01.2022 09:42, Jagan Teki wrote:
> > > >>> Updated series about drm bridge conversion of exynos dsi.
> > > >>>
> > > >>> Previous version can be accessible, here [1].
> > > >>>
> > > >>> Patch 1: connector reset
> > > >>>
> > > >>> Patch 2: panel_bridge API
> > > >>>
> > > >>> Patch 3: bridge conversion
> > > >>>
> > > >>> Patch 4: atomic functions
> > > >>>
> > > >>> Patch 5: DSI init in pre_enable
> > > >>>
> > > >>> Apply below patches to test on Exynos DSI:
> > > >>> - 
> > > >>> https://protect2.fireeye.com/v1/url?k=53bdf119-0c26c815-53bc7a56-000babff3563-792dc1a6b54db43e&q=1&e=9a4ea3ad-9e7d-443d-ad21-ce694a7cd352&u=https%3A%2F%2Fpatchwork.amarulasolutions.com%2Fpatch%2F1825%2F
> > > >>> - 
> > > >>> https://protect2.fireeye.com/v1/url?k=cb269ea3-94bda7af-cb2715ec-000babff3563-e6f545b4a32558ba&q=1&e=9a4ea3ad-9e7d-443d-ad21-ce694a7cd352&u=https%3A%2F%2Fpatchwork.amarulasolutions.com%2Fpatch%2F1838%2F
> > > >>>
> > > >>> [1] 
> > > >>> https://protect2.fireeye.com/v1/url?k=ee1dae12-b186971e-ee1c255d-000babff3563-83eaf8e86e67e0d9&q=1&e=9a4ea3ad-9e7d-443d-ad21-ce694a7cd352&u=https%3A%2F%2Fpatchwork.amarulasolutions.com%2Fcover%2F1826%2F
> > > >>>
> > > >>> Any inputs?
> > > >> I've tried a few times, but I am unable to find what is the base for
> > > >> this patchset. I always get a conflict around exynos_dsi_mode_set()
> > > >> function. I've tried current linux-next, drm-next, v5.16-rc1 and v5.16.
> > > >> It looks that I must have missed applying some patch before playing 
> > > >> with
> > > >> this.
> > > >>
> > > >> I've also tried to apply only the last patch, as if I got it right, it
> > > >> is the only difference between v4 and v5 and updated 'drm: of: Lookup 
> > > >> if
> > > >> child node has panel or bridge'. In such case the board freezes during
> > > >> the drm initialization.
> > > > Please use drm-misc/drm-misc-next with below patches and then apply 
> > > > this series.
> > >
> > > I don't have a good news. It doesn't work. The last patch even breaks
> > > DSI operation:
> > >
> > > [4.520276] [drm] Exynos DRM: using 1380.decon device for DMA
> > > mapping operations
> > > [4.520578] exynos-drm exynos-drm: bound 1380.decon (ops
> > > decon_component_ops)
> > > [4.580473] exynos-drm exynos-drm: bound 1388.decon (ops
> > > decon_component_ops)
> > > [4.580726] exynos-drm exynos-drm: bound 1393.mic (ops
> > > exynos_mic_component_ops)
> > > [4.584304] exynos-dsi 1390.dsi: [drm:exynos_dsi_host_attach]
> > > Attached s6e3hf2 device
> > > [4.585141] exynos-drm exynos-drm: bound 1390.dsi (ops
> > > exynos_dsi_component_ops)
> > > [4.593189] rc_core: Couldn't load IR keymap rc-cec
> > > [4.594133] Registered IR keymap rc-empty
> > > [4.598760] rc rc0: sii8620 as /devices/virtual/rc/rc0
> > > [4.605219] input: sii8620 as /devices/virtual/rc/rc0/input1
> > > [4.610238] exynos-drm exynos-drm: bound 1397.hdmi (ops
> > > hdmi_component_ops)
> > > [4.920038] exynos-dsi 1390.dsi: xfer timed out: 39 03 00 00 f0 5a 
> > > 5a
> > > [5.024033] [ cut here ]
> > > [5.024055] [CRTC:49:crtc-0] vblank wait timed out
> > > [5.024129] WARNING: CPU: 4 PID: 151 at
> > > drivers/gpu/drm/drm_atomic_helper.c:1530
> > > drm_atomic_helper_wait_for_vblanks.part.24+0x298/0x2a8
> > > [5.024171] Modules linked in:
> > > [5.024195] CPU: 4 PID: 151 Comm: kworker/4:7 Not tainted 5.16.0-rc5+
> > > #11232
> > > [5.024219] Hardware name: Samsung TM2E board (DT)
> > > [5.024232] Workqueue: events output_poll_execute
> > > [5.024262] pstate: 6005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS
> > > BTYPE=--)
> > > [5.024285] pc : drm_atomic_helper_wait_for_vblanks.part.24+0x298/0x2a8
> > > [5.024308] lr : drm_atomic_helper_wait_for_vblanks.part.24+0x298/0x2a8
> > > [5.024327] sp : 800013b5b970
> > > [5.024340] x29: 800013b5b970 x28:  x27:
> > > 2437e400
> > > [5.024391] x26:  x25:  x24:
> > > 800011aa0c60
> > > [5.024437] x23: 0001 x22: 25113000 x21:
> > > 0001
> > > [5.024482] x20: 316fc800 x19:  x18:
> > > 
> > > [5.024526] x17: 00480a11 x16: 0028 x15:
> > > 800011b66df8
> > > [5.024571] x14:  x13: 0a74756f2064656d x12:
> > > 6974207469617720
> > > [5.024615] x11: 656820747563205b x10: 003a x9 :
> > > 7e82f035
> > > [5.024661] x8 : 800011b66df8 x7 : 800013b5b740 x6 :
> > > 0001
> > > 

Re: [PATCH 2/2] drm/i915/vlv_dsi: Add DMI quirk for wrong panel size on Lenovo Yoga Tablet 2 series

2022-02-22 Thread Javier Martinez Canillas
On 2/21/22 23:06, Hans de Goede wrote:
> On the Lenovo Yoga Tablet 2 830 / 1050 the VBT contains a bogus
> 192mm x 120mm size. This is especially a problem on the 8" 830 version
> which uses a 10:16 portrait screen where as the bogus size is 16:10.
> 
> Add a DMI quirk to override the wrong panel size with the correct one.
> Note both the 10" 1050 models as well as the 8" 830 models use the same
> mainboard and thus the same DMI strings. The 10" 1050 uses a 1920x1200
> landscape screen, where as the 8" 830 uses a 1200x1920 portrait screen,
> so the quirk handling uses the display resolution to detect the model.
> 
> Signed-off-by: Hans de Goede 
> ---

Reviewed-by: Javier Martinez Canillas 

Best regards,
-- 
Javier Martinez Canillas
Linux Engineering
Red Hat



Re: [PATCH 1/2] drm/i915/vlv_dsi: Add DMI quirk for wrong panel modeline in BIOS on Asus TF103C

2022-02-22 Thread Ville Syrjälä
On Mon, Feb 21, 2022 at 11:06:07PM +0100, Hans de Goede wrote:
> Vtotal is wrong in the BIOS supplied modeline for the DSI panel on

Please include both the correct and bad modelines in the commit
msg.

> the Asus TF103C leading to the last line of the display being shown
> as the first line.
> 
> The factory installed Android has a hardcoded modeline in its kernel,
> causing it to not suffer from this BIOS bug;
> 
> and the Android boot-splash which uses the EFI FB which does have this bug
> has the last line all black causing the bug to not be visible.
> 
> This commit introduces a generic DMI based mechanism for doing modeline
> fixups, in case we need similar fixups on other models in the future.
> 
> Signed-off-by: Hans de Goede 
> ---
>  drivers/gpu/drm/i915/display/vlv_dsi.c | 36 ++
>  1 file changed, 36 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c 
> b/drivers/gpu/drm/i915/display/vlv_dsi.c
> index 06ef822c27bd..66f5cf32bb66 100644
> --- a/drivers/gpu/drm/i915/display/vlv_dsi.c
> +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
> @@ -23,6 +23,7 @@
>   * Author: Jani Nikula 
>   */
>  
> +#include 
>  #include 
>  
>  #include 
> @@ -1831,6 +1832,33 @@ static void vlv_dphy_param_init(struct intel_dsi 
> *intel_dsi)
>   intel_dsi_log_params(intel_dsi);
>  }
>  
> +typedef void (*vlv_dsi_mode_fixup_func)(struct drm_connector *connector,
> + struct drm_display_mode *fixed_mode);
> +
> +/*
> + * Vtotal is wrong on the Asus TF103C leading to the last line of the display
> + * being shown as the first line. The factory installed Android has a 
> hardcoded
> + * modeline, causing it to not suffer from this BIOS bug.
> + */
> +static void vlv_dsi_asus_tf103c_mode_fixup(struct drm_connector *connector,
> +struct drm_display_mode *fixed_mode)
> +{
> + fixed_mode->vtotal = 816;

I might prefer a full modeline here. Or maybe just vtotal-- or
something, if it's just an off by one.

> + fixed_mode->crtc_vtotal = 816;

The crtc timings should all be 0 at this point. So this looks redundant.

> +}
> +
> +static const struct dmi_system_id dmi_mode_fixup_table[] = {
> + {
> + /* Asus Transformer Pad TF103C */
> + .matches = {
> + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
> + DMI_MATCH(DMI_PRODUCT_NAME, "TF103C"),
> + },
> + .driver_data = (void *)vlv_dsi_asus_tf103c_mode_fixup,
> + },
> + { }
> +};
> +
>  void vlv_dsi_init(struct drm_i915_private *dev_priv)
>  {
>   struct drm_device *dev = &dev_priv->drm;
> @@ -1840,6 +1868,8 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
>   struct intel_connector *intel_connector;
>   struct drm_connector *connector;
>   struct drm_display_mode *current_mode, *fixed_mode;
> + const struct dmi_system_id *dmi_id;
> + vlv_dsi_mode_fixup_func mode_fixup;

The function pointer can go into the if block.

>   enum port port;
>   enum pipe pipe;
>  
> @@ -1968,6 +1998,12 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
>   goto err_cleanup_connector;
>   }
>  
> + dmi_id = dmi_first_match(dmi_mode_fixup_table);
> + if (dmi_id) {
> + mode_fixup = (vlv_dsi_mode_fixup_func)dmi_id->driver_data;
> + mode_fixup(connector, fixed_mode);
> + }
> +
>   intel_panel_init(&intel_connector->panel, fixed_mode, NULL);
>   intel_backlight_setup(intel_connector, INVALID_PIPE);
>  
> -- 
> 2.35.1

-- 
Ville Syrjälä
Intel


Re: [GIT PULL] exynos-drm-fixes

2022-02-22 Thread Inki Dae
Hi Dave,

Seems you missed. Is there any issue?

Thanks,
Inki Dae

22. 2. 10. 20:07에 Inki Dae 이(가) 쓴 글:
> Hi Dave and Daniel,
> 
>Just two fixup series - one is to fix irq chaining issue and other is
>regressions to TE-gpio handling.
> 
> Please let me know if there is any problem.
> 
> Thanks,
> Inki Dae
> 
> The following changes since commit dfd42facf1e4ada021b939b4e19c935dcdd55566:
> 
>   Linux 5.17-rc3 (2022-02-06 12:20:50 -0800)
> 
> are available in the Git repository at:
> 
>   gitolite.kernel.org:/pub/scm/linux/kernel/git/daeinki/drm-exynos 
> tags/exynos-drm-fixes-for-v5.17-rc4
> 
> for you to fetch changes up to 38103fa72e0b70e3067fed489f8316dc5998f26c:
> 
>   drm/exynos: Search for TE-gpio in DSI panel's node (2022-02-10 19:17:22 
> +0900)
> 
> 
> Fixups
> - Make display controller drivers for Exynos series to use platform_get_irq
>   and platform_get_irq_byname functions to get the interrupt, which prevents
>   irq chaining from messed up when using hierarchical interrupt domains
>   which use "interrupts" property in the node.
> - Fix two regressions to TE-gpio handling.
> 
> 
> Lad Prabhakar (5):
>   drm/exynos/exynos7_drm_decon: Use platform_get_irq_byname() to get the 
> interrupt
>   drm/exynos: mixer: Use platform_get_irq() to get the interrupt
>   drm/exynos/exynos_drm_fimd: Use platform_get_irq_byname() to get the 
> interrupt
>   drm/exynos/fimc: Use platform_get_irq() to get the interrupt
>   drm/exynos: gsc: Use platform_get_irq() to get the interrupt
> 
> Marek Szyprowski (2):
>   drm/exynos: Don't fail if no TE-gpio is defined for DSI driver
>   drm/exynos: Search for TE-gpio in DSI panel's node
> 
>  drivers/gpu/drm/exynos/exynos7_drm_decon.c | 12 +++-
>  drivers/gpu/drm/exynos/exynos_drm_dsi.c|  6 --
>  drivers/gpu/drm/exynos/exynos_drm_fimc.c   | 13 +
>  drivers/gpu/drm/exynos/exynos_drm_fimd.c   | 13 -
>  drivers/gpu/drm/exynos/exynos_drm_gsc.c| 10 +++---
>  drivers/gpu/drm/exynos/exynos_mixer.c  | 14 ++
>  6 files changed, 25 insertions(+), 43 deletions(-)
> 


Re: [PATCH] drm/stm: ltdc: add support for CRC hashing feature

2022-02-22 Thread yannick Fertre

Hi Raphael,
thanks for the patch.

Acked-by: Yannick Fertre 

Best regards


On 2/11/22 11:46, Raphaël Gallais-Pou wrote:

From: Raphael Gallais-Pou 

This patch adds the CRC hashing feature supported by some recent hardware
versions of the LTDC. This is useful for test suite such as IGT-GPU-tools
[1] where a CRTC output frame can be compared to a test reference frame
thanks to their respective CRC hash.

[1] https://cgit.freedesktop.org/drm/igt-gpu-tools

Signed-off-by: Raphael Gallais-Pou 
---
  drivers/gpu/drm/stm/ltdc.c | 104 +++--
  drivers/gpu/drm/stm/ltdc.h |   3 ++
  2 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index 5eeb32c9c9ce..b29476aec3a1 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -77,6 +77,7 @@
  #define LTDC_CPSR 0x0044  /* Current Position Status */
  #define LTDC_CDSR 0x0048  /* Current Display Status */
  #define LTDC_EDCR 0x0060  /* External Display Control */
+#define LTDC_CCRCR 0x007C  /* Computed CRC value */
  #define LTDC_FUT  0x0090  /* Fifo underrun Threshold */
  
  /* Layer register offsets */

@@ -121,6 +122,7 @@
  
  #define GCR_LTDCEN	BIT(0)		/* LTDC ENable */

  #define GCR_DEN   BIT(16) /* Dither ENable */
+#define GCR_CRCEN  BIT(19) /* CRC ENable */
  #define GCR_PCPOL BIT(28) /* Pixel Clock POLarity-Inverted */
  #define GCR_DEPOL BIT(29) /* Data Enable POLarity-High */
  #define GCR_VSPOL BIT(30) /* Vertical Synchro POLarity-High */
@@ -227,6 +229,13 @@
  
  #define NB_PF		8		/* Max nb of HW pixel format */
  
+/*

+ * Skip the first value and the second in case CRC was enabled during
+ * the thread irq. This is to be sure CRC value is relevant for the
+ * frame.
+ */
+#define CRC_SKIP_FRAMES 2
+
  enum ltdc_pix_fmt {
PF_NONE,
/* RGB formats */
@@ -664,6 +673,26 @@ static inline void ltdc_set_ycbcr_coeffs(struct drm_plane 
*plane)
 ltdc_ycbcr2rgb_coeffs[enc][ran][1]);
  }
  
+static inline void ltdc_irq_crc_handle(struct ltdc_device *ldev,

+  struct drm_crtc *crtc)
+{
+   u32 crc;
+   int ret;
+
+   if (ldev->crc_skip_count < CRC_SKIP_FRAMES) {
+   ldev->crc_skip_count++;
+   return;
+   }
+
+   /* Get the CRC of the frame */
+   ret = regmap_read(ldev->regmap, LTDC_CCRCR, &crc);
+   if (ret)
+   return;
+
+   /* Report to DRM the CRC (hw dependent feature) */
+   drm_crtc_add_crc_entry(crtc, true, drm_crtc_accurate_vblank_count(crtc), 
&crc);
+}
+
  static irqreturn_t ltdc_irq_thread(int irq, void *arg)
  {
struct drm_device *ddev = arg;
@@ -671,9 +700,14 @@ static irqreturn_t ltdc_irq_thread(int irq, void *arg)
struct drm_crtc *crtc = drm_crtc_from_index(ddev, 0);
  
  	/* Line IRQ : trigger the vblank event */

-   if (ldev->irq_status & ISR_LIF)
+   if (ldev->irq_status & ISR_LIF) {
drm_crtc_handle_vblank(crtc);
  
+		/* Early return if CRC is not active */

+   if (ldev->crc_active)
+   ltdc_irq_crc_handle(ldev, crtc);
+   }
+
/* Save FIFO Underrun & Transfer Error status */
mutex_lock(&ldev->err_lock);
if (ldev->irq_status & ISR_FUIF)
@@ -1079,6 +1113,48 @@ static void ltdc_crtc_disable_vblank(struct drm_crtc 
*crtc)
regmap_clear_bits(ldev->regmap, LTDC_IER, IER_LIE);
  }
  
+static int ltdc_crtc_set_crc_source(struct drm_crtc *crtc, const char *source)

+{
+   struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+   int ret;
+
+   DRM_DEBUG_DRIVER("\n");
+
+   if (!crtc)
+   return -ENODEV;
+
+   if (source && strcmp(source, "auto") == 0) {
+   ldev->crc_active = true;
+   ret = regmap_set_bits(ldev->regmap, LTDC_GCR, GCR_CRCEN);
+   } else if (!source) {
+   ldev->crc_active = false;
+   ret = regmap_clear_bits(ldev->regmap, LTDC_GCR, GCR_CRCEN);
+   } else {
+   ret = -EINVAL;
+   }
+
+   ldev->crc_skip_count = 0;
+   return ret;
+}
+
+static int ltdc_crtc_verify_crc_source(struct drm_crtc *crtc,
+  const char *source, size_t *values_cnt)
+{
+   DRM_DEBUG_DRIVER("\n");
+
+   if (!crtc)
+   return -ENODEV;
+
+   if (source && strcmp(source, "auto") != 0) {
+   DRM_DEBUG_DRIVER("Unknown CRC source %s for %s\n",
+source, crtc->name);
+   return -EINVAL;
+   }
+
+   *values_cnt = 1;
+   return 0;
+}
+
  static const struct drm_crtc_funcs ltdc_crtc_funcs = {
.destroy = drm_crtc_cleanup,
.set_config = drm_atomic_helper_set_config,
@@ -1091,6 +1167,20 @@ static const struct drm_crtc_funcs ltdc_crtc_funcs = {
  

Re: [PATCH 1/3] drm/edid: parse multiple CEA extension block

2022-02-22 Thread Jani Nikula
On Tue, 22 Feb 2022, Ville Syrjälä  wrote:
> On Tue, Feb 22, 2022 at 02:38:17PM +0800, Lee Shawn C wrote:
>> Try to find and parse more CEA ext blocks if edid->extensions
>> is greater than one.
>> 
>> Cc: Jani Nikula 
>> Cc: Ville Syrjala 
>> Cc: Ankit Nautiyal 
>> Signed-off-by: Lee Shawn C 
>> ---
>>  drivers/gpu/drm/drm_edid.c | 75 +++---
>>  1 file changed, 45 insertions(+), 30 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
>> index 12893e7be89b..3d5dbbeca7f9 100644
>> --- a/drivers/gpu/drm/drm_edid.c
>> +++ b/drivers/gpu/drm/drm_edid.c
>> @@ -4313,43 +4313,58 @@ add_cea_modes(struct drm_connector *connector, 
>> struct edid *edid)
>>  const u8 *cea = drm_find_cea_extension(edid);
>>  const u8 *db, *hdmi = NULL, *video = NULL;
>>  u8 dbl, hdmi_len, video_len = 0;
>> -int modes = 0;
>> +int modes = 0, j;
>>  
>> -if (cea && cea_revision(cea) >= 3) {
>> -int i, start, end;
>> +if (!cea)
>> +return 0;
>>  
>> -if (cea_db_offsets(cea, &start, &end))
>> -return 0;
>> +for (j = (cea - (u8 *)edid) / EDID_LENGTH; j <= edid->extensions;) {
>
> That looks rather illegible. I think we want a
> drm_find_cea_extension(const struct edid *edid, int *ext_index)
> and then just loop until it stops giving us stuff.

Neither approach takes multiple CEA blocks within DisplayID extension
into account. Or some blocks outside and some inside DisplayID
extension.

I think we're going to need abstracted EDID iteration similar to what
I've done for DisplayID iteration. We can't have all places
reimplementing the iteration like we have now.

BR,
Jani.

>
> There are also several other callers of drm_find_cea_extension().
> Why don't they require the same treatment?
>
>> +if (cea && cea_revision(cea) >= 3) {
>> +int i, start, end;
>>  
>> -for_each_cea_db(cea, i, start, end) {
>> -db = &cea[i];
>> -dbl = cea_db_payload_len(db);
>> +if (cea_db_offsets(cea, &start, &end))
>> +continue;
>>  
>> -if (cea_db_tag(db) == VIDEO_BLOCK) {
>> -video = db + 1;
>> -video_len = dbl;
>> -modes += do_cea_modes(connector, video, dbl);
>> -} else if (cea_db_is_hdmi_vsdb(db)) {
>> -hdmi = db;
>> -hdmi_len = dbl;
>> -} else if (cea_db_is_y420vdb(db)) {
>> -const u8 *vdb420 = &db[2];
>> -
>> -/* Add 4:2:0(only) modes present in EDID */
>> -modes += do_y420vdb_modes(connector,
>> -  vdb420,
>> -  dbl - 1);
>> +for_each_cea_db(cea, i, start, end) {
>> +db = &cea[i];
>> +dbl = cea_db_payload_len(db);
>> +
>> +if (cea_db_tag(db) == VIDEO_BLOCK) {
>> +video = db + 1;
>> +video_len = dbl;
>> +modes += do_cea_modes(connector, video, 
>> dbl);
>> +} else if (cea_db_is_hdmi_vsdb(db)) {
>> +hdmi = db;
>> +hdmi_len = dbl;
>> +} else if (cea_db_is_y420vdb(db)) {
>> +const u8 *vdb420 = &db[2];
>> +
>> +/* Add 4:2:0(only) modes present in 
>> EDID */
>> +modes += do_y420vdb_modes(connector,
>> +  vdb420,
>> +  dbl - 1);
>> +}
>>  }
>>  }
>> -}
>>  
>> -/*
>> - * We parse the HDMI VSDB after having added the cea modes as we will
>> - * be patching their flags when the sink supports stereo 3D.
>> - */
>> -if (hdmi)
>> -modes += do_hdmi_vsdb_modes(connector, hdmi, hdmi_len, video,
>> -video_len);
>> +/*
>> + * We parse the HDMI VSDB after having added the cea modes as 
>> we will
>> + * be patching their flags when the sink supports stereo 3D.
>> + */
>> +if (hdmi) {
>> +modes += do_hdmi_vsdb_modes(connector, hdmi, hdmi_len, 
>> video,
>> +video_len);
>> +hdmi  = NULL;
>> +video = NULL;
>> +hdmi_len = 0;
>> +  

Re: [PATCH v4 7/9] drm: vkms: Refactor the plane composer to accept new formats

2022-02-22 Thread Pekka Paalanen
On Mon, 21 Feb 2022 22:13:21 -0300
Igor Torrente  wrote:

> Hi Pekka,
> 
> On 2/21/22 06:18, Pekka Paalanen wrote:
> > On Sun, 20 Feb 2022 22:02:12 -0300
> > Igor Torrente  wrote:
> >   
> >> Hi Melissa,
> >>
> >> On 2/9/22 18:45, Melissa Wen wrote:  
> >>> On 02/08, Igor Torrente wrote:  
>  Hi Melissa,
> 
>  On 2/8/22 07:40, Melissa Wen wrote:  
> > On 01/21, Igor Torrente wrote:  
> >> Currently the blend function only accepts XRGB_ and ARGB_
> >> as a color input.
> >>
> >> This patch refactors all the functions related to the plane composition
> >> to overcome this limitation.
> >>
> >> A new internal format(`struct pixel`) is introduced to deal with all
> >> possible inputs. It consists of 16 bits fields that represent each of
> >> the channels.
> >>
> >> The pixels blend is done using this internal format. And new handlers
> >> are being added to convert a specific format to/from this internal 
> >> format.
> >>
> >> So the blend operation depends on these handlers to convert to this 
> >> common
> >> format. The blended result, if necessary, is converted to the writeback
> >> buffer format.
> >>
> >> This patch introduces three major differences to the blend function.
> >> 1 - All the planes are blended at once.
> >> 2 - The blend calculus is done as per line instead of per pixel.
> >> 3 - It is responsible to calculates the CRC and writing the writeback
> >>buffer(if necessary).
> >>
> >> These changes allow us to allocate way less memory in the intermediate
> >> buffer to compute these operations. Because now we don't need to
> >> have the entire intermediate image lines at once, just one line is
> >> enough.
> >>
> >> | Memory consumption (output dimensions) |
> >> |:--:|
> >> |   Current  | This patch|
> >> |:--:|:-:|
> >> |   Width * Heigth   | 2 * Width |
> >>
> >> Beyond memory, we also have a minor performance benefit from all
> >> these changes. Results running the IGT tests `*kms_cursor_crc*`:
> >> 
> > First, thanks for this improvement.
> >
> > Some recent changes in kms_cursor_crc caused VKMS to fail in most test
> > cases (iirc, only size-change and alpha-opaque are passing currently).  
> 
>  I updated my igt and kernel(from drm_misc/drm-misc-next) to the latest
>  commit[1][2] and I'm getting mixed results. Sometimes most of the test
>  passes, sometimes almost nothing passes.  
> >>> hmm.. is it happening when running kms_cursor_crc? Is the results
> >>> variation random or is it possible to follow a set of steps to reproduce
> >>> it? When failing, what is the reason displayed by the log?  
> >>
> >> I investigated it a little bit and discovered that the KMS
> >> cursor(".*kms_cursor_crc*" ) are failing after the execution of
> >> writeback tests(".*kms_writeback.*").
> >>
> >> I don't know what is causing it, but they are failing while trying to
> >> commit the KMS changes.
> >>
> >> out.txt:
> >> IGT-Version: 1.26-NO-GIT (x86_64) (Linux: 5.17.0-rc2 x86_64)
> >> Stack trace:
> >> #0 ../lib/igt_core.c:1754 __igt_fail_assert()
> >> #1 ../lib/igt_kms.c:3795 do_display_commit()
> >> #2 ../lib/igt_kms.c:3901 igt_display_commit2()
> >> #3 ../tests/kms_cursor_crc.c:820 __igt_uniquereal_main814()
> >> #4 ../tests/kms_cursor_crc.c:814 main()
> >> #5 ../csu/libc-start.c:308 __libc_start_main()
> >> #6 [_start+0x2a]
> >> Subtest pipe-A-cursor-size-change: FAIL
> >>
> >> err.txt:
> >> (kms_cursor_crc:1936) igt_kms-CRITICAL: Test assertion failure function
> >> do_display_commit, file ../lib/igt_kms.c:3795:
> >> (kms_cursor_crc:1936) igt_kms-CRITICAL: Failed assertion: ret == 0
> >> (kms_cursor_crc:1936) igt_kms-CRITICAL: Last errno: 22, Invalid argument
> >> (kms_cursor_crc:1936) igt_kms-CRITICAL: error: -22 != 0
> >>  
> >>>
> >>>   From my side, only the first two subtest of kms_cursor_crc is passing
> >>> before this patch. And after your changes here, all subtests are
> >>> successful again, except those related to 32x10 cursor size (that needs
> >>> futher investigation). I didn't check how the recent changes in
> >>> kms_cursor_crc affect VKMS performance on it, but I bet that clearing
> >>> the alpha channel is the reason to have the performance back.  
> >>
> >> Yeah, I also don't understand why the 32x10 cursor tests are failing.
> >>  
> > 
> > Hi,
> > 
> > are the tests putting the cursor partially outside of the CRTC area?
> > Or partially outside of primary plane area (which IIRC you used when you
> > should have used the CRTC area?)?
> > 
> > Does the writeback test forget to unlink the writeback connector? Or
> > does VKMS not handle unlinking the writeback connector?  
> 
> I don't know the answer to all these questions.

These are ju

Re: [Intel-gfx] [PATCH v3 08/11] drm/i915: Separate wakeref tracking

2022-02-22 Thread Andrzej Hajda




On 22.02.2022 08:12, Ville Syrjälä wrote:

On Tue, Feb 22, 2022 at 12:25:39AM +0100, Andrzej Hajda wrote:

-static noinline depot_stack_handle_t
+static intel_wakeref_t
  track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
  {
-   depot_stack_handle_t stack, *stacks;
-   unsigned long flags;
-
-   if (rpm->no_wakeref_tracking)
-   return -1;
-
-   stack = __save_depot_stack();
-   if (!stack)
+   if (!rpm->available)
return -1;

Still not the same.



It was fixed but in wrong place - patch 11. I will move the change here.

Regards
Andrzej


Re: [PATCH v2] drm/panel: Select DRM_DP_HELPER for DRM_PANEL_EDP

2022-02-22 Thread Geert Uytterhoeven
On Tue, Feb 8, 2022 at 10:39 AM Geert Uytterhoeven  wrote:
> On Mon, Feb 7, 2022 at 12:31 PM Thomas Zimmermann  wrote:
> > As reported in [1], DRM_PANEL_EDP depends on DRM_DP_HELPER. Select
> > the option to fix the build failure. The error message is shown
> > below.
> >
> >   arm-linux-gnueabihf-ld: drivers/gpu/drm/panel/panel-edp.o: in function
> > `panel_edp_probe': panel-edp.c:(.text+0xb74): undefined reference to
> > `drm_panel_dp_aux_backlight'
> >   make[1]: *** [/builds/linux/Makefile:1222: vmlinux] Error 1
> >
> > The issue has been reported before, when DisplayPort helpers were
> > hidden behind the option CONFIG_DRM_KMS_HELPER. [2]
> >
> > v2:
> > * fix and expand commit description (Arnd)
> >
> > Signed-off-by: Thomas Zimmermann 
>
> Thanks for your patch!
>
> This fixes the build errors I'm seeing, so
> Tested-by: Geert Uytterhoeven 

Is this planned to be queued? This is still failing in drm-next.
Thanks!

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH 1/3] drm/edid: parse multiple CEA extension block

2022-02-22 Thread Ville Syrjälä
On Tue, Feb 22, 2022 at 11:19:15AM +0200, Jani Nikula wrote:
> On Tue, 22 Feb 2022, Ville Syrjälä  wrote:
> > On Tue, Feb 22, 2022 at 02:38:17PM +0800, Lee Shawn C wrote:
> >> Try to find and parse more CEA ext blocks if edid->extensions
> >> is greater than one.
> >> 
> >> Cc: Jani Nikula 
> >> Cc: Ville Syrjala 
> >> Cc: Ankit Nautiyal 
> >> Signed-off-by: Lee Shawn C 
> >> ---
> >>  drivers/gpu/drm/drm_edid.c | 75 +++---
> >>  1 file changed, 45 insertions(+), 30 deletions(-)
> >> 
> >> diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
> >> index 12893e7be89b..3d5dbbeca7f9 100644
> >> --- a/drivers/gpu/drm/drm_edid.c
> >> +++ b/drivers/gpu/drm/drm_edid.c
> >> @@ -4313,43 +4313,58 @@ add_cea_modes(struct drm_connector *connector, 
> >> struct edid *edid)
> >>const u8 *cea = drm_find_cea_extension(edid);
> >>const u8 *db, *hdmi = NULL, *video = NULL;
> >>u8 dbl, hdmi_len, video_len = 0;
> >> -  int modes = 0;
> >> +  int modes = 0, j;
> >>  
> >> -  if (cea && cea_revision(cea) >= 3) {
> >> -  int i, start, end;
> >> +  if (!cea)
> >> +  return 0;
> >>  
> >> -  if (cea_db_offsets(cea, &start, &end))
> >> -  return 0;
> >> +  for (j = (cea - (u8 *)edid) / EDID_LENGTH; j <= edid->extensions;) {
> >
> > That looks rather illegible. I think we want a
> > drm_find_cea_extension(const struct edid *edid, int *ext_index)
> > and then just loop until it stops giving us stuff.
> 
> Neither approach takes multiple CEA blocks within DisplayID extension
> into account. Or some blocks outside and some inside DisplayID
> extension.
> 
> I think we're going to need abstracted EDID iteration similar to what
> I've done for DisplayID iteration. We can't have all places
> reimplementing the iteration like we have now.

Aye. We need so many layers of iteration in various places
that the whole thing is starting to resemble a Russian doll.
Following a common form should probably make that a lot more
manageable.

I've been already thinking about introducing an iterator for
the cea db stuff. But the EDID ext blocks is definitely another
target we need to look at.

And someone is going to have to figure out what are all the
ways these need to nest. I suppose the high level code
should only have to care about the deepest layer of stuff
and the iterators should take care to iterate through all
the potential containers? Eg. if the high level code wants
to look at cea dbs then it just iterates those and 
shouldn't have to care at all where they are stored.

-- 
Ville Syrjälä
Intel


Re: [Intel-gfx] [PATCH v3 08/11] drm/i915: Separate wakeref tracking

2022-02-22 Thread Ville Syrjälä
On Tue, Feb 22, 2022 at 10:28:33AM +0100, Andrzej Hajda wrote:
> 
> 
> On 22.02.2022 08:12, Ville Syrjälä wrote:
> > On Tue, Feb 22, 2022 at 12:25:39AM +0100, Andrzej Hajda wrote:
> >> -static noinline depot_stack_handle_t
> >> +static intel_wakeref_t
> >>   track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
> >>   {
> >> -  depot_stack_handle_t stack, *stacks;
> >> -  unsigned long flags;
> >> -
> >> -  if (rpm->no_wakeref_tracking)
> >> -  return -1;
> >> -
> >> -  stack = __save_depot_stack();
> >> -  if (!stack)
> >> +  if (!rpm->available)
> >>return -1;
> > Still not the same.
> >
> 
> It was fixed but in wrong place - patch 11. I will move the change here.

Doesn't look correct there either.

-- 
Ville Syrjälä
Intel


Re: [Intel-gfx] [PATCH 1/3] drm/i915/guc: Limit scheduling properties to avoid overflow

2022-02-22 Thread Tvrtko Ursulin



On 18/02/2022 21:33, john.c.harri...@intel.com wrote:

From: John Harrison 

GuC converts the pre-emption timeout and timeslice quantum values into
clock ticks internally. That significantly reduces the point of 32bit
overflow. On current platforms, worst case scenario is approximately


Where does 32-bit come from, the GuC side? We already use 64-bits so that 
something to fix to start with. Yep...

./gt/uc/intel_guc_fwif.h:   u32 execution_quantum;

./gt/uc/intel_guc_submission.c: desc->execution_quantum = 
engine->props.timeslice_duration_ms * 1000;

./gt/intel_engine_types.h:  unsigned long timeslice_duration_ms;

timeslice_store/preempt_timeout_store:
err = kstrtoull(buf, 0, &duration);

So both kconfig and sysfs can already overflow GuC, not only because of tick 
conversion internally but because at backend level nothing was done for 
assigning 64-bit into 32-bit. Or I failed to find where it is handled.


110 seconds. Rather than allowing the user to set higher values and
then get confused by early timeouts, add limits when setting these
values.


Btw who is reviewing GuC patches these days - things have somehow gotten pretty 
quiet in activity and I don't think that's due absence of stuff to improve or 
fix? Asking since I think I noticed a few already which you posted and then 
crickets on the mailing list.


Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/intel_engine_cs.c   | 15 +++
  drivers/gpu/drm/i915/gt/sysfs_engines.c | 14 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h |  9 +
  3 files changed, 38 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index e53008b4dd05..2a1e9f36e6f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -389,6 +389,21 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id,
if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
engine->props.preempt_timeout_ms = 0;
  
+	/* Cap timeouts to prevent overflow inside GuC */

+   if (intel_guc_submission_is_wanted(>->uc.guc)) {
+   if (engine->props.timeslice_duration_ms > 
GUC_POLICY_MAX_EXEC_QUANTUM_MS) {


Hm "wanted".. There's been too much back and forth on the GuC load options over 
the years to keep track.. intel_engine_uses_guc work sounds like would work and read 
nicer.

And limit to class instead of applying to all engines looks like a miss.


+   drm_info(&engine->i915->drm, "Warning, clamping timeslice 
duration to %d to prevent possibly overflow\n",
+GUC_POLICY_MAX_EXEC_QUANTUM_MS);
+   engine->props.timeslice_duration_ms = 
GUC_POLICY_MAX_EXEC_QUANTUM_MS;


I am not sure logging such message during driver load is useful. Sounds more 
like a confused driver which starts with one value and then overrides itself. 
I'd just silently set the value appropriate for the active backend. Preemption 
timeout kconfig text already documents the fact timeouts can get overriden at 
runtime depending on platform+engine. So maybe just add same text to timeslice 
kconfig.


+   }
+
+   if (engine->props.preempt_timeout_ms > 
GUC_POLICY_MAX_PREEMPT_TIMEOUT_MS) {
+   drm_info(&engine->i915->drm, "Warning, clamping pre-emption 
timeout to %d to prevent possibly overflow\n",
+GUC_POLICY_MAX_PREEMPT_TIMEOUT_MS);
+   engine->props.preempt_timeout_ms = 
GUC_POLICY_MAX_PREEMPT_TIMEOUT_MS;
+   }
+   }
+
engine->defaults = engine->props; /* never to change again */
  
  	engine->context_size = intel_engine_context_size(gt, engine->class);

diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c 
b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 967031056202..f57efe026474 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -221,6 +221,13 @@ timeslice_store(struct kobject *kobj, struct 
kobj_attribute *attr,
if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
return -EINVAL;
  
+	if (intel_uc_uses_guc_submission(&engine->gt->uc) &&

+   duration > GUC_POLICY_MAX_EXEC_QUANTUM_MS) {
+   duration = GUC_POLICY_MAX_EXEC_QUANTUM_MS;
+   drm_info(&engine->i915->drm, "Warning, clamping timeslice duration 
to %lld to prevent possibly overflow\n",
+duration);
+   }


I would suggest to avoid duplicated clamping logic. Maybe hide the all backend 
logic into the helpers then, like maybe:

  d = intel_engine_validate_timeslice/preempt_timeout(engine, duration);
  if (d != duration)
return -EINVAL:

Returning -EINVAL would be equivalent to existing behaviour:

if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
return -EINVAL;

That way user

Re: [Intel-gfx] [PATCH 0/3] Improve anti-pre-emption w/a for compute workloads

2022-02-22 Thread Tvrtko Ursulin



On 18/02/2022 21:33, john.c.harri...@intel.com wrote:

From: John Harrison 

Compute workloads are inherently not pre-emptible on current hardware.
Thus the pre-emption timeout was disabled as a workaround to prevent
unwanted resets. Instead, the hang detection was left to the heartbeat
and its (longer) timeout. This is undesirable with GuC submission as
the heartbeat is a full GT reset rather than a per engine reset and so
is much more destructive. Instead, just bump the pre-emption timeout


Can we have a feature request to allow asking GuC for an engine reset?

Regards,

Tvrtko


to a big value. Also, update the heartbeat to allow such a long
pre-emption delay in the final heartbeat period.

Signed-off-by: John Harrison 


John Harrison (3):
   drm/i915/guc: Limit scheduling properties to avoid overflow
   drm/i915/gt: Make the heartbeat play nice with long pre-emption
 timeouts
   drm/i915: Improve long running OCL w/a for GuC submission

  drivers/gpu/drm/i915/gt/intel_engine_cs.c | 37 +--
  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 16 
  drivers/gpu/drm/i915/gt/sysfs_engines.c   | 14 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  9 +
  4 files changed, 73 insertions(+), 3 deletions(-)



Re: [PATCH v3 05/11] lib/ref_tracker: __ref_tracker_dir_print improve printing

2022-02-22 Thread Andrzej Hajda




On 22.02.2022 01:08, Eric Dumazet wrote:

On Mon, Feb 21, 2022 at 3:26 PM Andrzej Hajda  wrote:

To improve readibility of ref_tracker printing following changes

readability


have been performed:
- reports are printed per stack_handle - log is more compact,
- added display name for ref_tracker_dir,
- stack trace is printed indented, in the same printk call,
- total number of references is printed every time,
- print info about dropped references.

Signed-off-by: Andrzej Hajda 
---
  include/linux/ref_tracker.h | 15 +--
  lib/ref_tracker.c   | 90 -
  2 files changed, 91 insertions(+), 14 deletions(-)

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index 3e9e9df2a41f5..a2cf1f6309adb 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -17,12 +17,19 @@ struct ref_tracker_dir {
 booldead;
 struct list_headlist; /* List of active trackers */
 struct list_headquarantine; /* List of dead trackers */
+   charname[32];
  #endif
  };

  #ifdef CONFIG_REF_TRACKER
-static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
-   unsigned int quarantine_count)
+
+// Temporary allow two and three arguments, until consumers are converted
+#define ref_tracker_dir_init(_d, _q, args...) _ref_tracker_dir_init(_d, _q, 
##args, #_d)
+#define _ref_tracker_dir_init(_d, _q, _n, ...) __ref_tracker_dir_init(_d, _q, 
_n)
+
+static inline void __ref_tracker_dir_init(struct ref_tracker_dir *dir,
+   unsigned int quarantine_count,
+   const char *name)
  {
 INIT_LIST_HEAD(&dir->list);
 INIT_LIST_HEAD(&dir->quarantine);
@@ -31,6 +38,7 @@ static inline void ref_tracker_dir_init(struct 
ref_tracker_dir *dir,
 dir->dead = false;
 refcount_set(&dir->untracked, 1);
 refcount_set(&dir->no_tracker, 1);
+   strlcpy(dir->name, name, sizeof(dir->name));
 stack_depot_init();
  }

@@ -51,7 +59,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
  #else /* CONFIG_REF_TRACKER */

  static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
-   unsigned int quarantine_count)
+   unsigned int quarantine_count,
+   ...)
  {
  }

diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index 5e9f90bbf771b..ab1253fde244e 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -1,11 +1,16 @@
  // SPDX-License-Identifier: GPL-2.0-or-later
+
+#define pr_fmt(fmt) "ref_tracker: " fmt
+
  #include 
+#include 
  #include 
  #include 
  #include 
  #include 

  #define REF_TRACKER_STACK_ENTRIES 16
+#define STACK_BUF_SIZE 1024

  struct ref_tracker {
 struct list_headhead;   /* anchor into dir->list or 
dir->quarantine */
@@ -14,24 +19,87 @@ struct ref_tracker {
 depot_stack_handle_tfree_stack_handle;
  };

-void __ref_tracker_dir_print(struct ref_tracker_dir *dir,
-  unsigned int display_limit)
+struct ref_tracker_dir_stats {
+   int total;
+   int count;
+   struct {
+   depot_stack_handle_t stack_handle;
+   unsigned int count;
+   } stacks[];
+};
+
+static struct ref_tracker_dir_stats *
+ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
  {
+   struct ref_tracker_dir_stats *stats;
 struct ref_tracker *tracker;
-   unsigned int i = 0;

-   lockdep_assert_held(&dir->lock);
+   stats = kmalloc(struct_size(stats, stacks, limit),
+   GFP_NOWAIT | __GFP_NOWARN);

I would be more comfortable if the allocation was done by the caller,
possibly using GFP_KERNEL and evenutally kvmalloc(),
instead of under dir->lock ?


I though also about it, but decided to left this change to another patch 
as the change can be substantial and could open another discussion.


I am not sure what you mean by 'caller' but it could be even external 
user of the API:

1. alloc data for ref_tracker_dir_stats.
2. take locks, if necessary.
3. gather stats (ref_tracker_get_stats) atomically.
4. release taken locks.
5. print stats.

This way, allocation and printing would happen outside locks.





+   if (!stats)
+   return ERR_PTR(-ENOMEM);
+   stats->total = 0;
+   stats->count = 0;

 list_for_each_entry(tracker, &dir->list, head) {
-   if (i < display_limit) {
-   pr_err("leaked reference.\n");
-   if (tracker->alloc_stack_handle)
-   stack_depot_print(tracker->alloc_stack_handle);
-   i++;
-   } else {
-   break;
+   depot_stack_handle_t stack = tracker->alloc_stack_handle;
+   int i;
+

[PATCH] drm/sched: Add device pointer to drm_gpu_scheduler

2022-02-22 Thread Jiawei Gu
Add device pointer so scheduler's printing can use
DRM_DEV_ERROR() instead, which makes life easier under multiple GPU
scenario.

v2: amend all calls of drm_sched_init()
v3: fill dev pointer for all drm_sched_init() calls

Signed-off-by: Jiawei Gu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  2 +-
 drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  2 +-
 drivers/gpu/drm/lima/lima_sched.c |  2 +-
 drivers/gpu/drm/msm/msm_ringbuffer.c  |  2 +-
 drivers/gpu/drm/panfrost/panfrost_job.c   |  2 +-
 drivers/gpu/drm/scheduler/sched_main.c|  9 +
 drivers/gpu/drm/v3d/v3d_sched.c   | 10 +-
 include/drm/gpu_scheduler.h   |  3 ++-
 8 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 45977a72b5dd..cd2d594d4ffc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -502,7 +502,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
   num_hw_submission, amdgpu_job_hang_limit,
-  timeout, NULL, sched_score, ring->name);
+  timeout, NULL, sched_score, ring->name, adev->dev);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
  ring->name);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 58f593b278c1..35e5ef7dbdcc 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -195,7 +195,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
 etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
 msecs_to_jiffies(500), NULL, NULL,
-dev_name(gpu->dev));
+dev_name(gpu->dev), gpu->dev);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/lima/lima_sched.c 
b/drivers/gpu/drm/lima/lima_sched.c
index 5612d73f238f..8d517c8880e3 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -490,7 +490,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, 
const char *name)
return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
  lima_job_hang_limit,
  msecs_to_jiffies(timeout), NULL,
- NULL, name);
+ NULL, name, pipe->ldev->dev);
 }
 
 void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c 
b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 3bbf574c3bdc..367a6aaa3a20 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -89,7 +89,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu 
*gpu, int id,
 
ret = drm_sched_init(&ring->sched, &msm_sched_ops,
num_hw_submissions, 0, sched_timeout,
-   NULL, NULL, to_msm_bo(ring->bo)->name);
+   NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
if (ret) {
goto fail;
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c
index 908d79520853..a6925dbb6224 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -812,7 +812,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
 nentries, 0,
 msecs_to_jiffies(JOB_TIMEOUT_MS),
 pfdev->reset.wq,
-NULL, "pan_js");
+NULL, "pan_js", pfdev->dev);
if (ret) {
dev_err(pfdev->dev, "Failed to create scheduler: %d.", 
ret);
goto err_sched;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index f91fb31ab7a7..b81fceb0b8a2 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -491,7 +491,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool 
full_recovery)
if (r == -ENOENT)
drm_sched_job_done(s_job);
else if (r)
-   DRM_ERROR("fence add callback failed (%d)\n",
+   DRM_DEV_ERROR(sched->dev, "fence add callback 
failed (%d)\n",
  r);
} else
drm_sched_job_done(s_job);
@@ -957,7 +957,7 @@ static int drm_sched_main(void *param)
if (r == -ENOENT)
   

Re: [PATCH 2/2] drm/doc: add rfc section for small BAR uapi

2022-02-22 Thread Thomas Hellström



On 2/18/22 12:22, Matthew Auld wrote:

Add an entry for the new uapi needed for small BAR on DG2+.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Jon Bloomfield 
Cc: Daniel Vetter 
Cc: Jordan Justen 
Cc: Kenneth Graunke 
Cc: mesa-...@lists.freedesktop.org
---
  Documentation/gpu/rfc/i915_small_bar.h   | 153 +++
  Documentation/gpu/rfc/i915_small_bar.rst |  40 ++
  Documentation/gpu/rfc/index.rst  |   4 +
  3 files changed, 197 insertions(+)
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.h
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.rst

diff --git a/Documentation/gpu/rfc/i915_small_bar.h 
b/Documentation/gpu/rfc/i915_small_bar.h
new file mode 100644
index ..fa65835fd608
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_small_bar.h
@@ -0,0 +1,153 @@
+/**
+ * struct __drm_i915_gem_create_ext - Existing gem_create behaviour, with added
+ * extension support using struct i915_user_extension.
+ *
+ * Note that in the future we want to have our buffer flags here,


Does this sentence need updating, with the flags member?



  at least for
+ * the stuff that is immutable. Previously we would have two ioctls, one to
+ * create the object with gem_create, and another to apply various parameters,
+ * however this creates some ambiguity for the params which are considered
+ * immutable. Also in general we're phasing out the various SET/GET ioctls.
+ */
+struct __drm_i915_gem_create_ext {
+   /**
+* @size: Requested size for the object.
+*
+* The (page-aligned) allocated size for the object will be returned.
+*
+* Note that for some devices we have might have further minimum
+* page-size restrictions(larger than 4K), like for device local-memory.
+* However in general the final size here should always reflect any
+* rounding up, if for example using the 
I915_GEM_CREATE_EXT_MEMORY_REGIONS
+* extension to place the object in device local-memory.
+*/
+   __u64 size;
+   /**
+* @handle: Returned handle for the object.
+*
+* Object handles are nonzero.
+*/
+   __u32 handle;
+   /**
+* @flags: Optional flags.
+*
+* Supported values:
+*
+* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that
+* the object will need to be accessed via the CPU.
+*
+* Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and
+* only strictly required on platforms where only some of the device
+* memory is directly visible or mappable through the CPU, like on DG2+.
+*
+* One of the placements MUST also be I915_MEMORY_CLASS_SYSTEM, to
+* ensure we can always spill the allocation to system memory, if we
+* can't place the object in the mappable part of
+* I915_MEMORY_CLASS_DEVICE.
+*
+* Note that buffers that need to be captured with EXEC_OBJECT_CAPTURE,
+* will need to enable this hint, if the object can also be placed in
+* I915_MEMORY_CLASS_DEVICE, starting from DG2+. The execbuf call will
+* throw an error otherwise. This also means that such objects will need
+* I915_MEMORY_CLASS_SYSTEM set as a possible placement.
+*
+* Without this hint, the kernel will assume that non-mappable
+* I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the
+* kernel can still migrate the object to the mappable part, as a last
+* resort, if userspace ever CPU faults this object, but this might be
+* expensive, and so ideally should be avoided.
+*/
+#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0)
+   __u32 flags;
+   /**
+* @extensions: The chain of extensions to apply to this object.
+*
+* This will be useful in the future when we need to support several
+* different extensions, and we need to apply more than one when
+* creating the object. See struct i915_user_extension.
+*
+* If we don't supply any extensions then we get the same old gem_create
+* behaviour.
+*
+* For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see
+* struct drm_i915_gem_create_ext_memory_regions.
+*
+* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
+* struct drm_i915_gem_create_ext_protected_content.
+*/
+#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
+#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+   __u64 extensions;
+};
+
+#define DRM_I915_QUERY_VMA_INFO5
+
+/**
+ * struct __drm_i915_query_vma_info
+ *
+ * Given a vm and GTT address, lookup the corresponding vma, returning its set
+ * of attributes.
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_query_vma_info info = {};
+ * struct drm_i915_query_item item = {
+ * .data_ptr = (uintptr_t)&info,
+ *  

[PATCH v5 0/4] GuC HWCONFIG with documentation

2022-02-22 Thread Jordan Justen
This is John/Rodrigo's 2 patches with some minor changes, and I added
2 patches.

"drm/i915/uapi: Add query for hwconfig blob" was changed:

 * Rename DRM_I915_QUERY_HWCONFIG_TABLE to DRM_I915_QUERY_HWCONFIG_BLOB
   as requested by Joonas.

 * Reword commit message

 * I added Acked-by to this patch, but this only applies in the
   context of this version of the patchset. If my changes are
   rejected, then please *do not* add my Acked-by to the other series.

   In particular, I do not want my Acked-by on the patch if the patch
   mentions the HWCONFIG format, but is not willing to add that to the
   actual uAPI.

   I also do not want my Acked-by on it if it mentions "consolidation"
   of this data. Since we are dealing with open source projects (aside
   from GuC), this doesn't seem appropriate.

"drm/i915/uapi: Add struct drm_i915_query_hwconfig_blob_item" adds a
struct to the uAPI and documents the return value for
DRM_I915_QUERY_HWCONFIG_BLOB. (Except, keys / values are still
deferred to the PRM.)

"drm/i915/guc: Verify hwconfig blob matches supported format" does the
simple verification of the blob to make sure it matches what the uAPI
documents.

v2:
 * Fix -Werror errors.
 * Rebase to drm-intel/for-linux-next instead of
   drm-intel/for-linux-next-gt, as this seems to be what CI wants.
 * Fix u32 -> __u32.
 * Add commit message for "Verify hwconfig blob" patch as requested by
   Tvrtko.
 * Reword text added to i915_drm.h as requested by Tvrtko. (Attempting
   to indicate the overall blob ends right at the last blob item.)

v3:
 * Add several changes suggested by Tvrtko in the "Verify hwconfig
   blob", along with some tweaks to i915_drm.h from the feedback for
   the same patch.

v4:
 * Rewrite verify_hwconfig_blob() to hopefully be clearer without
   relying on comments so much, and add various suggestions from
   Michal.
 * Michal also had some suggestions in John's "drm/i915/guc: Add fetch
   of hwconfig table" patch. I held off on making any of these changes
   in this version.

v5:
 * Add many changes suggested by Michal in John's "drm/i915/guc: Add
   fetch of hwconfig table" patch.
 * Fix documenation formatting as suggested by Daniel in
   "drm/i915/uapi: Add struct drm_i915_query_hwconfig_blob_item"

John Harrison (1):
  drm/i915/guc: Add fetch of hwconfig table

Jordan Justen (2):
  drm/i915/uapi: Add struct drm_i915_query_hwconfig_blob_item
  drm/i915/guc: Verify hwconfig blob matches supported format

Rodrigo Vivi (1):
  drm/i915/uapi: Add query for hwconfig blob

 drivers/gpu/drm/i915/Makefile |   1 +
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
 .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   |   4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   3 +
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c   | 187 ++
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.h   |  19 ++
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |   7 +
 drivers/gpu/drm/i915/i915_pci.c   |   1 +
 drivers/gpu/drm/i915/i915_query.c |  23 +++
 drivers/gpu/drm/i915/intel_device_info.h  |   1 +
 include/uapi/drm/i915_drm.h   |  44 +
 11 files changed, 291 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
 create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h

-- 
2.34.1



[PATCH v5 1/4] drm/i915/guc: Add fetch of hwconfig table

2022-02-22 Thread Jordan Justen
From: John Harrison 

Implement support for fetching the hardware description table from the
GuC. The call is made twice - once without a destination buffer to
query the size and then a second time to fill in the buffer.

Note that the table is only available on ADL-P and later platforms.

v5 (of Jordan's posting):
 * Various changes made by Jordan and recommended by Michal
   - Makefile ordering
   - Adjust "struct intel_guc_hwconfig hwconfig" comment
   - Set Copyright year to 2022 in intel_guc_hwconfig.c/.h
   - Drop inline from hwconfig_to_guc()
   - Replace hwconfig param with guc in __guc_action_get_hwconfig()
   - Move zero size check into guc_hwconfig_discover_size()
   - Change comment to say zero size offset/size is needed to get size
   - Add has_guc_hwconfig to devinfo and drop has_table()
   - Change drm_err to notice in __uc_init_hw() and use %pe

Cc: Michal Wajdeczko 
Signed-off-by: Rodrigo Vivi 
Signed-off-by: John Harrison 
Reviewed-by: Matthew Brost 
Acked-by: Jon Bloomfield 
Signed-off-by: Jordan Justen 
---
 drivers/gpu/drm/i915/Makefile |   1 +
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
 .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   |   4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   3 +
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c   | 145 ++
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.h   |  19 +++
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |   7 +
 drivers/gpu/drm/i915/i915_pci.c   |   1 +
 drivers/gpu/drm/i915/intel_device_info.h  |   1 +
 9 files changed, 182 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
 create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e9ce09620eb5..661f1afb51d7 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -188,6 +188,7 @@ i915-y += gt/uc/intel_uc.o \
  gt/uc/intel_guc_ct.o \
  gt/uc/intel_guc_debugfs.o \
  gt/uc/intel_guc_fw.o \
+ gt/uc/intel_guc_hwconfig.o \
  gt/uc/intel_guc_log.o \
  gt/uc/intel_guc_log_debugfs.o \
  gt/uc/intel_guc_rc.o \
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index fe5d7d261797..4a61c819f32b 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -137,6 +137,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+   INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100,
INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index 488b6061ee89..f9e2a6aaef4a 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -8,6 +8,10 @@
 
 enum intel_guc_response_status {
INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+   INTEL_GUC_RESPONSE_NOT_SUPPORTED = 0x20,
+   INTEL_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201,
+   INTEL_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202,
+   INTEL_GUC_RESPONSE_DECRYPTION_FAILED = 0x204,
INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index f9240d4baa69..2058eb8c3d0c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -13,6 +13,7 @@
 #include "intel_guc_fw.h"
 #include "intel_guc_fwif.h"
 #include "intel_guc_ct.h"
+#include "intel_guc_hwconfig.h"
 #include "intel_guc_log.h"
 #include "intel_guc_reg.h"
 #include "intel_guc_slpc_types.h"
@@ -37,6 +38,8 @@ struct intel_guc {
struct intel_guc_ct ct;
/** @slpc: sub-structure containing SLPC related data and objects */
struct intel_guc_slpc slpc;
+   /** @hwconfig: data related to hardware configuration KLV blob */
+   struct intel_guc_hwconfig hwconfig;
 
/** @sched_engine: Global engine used to submit requests to GuC */
struct i915_sched_engine *sched_engine;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
new file mode 100644
index ..ad289603460c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "gt/intel_gt.h"
+#include "i915_drv.h"
+#include "i915_memcpy.h"
+#include "intel_guc_hwconfig.h"
+
+static struct intel_guc *hwconfig_to_guc(struct intel_guc_hwconfig *hwconfig)
+{
+   return container_of(hwconfig, struct in

[PATCH v5 2/4] drm/i915/uapi: Add query for hwconfig blob

2022-02-22 Thread Jordan Justen
From: Rodrigo Vivi 

The DRM_I915_QUERY_HWCONFIG_BLOB query item returns a blob of data
which it receives from the GuC software. This blob provides some
useful data about the hardware for drivers.

Although the blob is not fully documented at this time, the basic
format is an array of u32 values. The array is a simple and flexible
KLV (Key/Length/Value) formatted table. For example, it could be just:
enum device_attr { ATTR_SOME_VALUE = 0, ATTR_SOME_MASK = 1, };

  static const u32 hwconfig[] = {
  ATTR_SOME_VALUE,
  1, // Value Length in DWords
  8, // Value

  ATTR_SOME_MASK,
  3,
  0x00, 0x, 0xFF00,
  };

The attribute ids and meaning of the values will be documented in the
Programmer Reference Manuals when released.

Cc: Tvrtko Ursulin 
Cc: Kenneth Graunke 
Cc: Michal Wajdeczko 
Cc: Slawomir Milczarek 
Cc: Joonas Lahtinen 
Signed-off-by: Rodrigo Vivi 
Signed-off-by: John Harrison 
Reviewed-by: Matthew Brost 
Acked-by: Jordan Justen 
Tested-by: Jordan Justen 
Acked-by: Jon Bloomfield 
---
 drivers/gpu/drm/i915/i915_query.c | 23 +++
 include/uapi/drm/i915_drm.h   |  1 +
 2 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_query.c 
b/drivers/gpu/drm/i915/i915_query.c
index 2dfbc22857a3..195524e9a369 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -479,12 +479,35 @@ static int query_memregion_info(struct drm_i915_private 
*i915,
return total_length;
 }
 
+static int query_hwconfig_blob(struct drm_i915_private *i915,
+  struct drm_i915_query_item *query_item)
+{
+   struct intel_gt *gt = to_gt(i915);
+   struct intel_guc_hwconfig *hwconfig = >->uc.guc.hwconfig;
+
+   if (!hwconfig->size || !hwconfig->ptr)
+   return -ENODEV;
+
+   if (query_item->length == 0)
+   return hwconfig->size;
+
+   if (query_item->length < hwconfig->size)
+   return -EINVAL;
+
+   if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
+hwconfig->ptr, hwconfig->size))
+   return -EFAULT;
+
+   return hwconfig->size;
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
struct drm_i915_query_item *query_item) 
= {
query_topology_info,
query_engine_info,
query_perf_config,
query_memregion_info,
+   query_hwconfig_blob,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 914ebd9290e5..069d2fadfbd9 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -2685,6 +2685,7 @@ struct drm_i915_query_item {
 #define DRM_I915_QUERY_ENGINE_INFO 2
 #define DRM_I915_QUERY_PERF_CONFIG  3
 #define DRM_I915_QUERY_MEMORY_REGIONS   4
+#define DRM_I915_QUERY_HWCONFIG_BLOB   5
 /* Must be kept compact -- no holes and well documented */
 
/**
-- 
2.34.1



[PATCH v5 4/4] drm/i915/guc: Verify hwconfig blob matches supported format

2022-02-22 Thread Jordan Justen
i915_drm.h now defines the format of the returned
DRM_I915_QUERY_HWCONFIG_BLOB query item. Since i915 receives this from
the black box GuC software, it should verify that the data matches
that format before sending it to user-space.

The verification makes a single simple pass through the blob contents,
so this verification step should not add a significant amount of init
time to i915.

v3:
 * Add various changes suggested by Tvrtko

v4:
 * Rewrite verify_hwconfig_blob() to hopefully be clearer without
   relying on comments so much, and add various suggestions from
   Michal.

Signed-off-by: Jordan Justen 
Acked-by: Jon Bloomfield 
---
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c   | 44 ++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
index ad289603460c..a844b880cbdb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -73,9 +73,46 @@ static int guc_hwconfig_discover_size(struct 
intel_guc_hwconfig *hwconfig)
return 0;
 }
 
+static int verify_hwconfig_blob(struct intel_guc_hwconfig *hwconfig)
+{
+   struct intel_guc *guc = hwconfig_to_guc(hwconfig);
+   struct drm_device *drm = &guc_to_gt(guc)->i915->drm;
+   struct drm_i915_query_hwconfig_blob_item *item = hwconfig->ptr;
+   u64 offset = 0;
+   u64 remaining = hwconfig->size;
+   /* Everything before the data field is required */
+   u64 min_item_size = offsetof(struct drm_i915_query_hwconfig_blob_item, 
data);
+   u64 item_size;
+
+   if (!IS_ALIGNED(hwconfig->size, sizeof(u32))) {
+   drm_err(drm, "hwconfig blob size (%d) is not u32 aligned\n", 
hwconfig->size);
+   return -EINVAL;
+   }
+
+   while (offset < hwconfig->size) {
+   if (remaining < min_item_size) {
+   drm_err(drm, "hwconfig blob invalid (no room for item 
required fields at offset %lld)\n",
+   offset);
+   return -EINVAL;
+   }
+   item_size = min_item_size + sizeof(u32) * item->length;
+   if (item_size > remaining) {
+   drm_err(drm, "hwconfig blob invalid (no room for data 
array of item at offset %lld)\n",
+   offset);
+   return -EINVAL;
+   }
+   offset += item_size;
+   remaining -= item_size;
+   item = (void *)&item->data[item->length];
+   }
+
+   return 0;
+}
+
 static int guc_hwconfig_fill_buffer(struct intel_guc_hwconfig *hwconfig)
 {
struct intel_guc *guc = hwconfig_to_guc(hwconfig);
+   struct drm_device *drm = &guc_to_gt(guc)->i915->drm;
struct i915_vma *vma;
u32 ggtt_offset;
void *vaddr;
@@ -90,8 +127,13 @@ static int guc_hwconfig_fill_buffer(struct 
intel_guc_hwconfig *hwconfig)
ggtt_offset = intel_guc_ggtt_offset(guc, vma);
 
ret = __guc_action_get_hwconfig(guc, ggtt_offset, hwconfig->size);
-   if (ret >= 0)
+   if (ret >= 0) {
memcpy(hwconfig->ptr, vaddr, hwconfig->size);
+   if (verify_hwconfig_blob(hwconfig)) {
+   drm_err(drm, "Ignoring invalid hwconfig blob received 
from GuC!\n");
+   ret = -EINVAL;
+   }
+   }
 
i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
 
-- 
2.34.1



[PATCH v5 3/4] drm/i915/uapi: Add struct drm_i915_query_hwconfig_blob_item

2022-02-22 Thread Jordan Justen
Also, document DRM_I915_QUERY_HWCONFIG_BLOB with this struct.

v3:
 * Add various changes suggested by Tvrtko

v5:
 * Fix documenation formatting and verified with `make htmldocs` as
   suggested by Daniel

Cc: Daniel Vetter 
Signed-off-by: Jordan Justen 
Acked-by: Jon Bloomfield 
Acked-by: Daniel Vetter 
---
 include/uapi/drm/i915_drm.h | 43 +
 1 file changed, 43 insertions(+)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 069d2fadfbd9..e44902ce8e64 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3276,6 +3276,49 @@ struct drm_i915_gem_create_ext_protected_content {
__u32 flags;
 };
 
+/**
+ * DOC: GuC HWCONFIG blob uAPI
+ *
+ * The GuC produces a blob with information about the current device.
+ * i915 reads this blob from GuC and makes it available via this uAPI.
+ *
+ * The returned blob is a sequence of items of variable length
+ * described by struct drm_i915_query_hwconfig_blob_item.
+ *
+ * The overall blob returned by DRM_I915_QUERY_HWCONFIG_BLOB will end
+ * at the same location as the end of the final struct
+ * drm_i915_query_hwconfig_blob_item. In other words, walking through
+ * the individual items is guaranteed to eventually arrive at the
+ * exact end of the entire blob.
+ */
+
+/**
+ * struct drm_i915_query_hwconfig_blob_item - A single hwconfig item
+ * within the sequence of hwconfig items returned by
+ * DRM_I915_QUERY_HWCONFIG_BLOB.
+ *
+ * The length field gives the length of the data[] array. The length
+ * is the number of u32 items in the data[] array, and *not* the
+ * number of bytes.
+ *
+ * The key and length fields are required, so the minimum item size is
+ * 2 x u32, or 8 bytes, when the length field is 0. If the length
+ * field is 1, then the item's size is 12 bytes.
+ *
+ * The meaning of the key field and the data values are documented in
+ * the Programmer's Reference Manual.
+ */
+struct drm_i915_query_hwconfig_blob_item {
+   /** @key: Enum which defines how to interpret @data values. */
+   __u32 key;
+
+   /** @length: The number of u32 values in the @data array. */
+   __u32 length;
+
+   /** @data: Array of values with meaning defined by @key. */
+   __u32 data[];
+};
+
 /* ID of the protected content session managed by i915 when PXP is active */
 #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
 
-- 
2.34.1



Re: [PATCH 0/2] drm/tegra: Fix panel support on Venice 2 and Nyan

2022-02-22 Thread Dmitry Osipenko
20.12.2021 13:48, Thierry Reding пишет:
> From: Thierry Reding 
> 
> Hi,
> 
> this is an alternative proposal to fix panel support on Venice 2 and
> Nyan. Dmitry had proposed a different solution that involved reverting
> the I2C/DDC registration order and would complicate things by breaking
> the encapsulation of the driver by introducing a global (though locally
> scoped) variable[0].
> 
> This set of patches avoids that by using the recently introduced DP AUX
> bus infrastructure. The result is that the changes are actually less
> intrusive and not a step back. Instead they nicely remove the circular
> dependency that previously existed and caused these issues in the first
> place.
> 
> To be fair, this is not perfect either because it requires a device tree
> change and hence isn't technically backwards-compatible. However, given
> that the original device tree was badly broken in the first place, I
> think we can make an exception, especially since it is not generally a
> problem to update device trees on the affected devices.
> 
> Secondly, this relies on infrastructure that was introduced in v5.15 and
> therefore will be difficult to backport beyond that. However, since this
> functionality has been broken since v5.13 and all of the kernel versions
> between that and v5.15 are EOL anyway, there isn't much that we can do
> to fix the interim versions anyway.
> 
> Adding Doug and Laurent since they originally designed the AUX bus
> patches in case they see anything in here that would be objectionable.
> 
> Thierry
> 
> [0]: 
> https://lore.kernel.org/dri-devel/20211130230957.30213-1-dig...@gmail.com/
> 
> Thierry Reding (2):
>   drm/tegra: dpaux: Populate AUX bus
>   ARM: tegra: Move panels to AUX bus
> 
>  arch/arm/boot/dts/tegra124-nyan-big.dts   | 15 +--
>  arch/arm/boot/dts/tegra124-nyan-blaze.dts | 15 +--
>  arch/arm/boot/dts/tegra124-venice2.dts| 14 +++---
>  drivers/gpu/drm/tegra/Kconfig |  1 +
>  drivers/gpu/drm/tegra/dpaux.c |  7 +++
>  5 files changed, 33 insertions(+), 19 deletions(-)
> 

Will we see the v2 anytime soon?


Re: [Intel-gfx] [PATCH 1/3] drm/i915/guc: Limit scheduling properties to avoid overflow

2022-02-22 Thread Tvrtko Ursulin



On 22/02/2022 09:52, Tvrtko Ursulin wrote:


On 18/02/2022 21:33, john.c.harri...@intel.com wrote:

From: John Harrison 

GuC converts the pre-emption timeout and timeslice quantum values into
clock ticks internally. That significantly reduces the point of 32bit
overflow. On current platforms, worst case scenario is approximately


Where does 32-bit come from, the GuC side? We already use 64-bits so 
that something to fix to start with. Yep...


./gt/uc/intel_guc_fwif.h:   u32 execution_quantum;

./gt/uc/intel_guc_submission.c: desc->execution_quantum = 
engine->props.timeslice_duration_ms * 1000;


./gt/intel_engine_types.h:  unsigned long 
timeslice_duration_ms;


timeslice_store/preempt_timeout_store:
err = kstrtoull(buf, 0, &duration);

So both kconfig and sysfs can already overflow GuC, not only because of 
tick conversion internally but because at backend level nothing was done 
for assigning 64-bit into 32-bit. Or I failed to find where it is handled.



110 seconds. Rather than allowing the user to set higher values and
then get confused by early timeouts, add limits when setting these
values.


Btw who is reviewing GuC patches these days - things have somehow gotten 
pretty quiet in activity and I don't think that's due absence of stuff 
to improve or fix? Asking since I think I noticed a few already which 
you posted and then crickets on the mailing list.



Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/intel_engine_cs.c   | 15 +++
  drivers/gpu/drm/i915/gt/sysfs_engines.c | 14 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h |  9 +
  3 files changed, 38 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c

index e53008b4dd05..2a1e9f36e6f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -389,6 +389,21 @@ static int intel_engine_setup(struct intel_gt 
*gt, enum intel_engine_id id,

  if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
  engine->props.preempt_timeout_ms = 0;
+    /* Cap timeouts to prevent overflow inside GuC */
+    if (intel_guc_submission_is_wanted(>->uc.guc)) {
+    if (engine->props.timeslice_duration_ms > 
GUC_POLICY_MAX_EXEC_QUANTUM_MS) {


Hm "wanted".. There's been too much back and forth on the GuC load 
options over the years to keep track.. intel_engine_uses_guc work sounds 
like would work and read nicer.


And limit to class instead of applying to all engines looks like a miss.


Sorry limit to class does not apply here, I confused this with the last 
patch.


Regards,

Tvrtko



+    drm_info(&engine->i915->drm, "Warning, clamping timeslice 
duration to %d to prevent possibly overflow\n",

+ GUC_POLICY_MAX_EXEC_QUANTUM_MS);
+    engine->props.timeslice_duration_ms = 
GUC_POLICY_MAX_EXEC_QUANTUM_MS;


I am not sure logging such message during driver load is useful. Sounds 
more like a confused driver which starts with one value and then 
overrides itself. I'd just silently set the value appropriate for the 
active backend. Preemption timeout kconfig text already documents the 
fact timeouts can get overriden at runtime depending on platform+engine. 
So maybe just add same text to timeslice kconfig.



+    }
+
+    if (engine->props.preempt_timeout_ms > 
GUC_POLICY_MAX_PREEMPT_TIMEOUT_MS) {
+    drm_info(&engine->i915->drm, "Warning, clamping 
pre-emption timeout to %d to prevent possibly overflow\n",

+ GUC_POLICY_MAX_PREEMPT_TIMEOUT_MS);
+    engine->props.preempt_timeout_ms = 
GUC_POLICY_MAX_PREEMPT_TIMEOUT_MS;

+    }
+    }
+
  engine->defaults = engine->props; /* never to change again */
  engine->context_size = intel_engine_context_size(gt, 
engine->class);
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c 
b/drivers/gpu/drm/i915/gt/sysfs_engines.c

index 967031056202..f57efe026474 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -221,6 +221,13 @@ timeslice_store(struct kobject *kobj, struct 
kobj_attribute *attr,

  if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
  return -EINVAL;
+    if (intel_uc_uses_guc_submission(&engine->gt->uc) &&
+    duration > GUC_POLICY_MAX_EXEC_QUANTUM_MS) {
+    duration = GUC_POLICY_MAX_EXEC_QUANTUM_MS;
+    drm_info(&engine->i915->drm, "Warning, clamping timeslice 
duration to %lld to prevent possibly overflow\n",

+ duration);
+    }


I would suggest to avoid duplicated clamping logic. Maybe hide the all 
backend logic into the helpers then, like maybe:


   d = intel_engine_validate_timeslice/preempt_timeout(engine, duration);
   if (d != duration)
 return -EINVAL:

Returning -EINVAL would be equivalent to existing behaviour:

 if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
     return -EINVAL;

That way userspace 

Re: [PATCH v3 07/11] lib/ref_tracker: remove warnings in case of allocation failure

2022-02-22 Thread Andrzej Hajda




On 22.02.2022 00:54, Eric Dumazet wrote:

On Mon, Feb 21, 2022 at 3:26 PM Andrzej Hajda  wrote:

Library can handle allocation failures. To avoid allocation warnings
__GFP_NOWARN has been added everywhere. Moreover GFP_ATOMIC has been
replaced with GFP_NOWAIT in case of stack allocation on tracker free
call.

Signed-off-by: Andrzej Hajda 
---
  lib/ref_tracker.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index 2ef4596b6b36f..cae4498fcfd70 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -189,7 +189,7 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir,
 unsigned long entries[REF_TRACKER_STACK_ENTRIES];
 struct ref_tracker *tracker;
 unsigned int nr_entries;
-   gfp_t gfp_mask = gfp;
+   gfp_t gfp_mask = gfp | __GFP_NOWARN;

SGTM


 unsigned long flags;

 WARN_ON_ONCE(dir->dead);
@@ -237,7 +237,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
 return -EEXIST;
 }
 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
-   stack_handle = stack_depot_save(entries, nr_entries, GFP_ATOMIC);
+   stack_handle = stack_depot_save(entries, nr_entries,
+   GFP_NOWAIT | __GFP_NOWARN);

Last time I looked at this, __GFP_NOWARN was enforced in __stack_depot_save()


You are right, however I am not sure if we should count on unexpected 
(at least for me) and undocumented behavior.

Currently we do not need to rely on some hidden feature.

Regards
Andrzej




 spin_lock_irqsave(&dir->lock, flags);
 if (tracker->dead) {
--
2.25.1





Re: [PATCH v3 8/9] drm/tegra: vic: Implement get_streamid_offset

2022-02-22 Thread Dmitry Osipenko
22.02.2022 11:27, Mikko Perttunen пишет:
> On 2/21/22 22:10, Dmitry Osipenko wrote:
>> 21.02.2022 14:44, Mikko Perttunen пишет:
>>> On 2/19/22 20:54, Dmitry Osipenko wrote:
 19.02.2022 21:49, Dmitry Osipenko пишет:
> 18.02.2022 14:39, Mikko Perttunen пишет:
>> +static int vic_get_streamid_offset(struct tegra_drm_client *client)
>> +{
>> +    struct vic *vic = to_vic(client);
>> +    int err;
>> +
>> +    err = vic_load_firmware(vic);
>
> You can't invoke vic_load_firmware() while RPM is suspended. Either
> replace this with RPM get/put or do something else.
>>>
>>> Why not, I'm not seeing any HW accesses in vic_load_firmware? Although
>>> it looks like it might race with the vic_load_firmware call in
>>> vic_runtime_resume which probably needs to be fixed.
>>
>> It was not clear from the function's name that h/w is untouched, I read
>> "load" as "upload" and then looked at vic_runtime_resume(). I'd rename
>> vic_load_firmware() to vic_prepare_firmware_image().
>>
>> And yes, technically lock is needed.
> 
> Yep, I'll consider renaming it.

Looking at this all again, I'd suggest to change:

int get_streamid_offset(client)

to:

int get_streamid_offset(client, *offset)

and bail out if get_streamid_offset() returns error. It's never okay to
ignore errors.


Re: [PATCH] drm/stm: Avoid using val uninitialized in ltdc_set_ycbcr_config()

2022-02-22 Thread Philippe CORNU




On 2/7/22 8:44 PM, Nick Desaulniers wrote:

On Mon, Feb 7, 2022 at 8:53 AM Nathan Chancellor  wrote:


Clang warns:

   drivers/gpu/drm/stm/ltdc.c:625:2: warning: variable 'val' is used 
uninitialized whenever switch default is taken [-Wsometimes-uninitialized]
   default:
   ^~~
   drivers/gpu/drm/stm/ltdc.c:635:2: note: uninitialized use occurs here
   val |= LxPCR_YCEN;
   ^~~
   drivers/gpu/drm/stm/ltdc.c:600:9: note: initialize the variable 'val' to 
silence this warning
   u32 val;
  ^
   = 0
   1 warning generated.

Use a return instead of break in the default case to fix the warning.
Add an error message so that this return is not silent, which could hide
issues in the future.

Fixes: 484e72d3146b ("drm/stm: ltdc: add support of ycbcr pixel formats")
Link: https://github.com/ClangBuiltLinux/linux/issues/1575
Signed-off-by: Nathan Chancellor 
---
  drivers/gpu/drm/stm/ltdc.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index 5eeb32c9c9ce..447ddde1786c 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -624,7 +624,8 @@ static inline void ltdc_set_ycbcr_config(struct drm_plane 
*plane, u32 drm_pix_fm
 break;
 default:
 /* RGB or not a YCbCr supported format */
-   break;
+   drm_err(plane->dev, "Unsupported pixel format: %u\n", 
drm_pix_fmt);


This is fine, but in the future you should add an explicit
#include 
to avoid implicit header dependencies (like the ones that Mingo is
trying to detangle) for the declaration of drm_err. `drm_vprintf`
needs it, too.

Reviewed-by: Nick Desaulniers 



Hi Nick,
and thank you for having pointing this.

Hi Nathan,
May I ask you please to update your patch changing drm_err(plane->dev, ) 
with DRM_ERROR().



Big thank you,

Philippe :-)




+   return;
 }

 /* Enable limited range */

base-commit: 542898c5aa5c6a3179dffb1d1606884a63f75fed
--
2.35.1






Re: [PATCH v3 8/9] drm/tegra: vic: Implement get_streamid_offset

2022-02-22 Thread Mikko Perttunen

On 2/22/22 12:46, Dmitry Osipenko wrote:

22.02.2022 11:27, Mikko Perttunen пишет:

On 2/21/22 22:10, Dmitry Osipenko wrote:

21.02.2022 14:44, Mikko Perttunen пишет:

On 2/19/22 20:54, Dmitry Osipenko wrote:

19.02.2022 21:49, Dmitry Osipenko пишет:

18.02.2022 14:39, Mikko Perttunen пишет:

+static int vic_get_streamid_offset(struct tegra_drm_client *client)
+{
+    struct vic *vic = to_vic(client);
+    int err;
+
+    err = vic_load_firmware(vic);


You can't invoke vic_load_firmware() while RPM is suspended. Either
replace this with RPM get/put or do something else.


Why not, I'm not seeing any HW accesses in vic_load_firmware? Although
it looks like it might race with the vic_load_firmware call in
vic_runtime_resume which probably needs to be fixed.


It was not clear from the function's name that h/w is untouched, I read
"load" as "upload" and then looked at vic_runtime_resume(). I'd rename
vic_load_firmware() to vic_prepare_firmware_image().

And yes, technically lock is needed.


Yep, I'll consider renaming it.


Looking at this all again, I'd suggest to change:

int get_streamid_offset(client)

to:

int get_streamid_offset(client, *offset)

and bail out if get_streamid_offset() returns error. It's never okay to
ignore errors.


Sure, seems reasonable. We'll still need some error code to indicate 
that context isolation isn't available for the engine and continue on in 
that case but that's better than just ignoring all of them.


Mikko


Re: [PATCH v3 8/9] drm/tegra: vic: Implement get_streamid_offset

2022-02-22 Thread Dmitry Osipenko
22.02.2022 13:54, Mikko Perttunen пишет:
> On 2/22/22 12:46, Dmitry Osipenko wrote:
>> 22.02.2022 11:27, Mikko Perttunen пишет:
>>> On 2/21/22 22:10, Dmitry Osipenko wrote:
 21.02.2022 14:44, Mikko Perttunen пишет:
> On 2/19/22 20:54, Dmitry Osipenko wrote:
>> 19.02.2022 21:49, Dmitry Osipenko пишет:
>>> 18.02.2022 14:39, Mikko Perttunen пишет:
 +static int vic_get_streamid_offset(struct tegra_drm_client
 *client)
 +{
 +    struct vic *vic = to_vic(client);
 +    int err;
 +
 +    err = vic_load_firmware(vic);
>>>
>>> You can't invoke vic_load_firmware() while RPM is suspended. Either
>>> replace this with RPM get/put or do something else.
>
> Why not, I'm not seeing any HW accesses in vic_load_firmware? Although
> it looks like it might race with the vic_load_firmware call in
> vic_runtime_resume which probably needs to be fixed.

 It was not clear from the function's name that h/w is untouched, I read
 "load" as "upload" and then looked at vic_runtime_resume(). I'd rename
 vic_load_firmware() to vic_prepare_firmware_image().

 And yes, technically lock is needed.
>>>
>>> Yep, I'll consider renaming it.
>>
>> Looking at this all again, I'd suggest to change:
>>
>> int get_streamid_offset(client)
>>
>> to:
>>
>> int get_streamid_offset(client, *offset)
>>
>> and bail out if get_streamid_offset() returns error. It's never okay to
>> ignore errors.
> 
> Sure, seems reasonable. We'll still need some error code to indicate
> that context isolation isn't available for the engine and continue on in
> that case but that's better than just ignoring all of them.

Yes, check for -EOPNOTSUPP and skip it.



Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Make the heartbeat play nice with long pre-emption timeouts

2022-02-22 Thread Tvrtko Ursulin



On 18/02/2022 21:33, john.c.harri...@intel.com wrote:

From: John Harrison 

Compute workloads are inherantly not pre-emptible for long periods on
current hardware. As a workaround for this, the pre-emption timeout
for compute capable engines was disabled. This is undesirable with GuC
submission as it prevents per engine reset of hung contexts. Hence the
next patch will re-enable the timeout but bumped up by an order of
magnititude.


(Some typos above.)


However, the heartbeat might not respect that. Depending upon current
activity, a pre-emption to the heartbeat pulse might not even be
attempted until the last heartbeat period. Which means that only one


Might not be attempted, but could be if something is running with lower 
priority. In which case I think special casing the last heartbeat does 
not feel right because it can end up resetting the engine before it was 
intended.


Like if first heartbeat decides to preempt (the decision is backend 
specific, could be same prio + timeslicing), and preempt timeout has 
been set to heartbeat interval * 3, then 2nd heartbeat gets queued up, 
then 3rd, and so reset is triggered even before the first preempt 
timeout legitimately expires (or just as it is about to react).


Instead, how about preempt timeout is always considered when calculating 
when to emit the next heartbeat? End result would be similar to your 
patch, in terms of avoiding the direct problem, although hang detection 
would be overall longer (but more correct I think).


And it also means in the next patch you don't have to add coupling 
between preempt timeout and heartbeat to intel_engine_setup. Instead 
just some long preempt timeout would be needed. Granted, the decoupling 
argument is not super strong since then the heartbeat code has the 
coupling instead, but that still feels better to me. (Since we can say 
heartbeats only make sense on loaded engines, and so things like preempt 
timeout can legitimately be considered from there.)


Incidentally, that would be similar to a patch which Chris had a year 
ago (https://patchwork.freedesktop.org/patch/419783/?series=86841&rev=1) 
to fix some CI issue.


On a related topic, if GuC engine resets stop working when preempt 
timeout is set to zero - I think we need to somehow let the user know if 
they try to tweak it via sysfs. Perhaps go as far as -EINVAL in GuC 
mode, if i915.reset has not explicitly disabled engine resets.


Regards,

Tvrtko


period is granted for the pre-emption to occur. With the aforesaid
bump, the pre-emption timeout could be significantly larger than this
heartbeat period.

So adjust the heartbeat code to take the pre-emption timeout into
account. When it reaches the final (high priority) period, it now
ensures the delay before hitting reset is bigger than the pre-emption
timeout.

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 16 
  1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index a3698f611f45..72a82a6085e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -22,9 +22,25 @@
  
  static bool next_heartbeat(struct intel_engine_cs *engine)

  {
+   struct i915_request *rq;
long delay;
  
  	delay = READ_ONCE(engine->props.heartbeat_interval_ms);

+
+   rq = engine->heartbeat.systole;
+   if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER) {
+   long longer;
+
+   /*
+* The final try is at the highest priority possible. Up until 
now
+* a pre-emption might not even have been attempted. So make 
sure
+* this last attempt allows enough time for a pre-emption to 
occur.
+*/
+   longer = READ_ONCE(engine->props.preempt_timeout_ms) * 2;
+   if (longer > delay)
+   delay = longer;
+   }
+
if (!delay)
return false;
  


Re: [Intel-gfx] [PATCH v5 4/4] drm/i915/guc: Verify hwconfig blob matches supported format

2022-02-22 Thread Tvrtko Ursulin



On 22/02/2022 10:36, Jordan Justen wrote:

i915_drm.h now defines the format of the returned
DRM_I915_QUERY_HWCONFIG_BLOB query item. Since i915 receives this from
the black box GuC software, it should verify that the data matches
that format before sending it to user-space.

The verification makes a single simple pass through the blob contents,
so this verification step should not add a significant amount of init
time to i915.

v3:
  * Add various changes suggested by Tvrtko

v4:
  * Rewrite verify_hwconfig_blob() to hopefully be clearer without
relying on comments so much, and add various suggestions from
Michal.

Signed-off-by: Jordan Justen 
Acked-by: Jon Bloomfield 
---
  .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c   | 44 ++-
  1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
index ad289603460c..a844b880cbdb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -73,9 +73,46 @@ static int guc_hwconfig_discover_size(struct 
intel_guc_hwconfig *hwconfig)
return 0;
  }
  
+static int verify_hwconfig_blob(struct intel_guc_hwconfig *hwconfig)

+{
+   struct intel_guc *guc = hwconfig_to_guc(hwconfig);
+   struct drm_device *drm = &guc_to_gt(guc)->i915->drm;
+   struct drm_i915_query_hwconfig_blob_item *item = hwconfig->ptr;
+   u64 offset = 0;
+   u64 remaining = hwconfig->size;
+   /* Everything before the data field is required */
+   u64 min_item_size = offsetof(struct drm_i915_query_hwconfig_blob_item, 
data);
+   u64 item_size;
+
+   if (!IS_ALIGNED(hwconfig->size, sizeof(u32))) {
+   drm_err(drm, "hwconfig blob size (%d) is not u32 aligned\n", 
hwconfig->size);
+   return -EINVAL;
+   }
+
+   while (offset < hwconfig->size) {
+   if (remaining < min_item_size) {
+   drm_err(drm, "hwconfig blob invalid (no room for item 
required fields at offset %lld)\n",
+   offset);
+   return -EINVAL;
+   }
+   item_size = min_item_size + sizeof(u32) * item->length;
+   if (item_size > remaining) {
+   drm_err(drm, "hwconfig blob invalid (no room for data array 
of item at offset %lld)\n",
+   offset);
+   return -EINVAL;
+   }
+   offset += item_size;
+   remaining -= item_size;
+   item = (void *)&item->data[item->length];
+   }
+
+   return 0;
+}
+
  static int guc_hwconfig_fill_buffer(struct intel_guc_hwconfig *hwconfig)
  {
struct intel_guc *guc = hwconfig_to_guc(hwconfig);
+   struct drm_device *drm = &guc_to_gt(guc)->i915->drm;
struct i915_vma *vma;
u32 ggtt_offset;
void *vaddr;
@@ -90,8 +127,13 @@ static int guc_hwconfig_fill_buffer(struct 
intel_guc_hwconfig *hwconfig)
ggtt_offset = intel_guc_ggtt_offset(guc, vma);
  
  	ret = __guc_action_get_hwconfig(guc, ggtt_offset, hwconfig->size);

-   if (ret >= 0)
+   if (ret >= 0) {
memcpy(hwconfig->ptr, vaddr, hwconfig->size);
+   if (verify_hwconfig_blob(hwconfig)) {
+   drm_err(drm, "Ignoring invalid hwconfig blob received from 
GuC!\n");
+   ret = -EINVAL;
+   }
+   }
  
  	i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
  


Reviewed-by: Tvrtko Ursulin 

Regards,

Tvrtko


Re: [PATCH] dma-buf: check the return value of kstrdup()

2022-02-22 Thread Christian König




Am 22.02.22 um 12:30 schrieb xkernel.w...@foxmail.com:

From: Xiaoke Wang 

kstrdup() is a memory allocation function which can return NULL when
some internaly memory errors happen. It is better to check the return
value of it to prevent further wrong memory access.

Signed-off-by: Xiaoke Wang 


Acked-by: Christian König 


---
  drivers/dma-buf/selftest.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/dma-buf/selftest.c b/drivers/dma-buf/selftest.c
index c60b694..2c29e2a 100644
--- a/drivers/dma-buf/selftest.c
+++ b/drivers/dma-buf/selftest.c
@@ -50,6 +50,9 @@ static bool apply_subtest_filter(const char *caller, const 
char *name)
bool result = true;
  
  	filter = kstrdup(__st_filter, GFP_KERNEL);

+   if (!filter)
+   return false;
+
for (sep = filter; (tok = strsep(&sep, ","));) {
bool allow = true;
char *sl;




Re: [PATCH 7/7] drm/i915: Expose client engine utilisation via fdinfo

2022-02-22 Thread Tvrtko Ursulin



On 19/02/2022 00:51, Umesh Nerlige Ramappa wrote:

On Thu, Jan 06, 2022 at 04:55:36PM +, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Similar to AMD commit
874442541133 ("drm/amdgpu: Add show_fdinfo() interface"), using the
infrastructure added in previous patches, we add basic client info
and GPU engine utilisation for i915.

Example of the output:

 pos:    0
 flags:  012
 mnt_id: 21
 drm-driver: i915
 drm-pdev:   :00:02.0
 drm-client-id:  7
 drm-engine-render:  9288864723 ns
 drm-engine-copy:    2035071108 ns
 drm-engine-video:   0 ns
 drm-engine-video-enhance:   0 ns

v2:
* Update for removal of name and pid.

v3:
* Use drm_driver.name.

Signed-off-by: Tvrtko Ursulin 
Cc: David M Nieto 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Chris Healy 
Acked-by: Christian König 
---
Documentation/gpu/drm-usage-stats.rst  |  6 +++
Documentation/gpu/i915.rst | 27 ++
drivers/gpu/drm/i915/i915_driver.c |  3 ++
drivers/gpu/drm/i915/i915_drm_client.c | 73 ++
drivers/gpu/drm/i915/i915_drm_client.h |  4 ++
5 files changed, 113 insertions(+)

diff --git a/Documentation/gpu/drm-usage-stats.rst 
b/Documentation/gpu/drm-usage-stats.rst

index c669026be244..6952f8389d07 100644
--- a/Documentation/gpu/drm-usage-stats.rst
+++ b/Documentation/gpu/drm-usage-stats.rst
@@ -95,3 +95,9 @@ object belong to this client, in the respective 
memory region.


Default unit shall be bytes with optional unit specifiers of 'KiB' or 
'MiB'

indicating kibi- or mebi-bytes.
+
+===
+Driver specific implementations
+===
+
+:ref:`i915-usage-stats`
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index b7d801993bfa..29f412a0c3dc 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -708,3 +708,30 @@ The style guide for ``i915_reg.h``.

.. kernel-doc:: drivers/gpu/drm/i915/i915_reg.h
   :doc: The i915 register macro definition style guide
+
+.. _i915-usage-stats:
+
+i915 DRM client usage stats implementation
+==
+
+The drm/i915 driver implements the DRM client usage stats 
specification as

+documented in :ref:`drm-client-usage-stats`.
+
+Example of the output showing the implemented key value pairs and 
entirety of

+the currenly possible format options:


s/currenly/currently/

lgtm, for the series
Reviewed-by: Umesh Nerlige Ramappa 


Thanks Umesh!

There will be a small re-spin, mostly about adding drm-engine-capacity- 
tag which I needed for vendor agnostic gputop, and couple fixups. Some 
r-b's will need updating. I will copy you when sending it out.


Regards,

Tvrtko


Re: [PATCH] drm/i915: make a handful of read-only arrays static const

2022-02-22 Thread Jani Nikula
On Tue, 22 Feb 2022, Colin Ian King  wrote:
> Don't populate the read-only arrays on the stack but instead make
> them static const. Also makes the object code a little smaller.
> Reformat the statements to clear up checkpatch warning.
>
> Signed-off-by: Colin Ian King 
> ---
>  drivers/gpu/drm/i915/display/intel_vdsc.c | 16 
>  1 file changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c 
> b/drivers/gpu/drm/i915/display/intel_vdsc.c
> index 3faea903b9ae..d49f66237ec3 100644
> --- a/drivers/gpu/drm/i915/display/intel_vdsc.c
> +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
> @@ -378,10 +378,18 @@ calculate_rc_params(struct rc_parameters *rc,
>  {
>   int bpc = vdsc_cfg->bits_per_component;
>   int bpp = vdsc_cfg->bits_per_pixel >> 4;
> - int ofs_und6[] = { 0, -2, -2, -4, -6, -6, -8, -8, -8, -10, -10, -12, 
> -12, -12, -12 };
> - int ofs_und8[] = { 2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, 
> -12, -12 };
> - int ofs_und12[] = { 2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, 
> -12, -12, -12 };
> - int ofs_und15[] = { 10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, 
> -12, -12 };
> + static const int ofs_und6[] = {
> + 0, -2, -2, -4, -6, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12
> + };
> + static const int ofs_und8[] = {
> + 2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12
> + };
> + static const int ofs_und12[] = {
> + 2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12
> + };

Hmm, I wonder why the same values are duplicated in ofs_und8 and
ofs_und12. Cc: Vandita, Manasi.

Regardless, the patch is sane.

Reviewed-by: Jani Nikula 

> + static const int ofs_und15[] = {
> + 10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, -12, -12
> + };
>   int qp_bpc_modifier = (bpc - 8) * 2;
>   u32 res, buf_i, bpp_i;

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [PATCH v13 5/5] drm/amdgpu: add drm buddy support to amdgpu

2022-02-22 Thread Christian König

Am 21.02.22 um 17:45 schrieb Arunpravin:

- Remove drm_mm references and replace with drm buddy functionalities
- Add res cursor support for drm buddy

v2(Matthew Auld):
   - replace spinlock with mutex as we call kmem_cache_zalloc
 (..., GFP_KERNEL) in drm_buddy_alloc() function

   - lock drm_buddy_block_trim() function as it calls
 mark_free/mark_split are all globally visible

v3(Matthew Auld):
   - remove trim method error handling as we address the failure case
 at drm_buddy_block_trim() function

v4:
   - fix warnings reported by kernel test robot 

v5:
   - fix merge conflict issue

v6:
   - fix warnings reported by kernel test robot 

v7:
   - remove DRM_BUDDY_RANGE_ALLOCATION flag usage

v8:
   - keep DRM_BUDDY_RANGE_ALLOCATION flag usage
   - resolve conflicts created by drm/amdgpu: remove VRAM accounting v2

Signed-off-by: Arunpravin 
---
  drivers/gpu/drm/Kconfig   |   1 +
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h|  97 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |   7 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 256 ++
  4 files changed, 229 insertions(+), 132 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 763355330b17..019ec0440ced 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -279,6 +279,7 @@ config DRM_AMDGPU
select HWMON
select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
+   select DRM_BUDDY
help
  Choose this option if you have a recent AMD Radeon graphics card.
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h

index acfa207cf970..da12b4ff2e45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -30,12 +30,15 @@
  #include 
  #include 
  
+#include "amdgpu_vram_mgr.h"

+
  /* state back for walking over vram_mgr and gtt_mgr allocations */
  struct amdgpu_res_cursor {
uint64_tstart;
uint64_tsize;
uint64_tremaining;
-   struct drm_mm_node  *node;
+   void*node;
+   uint32_tmem_type;
  };
  
  /**

@@ -52,27 +55,63 @@ static inline void amdgpu_res_first(struct ttm_resource 
*res,
uint64_t start, uint64_t size,
struct amdgpu_res_cursor *cur)
  {
+   struct drm_buddy_block *block;
+   struct list_head *head, *next;
struct drm_mm_node *node;
  
-	if (!res || res->mem_type == TTM_PL_SYSTEM) {

-   cur->start = start;
-   cur->size = size;
-   cur->remaining = size;
-   cur->node = NULL;
-   WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
-   return;
-   }
+   if (!res)
+   goto err_out;


It's not really an error to not have a resource. So I would rather name 
the label fallback or something like that.


  
  	BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
  
-	node = to_ttm_range_mgr_node(res)->mm_nodes;

-   while (start >= node->size << PAGE_SHIFT)
-   start -= node++->size << PAGE_SHIFT;
+   cur->mem_type = res->mem_type;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   head = &to_amdgpu_vram_mgr_node(res)->blocks;
+
+   block = list_first_entry_or_null(head,
+struct drm_buddy_block,
+link);
+   if (!block)
+   goto err_out;
+
+   while (start >= amdgpu_node_size(block)) {
+   start -= amdgpu_node_size(block);
+
+   next = block->link.next;
+   if (next != head)
+   block = list_entry(next, struct 
drm_buddy_block, link);
+   }
+
+   cur->start = amdgpu_node_start(block) + start;
+   cur->size = min(amdgpu_node_size(block) - start, size);
+   cur->remaining = size;
+   cur->node = block;
+   break;
+   case TTM_PL_TT:
+   node = to_ttm_range_mgr_node(res)->mm_nodes;
+   while (start >= node->size << PAGE_SHIFT)
+   start -= node++->size << PAGE_SHIFT;
+
+   cur->start = (node->start << PAGE_SHIFT) + start;
+   cur->size = min((node->size << PAGE_SHIFT) - start, size);
+   cur->remaining = size;
+   cur->node = node;
+   break;
+   default:
+   goto err_out;
+   }
  
-	cur->start = (node->start << PAGE_SHIFT) + start;

-   cur->size = min((node->size << PAGE_SHIFT) - start, size);
+   return;
+
+err_out:
+   cur->start = start;
+   cur->size = size;
cur->remaining = size;
-

Re: [PATCH v13 4/5] drm/amdgpu: move vram inline functions into a header

2022-02-22 Thread Christian König

Am 21.02.22 um 17:45 schrieb Arunpravin:

Move shared vram inline functions and structs
into a header file

Signed-off-by: Arunpravin 


Patches #1-#3 haven been pushed to drm-misc-next.

For this one it might be better to squash that into commit #5 as well 
since this is otherwise unused.


Might be a good idea to move the vram_mgr structur and function from 
amdgpu_ttm.h over to this file as well.


Regards,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 51 
  1 file changed, 51 insertions(+)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
new file mode 100644
index ..59983464cce5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VRAM_MGR_H__
+#define __AMDGPU_VRAM_MGR_H__
+
+#include 
+
+struct amdgpu_vram_mgr_node {
+   struct ttm_resource base;
+   struct list_head blocks;
+   unsigned long flags;
+};
+
+static inline u64 amdgpu_node_start(struct drm_buddy_block *block)
+{
+   return drm_buddy_block_offset(block);
+}
+
+static inline u64 amdgpu_node_size(struct drm_buddy_block *block)
+{
+   return PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline struct amdgpu_vram_mgr_node *
+to_amdgpu_vram_mgr_node(struct ttm_resource *res)
+{
+   return container_of(res, struct amdgpu_vram_mgr_node, base);
+}
+
+#endif




Re: [PATCH] drm: rcar-du: Fix CRTC timings when CMM is used

2022-02-22 Thread Kieran Bingham
Quoting Laurent Pinchart (2021-11-29 22:28:13)
> When the CMM is enabled, an offset of 25 pixels must be subtracted from
> the HDS (horizontal display start) and HDE (horizontal display end)
> registers. Fix the timings calculation, and take this into account in
> the mode validation.
> 
> This fixes a visible horizontal offset in the image with VGA monitors.
> HDMI monitors seem to be generally more tolerant to incorrect timings,
> but may be affected too.
> 
> Signed-off-by: Laurent Pinchart 
> ---
>  drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 20 
>  1 file changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c 
> b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
> index 5672830ca184..ee6ba74627a2 100644
> --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
> +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
> @@ -215,6 +215,7 @@ static void rcar_du_crtc_set_display_timing(struct 
> rcar_du_crtc *rcrtc)
> const struct drm_display_mode *mode = 
> &rcrtc->crtc.state->adjusted_mode;
> struct rcar_du_device *rcdu = rcrtc->dev;
> unsigned long mode_clock = mode->clock * 1000;
> +   unsigned int hdse_offset;
> u32 dsmr;
> u32 escr;
>  
> @@ -298,10 +299,15 @@ static void rcar_du_crtc_set_display_timing(struct 
> rcar_du_crtc *rcrtc)
>  | DSMR_DIPM_DISP | DSMR_CSPM;
> rcar_du_crtc_write(rcrtc, DSMR, dsmr);
>  

This looks like the kind of place that could do with a comment
explaining what is going on.

> +   hdse_offset = 19;
> +   if (rcrtc->group->cmms_mask & BIT(rcrtc->index % 2))
> +   hdse_offset += 25;
> +
> /* Display timings */
> -   rcar_du_crtc_write(rcrtc, HDSR, mode->htotal - mode->hsync_start - 
> 19);
> +   rcar_du_crtc_write(rcrtc, HDSR, mode->htotal - mode->hsync_start -
> +   hdse_offset);
> rcar_du_crtc_write(rcrtc, HDER, mode->htotal - mode->hsync_start +
> -   mode->hdisplay - 19);
> +   mode->hdisplay - hdse_offset);
> rcar_du_crtc_write(rcrtc, HSWR, mode->hsync_end -
> mode->hsync_start - 1);
> rcar_du_crtc_write(rcrtc, HCR,  mode->htotal - 1);
> @@ -836,6 +842,7 @@ rcar_du_crtc_mode_valid(struct drm_crtc *crtc,
> struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
> struct rcar_du_device *rcdu = rcrtc->dev;
> bool interlaced = mode->flags & DRM_MODE_FLAG_INTERLACE;
> +   unsigned int min_sync_porch;
> unsigned int vbp;
>  
> if (interlaced && !rcar_du_has(rcdu, RCAR_DU_FEATURE_INTERLACED))
> @@ -843,9 +850,14 @@ rcar_du_crtc_mode_valid(struct drm_crtc *crtc,
>  
> /*
>  * The hardware requires a minimum combined horizontal sync and back
> -* porch of 20 pixels and a minimum vertical back porch of 3 lines.
> +* porch of 20 pixels (when CMM isn't used) or 45 pixels (when CMM is
> +* used), and a minimum vertical back porch of 3 lines.
>  */
> -   if (mode->htotal - mode->hsync_start < 20)
> +   min_sync_porch = 20;
> +   if (rcrtc->group->cmms_mask & BIT(rcrtc->index % 2))
> +   min_sync_porch += 25;
> +
> +   if (mode->htotal - mode->hsync_start < min_sync_porch)
> return MODE_HBLANK_NARROW;

Is the '19' in the hdse offset, this min_sync_port - 1 for position
correction? It looks something like that. And the rest seems ok.

With or without the additional optional comment suggestion above:

Reviewed-by: Kieran Bingham 



>  
> vbp = (mode->vtotal - mode->vsync_end) / (interlaced ? 2 : 1);
> 
> base-commit: c18c889bb5e014e144716044991112f16833
> prerequisite-patch-id: dc9121a1b85ea05bf3eae2b0ac2168d47101ee87
> -- 
> Regards,
> 
> Laurent Pinchart
>


Re: [PATCH v2 4/5] fbdev: Improve performance of cfb_imageblit()

2022-02-22 Thread Pekka Paalanen
On Mon, 21 Feb 2022 20:54:09 +0100
Thomas Zimmermann  wrote:

> Improve the performance of sys_imageblit() by manually unrolling

sys?

> the inner blitting loop and moving some invariants out. The compiler
> failed to do this automatically. This change keeps cfb_imageblit()
> in sync with sys_imagebit().

This is correct here.

> 
> A microbenchmark measures the average number of CPU cycles
> for sys_imageblit() after a stabilizing period of a few minutes

sys?

> (i7-4790, FullHD, simpledrm, kernel with debugging).
> 
> sys_imageblit(), new: 15724 cycles

sys?

> cfb_imageblit(): old: 30566 cycles
> 
> In the optimized case, cfb_imageblit() is now ~2x faster than before.
> 
> Signed-off-by: Thomas Zimmermann 
> ---
>  drivers/video/fbdev/core/cfbimgblt.c | 51 +++-
>  1 file changed, 42 insertions(+), 9 deletions(-)

Just noticed some confusion in the commit message.


Thanks,
pq


pgpD1xHYjzIvL.pgp
Description: OpenPGP digital signature


[PATCH 00/10] Fix multiple compilation warnings

2022-02-22 Thread Maíra Canal
This patchset intends to deal with a couple of warnings in the AMD graphic
drivers. All warnings were generated with Clang and W=1 flag.

Maíra Canal (10):
  drm/amdgpu: Change amdgpu_ras_block_late_init_default function scope
  drm/amdgpu: Remove tmp unused variable
  drm/amdgpu: Remove unused get_umc_v8_7_channel_index function
  drm/amd/display: Remove unused temp variable
  drm/amd/display: Remove unused dcn316_smu_set_voltage_via_phyclk
function
  drm/amd/display: Remove vupdate_int_entry definition
  drm/amd/display: Remove unused dmub_outbox_irq_info_funcs variable
  drm/amd/display: Remove unused variable
  drm/amd/display: Add missing prototypes to dcn201_init
  drm/amd/display: Turn global functions into static functions

 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c  |  4 +---
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c  |  4 +---
 drivers/gpu/drm/amd/amdgpu/umc_v8_7.c  |  7 ---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  |  4 ++--
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c|  2 +-
 .../dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c   |  3 ++-
 .../display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c |  2 +-
 .../amd/display/dc/clk_mgr/dcn316/dcn316_smu.c | 18 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c  |  2 +-
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c  |  3 ---
 .../drm/amd/display/dc/dcn201/dcn201_init.c|  1 +
 .../display/dc/irq/dcn20/irq_service_dcn20.c   |  2 +-
 .../display/dc/irq/dcn201/irq_service_dcn201.c |  5 -
 .../display/dc/irq/dcn21/irq_service_dcn21.c   | 14 --
 .../display/dc/irq/dcn30/irq_service_dcn30.c   |  2 +-
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h|  5 +
 17 files changed, 15 insertions(+), 65 deletions(-)

-- 
2.35.1



[PATCH 01/10] drm/amdgpu: Change amdgpu_ras_block_late_init_default function scope

2022-02-22 Thread Maíra Canal
Turn previously global function into a static function to avoid the
following Clang warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:2459:5: warning: no previous prototype
for function 'amdgpu_ras_block_late_init_default' [-Wmissing-prototypes]
int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
^
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:2459:1: note: declare 'static' if the
function is not intended to be used outside of this translation unit
int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
^
static

Signed-off-by: Maíra Canal 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e5874df3c9ca..dff5240efcc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2456,7 +2456,7 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
return r;
 }
 
-int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
+static int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
 struct ras_common_if *ras_block)
 {
return amdgpu_ras_block_late_init(adev, ras_block);
-- 
2.35.1



[PATCH 02/10] drm/amdgpu: Remove tmp unused variable

2022-02-22 Thread Maíra Canal
The variable tmp is not used in the dce_v6_0_hpd_fini and
dce_v8_0_hpd_fini functions. Therefor, the variable is removed in order to
avoid the following Clang warnings:

drivers/gpu/drm/amd/amdgpu/dce_v6_0.c:331:6: warning: variable 'tmp' set but
not used [-Wunused-but-set-variable]
u32 tmp;
^
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c:325:6: warning: variable 'tmp' set but
not used [-Wunused-but-set-variable]
u32 tmp;
^

Signed-off-by: Maíra Canal 
---
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 4 +---
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index b90bc2adf778..2c61f0c2e709 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -328,7 +328,6 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
struct drm_device *dev = adev_to_drm(adev);
struct drm_connector *connector;
struct drm_connector_list_iter iter;
-   u32 tmp;
 
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
@@ -337,8 +336,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
continue;
 
-   tmp = RREG32(mmDC_HPD1_CONTROL + 
hpd_offsets[amdgpu_connector->hpd.hpd]);
-   tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+   RREG32(mmDC_HPD1_CONTROL + 
hpd_offsets[amdgpu_connector->hpd.hpd]);
WREG32(mmDC_HPD1_CONTROL + 
hpd_offsets[amdgpu_connector->hpd.hpd], 0);
 
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 7c1379b02f94..c5e9c5dbd165 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -322,7 +322,6 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
struct drm_device *dev = adev_to_drm(adev);
struct drm_connector *connector;
struct drm_connector_list_iter iter;
-   u32 tmp;
 
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
@@ -331,8 +330,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
continue;
 
-   tmp = RREG32(mmDC_HPD1_CONTROL + 
hpd_offsets[amdgpu_connector->hpd.hpd]);
-   tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+   RREG32(mmDC_HPD1_CONTROL + 
hpd_offsets[amdgpu_connector->hpd.hpd]);
WREG32(mmDC_HPD1_CONTROL + 
hpd_offsets[amdgpu_connector->hpd.hpd], 0);
 
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
-- 
2.35.1



[PATCH 03/10] drm/amdgpu: Remove unused get_umc_v8_7_channel_index function

2022-02-22 Thread Maíra Canal
Remove get_umc_v8_7_channel_index function, which is not used
in the codebase.

This was pointed by clang with the following warning:

drivers/gpu/drm/amd/amdgpu/umc_v8_7.c:50:24: warning: unused function
'get_umc_v8_7_channel_index' [-Wunused-function]
static inline uint32_t get_umc_v8_7_channel_index(struct amdgpu_device *adev,
   ^

Signed-off-by: Maíra Canal 
---
 drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
index de85a998ef99..f35253e0eaa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
@@ -47,13 +47,6 @@ static inline uint32_t get_umc_v8_7_reg_offset(struct 
amdgpu_device *adev,
return adev->umc.channel_offs*ch_inst + UMC_8_INST_DIST*umc_inst;
 }
 
-static inline uint32_t get_umc_v8_7_channel_index(struct amdgpu_device *adev,
-   uint32_t umc_inst,
-   uint32_t ch_inst)
-{
-   return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num 
+ ch_inst];
-}
-
 static void umc_v8_7_ecc_info_query_correctable_error_count(struct 
amdgpu_device *adev,
uint32_t umc_inst, uint32_t 
ch_inst,
unsigned long *error_count)
-- 
2.35.1



[PATCH 04/10] drm/amd/display: Remove unused temp variable

2022-02-22 Thread Maíra Canal
Remove unused temp variable from the dmub_rb_flush_pending function by
using arithmetic to remove the loop.

The -Wunused-but-set-variable warning was pointed out by Clang with the
following warning:

drivers/gpu/drm/amd/amdgpu/../display/dmub/inc/dmub_cmd.h:2921:12: warning:
variable 'temp' set but not used [-Wunused-but-set-variable]
uint64_t temp;
 ^

Signed-off-by: Maíra Canal 
---
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h 
b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index fb01ff49e655..d3088836d4e4 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -2918,11 +2918,8 @@ static inline void dmub_rb_flush_pending(const struct 
dmub_rb *rb)
while (rptr != wptr) {
uint64_t volatile *data = (uint64_t volatile *)((uint8_t 
*)(rb->base_address) + rptr);
//uint64_t volatile *p = (uint64_t volatile *)data;
-   uint64_t temp;
-   uint8_t i;
 
-   for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
-   temp = *data++;
+   *data += DMUB_RB_CMD_SIZE / sizeof(uint64_t);
 
rptr += DMUB_RB_CMD_SIZE;
if (rptr >= rb->capacity)
-- 
2.35.1



[PATCH 06/10] drm/amd/display: Remove vupdate_int_entry definition

2022-02-22 Thread Maíra Canal
Remove the vupdate_int_entry definition and utilization to avoid the
following warning by Clang:

drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:410:2:
warning: initializer overrides prior initialization of this subobject
[-Winitializer-overrides]
vupdate_no_lock_int_entry(0),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:280:39:
note: expanded from macro 'vupdate_no_lock_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:404:2:
note: previous initialization is here
vupdate_int_entry(0),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:269:39:
note: expanded from macro 'vupdate_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:411:2:
warning: initializer overrides prior initialization of this subobject
[-Winitializer-overrides]
vupdate_no_lock_int_entry(1),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:280:39:
note: expanded from macro 'vupdate_no_lock_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:405:2:
note: previous initialization is here
vupdate_int_entry(1),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:269:39:
note: expanded from macro 'vupdate_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:412:2:
warning: initializer overrides prior initialization of this subobject
[-Winitializer-overrides]
vupdate_no_lock_int_entry(2),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:280:39:
note: expanded from macro 'vupdate_no_lock_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:406:2:
note: previous initialization is here
vupdate_int_entry(2),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:269:39:
note: expanded from macro 'vupdate_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:413:2:
warning: initializer overrides prior initialization of this subobject
[-Winitializer-overrides]
vupdate_no_lock_int_entry(3),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:280:39:
note: expanded from macro 'vupdate_no_lock_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:407:2:
note: previous initialization is here
vupdate_int_entry(3),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:269:39:
note: expanded from macro 'vupdate_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:414:2:
warning: initializer overrides prior initialization of this subobject
[-Winitializer-overrides]
vupdate_no_lock_int_entry(4),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:280:39:
note: expanded from macro 'vupdate_no_lock_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:408:2:
note: previous initialization is here
vupdate_int_entry(4),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:269:39:
note: expanded from macro 'vupdate_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:415:2:
warning: initializer overrides prior initialization of this subobject
[-Winitializer-overrides]
vupdate_no_lock_int_entry(5),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:280:39:
note: expanded from macro 'vupdate_no_lock_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:409:2:
note: previous initialization is here
vupdate_int_entry(5),
^~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn21/irq_service_dcn21.c:269:39:
note: expanded from macro 'vupdate_int_entry'
[DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
^~
6 warnings generated.

fixes: 688f97ed ("drm/amd/display: Add vupdate_no_lock interrupts for
DCN2.1")

Signed-off-by: Maíra Canal 
---
 .../amd/display/dc/irq/dcn21/irq_service_dcn21.c   | 14 --
 1 file changed, 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/

[PATCH 07/10] drm/amd/display: Remove unused dmub_outbox_irq_info_funcs variable

2022-02-22 Thread Maíra Canal
Remove the unused struct irq_source_info_funcs
dmub_outbox_irq_info_funcs from the file, which was declared but never
hooked up.

This was pointed by clang with the following warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn201/irq_service_dcn201.c:141:43:
warning: unused variable 'dmub_outbox_irq_info_funcs'
[-Wunused-const-variable]
static const struct irq_source_info_funcs dmub_outbox_irq_info_funcs = {
  ^

Signed-off-by: Maíra Canal 
---
 .../gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c   | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
index aa708b61142f..45f99351a0ab 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
@@ -138,11 +138,6 @@ static const struct irq_source_info_funcs 
vupdate_no_lock_irq_info_funcs = {
.ack = NULL
 };
 
-static const struct irq_source_info_funcs dmub_outbox_irq_info_funcs = {
-   .set = NULL,
-   .ack = NULL
-};
-
 #undef BASE_INNER
 #define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg
 
-- 
2.35.1



[PATCH 08/10] drm/amd/display: Remove unused variable

2022-02-22 Thread Maíra Canal
Remove the variable clamshell_closed from the function
dcn10_align_pixel_clocks.

This was pointed by clang with the following warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer.c:2063:7:
warning: variable 'clamshell_closed' set but not used
[-Wunused-but-set-variable]
bool clamshell_closed = false;
 ^

Signed-off-by: Maíra Canal 
---
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 8dc1afc03961..559aa45f27e7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -2060,14 +2060,11 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int 
group_size,
uint32_t embedded_pix_clk_100hz;
uint16_t embedded_h_total;
uint16_t embedded_v_total;
-   bool clamshell_closed = false;
uint32_t dp_ref_clk_100hz =

dc->res_pool->dp_clock_source->ctx->dc->clk_mgr->dprefclk_khz*10;
 
if (dc->config.vblank_alignment_dto_params &&
dc->res_pool->dp_clock_source->funcs->override_dp_pix_clk) {
-   clamshell_closed =
-   (dc->config.vblank_alignment_dto_params >> 63);
embedded_h_total =
(dc->config.vblank_alignment_dto_params >> 32) & 0x7FFF;
embedded_v_total =
-- 
2.35.1



[PATCH 09/10] drm/amd/display: Add missing prototypes to dcn201_init

2022-02-22 Thread Maíra Canal
Include the header with the prototype to silence the following clang
warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn201/dcn201_init.c:127:6:
warning: no previous prototype for function 'dcn201_hw_sequencer_construct'
[-Wmissing-prototypes]
void dcn201_hw_sequencer_construct(struct dc *dc)
 ^

Signed-off-by: Maíra Canal 
---
 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c 
b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c
index f1f89f93603f..1826dd7f3da1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c
@@ -27,6 +27,7 @@
 #include "dcn10/dcn10_hw_sequencer.h"
 #include "dcn20/dcn20_hwseq.h"
 #include "dcn201_hwseq.h"
+#include "dcn201_init.h"
 
 static const struct hw_sequencer_funcs dcn201_funcs = {
.program_gamut_remap = dcn10_program_gamut_remap,
-- 
2.35.1



[PATCH 10/10] drm/amd/display: Turn global functions into static functions

2022-02-22 Thread Maíra Canal
Turn previously global functions into static functions to avoid
-Wmissing-prototype warnings, such as:

drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn30/irq_service_dcn30.c:50:20:
warning: no previous prototype for function 'to_dal_irq_source_dcn30'
[-Wmissing-prototypes]
enum dc_irq_source to_dal_irq_source_dcn30(
   ^
drivers/gpu/drm/amd/amdgpu/../display/dc/irq/dcn30/irq_service_dcn30.c:50:1:
note: declare 'static' if the function is not intended to be used outside
of this translation unit
enum dc_irq_source to_dal_irq_source_dcn30(
^
static
1 warning generated.

drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c:488:6:
warning: no previous prototype for function
'dcn316_clk_mgr_helper_populate_bw_params' [-Wmissing-prototypes]
void dcn316_clk_mgr_helper_populate_bw_params(
 ^
drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c:488:1:
note: declare 'static' if the function is not intended to be used outside
of this translation unit
void dcn316_clk_mgr_helper_populate_bw_params(
^
static
1 warning generated.

Signed-off-by: Maíra Canal 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c   | 2 +-
 .../drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c  | 3 ++-
 .../gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c| 2 +-
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c| 2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +-
 drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c  | 2 +-
 drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c  | 2 +-
 8 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c9ca328d34e3..a99b92526b55 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6362,7 +6362,7 @@ static bool is_freesync_video_mode(const struct 
drm_display_mode *mode,
return true;
 }
 
-struct dc_stream_state *
+static struct dc_stream_state *
 create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
   const struct drm_display_mode *drm_mode,
   const struct dm_connector_state *dm_state,
@@ -10189,7 +10189,7 @@ static void set_freesync_fixed_config(struct 
dm_crtc_state *dm_new_crtc_state) {
dm_new_crtc_state->freesync_config.fixed_refresh_in_uhz = res;
 }
 
-int dm_update_crtc_state(struct amdgpu_display_manager *dm,
+static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 struct drm_atomic_state *state,
 struct drm_crtc *crtc,
 struct drm_crtc_state *old_crtc_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 389b0cb37995..05573f073b21 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -213,7 +213,7 @@ static bool validate_dsc_caps_on_connector(struct 
amdgpu_dm_connector *aconnecto
return true;
 }
 
-bool retrieve_downstream_port_device(struct amdgpu_dm_connector *aconnector)
+static bool retrieve_downstream_port_device(struct amdgpu_dm_connector 
*aconnector)
 {
union dp_downstream_port_present ds_port_present;
 
diff --git 
a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
index 06bab24d8e27..450eaead4f20 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
@@ -101,7 +101,8 @@ static uint32_t rv1_smu_wait_for_response(struct 
clk_mgr_internal *clk_mgr, unsi
return res_val;
 }
 
-int rv1_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, 
unsigned int msg_id, unsigned int param)
+static int rv1_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+   unsigned int msg_id, unsigned int param)
 {
uint32_t result;
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index ffd3d5cb9871..02a59adff90d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -485,7 +485,7 @@ static unsigned int find_clk_for_voltage(
return clock;
 }
 
-void dcn316_clk_mgr_helper_populate_bw_params(
+static void dcn316_clk_mgr_helper_populate_bw_params(
struct clk_mgr_internal *clk_mgr,
struct integrated_info *bios_info,
const DpmClocks_316_t *clock_table)
diff --git a/drivers/gpu/drm/amd/display/dc/

[PATCH v5 00/11] clk: Improve clock range handling

2022-02-22 Thread Maxime Ripard
Hi,

This is a follow-up of the discussion here:
https://lore.kernel.org/linux-clk/20210319150355.xzw7ikwdaga2dwhv@gilmour/

and here:
https://lore.kernel.org/all/20210914093515.260031-1-max...@cerno.tech/

While the initial proposal implemented a new API to temporarily raise and lower
clock rates based on consumer workloads, Stephen suggested an
alternative approach implemented here.

The main issue that needed to be addressed in our case was that in a
situation where we would have multiple calls to clk_set_rate_range, we
would end up with a clock at the maximum of the minimums being set. This
would be expected, but the issue was that if one of the users was to
relax or drop its requirements, the rate would be left unchanged, even
though the ideal rate would have changed.

So something like

clk_set_rate(user1_clk, 1000);
clk_set_min_rate(user1_clk, 2000);
clk_set_min_rate(user2_clk, 3000);
clk_set_min_rate(user2_clk, 1000);

Would leave the clock running at 3000Hz, while the minimum would now be
2000Hz.

This was mostly due to the fact that the core only triggers a rate
change in clk_set_rate_range() if the current rate is outside of the
boundaries, but not if it's within the new boundaries.

That series changes that and will trigger a rate change on every call,
with the former rate being tried again. This way, providers have a
chance to follow whatever policy they see fit for a given clock each
time the boundaries change.

This series also implements some kunit tests, first to test a few rate
related functions in the CCF, and then extends it to make sure that
behaviour has some test coverage.

Let me know what you think
Maxime

Changes from v4:
  - Rename the test file
  - Move all the tests to the first patch, and fix them up as fixes are done
  - Improved the test conditions
  - Added more tests
  - Improved commit messages
  - Fixed a regression where two disjoints clock ranges would now be accepted

Changes from v3:
  - Renamed the test file and Kconfig option
  - Add option to .kunitconfig
  - Switch to kunit_kzalloc
  - Use KUNIT_EXPECT_* instead of KUNIT_ASSERT_* where relevant
  - Test directly relevant calls instead of going through a temporary variable
  - Switch to more precise KUNIT_ASSERT_* macros where relevant

Changes from v2:
  - Rebased on current next
  - Rewrote the whole thing according to Stephen reviews
  - Implemented some kunit tests

Changes from v1:
  - Return NULL in clk_request_start if clk pointer is NULL
  - Test for clk_req pointer in clk_request_done
  - Add another user in vc4
  - Rebased on top of v5.15-rc1

Maxime Ripard (11):
  clk: Introduce Kunit Tests for the framework
  clk: Enforce that disjoints limits are invalid
  clk: Always clamp the rounded rate
  clk: Use clamp instead of open-coding our own
  clk: Always set the rate on clk_set_range_rate
  clk: Add clk_drop_range
  clk: bcm: rpi: Add variant structure
  clk: bcm: rpi: Set a default minimum rate
  clk: bcm: rpi: Run some clocks at the minimum rate allowed
  drm/vc4: Add logging and comments
  drm/vc4: hdmi: Remove clock rate initialization

 drivers/clk/.kunitconfig  |   1 +
 drivers/clk/Kconfig   |   7 +
 drivers/clk/Makefile  |   1 +
 drivers/clk/bcm/clk-raspberrypi.c | 125 -
 drivers/clk/clk.c |  73 ++-
 drivers/clk/clk_test.c| 790 ++
 drivers/gpu/drm/vc4/vc4_hdmi.c|  13 -
 drivers/gpu/drm/vc4/vc4_kms.c |  11 +
 include/linux/clk.h   |  11 +
 9 files changed, 978 insertions(+), 54 deletions(-)
 create mode 100644 drivers/clk/clk_test.c

-- 
2.35.1



[PATCH v5 01/11] clk: Introduce Kunit Tests for the framework

2022-02-22 Thread Maxime Ripard
Let's test various parts of the rate-related clock API with the kunit
testing framework.

Cc: kunit-...@googlegroups.com
Suggested-by: Stephen Boyd 
Signed-off-by: Maxime Ripard 
---
 drivers/clk/.kunitconfig |   1 +
 drivers/clk/Kconfig  |   7 +
 drivers/clk/Makefile |   1 +
 drivers/clk/clk_test.c   | 786 +++
 4 files changed, 795 insertions(+)
 create mode 100644 drivers/clk/clk_test.c

diff --git a/drivers/clk/.kunitconfig b/drivers/clk/.kunitconfig
index 3754fdb9485a..cdbc7d7deba9 100644
--- a/drivers/clk/.kunitconfig
+++ b/drivers/clk/.kunitconfig
@@ -1,3 +1,4 @@
 CONFIG_KUNIT=y
 CONFIG_COMMON_CLK=y
+CONFIG_CLK_KUNIT_TEST=y
 CONFIG_CLK_GATE_KUNIT_TEST=y
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 3cdf33470a75..2ef6eca297ff 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -429,6 +429,13 @@ source "drivers/clk/xilinx/Kconfig"
 source "drivers/clk/zynqmp/Kconfig"
 
 # Kunit test cases
+config CLK_KUNIT_TEST
+   tristate "Basic Clock Framework Kunit Tests" if !KUNIT_ALL_TESTS
+   depends on KUNIT
+   default KUNIT_ALL_TESTS
+   help
+ Kunit tests for the common clock framework.
+
 config CLK_GATE_KUNIT_TEST
tristate "Basic gate type Kunit test" if !KUNIT_ALL_TESTS
depends on KUNIT
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 6a98291350b6..8f9b1daba411 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -2,6 +2,7 @@
 # common clock types
 obj-$(CONFIG_HAVE_CLK) += clk-devres.o clk-bulk.o clkdev.o
 obj-$(CONFIG_COMMON_CLK)   += clk.o
+obj-$(CONFIG_CLK_KUNIT_TEST)   += clk_test.o
 obj-$(CONFIG_COMMON_CLK)   += clk-divider.o
 obj-$(CONFIG_COMMON_CLK)   += clk-fixed-factor.o
 obj-$(CONFIG_COMMON_CLK)   += clk-fixed-rate.o
diff --git a/drivers/clk/clk_test.c b/drivers/clk/clk_test.c
new file mode 100644
index ..8ddfd3b195e1
--- /dev/null
+++ b/drivers/clk/clk_test.c
@@ -0,0 +1,786 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Kunit test for clk rate management
+ */
+#include 
+#include 
+#include 
+
+/* Needed for clk_hw_create_clk() */
+#include "clk.h"
+
+#include 
+
+#define DUMMY_CLOCK_INIT_RATE  (42 * 1000 * 1000)
+#define DUMMY_CLOCK_RATE_1 (142 * 1000 * 1000)
+#define DUMMY_CLOCK_RATE_2 (242 * 1000 * 1000)
+
+struct clk_dummy_context {
+   struct clk_hw hw;
+   unsigned long rate;
+};
+
+static unsigned long clk_dummy_recalc_rate(struct clk_hw *hw,
+  unsigned long parent_rate)
+{
+   struct clk_dummy_context *ctx =
+   container_of(hw, struct clk_dummy_context, hw);
+
+   return ctx->rate;
+}
+
+static int clk_dummy_determine_rate(struct clk_hw *hw,
+struct clk_rate_request *req)
+{
+   /* Just return the same rate without modifying it */
+   return 0;
+}
+
+static int clk_dummy_maximize_rate(struct clk_hw *hw,
+  struct clk_rate_request *req)
+{
+   /*
+* If there's a maximum set, always run the clock at the maximum
+* allowed.
+*/
+   if (req->max_rate < ULONG_MAX)
+   req->rate = req->max_rate;
+
+   return 0;
+}
+
+static int clk_dummy_minimize_rate(struct clk_hw *hw,
+  struct clk_rate_request *req)
+{
+   /*
+* If there's a minimum set, always run the clock at the minimum
+* allowed.
+*/
+   if (req->min_rate > 0)
+   req->rate = req->min_rate;
+
+   return 0;
+}
+
+static int clk_dummy_set_rate(struct clk_hw *hw,
+ unsigned long rate,
+ unsigned long parent_rate)
+{
+   struct clk_dummy_context *ctx =
+   container_of(hw, struct clk_dummy_context, hw);
+
+   ctx->rate = rate;
+   return 0;
+}
+
+static const struct clk_ops clk_dummy_rate_ops = {
+   .recalc_rate = clk_dummy_recalc_rate,
+   .determine_rate = clk_dummy_determine_rate,
+   .set_rate = clk_dummy_set_rate,
+};
+
+static const struct clk_ops clk_dummy_maximize_rate_ops = {
+   .recalc_rate = clk_dummy_recalc_rate,
+   .determine_rate = clk_dummy_maximize_rate,
+   .set_rate = clk_dummy_set_rate,
+};
+
+static const struct clk_ops clk_dummy_minimize_rate_ops = {
+   .recalc_rate = clk_dummy_recalc_rate,
+   .determine_rate = clk_dummy_minimize_rate,
+   .set_rate = clk_dummy_set_rate,
+};
+
+static int clk_test_init_with_ops(struct kunit *test, const struct clk_ops 
*ops)
+{
+   struct clk_dummy_context *ctx;
+   struct clk_init_data init = { };
+   int ret;
+
+   ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+   if (!ctx)
+   return -ENOMEM;
+   ctx->rate = DUMMY_CLOCK_INIT_RATE;
+   test->priv = ctx;
+
+   init.name = "test_dummy_rate";
+   init.ops = ops;
+   ctx->hw.init = &init;
+
+   ret = clk_hw

[PATCH v5 03/11] clk: Always clamp the rounded rate

2022-02-22 Thread Maxime Ripard
The current core while setting the min and max rate properly in the
clk_request structure will not make sure that the requested rate is
within these boundaries, leaving it to each and every driver to make
sure it is.

It's not clear if this was on purpose or not, but this introduces some
inconsistencies within the API.

For example, a user setting a range and then calling clk_round_rate()
with a value outside of that range will get the same value back
(ignoring any driver adjustements), effectively ignoring the range that
was just set.

Another one, arguably worse, is that it also makes clk_round_rate() and
clk_set_rate() behave differently if there's a range and the rate being
used for both is outside that range. As we have seen, the rate will be
returned unchanged by clk_round_rate(), but clk_set_rate() will error
out returning -EINVAL.

Let's make sure the framework will always clamp the rate to the current
range found on the clock, which will fix both these inconsistencies.

Signed-off-by: Maxime Ripard 
---
 drivers/clk/clk.c  |  2 ++
 drivers/clk/clk_test.c | 46 +-
 2 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 276188a6bc2a..9725bdc996b3 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1348,6 +1348,8 @@ static int clk_core_determine_round_nolock(struct 
clk_core *core,
if (!core)
return 0;
 
+   req->rate = clamp(req->rate, req->min_rate, req->max_rate);
+
/*
 * At this point, core protection will be disabled
 * - if the provider is not protected at all
diff --git a/drivers/clk/clk_test.c b/drivers/clk/clk_test.c
index 8ddfd3b195e1..2b906c594328 100644
--- a/drivers/clk/clk_test.c
+++ b/drivers/clk/clk_test.c
@@ -309,8 +309,7 @@ static void clk_range_test_multiple_disjoints_range(struct 
kunit *test)
 
 /*
  * Test that if our clock has some boundaries and we try to round a rate
- * lower than the minimum, the returned rate won't be affected by the
- * boundaries.
+ * lower than the minimum, the returned rate will be within range.
  */
 static void clk_range_test_set_range_round_rate_lower(struct kunit *test)
 {
@@ -327,18 +326,19 @@ static void 
clk_range_test_set_range_round_rate_lower(struct kunit *test)
 
rate = clk_round_rate(clk, DUMMY_CLOCK_RATE_1 - 1000);
KUNIT_ASSERT_GT(test, rate, 0);
-   KUNIT_EXPECT_EQ(test, rate, DUMMY_CLOCK_RATE_1 - 1000);
+   KUNIT_EXPECT_TRUE(test, rate >= DUMMY_CLOCK_RATE_1 && rate <= 
DUMMY_CLOCK_RATE_2);
 }
 
 /*
  * Test that if our clock has some boundaries and we try to set a rate
- * lower than the minimum, we'll get an error.
+ * higher than the maximum, the new rate will be within range.
  */
 static void clk_range_test_set_range_set_rate_lower(struct kunit *test)
 {
struct clk_dummy_context *ctx = test->priv;
struct clk_hw *hw = &ctx->hw;
struct clk *clk = hw->clk;
+   unsigned long rate;
 
KUNIT_ASSERT_EQ(test,
clk_set_rate_range(clk,
@@ -346,15 +346,20 @@ static void 
clk_range_test_set_range_set_rate_lower(struct kunit *test)
   DUMMY_CLOCK_RATE_2),
0);
 
-   KUNIT_ASSERT_LT(test,
+   KUNIT_ASSERT_EQ(test,
clk_set_rate(clk, DUMMY_CLOCK_RATE_1 - 1000),
0);
+
+   rate = clk_get_rate(clk);
+   KUNIT_ASSERT_GT(test, rate, 0);
+   KUNIT_EXPECT_TRUE(test, rate >= DUMMY_CLOCK_RATE_1 && rate <= 
DUMMY_CLOCK_RATE_2);
 }
 
 /*
  * Test that if our clock has some boundaries and we try to round and
- * set a rate lower than the minimum, the values won't be consistent
- * between clk_round_rate() and clk_set_rate().
+ * set a rate lower than the minimum, the rate returned by
+ * clk_round_rate() will be consistent with the new rate set by
+ * clk_set_rate().
  */
 static void clk_range_test_set_range_set_round_rate_consistent_lower(struct 
kunit *test)
 {
@@ -372,17 +377,16 @@ static void 
clk_range_test_set_range_set_round_rate_consistent_lower(struct kuni
rounded = clk_round_rate(clk, DUMMY_CLOCK_RATE_1 - 1000);
KUNIT_ASSERT_GT(test, rounded, 0);
 
-   KUNIT_EXPECT_LT(test,
+   KUNIT_ASSERT_EQ(test,
clk_set_rate(clk, DUMMY_CLOCK_RATE_1 - 1000),
0);
 
-   KUNIT_EXPECT_NE(test, rounded, clk_get_rate(clk));
+   KUNIT_EXPECT_EQ(test, rounded, clk_get_rate(clk));
 }
 
 /*
  * Test that if our clock has some boundaries and we try to round a rate
- * higher than the maximum, the returned rate won't be affected by the
- * boundaries.
+ * higher than the maximum, the returned rate will be within range.
  */
 static void clk_range_test_set_range_round_rate_higher(struct kunit *test)
 {
@@ -399,18 +403,19 @@ static void 
clk_range_test_set_range_round_rate_higher(struct kunit *test)
 
rate = clk_round_rate(clk, D

[PATCH v5 04/11] clk: Use clamp instead of open-coding our own

2022-02-22 Thread Maxime Ripard
The code in clk_set_rate_range() will, if the current rate is outside of
the new range, will force it to the minimum or maximum.

Since it's running under the condition that the rate is either lower
than the minimum, or higher than the maximum, this is equivalent to
using clamp, while being less readable. Let's switch to using clamp
instead.

Signed-off-by: Maxime Ripard 
---
 drivers/clk/clk.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 9725bdc996b3..fd3daa11bfa4 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2388,11 +2388,7 @@ int clk_set_rate_range(struct clk *clk, unsigned long 
min, unsigned long max)
 *   this corner case when determining the rate
 */
 
-   if (rate < min)
-   rate = min;
-   else
-   rate = max;
-
+   rate = clamp(clk->core->req_rate, min, max);
ret = clk_core_set_rate_nolock(clk->core, rate);
if (ret) {
/* rollback the changes */
-- 
2.35.1



[PATCH v5 05/11] clk: Always set the rate on clk_set_range_rate

2022-02-22 Thread Maxime Ripard
When we change a clock minimum or maximum using clk_set_rate_range(),
clk_set_min_rate() or clk_set_max_rate(), the current code will only
trigger a new rate change if the rate is outside of the new boundaries.

However, a clock driver might want to always keep the clock rate to
one of its boundary, for example the minimum to keep the power
consumption as low as possible.

Since they don't always get called though, clock providers don't have the
opportunity to implement this behaviour.

Let's trigger a clk_set_rate() on the previous requested rate every time
clk_set_rate_range() is called. That way, providers that care about the
new boundaries have a chance to adjust the rate, while providers that
don't care about those new boundaries will return the same rate than
before, which will be ignored by clk_set_rate() and won't result in a
new rate change.

Suggested-by: Stephen Boyd 
Signed-off-by: Maxime Ripard 
---
 drivers/clk/clk.c  | 45 
 drivers/clk/clk_test.c | 58 +++---
 2 files changed, 49 insertions(+), 54 deletions(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index fd3daa11bfa4..9ee9ef0601c5 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2373,28 +2373,29 @@ int clk_set_rate_range(struct clk *clk, unsigned long 
min, unsigned long max)
goto out;
}
 
-   rate = clk_core_get_rate_nolock(clk->core);
-   if (rate < min || rate > max) {
-   /*
-* FIXME:
-* We are in bit of trouble here, current rate is outside the
-* the requested range. We are going try to request appropriate
-* range boundary but there is a catch. It may fail for the
-* usual reason (clock broken, clock protected, etc) but also
-* because:
-* - round_rate() was not favorable and fell on the wrong
-*   side of the boundary
-* - the determine_rate() callback does not really check for
-*   this corner case when determining the rate
-*/
-
-   rate = clamp(clk->core->req_rate, min, max);
-   ret = clk_core_set_rate_nolock(clk->core, rate);
-   if (ret) {
-   /* rollback the changes */
-   clk->min_rate = old_min;
-   clk->max_rate = old_max;
-   }
+   /*
+* Since the boundaries have been changed, let's give the
+* opportunity to the provider to adjust the clock rate based on
+* the new boundaries.
+*
+* We also need to handle the case where the clock is currently
+* outside of the boundaries. Clamping the last requested rate
+* to the current minimum and maximum will also handle this.
+*
+* FIXME:
+* There is a catch. It may fail for the usual reason (clock
+* broken, clock protected, etc) but also because:
+* - round_rate() was not favorable and fell on the wrong
+*   side of the boundary
+* - the determine_rate() callback does not really check for
+*   this corner case when determining the rate
+*/
+   rate = clamp(clk->core->req_rate, min, max);
+   ret = clk_core_set_rate_nolock(clk->core, rate);
+   if (ret) {
+   /* rollback the changes */
+   clk->min_rate = old_min;
+   clk->max_rate = old_max;
}
 
 out:
diff --git a/drivers/clk/clk_test.c b/drivers/clk/clk_test.c
index 2b906c594328..b23859d1b460 100644
--- a/drivers/clk/clk_test.c
+++ b/drivers/clk/clk_test.c
@@ -544,13 +544,12 @@ static struct kunit_suite clk_range_test_suite = {
 };
 
 /*
- * Test that if:
- * - we have several subsequent calls to clk_set_rate_range();
- * - and we have a round_rate ops that always return the maximum
- *   frequency allowed;
+ * Test that if we have several subsequent calls to
+ * clk_set_rate_range(), the core will reevaluate whether a new rate is
+ * needed each and every time.
  *
- * The clock will run at the minimum of all maximum boundaries
- * requested, even if those boundaries aren't there anymore.
+ * With clk_dummy_maximize_rate_ops, this means that the the rate will
+ * trail along the maximum as it evolves.
  */
 static void clk_range_test_set_range_rate_maximized(struct kunit *test)
 {
@@ -591,18 +590,16 @@ static void 
clk_range_test_set_range_rate_maximized(struct kunit *test)
 
rate = clk_get_rate(clk);
KUNIT_ASSERT_GT(test, rate, 0);
-   KUNIT_EXPECT_EQ(test, rate, DUMMY_CLOCK_RATE_2 - 1000);
+   KUNIT_EXPECT_EQ(test, rate, DUMMY_CLOCK_RATE_2);
 }
 
 /*
- * Test that if:
- * - we have several subsequent calls to clk_set_rate_range(), across
- *   multiple users;
- * - and we have a round_rate ops that always return the maximum
- *   frequency allowed;
+ * Test that if we have several subsequent calls to
+ * c

[PATCH v5 06/11] clk: Add clk_drop_range

2022-02-22 Thread Maxime Ripard
In order to reset the range on a clock, we need to call
clk_set_rate_range with a minimum of 0 and a maximum of ULONG_MAX. Since
it's fairly inconvenient, let's introduce a clk_drop_range() function
that will do just this.

Suggested-by: Stephen Boyd 
Signed-off-by: Maxime Ripard 
---
 drivers/clk/clk_test.c |  4 ++--
 include/linux/clk.h| 11 +++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/clk_test.c b/drivers/clk/clk_test.c
index b23859d1b460..f108e2dcc351 100644
--- a/drivers/clk/clk_test.c
+++ b/drivers/clk/clk_test.c
@@ -640,7 +640,7 @@ static void 
clk_range_test_multiple_set_range_rate_maximized(struct kunit *test)
KUNIT_EXPECT_EQ(test, rate, DUMMY_CLOCK_RATE_1);
 
KUNIT_ASSERT_EQ(test,
-   clk_set_rate_range(user2, 0, ULONG_MAX),
+   clk_drop_range(user2),
0);
 
rate = clk_get_rate(clk);
@@ -757,7 +757,7 @@ static void 
clk_range_test_multiple_set_range_rate_minimized(struct kunit *test)
KUNIT_EXPECT_EQ(test, rate, DUMMY_CLOCK_RATE_2);
 
KUNIT_ASSERT_EQ(test,
-   clk_set_rate_range(user2, 0, ULONG_MAX),
+   clk_drop_range(user2),
0);
 
rate = clk_get_rate(clk);
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 266e8de3cb51..39faa54efe88 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -986,6 +986,17 @@ static inline void clk_bulk_disable_unprepare(int num_clks,
clk_bulk_unprepare(num_clks, clks);
 }
 
+/**
+ * clk_drop_range - Reset any range set on that clock
+ * @clk: clock source
+ *
+ * Returns success (0) or negative errno.
+ */
+static inline int clk_drop_range(struct clk *clk)
+{
+   return clk_set_rate_range(clk, 0, ULONG_MAX);
+}
+
 /**
  * clk_get_optional - lookup and obtain a reference to an optional clock
  *   producer.
-- 
2.35.1



[PATCH v5 02/11] clk: Enforce that disjoints limits are invalid

2022-02-22 Thread Maxime Ripard
If we were to have two users of the same clock, doing something like:

clk_set_rate_range(user1, 1000, 2000);
clk_set_rate_range(user2, 3000, 4000);

The second call would fail with -EINVAL, preventing from getting in a
situation where we end up with impossible limits.

However, this is never explicitly checked against and enforced, and
works by relying on an undocumented behaviour of clk_set_rate().

Indeed, on the first clk_set_rate_range will make sure the current clock
rate is within the new range, so it will be between 1000 and 2000Hz. On
the second clk_set_rate_range(), it will consider (rightfully), that our
current clock is outside of the 3000-4000Hz range, and will call
clk_core_set_rate_nolock() to set it to 3000Hz.

clk_core_set_rate_nolock() will then call clk_calc_new_rates() that will
eventually check that our rate 3000Hz rate is outside the min 3000Hz max
2000Hz range, will bail out, the error will propagate and we'll
eventually return -EINVAL.

This solely relies on the fact that clk_calc_new_rates(), and in
particular clk_core_determine_round_nolock(), won't modify the new rate
allowing the error to be reported. That assumption won't be true for all
drivers, and most importantly we'll break that assumption in a later
patch.

It can also be argued that we shouldn't even reach the point where we're
calling clk_core_set_rate_nolock().

Let's make an explicit check for disjoints range before we're doing
anything.

Signed-off-by: Maxime Ripard 
---
 drivers/clk/clk.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 8de6a22498e7..276188a6bc2a 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -632,6 +632,24 @@ static void clk_core_get_boundaries(struct clk_core *core,
*max_rate = min(*max_rate, clk_user->max_rate);
 }
 
+static bool clk_core_check_boundaries(struct clk_core *core,
+ unsigned long min_rate,
+ unsigned long max_rate)
+{
+   struct clk *user;
+
+   lockdep_assert_held(&prepare_lock);
+
+   if (min_rate > core->max_rate || max_rate < core->min_rate)
+   return false;
+
+   hlist_for_each_entry(user, &core->clks, clks_node)
+   if (min_rate > user->max_rate || max_rate < user->min_rate)
+   return false;
+
+   return true;
+}
+
 void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate,
   unsigned long max_rate)
 {
@@ -2348,6 +2366,11 @@ int clk_set_rate_range(struct clk *clk, unsigned long 
min, unsigned long max)
clk->min_rate = min;
clk->max_rate = max;
 
+   if (!clk_core_check_boundaries(clk->core, min, max)) {
+   ret = -EINVAL;
+   goto out;
+   }
+
rate = clk_core_get_rate_nolock(clk->core);
if (rate < min || rate > max) {
/*
@@ -2376,6 +2399,7 @@ int clk_set_rate_range(struct clk *clk, unsigned long 
min, unsigned long max)
}
}
 
+out:
if (clk->exclusive_count)
clk_core_rate_protect(clk->core);
 
-- 
2.35.1



[PATCH v5 08/11] clk: bcm: rpi: Set a default minimum rate

2022-02-22 Thread Maxime Ripard
The M2MC clock provides the state machine clock for both HDMI
controllers.

However, if no HDMI monitor is plugged in at boot, its clock rate will
be left at 0 by the firmware and will make any register access end up in
a CPU stall, even though the clock was enabled.

We had some code in the HDMI controller to deal with this before, but it
makes more sense to have it in the clock driver. Move it there.

Signed-off-by: Maxime Ripard 
---
 drivers/clk/bcm/clk-raspberrypi.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/drivers/clk/bcm/clk-raspberrypi.c 
b/drivers/clk/bcm/clk-raspberrypi.c
index f7185d421085..c879f2e9a4a7 100644
--- a/drivers/clk/bcm/clk-raspberrypi.c
+++ b/drivers/clk/bcm/clk-raspberrypi.c
@@ -76,6 +76,7 @@ struct raspberrypi_clk_data {
 struct raspberrypi_clk_variant {
boolexport;
char*clkdev;
+   unsigned long   min_rate;
 };
 
 static struct raspberrypi_clk_variant
@@ -89,6 +90,18 @@ raspberrypi_clk_variants[RPI_FIRMWARE_NUM_CLK_ID] = {
},
[RPI_FIRMWARE_M2MC_CLK_ID] = {
.export = true,
+
+   /*
+* If we boot without any cable connected to any of the
+* HDMI connector, the firmware will skip the HSM
+* initialization and leave it with a rate of 0,
+* resulting in a bus lockup when we're accessing the
+* registers even if it's enabled.
+*
+* Let's put a sensible default so that we don't end up
+* in this situation.
+*/
+   .min_rate = 12000,
},
[RPI_FIRMWARE_V3D_CLK_ID] = {
.export = true,
@@ -267,6 +280,19 @@ static struct clk_hw *raspberrypi_clk_register(struct 
raspberrypi_clk *rpi,
}
}
 
+   if (variant->min_rate) {
+   unsigned long rate;
+
+   clk_hw_set_rate_range(&data->hw, variant->min_rate, max_rate);
+
+   rate = raspberrypi_fw_get_rate(&data->hw, 0);
+   if (rate < variant->min_rate) {
+   ret = raspberrypi_fw_set_rate(&data->hw, 
variant->min_rate, 0);
+   if (ret)
+   return ERR_PTR(ret);
+   }
+   }
+
return &data->hw;
 }
 
-- 
2.35.1



[PATCH v5 07/11] clk: bcm: rpi: Add variant structure

2022-02-22 Thread Maxime Ripard
We only export a bunch of firmware clocks, and some of them require
special treatment.

This has been do so far using some tests on the clock id in various
places, but this is fairly hard to extend and doesn't scale very well.

Since we'll need some more cases in the next patches, let's switch to a
variant structure that defines the behaviour we need to have for a given
clock.

Signed-off-by: Maxime Ripard 
---
 drivers/clk/bcm/clk-raspberrypi.c | 62 +++
 1 file changed, 46 insertions(+), 16 deletions(-)

diff --git a/drivers/clk/bcm/clk-raspberrypi.c 
b/drivers/clk/bcm/clk-raspberrypi.c
index dd3b71eafabf..f7185d421085 100644
--- a/drivers/clk/bcm/clk-raspberrypi.c
+++ b/drivers/clk/bcm/clk-raspberrypi.c
@@ -56,6 +56,8 @@ static char *rpi_firmware_clk_names[] = {
 #define RPI_FIRMWARE_STATE_ENABLE_BIT  BIT(0)
 #define RPI_FIRMWARE_STATE_WAIT_BITBIT(1)
 
+struct raspberrypi_clk_variant;
+
 struct raspberrypi_clk {
struct device *dev;
struct rpi_firmware *firmware;
@@ -66,10 +68,36 @@ struct raspberrypi_clk_data {
struct clk_hw hw;
 
unsigned int id;
+   struct raspberrypi_clk_variant *variant;
 
struct raspberrypi_clk *rpi;
 };
 
+struct raspberrypi_clk_variant {
+   boolexport;
+   char*clkdev;
+};
+
+static struct raspberrypi_clk_variant
+raspberrypi_clk_variants[RPI_FIRMWARE_NUM_CLK_ID] = {
+   [RPI_FIRMWARE_ARM_CLK_ID] = {
+   .export = true,
+   .clkdev = "cpu0",
+   },
+   [RPI_FIRMWARE_CORE_CLK_ID] = {
+   .export = true,
+   },
+   [RPI_FIRMWARE_M2MC_CLK_ID] = {
+   .export = true,
+   },
+   [RPI_FIRMWARE_V3D_CLK_ID] = {
+   .export = true,
+   },
+   [RPI_FIRMWARE_PIXEL_BVB_CLK_ID] = {
+   .export = true,
+   },
+};
+
 /*
  * Structure of the message passed to Raspberry Pi's firmware in order to
  * change clock rates. The 'disable_turbo' option is only available to the ARM
@@ -183,7 +211,8 @@ static const struct clk_ops raspberrypi_firmware_clk_ops = {
 
 static struct clk_hw *raspberrypi_clk_register(struct raspberrypi_clk *rpi,
   unsigned int parent,
-  unsigned int id)
+  unsigned int id,
+  struct raspberrypi_clk_variant 
*variant)
 {
struct raspberrypi_clk_data *data;
struct clk_init_data init = {};
@@ -195,6 +224,7 @@ static struct clk_hw *raspberrypi_clk_register(struct 
raspberrypi_clk *rpi,
return ERR_PTR(-ENOMEM);
data->rpi = rpi;
data->id = id;
+   data->variant = variant;
 
init.name = devm_kasprintf(rpi->dev, GFP_KERNEL,
   "fw-clk-%s",
@@ -228,9 +258,9 @@ static struct clk_hw *raspberrypi_clk_register(struct 
raspberrypi_clk *rpi,
 
clk_hw_set_rate_range(&data->hw, min_rate, max_rate);
 
-   if (id == RPI_FIRMWARE_ARM_CLK_ID) {
+   if (variant->clkdev) {
ret = devm_clk_hw_register_clkdev(rpi->dev, &data->hw,
- NULL, "cpu0");
+ NULL, variant->clkdev);
if (ret) {
dev_err(rpi->dev, "Failed to initialize clkdev\n");
return ERR_PTR(ret);
@@ -264,27 +294,27 @@ static int raspberrypi_discover_clocks(struct 
raspberrypi_clk *rpi,
return ret;
 
while (clks->id) {
-   struct clk_hw *hw;
+   struct raspberrypi_clk_variant *variant;
+
+   if (clks->id > RPI_FIRMWARE_NUM_CLK_ID) {
+   dev_err(rpi->dev, "Unknown clock id: %u", clks->id);
+   return -EINVAL;
+   }
+
+   variant = &raspberrypi_clk_variants[clks->id];
+   if (variant->export) {
+   struct clk_hw *hw;
 
-   switch (clks->id) {
-   case RPI_FIRMWARE_ARM_CLK_ID:
-   case RPI_FIRMWARE_CORE_CLK_ID:
-   case RPI_FIRMWARE_M2MC_CLK_ID:
-   case RPI_FIRMWARE_V3D_CLK_ID:
-   case RPI_FIRMWARE_PIXEL_BVB_CLK_ID:
hw = raspberrypi_clk_register(rpi, clks->parent,
- clks->id);
+ clks->id, variant);
if (IS_ERR(hw))
return PTR_ERR(hw);
 
data->hws[clks->id] = hw;
data->num = clks->id + 1;
-   fallthrough;
-
-   default:
-   clks++;
-   break;
}
+
+   clks++;
}
 
return 0;
-- 
2.35.1



[PATCH v5 09/11] clk: bcm: rpi: Run some clocks at the minimum rate allowed

2022-02-22 Thread Maxime Ripard
The core clock and M2MC clocks are shared between some devices (Unicam
controllers and the HVS, and the HDMI controllers, respectively) that
will have various, varying, requirements depending on their current work
load.

Since those loads can require a fairly high clock rate in extreme
conditions (up to ~600MHz), we can end up running those clocks at their
maximum frequency even though we no longer require such a high rate.

Fortunately, those devices don't require an exact rate but a minimum
rate, and all the drivers are using clk_set_min_rate. Thus, we can just
rely on the fact that the clk_request minimum (which is the aggregated
minimum of all the clock users) is what we want at all times.

Signed-off-by: Maxime Ripard 
---
 drivers/clk/bcm/clk-raspberrypi.c | 37 +++
 1 file changed, 37 insertions(+)

diff --git a/drivers/clk/bcm/clk-raspberrypi.c 
b/drivers/clk/bcm/clk-raspberrypi.c
index c879f2e9a4a7..9d09621549b9 100644
--- a/drivers/clk/bcm/clk-raspberrypi.c
+++ b/drivers/clk/bcm/clk-raspberrypi.c
@@ -77,6 +77,7 @@ struct raspberrypi_clk_variant {
boolexport;
char*clkdev;
unsigned long   min_rate;
+   boolminimize;
 };
 
 static struct raspberrypi_clk_variant
@@ -87,6 +88,18 @@ raspberrypi_clk_variants[RPI_FIRMWARE_NUM_CLK_ID] = {
},
[RPI_FIRMWARE_CORE_CLK_ID] = {
.export = true,
+
+   /*
+* The clock is shared between the HVS and the CSI
+* controllers, on the BCM2711 and will change depending
+* on the pixels composited on the HVS and the capture
+* resolution on Unicam.
+*
+* Since the rate can get quite large, and we need to
+* coordinate between both driver instances, let's
+* always use the minimum the drivers will let us.
+*/
+   .minimize = true,
},
[RPI_FIRMWARE_M2MC_CLK_ID] = {
.export = true,
@@ -102,6 +115,16 @@ raspberrypi_clk_variants[RPI_FIRMWARE_NUM_CLK_ID] = {
 * in this situation.
 */
.min_rate = 12000,
+
+   /*
+* The clock is shared between the two HDMI controllers
+* on the BCM2711 and will change depending on the
+* resolution output on each. Since the rate can get
+* quite large, and we need to coordinate between both
+* driver instances, let's always use the minimum the
+* drivers will let us.
+*/
+   .minimize = true,
},
[RPI_FIRMWARE_V3D_CLK_ID] = {
.export = true,
@@ -206,12 +229,26 @@ static int raspberrypi_fw_set_rate(struct clk_hw *hw, 
unsigned long rate,
 static int raspberrypi_fw_dumb_determine_rate(struct clk_hw *hw,
  struct clk_rate_request *req)
 {
+   struct raspberrypi_clk_data *data =
+   container_of(hw, struct raspberrypi_clk_data, hw);
+   struct raspberrypi_clk_variant *variant = data->variant;
+
/*
 * The firmware will do the rounding but that isn't part of
 * the interface with the firmware, so we just do our best
 * here.
 */
+
req->rate = clamp(req->rate, req->min_rate, req->max_rate);
+
+   /*
+* We want to aggressively reduce the clock rate here, so let's
+* just ignore the requested rate and return the bare minimum
+* rate we can get away with.
+*/
+   if (variant->minimize && req->min_rate > 0)
+   req->rate = req->min_rate;
+
return 0;
 }
 
-- 
2.35.1



[PATCH v5 10/11] drm/vc4: Add logging and comments

2022-02-22 Thread Maxime Ripard
The HVS core clock isn't really obvious, so let's add a bunch more
comments and some logging for easier debugging.

Signed-off-by: Maxime Ripard 
---
 drivers/gpu/drm/vc4/vc4_kms.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 24de29bc1cda..6fe03fc17d73 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -389,8 +389,15 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state 
*state)
5,
new_hvs_state->core_clock_rate);
 
+   drm_dbg(dev, "Raising the core clock at %lu Hz\n", core_rate);
+
+   /*
+* Do a temporary request on the core clock during the
+* modeset.
+*/
clk_set_min_rate(hvs->core_clk, core_rate);
}
+
drm_atomic_helper_commit_modeset_disables(dev, state);
 
vc4_ctm_commit(vc4, state);
@@ -416,6 +423,10 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state 
*state)
drm_dbg(dev, "Running the core clock at %lu Hz\n",
new_hvs_state->core_clock_rate);
 
+   /*
+* Request a clock rate based on the current HVS
+* requirements.
+*/
clk_set_min_rate(hvs->core_clk, new_hvs_state->core_clock_rate);
}
 }
-- 
2.35.1



[PATCH v5 11/11] drm/vc4: hdmi: Remove clock rate initialization

2022-02-22 Thread Maxime Ripard
Now that the clock driver makes sure we never end up with a rate of 0,
the HDMI driver doesn't need to care anymore.

Signed-off-by: Maxime Ripard 
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 13 -
 1 file changed, 13 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 92b1530aa17b..21aff3ad96cf 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -2576,19 +2576,6 @@ static int vc4_hdmi_bind(struct device *dev, struct 
device *master, void *data)
vc4_hdmi->disable_4kp60 = true;
}
 
-   /*
-* If we boot without any cable connected to the HDMI connector,
-* the firmware will skip the HSM initialization and leave it
-* with a rate of 0, resulting in a bus lockup when we're
-* accessing the registers even if it's enabled.
-*
-* Let's put a sensible default at runtime_resume so that we
-* don't end up in this situation.
-*/
-   ret = clk_set_min_rate(vc4_hdmi->hsm_clock, HSM_MIN_CLOCK_FREQ);
-   if (ret)
-   goto err_put_ddc;
-
/*
 * We need to have the device powered up at this point to call
 * our reset hook and for the CEC init.
-- 
2.35.1



[PATCH v2] drm/i915: Clarify vma lifetime

2022-02-22 Thread Thomas Hellström
It's unclear what reference the initial vma kref reference refers to.
A vma can have multiple weak references, the object vma list,
the vm's bound list and the GT's closed_list, and the initial vma
reference can be put from lookups of all these lists.

With the current implementation this means
that any holder of yet another vma refcount (currently only
i915_gem_object_unbind()) needs to be holding two of either
*) An object refcount,
*) A vm open count
*) A vma open count

in order for us to not risk leaking a reference by having the
initial vma reference being put twice.

Address this by re-introducing i915_vma_destroy() which removes all
weak references of the vma and *then* puts the initial vma refcount.
This makes a strong vma reference hold on to the vma unconditionally.

Perhaps a better name would be i915_vma_revoke() or i915_vma_zombify(),
since other callers may still hold a refcount, but with the prospect of
being able to replace the vma refcount with the object lock in the near
future, let's stick with i915_vma_destroy().

Finally this commit fixes a race in that previously i915_vma_release() and
now i915_vma_destroy() could destroy a vma without taking the vm->mutex
after an advisory check that the vma mm_node was not allocated.
This would race with the ungrab_vma() function creating a trace similar
to the below one. This was fixed in one of the __i915_vma_put() callsites
in
commit bc1922e5d349 ("drm/i915: Fix a race between vma / object destruction and 
unbinding")
but although not seemingly triggered by CI, that
is not sufficient. This patch is needed to fix that properly.

[823.012188] Console: switching to colour dummy device 80x25
[823.012422] [IGT] gem_ppgtt: executing
[823.016667] [IGT] gem_ppgtt: starting subtest blt-vs-render-ctx0
[852.436465] stack segment:  [#1] PREEMPT SMP NOPTI
[852.436480] CPU: 0 PID: 3200 Comm: gem_ppgtt Not tainted 
5.16.0-CI-CI_DRM_5+ #1
[852.436489] Hardware name: Intel Corporation Alder Lake Client 
Platform/AlderLake-P DDR5 RVP, BIOS ADLPFWI1.R00.2422.A00.2110131104 10/13/2021
[852.436499] RIP: 0010:ungrab_vma+0x9/0x80 [i915]
[852.436711] Code: ef e8 4b 85 cf e0 e8 36 a3 d6 e0 8b 83 f8 9c 00 00 85 c0 75 
e1 5b 5d 41 5c 41 5d c3 e9 d6 fd 14 00 55 53 48 8b af c0 00 00 00 <8b> 45 00 85 
c0 75 03 5b 5d c3 48 8b 85 a0 02 00 00 48 89 fb 48 8b
[852.436727] RSP: 0018:c90006db7880 EFLAGS: 00010246
[852.436734] RAX:  RBX: c90006db7598 RCX: 
[852.436742] RDX: 88815349e898 RSI: 88815349e858 RDI: 88810a284140
[852.436748] RBP: 6b6b6b6b6b6b6b6b R08: 88815349e898 R09: 88815349e8e8
[852.436754] R10: 0001 R11: 51ef1141 R12: 88810a284140
[852.436762] R13:  R14: 88815349e868 R15: 88810a284458
[852.436770] FS:  7f5c04b04e40() GS:88849f00() 
knlGS:
[852.436781] CS:  0010 DS:  ES:  CR0: 80050033
[852.436788] CR2: 7f5c04b38fe0 CR3: 00010a6e8001 CR4: 00770ef0
[852.436797] PKRU: 5554
[852.436801] Call Trace:
[852.436806]  
[852.436811]  i915_gem_evict_for_node+0x33c/0x3c0 [i915]
[852.437014]  i915_gem_gtt_reserve+0x106/0x130 [i915]
[852.437211]  i915_vma_pin_ww+0x8f4/0xb60 [i915]
[852.437412]  eb_validate_vmas+0x688/0x860 [i915]
[852.437596]  i915_gem_do_execbuffer+0xc0e/0x25b0 [i915]
[852.437770]  ? deactivate_slab+0x5f2/0x7d0
[852.437778]  ? _raw_spin_unlock_irqrestore+0x50/0x60
[852.437789]  ? i915_gem_execbuffer2_ioctl+0xc6/0x2c0 [i915]
[852.437944]  ? init_object+0x49/0x80
[852.437950]  ? __lock_acquire+0x5e6/0x2580
[852.437963]  i915_gem_execbuffer2_ioctl+0x116/0x2c0 [i915]
[852.438129]  ? i915_gem_do_execbuffer+0x25b0/0x25b0 [i915]
[852.438300]  drm_ioctl_kernel+0xac/0x140
[852.438310]  drm_ioctl+0x201/0x3d0
[852.438316]  ? i915_gem_do_execbuffer+0x25b0/0x25b0 [i915]
[852.438490]  __x64_sys_ioctl+0x6a/0xa0
[852.438498]  do_syscall_64+0x37/0xb0
[852.438507]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[852.438515] RIP: 0033:0x7f5c0415b317
[852.438523] Code: b3 66 90 48 8b 05 71 4b 2d 00 64 c7 00 26 00 00 00 48 c7 c0 
ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 
ff ff 73 01 c3 48 8b 0d 41 4b 2d 00 f7 d8 64 89 01 48
[852.438542] RSP: 002b:7ffd765039a8 EFLAGS: 0246 ORIG_RAX: 
0010
[852.438553] RAX: ffda RBX: 55e4d7829dd0 RCX: 7f5c0415b317
[852.438562] RDX: 7ffd76503a00 RSI: c0406469 RDI: 0017
[852.438571] RBP: 7ffd76503a00 R08:  R09: 0081
[852.438579] R10: ff7f R11: 0246 R12: c0406469
[852.438587] R13: 0017 R14: 7ffd76503a00 R15: 
[852.438598]  
[852.438602] Modules linked in: snd_hda_codec_hdmi i915 mei_hdcp 
x86_pkg_temp_thermal snd_hda_intel snd_intel_dspcfg drm_buddy coretemp 
crct10dif_pclmul crc32_pclmul snd_hda_codec ttm ghash_clmulni_intel snd_hwdep 
snd_hda_core e1000e drm_dp_helper ptp snd_pcm m

Re: [PATCH] dma-buf: check the return value of kstrdup()

2022-02-22 Thread Sumit Semwal
Hi Xiaoke,

On Tue, 22 Feb 2022 at 17:00,  wrote:
>
> From: Xiaoke Wang 
>
> kstrdup() is a memory allocation function which can return NULL when
> some internaly memory errors happen. It is better to check the return
> value of it to prevent further wrong memory access.
Thanks for the patch; looks sane.
>
> Signed-off-by: Xiaoke Wang 
Acked-by: Sumit Semwal 

Will queue it up.
> ---
>  drivers/dma-buf/selftest.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/drivers/dma-buf/selftest.c b/drivers/dma-buf/selftest.c
> index c60b694..2c29e2a 100644
> --- a/drivers/dma-buf/selftest.c
> +++ b/drivers/dma-buf/selftest.c
> @@ -50,6 +50,9 @@ static bool apply_subtest_filter(const char *caller, const 
> char *name)
> bool result = true;
>
> filter = kstrdup(__st_filter, GFP_KERNEL);
> +   if (!filter)
> +   return false;
> +
> for (sep = filter; (tok = strsep(&sep, ","));) {
> bool allow = true;
> char *sl;
> --


Best,
Sumit.


[v2 1/3] drm/edid: parse multiple CEA extension block

2022-02-22 Thread Lee Shawn C
Try to find and parse more CEA ext blocks if edid->extensions
is greater than one.

v2: add ext_index and look for available CEA block from
ext_index in drm_find_cea_extension().

Cc: Jani Nikula 
Cc: Ville Syrjala 
Cc: Ankit Nautiyal 
Signed-off-by: Lee Shawn C 
---
 drivers/gpu/drm/drm_edid.c | 110 -
 1 file changed, 60 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 12893e7be89b..c83ef23757bd 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -3353,16 +3353,14 @@ const u8 *drm_find_edid_extension(const struct edid 
*edid,
return edid_ext;
 }
 
-static const u8 *drm_find_cea_extension(const struct edid *edid)
+static const u8 *drm_find_cea_extension(const struct edid *edid, int 
*ext_index)
 {
const struct displayid_block *block;
struct displayid_iter iter;
const u8 *cea;
-   int ext_index = 0;
 
-   /* Look for a top level CEA extension block */
-   /* FIXME: make callers iterate through multiple CEA ext blocks? */
-   cea = drm_find_edid_extension(edid, CEA_EXT, &ext_index);
+   /* Look for a CEA extension block from ext_index */
+   cea = drm_find_edid_extension(edid, CEA_EXT, ext_index);
if (cea)
return cea;
 
@@ -3643,10 +3641,10 @@ add_alternate_cea_modes(struct drm_connector 
*connector, struct edid *edid)
struct drm_device *dev = connector->dev;
struct drm_display_mode *mode, *tmp;
LIST_HEAD(list);
-   int modes = 0;
+   int modes = 0, ext_index = 0;
 
/* Don't add CEA modes if the CEA extension block is missing */
-   if (!drm_find_cea_extension(edid))
+   if (!drm_find_cea_extension(edid, &ext_index))
return 0;
 
/*
@@ -4310,46 +4308,58 @@ static void drm_parse_y420cmdb_bitmap(struct 
drm_connector *connector,
 static int
 add_cea_modes(struct drm_connector *connector, struct edid *edid)
 {
-   const u8 *cea = drm_find_cea_extension(edid);
+   const u8 *cea = NULL;
const u8 *db, *hdmi = NULL, *video = NULL;
u8 dbl, hdmi_len, video_len = 0;
-   int modes = 0;
+   int modes = 0, j = 0;
 
-   if (cea && cea_revision(cea) >= 3) {
-   int i, start, end;
+   for (;;) {
+   cea = drm_find_cea_extension(edid, &j);
 
-   if (cea_db_offsets(cea, &start, &end))
-   return 0;
+   if (!cea)
+   break;
 
-   for_each_cea_db(cea, i, start, end) {
-   db = &cea[i];
-   dbl = cea_db_payload_len(db);
+   if (cea && cea_revision(cea) >= 3) {
+   int i, start, end;
+
+   if (cea_db_offsets(cea, &start, &end))
+   continue;
 
-   if (cea_db_tag(db) == VIDEO_BLOCK) {
-   video = db + 1;
-   video_len = dbl;
-   modes += do_cea_modes(connector, video, dbl);
-   } else if (cea_db_is_hdmi_vsdb(db)) {
-   hdmi = db;
-   hdmi_len = dbl;
-   } else if (cea_db_is_y420vdb(db)) {
-   const u8 *vdb420 = &db[2];
-
-   /* Add 4:2:0(only) modes present in EDID */
-   modes += do_y420vdb_modes(connector,
- vdb420,
- dbl - 1);
+   for_each_cea_db(cea, i, start, end) {
+   db = &cea[i];
+   dbl = cea_db_payload_len(db);
+
+   if (cea_db_tag(db) == VIDEO_BLOCK) {
+   video = db + 1;
+   video_len = dbl;
+   modes += do_cea_modes(connector, video, 
dbl);
+   } else if (cea_db_is_hdmi_vsdb(db)) {
+   hdmi = db;
+   hdmi_len = dbl;
+   } else if (cea_db_is_y420vdb(db)) {
+   const u8 *vdb420 = &db[2];
+
+   /* Add 4:2:0(only) modes present in 
EDID */
+   modes += do_y420vdb_modes(connector,
+ vdb420,
+ dbl - 1);
+   }
}
}
-   }
 
-   /*
-* We parse the HDMI VSDB after having added the cea modes as we will
-* be patching their flags when the sink supports stereo 3D.

[v2 3/3] drm/edid: parse HF-EEODB CEA extension block

2022-02-22 Thread Lee Shawn C
While adding CEA modes, try to get available EEODB block
number. Then based on it to parse numbers of ext blocks,
retrieve CEA information and add more CEA modes.

Signed-off-by: Lee Shawn C 
---
 drivers/gpu/drm/drm_displayid.c |  5 -
 drivers/gpu/drm/drm_edid.c  | 34 ++---
 include/drm/drm_edid.h  |  2 +-
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_displayid.c b/drivers/gpu/drm/drm_displayid.c
index 32da557b960f..dc649a9efaa2 100644
--- a/drivers/gpu/drm/drm_displayid.c
+++ b/drivers/gpu/drm/drm_displayid.c
@@ -37,7 +37,10 @@ static const u8 *drm_find_displayid_extension(const struct 
edid *edid,
  int *length, int *idx,
  int *ext_index)
 {
-   const u8 *displayid = drm_find_edid_extension(edid, DISPLAYID_EXT, 
ext_index);
+   const u8 *displayid = drm_find_edid_extension(edid,
+ DISPLAYID_EXT,
+ ext_index,
+ edid->extensions);
const struct displayid_header *base;
int ret;
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index f8514783d089..bb050d171bb8 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -3364,23 +3364,23 @@ add_detailed_modes(struct drm_connector *connector, 
struct edid *edid,
  * Search EDID for CEA extension block.
  */
 const u8 *drm_find_edid_extension(const struct edid *edid,
- int ext_id, int *ext_index)
+ int ext_id, int *ext_index, int num_ext_blk)
 {
const u8 *edid_ext = NULL;
int i;
 
/* No EDID or EDID extensions */
-   if (edid == NULL || edid->extensions == 0)
+   if (edid == NULL || edid->extensions == 0 || *ext_index >= num_ext_blk)
return NULL;
 
/* Find CEA extension */
-   for (i = *ext_index; i < edid->extensions; i++) {
+   for (i = *ext_index; i < num_ext_blk; i++) {
edid_ext = (const u8 *)edid + EDID_LENGTH * (i + 1);
if (edid_ext[0] == ext_id)
break;
}
 
-   if (i >= edid->extensions)
+   if (i >= num_ext_blk)
return NULL;
 
*ext_index = i + 1;
@@ -3388,14 +3388,15 @@ const u8 *drm_find_edid_extension(const struct edid 
*edid,
return edid_ext;
 }
 
-static const u8 *drm_find_cea_extension(const struct edid *edid, int 
*ext_index)
+static const u8 *drm_find_cea_extension(const struct edid *edid,
+   int *ext_index, int num_ext_blk)
 {
const struct displayid_block *block;
struct displayid_iter iter;
const u8 *cea;
 
/* Look for a CEA extension block from ext_index */
-   cea = drm_find_edid_extension(edid, CEA_EXT, ext_index);
+   cea = drm_find_edid_extension(edid, CEA_EXT, ext_index, num_ext_blk);
if (cea)
return cea;
 
@@ -3679,7 +3680,7 @@ add_alternate_cea_modes(struct drm_connector *connector, 
struct edid *edid)
int modes = 0, ext_index = 0;
 
/* Don't add CEA modes if the CEA extension block is missing */
-   if (!drm_find_cea_extension(edid, &ext_index))
+   if (!drm_find_cea_extension(edid, &ext_index, edid->extensions))
return 0;
 
/*
@@ -4376,11 +4377,14 @@ add_cea_modes(struct drm_connector *connector, struct 
edid *edid)
 {
const u8 *cea = NULL;
const u8 *db, *hdmi = NULL, *video = NULL;
-   u8 dbl, hdmi_len, video_len = 0;
+   u8 dbl, hdmi_len, video_len = 0, num_ext_blk = edid->extensions;
int modes = 0, j = 0;
 
+   if (num_ext_blk && drm_edid_is_hf_eeodb_blk_available(edid))
+   num_ext_blk = drm_edid_read_hf_eeodb_blk_size(edid);
+
for (;;) {
-   cea = drm_find_cea_extension(edid, &j);
+   cea = drm_find_cea_extension(edid, &j, num_ext_blk);
 
if (!cea)
break;
@@ -4636,7 +4640,7 @@ static void drm_edid_to_eld(struct drm_connector 
*connector, struct edid *edid)
if (!edid)
return;
 
-   cea = drm_find_cea_extension(edid, &ext_index);
+   cea = drm_find_cea_extension(edid, &ext_index, edid->extensions);
if (!cea) {
DRM_DEBUG_KMS("ELD: no CEA Extension found\n");
return;
@@ -4724,7 +4728,7 @@ int drm_edid_to_sad(struct edid *edid, struct cea_sad 
**sads)
int i, start, end, dbl;
const u8 *cea;
 
-   cea = drm_find_cea_extension(edid, &ext_index);
+   cea = drm_find_cea_extension(edid, &ext_index, edid->extensions);
if (!cea) {
DRM_DEBUG_KMS("SAD: no CEA Extension found\n");
return 0;
@@ -4786,7 +4790,7 @@ int drm_edid_to_speaker_allo

[v2 2/3] drm/edid: read HF-EEODB ext block

2022-02-22 Thread Lee Shawn C
Support to read HF_EEODB block that request by HDMI 2.1 specification.

Signed-off-by: Lee Shawn C 
---
 drivers/gpu/drm/drm_connector.c |  5 ++-
 drivers/gpu/drm/drm_edid.c  | 76 ++---
 include/drm/drm_edid.h  |  2 +
 3 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index a50c82bc2b2f..0f9e3ef00be7 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -2137,8 +2137,11 @@ int drm_connector_update_edid_property(struct 
drm_connector *connector,
if (connector->override_edid)
return 0;
 
-   if (edid)
+   if (edid) {
size = EDID_LENGTH * (1 + edid->extensions);
+   if (drm_edid_is_hf_eeodb_blk_available(edid))
+   size = EDID_LENGTH * (1 + 
drm_edid_read_hf_eeodb_blk_size(edid));
+   }
 
/* Set the display info, using edid if available, otherwise
 * resetting the values to defaults. This duplicates the work
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index c83ef23757bd..f8514783d089 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1991,7 +1991,7 @@ struct edid *drm_do_get_edid(struct drm_connector 
*connector,
void *data)
 {
int i, j = 0, valid_extensions = 0;
-   u8 *edid, *new;
+   u8 *edid, *new, ext_eeodb_blk_size;
struct edid *override;
 
override = drm_get_override_edid(connector);
@@ -2051,7 +2051,40 @@ struct edid *drm_do_get_edid(struct drm_connector 
*connector,
}
 
kfree(edid);
+   return (struct edid *)new;
+   }
+
+   if (drm_edid_is_hf_eeodb_blk_available((struct edid *)edid)) {
+   ext_eeodb_blk_size = drm_edid_read_hf_eeodb_blk_size((struct 
edid *)edid);
+
+   // no more ext blk wait for read
+   if (ext_eeodb_blk_size <= 1)
+   return (struct edid *)edid;
+
+   new = krealloc(edid, (ext_eeodb_blk_size + 1) * EDID_LENGTH, 
GFP_KERNEL);
+   if (!new)
+   goto out;
edid = new;
+
+   valid_extensions = ext_eeodb_blk_size - 1;
+   for (j = 2; j <= ext_eeodb_blk_size; j++) {
+   u8 *block = edid + j * EDID_LENGTH;
+
+   for (i = 0; i < 4; i++) {
+   if (get_edid_block(data, block, j, EDID_LENGTH))
+   goto out;
+   if (drm_edid_block_valid(block, j, false, NULL))
+   break;
+   }
+
+   if (i == 4)
+   valid_extensions--;
+   }
+
+   if (valid_extensions != ext_eeodb_blk_size - 1) {
+   DRM_ERROR("Not able to retrieve proper EDID contain 
HF-EEODB data.\n");
+   goto out;
+   }
}
 
return (struct edid *)edid;
@@ -3315,15 +3348,17 @@ add_detailed_modes(struct drm_connector *connector, 
struct edid *edid,
 #define VIDEO_BLOCK 0x02
 #define VENDOR_BLOCK0x03
 #define SPEAKER_BLOCK  0x04
-#define HDR_STATIC_METADATA_BLOCK  0x6
-#define USE_EXTENDED_TAG 0x07
-#define EXT_VIDEO_CAPABILITY_BLOCK 0x00
+#define EXT_VIDEO_CAPABILITY_BLOCK 0x00
+#define HDR_STATIC_METADATA_BLOCK  0x06
+#define USE_EXTENDED_TAG   0x07
 #define EXT_VIDEO_DATA_BLOCK_420   0x0E
-#define EXT_VIDEO_CAP_BLOCK_Y420CMDB 0x0F
+#define EXT_VIDEO_CAP_BLOCK_Y420CMDB   0x0F
+#define EXT_VIDEO_HF_EEODB_DATA_BLOCK  0x78
 #define EDID_BASIC_AUDIO   (1 << 6)
 #define EDID_CEA_YCRCB444  (1 << 5)
 #define EDID_CEA_YCRCB422  (1 << 4)
 #define EDID_CEA_VCDB_QS   (1 << 6)
+#define HF_EEODB_LENGTH2
 
 /*
  * Search EDID for CEA extension block.
@@ -4220,6 +4255,20 @@ static bool cea_db_is_hdmi_forum_vsdb(const u8 *db)
return oui(db[3], db[2], db[1]) == HDMI_FORUM_IEEE_OUI;
 }
 
+static bool cea_db_is_hdmi_forum_eeodb(const u8 *db)
+{
+   if (cea_db_tag(db) != USE_EXTENDED_TAG)
+   return false;
+
+   if (cea_db_payload_len(db) != HF_EEODB_LENGTH)
+   return false;
+
+   if (cea_db_extended_tag(db) != EXT_VIDEO_HF_EEODB_DATA_BLOCK)
+   return false;
+
+   return true;
+}
+
 static bool cea_db_is_vcdb(const u8 *db)
 {
if (cea_db_tag(db) != USE_EXTENDED_TAG)
@@ -4262,6 +4311,23 @@ static bool cea_db_is_y420vdb(const u8 *db)
return true;
 }
 
+bool drm_edid_is_hf_eeodb_blk_available(const struct edid *edid)
+{
+   const u8 *eeodb_header = (u8 *)edid + EDID_LENGTH + 4;
+
+   if (!edid->extensions)
+   return false;
+
+   return cea_db_is_hdmi_forum_eeodb(eeodb_header);
+}
+EXPORT_SYMBOL_GPL(drm_edid_is_hf_eeodb_blk_available);
+
+u8 drm_edid

Re: [PATCH v10 3/4] drm/lsdc: add drm driver for loongson display controller

2022-02-22 Thread Sui Jingfeng



On 2022/2/22 16:27, Maxime Ripard wrote:

Hi,

On Sun, Feb 20, 2022 at 10:55:53PM +0800, Sui Jingfeng wrote:

+/* lsdc_get_display_timings_from_dtb - Get display timings from the device tree
+ *
+ * @np: point to the device node contain the display timings
+ * @pptim: point to where the pointer of struct display_timings is store to
+ */
+static void lsdc_get_display_timings_from_dtb(struct device_node *np,
+ struct display_timings **pptim)
+{
+   struct display_timings *timings;
+
+   if (!np)
+   return;
+
+   timings = of_get_display_timings(np);
+   if (timings)
+   *pptim = timings;
+}

This is not documented in your binding.


+static int lsdc_get_connector_type(struct drm_device *ddev,
+  struct device_node *output,
+  unsigned int index)
+{
+   const char *name;
+   int ret;
+
+   ret = of_property_read_string(output, "connector", &name);
+   if (ret < 0)
+   return DRM_MODE_CONNECTOR_Unknown;
+
+   if (strncmp(name, "vga-connector", 13) == 0) {
+   ret = DRM_MODE_CONNECTOR_VGA;
+   drm_info(ddev, "connector%d is VGA\n", index);
+   } else if (strncmp(name, "dvi-connector", 13) == 0) {
+   bool analog, digital;
+
+   analog = of_property_read_bool(output, "analog");
+   digital = of_property_read_bool(output, "digital");
+
+   if (analog && !digital)
+   ret = DRM_MODE_CONNECTOR_DVIA;
+   else if (analog && digital)
+   ret = DRM_MODE_CONNECTOR_DVII;
+   else
+   ret = DRM_MODE_CONNECTOR_DVID;
+
+   drm_info(ddev, "connector%d is DVI\n", index);
+   } else if (strncmp(name, "virtual-connector", 17) == 0) {
+   ret = DRM_MODE_CONNECTOR_VIRTUAL;
+   drm_info(ddev, "connector%d is virtual\n", index);
+   } else if (strncmp(name, "dpi-connector", 13) == 0) {
+   ret = DRM_MODE_CONNECTOR_DPI;
+   drm_info(ddev, "connector%d is DPI\n", index);
+   } else if (strncmp(name, "hdmi-connector", 14) == 0) {
+   int res;
+   const char *hdmi_type;
+
+   ret = DRM_MODE_CONNECTOR_HDMIA;
+
+   res = of_property_read_string(output, "type", &hdmi_type);
+   if (res == 0 && !strcmp(hdmi_type, "b"))
+   ret = DRM_MODE_CONNECTOR_HDMIB;
+
+   drm_info(ddev, "connector%d is HDMI, type is %s\n", index, 
hdmi_type);
+   } else {
+   ret = DRM_MODE_CONNECTOR_Unknown;
+   drm_info(ddev, "The type of connector%d is unknown\n", index);
+   }
+
+   return ret;
+}

Your ports and that you're using the connectors bindings either.


+struct lsdc_connector *lsdc_connector_init(struct lsdc_device *ldev, unsigned 
int index)
+{
+   struct drm_device *ddev = &ldev->drm;
+   struct device_node *np = ddev->dev->of_node;
+   struct device_node *output = NULL;
+   unsigned int connector_type = DRM_MODE_CONNECTOR_Unknown;
+   struct device_node *disp_tims_np;
+   struct lsdc_connector *lconn;
+   struct drm_connector *connector;
+   int ret;
+
+   lconn = devm_kzalloc(ddev->dev, sizeof(*lconn), GFP_KERNEL);
+   if (!lconn)
+   return ERR_PTR(-ENOMEM);
+
+   lconn->index = index;
+   lconn->has_disp_tim = false;
+   lconn->ddc = NULL;
+
+   output = of_parse_phandle(np, "output-ports", index);
+   if (!output) {
+   drm_warn(ddev, "no output-ports property, please update dtb\n");
+   /*
+* Providing a blindly support even though no output-ports
+* property is provided in the dtb.
+*/
+   goto DT_SKIPED;
+   }

output-ports is not documented either.
Thanks for you take time review my patch, i will try to document it at 
next version.


[PATCH i-g-t 00/11] Per client GPU utilisation

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

This series contains four main components:

 1. Per client support for intel_gpu_top.
 2. IGT test for per client data exposed via fdinfo from i915.
 3. Extracting intel_gpu_top code into shared IGT libraries - which makes
possible to write:
 4. Vendor agnostic rudimentary gputop tool.

Since I had the intel_gpu_top code for years, this flow of the series is what
I ended up with. But it also makes sense since we can have a cut point after
intel_gpu_top, in case the common drm fdinfo specification does not end up
getting enough traction.

Example of the intel_gpu_top output:

intel-gpu-top: Intel Tigerlake (Gen12) @ /dev/dri/card0 -  220/ 221 MHz
70% RC6;  0.62/ 7.08 W;  760 irqs/s

 ENGINES BUSY MI_SEMA MI_WAIT
   Render/3D   23.06% |██▊  |  0%  0%
 Blitter0.00% | |  0%  0%
   Video5.40% |█▋   |  0%  0%
VideoEnhance   20.67% |██   |  0%  0%

   PID  NAME  Render/3DBlitter  VideoVideoEnhance
  3082   mpv |  ||  ||▌ ||██|
  3117 neverball |█▉||  ||  ||  |
 1   systemd |▍ ||  ||  ||  |
  2338   gnome-shell |  ||  ||  ||  |


Example of the gputop output:

DRM minor 0
PID   NAMErender   copy   video
3816  kwin_x11 |███▎  ||  ||  ||  |
3523  Xorg |▊ ||  ||  ||  |
 1120449   mpv |  ||  ||▋ ||  |
 1120529  glxgears |▋ ||  ||  ||  |
 1120449   mpv |▍ ||  ||  ||  |
3860   plasmashell |▏ ||  ||  ||  |
4764   krunner |  ||  ||  ||  |
  575206chrome |  ||  ||  ||  |
  833481   firefox |  ||  ||  ||  |
  892924   thunderbird |  ||  ||  ||  |


Tvrtko Ursulin (11):
  lib: Helper library for parsing i915 fdinfo output
  tests/i915/drm_fdinfo: Basic and functional tests for GPU busyness
exported via fdinfo
  intel-gpu-top: Add support for per client stats
  lib: Extract igt_drm_clients from intel_gpu_top
  libdrmfdinfo: Allow specifying custom engine map
  libdrmclients: Record client drm minor
  libdrmclient: Support multiple DRM cards
  libdrmfdinfo: Track largest engine index
  libdrmclient/intel_gpu_top: Decouple hardcoded engine assumptions
  libdrmclient: Enforce client status sort order in the library
  gputop: Basic vendor agnostic GPU top tool

 lib/igt_drm_clients.c   | 443 +++
 lib/igt_drm_clients.h   | 104 
 lib/igt_drm_fdinfo.c| 200 ++
 lib/igt_drm_fdinfo.h|  52 
 lib/meson.build |  15 ++
 man/intel_gpu_top.rst   |   4 +
 tests/i915/drm_fdinfo.c | 565 
 tests/meson.build   |   8 +
 tools/gputop.c  | 276 
 tools/intel_gpu_top.c   | 497 ++-
 tools/meson.build   |   7 +-
 11 files changed, 2159 insertions(+), 12 deletions(-)
 create mode 100644 lib/igt_drm_clients.c
 create mode 100644 lib/igt_drm_clients.h
 create mode 100644 lib/igt_drm_fdinfo.c
 create mode 100644 lib/igt_drm_fdinfo.h
 create mode 100644 tests/i915/drm_fdinfo.c
 create mode 100644 tools/gputop.c

-- 
2.32.0



[PATCH i-g-t 01/11] lib: Helper library for parsing i915 fdinfo output

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Tests and intel_gpu_top will share common code for parsing this file.

Signed-off-by: Tvrtko Ursulin 
---
 lib/igt_drm_fdinfo.c | 183 +++
 lib/igt_drm_fdinfo.h |  48 
 lib/meson.build  |   7 ++
 3 files changed, 238 insertions(+)
 create mode 100644 lib/igt_drm_fdinfo.c
 create mode 100644 lib/igt_drm_fdinfo.h

diff --git a/lib/igt_drm_fdinfo.c b/lib/igt_drm_fdinfo.c
new file mode 100644
index ..28c1bdbda08e
--- /dev/null
+++ b/lib/igt_drm_fdinfo.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "drmtest.h"
+
+#include "igt_drm_fdinfo.h"
+
+static size_t read_fdinfo(char *buf, const size_t sz, int at, const char *name)
+{
+   size_t count;
+   int fd;
+
+   fd = openat(at, name, O_RDONLY);
+   if (fd < 0)
+   return 0;
+
+   buf[sz - 1] = 0;
+   count = read(fd, buf, sz);
+   close(fd);
+
+   return count;
+}
+
+static int parse_engine(char *line, struct drm_client_fdinfo *info,
+   size_t prefix_len, uint64_t *val)
+{
+   static const char *e2class[] = {
+   "render",
+   "copy",
+   "video",
+   "video-enhance",
+   };
+   ssize_t name_len;
+   char *name, *p;
+   int found = -1;
+   unsigned int i;
+
+   p = index(line, ':');
+   if (!p || p == line)
+   return -1;
+
+   name_len = p - line - prefix_len;
+   if (name_len < 1)
+   return -1;
+
+   name = line + prefix_len;
+
+   for (i = 0; i < ARRAY_SIZE(e2class); i++) {
+   if (!strncmp(name, e2class[i], name_len)) {
+   found = i;
+   break;
+   }
+   }
+
+   if (found >= 0) {
+   while (*++p && isspace(*p));
+   *val = strtoull(p, NULL, 10);
+   }
+
+   return found;
+}
+
+static const char *find_kv(const char *buf, const char *key, size_t keylen)
+{
+   const char *p = buf;
+
+   p = index(buf, ':');
+   if (!p || p == buf)
+   return NULL;
+
+   if ((p - buf) != keylen)
+   return NULL;
+
+   while (*++p && isspace(*p));
+   if (*p && !strncmp(buf, key, keylen))
+   return p;
+
+   return NULL;
+}
+
+bool
+__igt_parse_drm_fdinfo(int dir, const char *fd, struct drm_client_fdinfo *info)
+{
+   char buf[4096], *_buf = buf;
+   char *l, *ctx = NULL;
+   unsigned int good = 0;
+   size_t count;
+
+   count = read_fdinfo(buf, sizeof(buf), dir, fd);
+   if (!count)
+   return false;
+
+   while ((l = strtok_r(_buf, "\n", &ctx))) {
+   uint64_t val = 0;
+   const char *v;
+   int idx;
+
+   _buf = NULL;
+
+   if ((v = find_kv(l, "drm-driver", strlen("drm-driver" {
+   strncpy(info->driver, v, sizeof(info->driver) - 1);
+   good++;
+   } else if ((v = find_kv(l, "drm-pdev", strlen("drm-pdev" {
+   strncpy(info->pdev, v, sizeof(info->pdev) - 1);
+   }  else if ((v = find_kv(l, "drm-client-id",
+strlen("drm-client-id" {
+   info->id = atol(v);
+   good++;
+   } else if (!strncmp(l, "drm-engine-", 11) &&
+  strncmp(l, "drm-engine-capacity-", 20)) {
+   idx = parse_engine(l, info, strlen("drm-engine-"),
+  &val);
+   if (idx >= 0) {
+   if (!info->capacity[idx])
+   

[PATCH i-g-t 03/11] intel-gpu-top: Add support for per client stats

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Use the i915 exported data in /proc//fdinfo to show GPU utilization
per DRM client.

Example of the output:

intel-gpu-top: Intel Tigerlake (Gen12) @ /dev/dri/card0 -  220/ 221 MHz
70% RC6;  0.62/ 7.08 W;  760 irqs/s

 ENGINES BUSY MI_SEMA MI_WAIT
   Render/3D   23.06% |██▊  |  0%  0%
 Blitter0.00% | |  0%  0%
   Video5.40% |█▋   |  0%  0%
VideoEnhance   20.67% |██   |  0%  0%

   PID  NAME  Render/3DBlitter  VideoVideoEnhance
  3082   mpv |  ||  ||▌ ||██|
  3117 neverball |█▉||  ||  ||  |
 1   systemd |▍ ||  ||  ||  |
  2338   gnome-shell |  ||  ||  ||  |

Signed-off-by: Tvrtko Ursulin 
---
 man/intel_gpu_top.rst |   4 +
 tools/intel_gpu_top.c | 801 +-
 tools/meson.build |   2 +-
 3 files changed, 804 insertions(+), 3 deletions(-)

diff --git a/man/intel_gpu_top.rst b/man/intel_gpu_top.rst
index b3b765b05feb..f4dbfc5b44d9 100644
--- a/man/intel_gpu_top.rst
+++ b/man/intel_gpu_top.rst
@@ -56,6 +56,10 @@ Supported keys:
 'q'Exit from the tool.
 'h'Show interactive help.
 '1'Toggle between aggregated engine class and physical engine mode.
+'n'Toggle display of numeric client busyness overlay.
+'s'Toggle between sort modes (runtime, total runtime, pid, client id).
+'i'Toggle display of clients which used no GPU time.
+'H'Toggle between per PID aggregation and individual clients.
 
 DEVICE SELECTION
 
diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
index bc11fce2bb1e..73815cdea8aa 100644
--- a/tools/intel_gpu_top.c
+++ b/tools/intel_gpu_top.c
@@ -43,8 +43,10 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "igt_perf.h"
+#include "igt_drm_fdinfo.h"
 
 #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
 
@@ -311,7 +313,8 @@ static int engine_cmp(const void *__a, const void *__b)
return a->instance - b->instance;
 }
 
-#define is_igpu_pci(x) (strcmp(x, ":00:02.0") == 0)
+#define IGPU_PCI ":00:02.0"
+#define is_igpu_pci(x) (strcmp(x, IGPU_PCI) == 0)
 #define is_igpu(x) (strcmp(x, "i915") == 0)
 
 static struct engines *discover_engines(char *device)
@@ -635,6 +638,547 @@ static void pmu_sample(struct engines *engines)
}
 }
 
+enum client_status {
+   FREE = 0, /* mbz */
+   ALIVE,
+   PROBE
+};
+
+struct clients;
+
+struct client {
+   struct clients *clients;
+
+   enum client_status status;
+   unsigned int id;
+   unsigned int pid;
+   char name[24];
+   char print_name[24];
+   unsigned int samples;
+   unsigned long total_runtime;
+   unsigned long last_runtime;
+   unsigned long *val;
+   uint64_t *last;
+};
+
+struct clients {
+   unsigned int num_clients;
+   unsigned int active_clients;
+
+   unsigned int num_classes;
+   struct engine_class *class;
+
+   char pci_slot[64];
+
+   struct client *client;
+};
+
+#define for_each_client(clients, c, tmp) \
+   for ((tmp) = (clients)->num_clients, c = (clients)->client; \
+(tmp > 0); (tmp)--, (c)++)
+
+static struct clients *init_clients(const char *pci_slot)
+{
+   struct clients *clients;
+
+   clients = malloc(sizeof(*clients));
+   if (!clients)
+   return NULL;
+
+   memset(clients, 0, sizeof(*clients));
+
+   strncpy(clients->pci_slot, pci_slot, sizeof(clients->pci_slot));
+
+   return clients;
+}
+
+static struct client *
+find_client(struct clients *clients, enum client_status status, unsigned int 
id)
+{
+   unsigned int start, num;
+   struct client *c;
+
+   start = status == FREE ? clients->active_clients : 0; /* Free block at 
the end. */
+   num = clients->num_clients - start;
+
+   for (c = &clients->client[start]; num; c++, num--) {
+   if (status != c->status)
+   continue;
+
+   if (status == FREE || c->id == id)
+   return c;
+   }
+
+   return NULL;
+}
+
+static void
+update_client(struct client *c, unsigned int pid, char *name, uint64_t val[16])
+{
+   unsigned int i;
+
+   if (c->pid != pid)
+   c->pid = pid;
+
+   if (strcmp(c->name, name)) {
+   char *p;
+
+   strncpy(c->name, name, sizeof(c->name) - 1);
+   strncpy(c->print_name, name, sizeof(c->print_name) - 1);
+
+   p = c->print_name;
+   while (*p) {
+   if (!isprint(*p))
+   *p = '*';
+   p++;
+   }
+   }
+
+  

[PATCH i-g-t 05/11] libdrmfdinfo: Allow specifying custom engine map

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Instead of hard coding the engine names, allow a map of names to indices
to either be passed in or it gets auto-detected (less efficient) while
parsing.

Signed-off-by: Tvrtko Ursulin 
---
 lib/igt_drm_clients.c   |  7 +++---
 lib/igt_drm_clients.h   |  3 ++-
 lib/igt_drm_fdinfo.c| 47 +++--
 lib/igt_drm_fdinfo.h|  7 --
 tests/i915/drm_fdinfo.c | 16 +++---
 tools/intel_gpu_top.c   | 13 ++--
 6 files changed, 66 insertions(+), 27 deletions(-)

diff --git a/lib/igt_drm_clients.c b/lib/igt_drm_clients.c
index 58d82648d821..591602f4c9f5 100644
--- a/lib/igt_drm_clients.c
+++ b/lib/igt_drm_clients.c
@@ -269,7 +269,8 @@ static bool is_drm_fd(int fd_dir, const char *name)
 struct igt_drm_clients *
 igt_drm_clients_scan(struct igt_drm_clients *clients,
 bool (*filter_client)(const struct igt_drm_clients *,
-  const struct drm_client_fdinfo *))
+  const struct drm_client_fdinfo *),
+const char **name_map, unsigned int map_entries)
 {
struct dirent *proc_dent;
struct igt_drm_client *c;
@@ -343,8 +344,8 @@ igt_drm_clients_scan(struct igt_drm_clients *clients,
memset(&info, 0, sizeof(info));
 
if (!__igt_parse_drm_fdinfo(dirfd(fdinfo_dir),
-   fdinfo_dent->d_name,
-   &info))
+   fdinfo_dent->d_name, &info,
+   name_map, map_entries))
continue;
 
if (filter_client && !filter_client(clients, &info))
diff --git a/lib/igt_drm_clients.h b/lib/igt_drm_clients.h
index f52080847291..91e9da4c0733 100644
--- a/lib/igt_drm_clients.h
+++ b/lib/igt_drm_clients.h
@@ -80,7 +80,8 @@ void igt_drm_clients_free(struct igt_drm_clients *clients);
 struct igt_drm_clients *
 igt_drm_clients_scan(struct igt_drm_clients *clients,
 bool (*filter_client)(const struct igt_drm_clients *,
-  const struct drm_client_fdinfo *));
+  const struct drm_client_fdinfo *),
+const char **name_map, unsigned int map_entries);
 
 struct igt_drm_client *
 igt_drm_clients_find(struct igt_drm_clients *clients,
diff --git a/lib/igt_drm_fdinfo.c b/lib/igt_drm_fdinfo.c
index 28c1bdbda08e..96a8b768a4b1 100644
--- a/lib/igt_drm_fdinfo.c
+++ b/lib/igt_drm_fdinfo.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -52,14 +53,10 @@ static size_t read_fdinfo(char *buf, const size_t sz, int 
at, const char *name)
 }
 
 static int parse_engine(char *line, struct drm_client_fdinfo *info,
-   size_t prefix_len, uint64_t *val)
+   size_t prefix_len,
+   const char **name_map, unsigned int map_entries,
+   uint64_t *val)
 {
-   static const char *e2class[] = {
-   "render",
-   "copy",
-   "video",
-   "video-enhance",
-   };
ssize_t name_len;
char *name, *p;
int found = -1;
@@ -75,10 +72,26 @@ static int parse_engine(char *line, struct 
drm_client_fdinfo *info,
 
name = line + prefix_len;
 
-   for (i = 0; i < ARRAY_SIZE(e2class); i++) {
-   if (!strncmp(name, e2class[i], name_len)) {
-   found = i;
-   break;
+   if (name_map) {
+   for (i = 0; i < map_entries; i++) {
+   if (!strncmp(name, name_map[i], name_len)) {
+   found = i;
+   break;
+   }
+   }
+   } else {
+   for (i = 0; i < info->num_engines; i++) {
+   if (!strncmp(name, info->names[i], name_len)) {
+   found = i;
+   break;
+   }
+   }
+
+   if (found < 0) {
+   assert((info->num_engines + 1) < 
ARRAY_SIZE(info->names));
+   assert((strlen(name) + 1) < sizeof(info->names[0]));
+   strncpy(info->names[info->num_engines], name, name_len);
+   found = info->num_engines;
}
}
 
@@ -109,7 +122,8 @@ static const char *find_kv(const char *buf, const char 
*key, size_t keylen)
 }
 
 bool
-__igt_parse_drm_fdinfo(int dir, const char *fd, struct drm_client_fdinfo *info)
+__igt_parse_drm_fdinfo(int dir, const char *fd, struct drm_client_fdinfo *info,
+  const char **name_map, unsigned int map_entries)
 {
char buf[4096], *_buf = buf;
char *l, *ctx = NULL;
@@ -13

[PATCH i-g-t 08/11] libdrmfdinfo: Track largest engine index

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Prep code for incoming work.

Signed-off-by: Tvrtko Ursulin 
---
 lib/igt_drm_fdinfo.c | 2 ++
 lib/igt_drm_fdinfo.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/lib/igt_drm_fdinfo.c b/lib/igt_drm_fdinfo.c
index 96a8b768a4b1..964c6b708960 100644
--- a/lib/igt_drm_fdinfo.c
+++ b/lib/igt_drm_fdinfo.c
@@ -159,6 +159,8 @@ __igt_parse_drm_fdinfo(int dir, const char *fd, struct 
drm_client_fdinfo *info,
info->capacity[idx] = 1;
info->busy[idx] = val;
info->num_engines++;
+   if (idx > info->last_engine_index)
+   info->last_engine_index = idx;
}
} else if (!strncmp(l, "drm-engine-capacity-", 20)) {
idx = parse_engine(l, info,
diff --git a/lib/igt_drm_fdinfo.h b/lib/igt_drm_fdinfo.h
index bea4a6304734..804e8e1aa333 100644
--- a/lib/igt_drm_fdinfo.h
+++ b/lib/igt_drm_fdinfo.h
@@ -36,6 +36,7 @@ struct drm_client_fdinfo {
unsigned long id;
 
unsigned int num_engines;
+   unsigned int last_engine_index;
unsigned int capacity[16];
char names[16][256];
uint64_t busy[16];
-- 
2.32.0



[PATCH i-g-t 06/11] libdrmclients: Record client drm minor

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Prepare for supporting clients belonging to multiple DRM cards by storing
the DRM minor in the client record.

Signed-off-by: Tvrtko Ursulin 
---
 lib/igt_drm_clients.c | 33 -
 lib/igt_drm_clients.h |  6 --
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/lib/igt_drm_clients.c b/lib/igt_drm_clients.c
index 591602f4c9f5..116479a9f363 100644
--- a/lib/igt_drm_clients.c
+++ b/lib/igt_drm_clients.c
@@ -117,12 +117,13 @@ igt_drm_client_update(struct igt_drm_client *c, unsigned 
int pid, char *name,
 }
 
 void
-igt_drm_client_add(struct igt_drm_clients *clients, unsigned int id,
-  unsigned int pid, char *name, uint64_t busy[16])
+igt_drm_client_add(struct igt_drm_clients *clients,
+  struct drm_client_fdinfo *info,
+  unsigned int pid, char *name, unsigned int drm_minor)
 {
struct igt_drm_client *c;
 
-   assert(!igt_drm_clients_find(clients, IGT_DRM_CLIENT_ALIVE, id));
+   assert(!igt_drm_clients_find(clients, IGT_DRM_CLIENT_ALIVE, info->id));
 
c = igt_drm_clients_find(clients, IGT_DRM_CLIENT_FREE, 0);
if (!c) {
@@ -137,13 +138,14 @@ igt_drm_client_add(struct igt_drm_clients *clients, 
unsigned int id,
memset(c, 0, (clients->num_clients - idx) * sizeof(*c));
}
 
-   c->id = id;
+   c->id = info->id;
+   c->drm_minor = drm_minor;
c->clients = clients;
c->val = calloc(clients->num_classes, sizeof(c->val));
c->last = calloc(clients->num_classes, sizeof(c->last));
assert(c->val && c->last);
 
-   igt_drm_client_update(c, pid, name, busy);
+   igt_drm_client_update(c, pid, name, info->busy);
 }
 
 void igt_drm_client_free(struct igt_drm_client *c)
@@ -254,16 +256,21 @@ static bool get_task_name(const char *buffer, char *out, 
unsigned long sz)
return true;
 }
 
-static bool is_drm_fd(int fd_dir, const char *name)
+static bool is_drm_fd(int fd_dir, const char *name, unsigned int *minor)
 {
struct stat stat;
int ret;
 
ret = fstatat(fd_dir, name, &stat, 0);
 
-   return ret == 0 &&
-  (stat.st_mode & S_IFMT) == S_IFCHR &&
-  major(stat.st_rdev) == 226;
+   if (ret == 0 &&
+   (stat.st_mode & S_IFMT) == S_IFCHR &&
+   major(stat.st_rdev) == 226) {
+   *minor = minor(stat.st_rdev);
+   return true;
+   }
+
+   return false;
 }
 
 struct igt_drm_clients *
@@ -293,10 +300,10 @@ igt_drm_clients_scan(struct igt_drm_clients *clients,
return clients;
 
while ((proc_dent = readdir(proc_dir)) != NULL) {
+   unsigned int client_pid, minor = 0;
int pid_dir = -1, fd_dir = -1;
struct dirent *fdinfo_dent;
char client_name[64] = { };
-   unsigned int client_pid;
DIR *fdinfo_dir = NULL;
char buf[4096];
size_t count;
@@ -338,7 +345,7 @@ igt_drm_clients_scan(struct igt_drm_clients *clients,
if (!isdigit(fdinfo_dent->d_name[0]))
continue;
 
-   if (!is_drm_fd(fd_dir, fdinfo_dent->d_name))
+   if (!is_drm_fd(fd_dir, fdinfo_dent->d_name, &minor))
continue;
 
memset(&info, 0, sizeof(info));
@@ -358,8 +365,8 @@ igt_drm_clients_scan(struct igt_drm_clients *clients,
c = igt_drm_clients_find(clients, IGT_DRM_CLIENT_PROBE,
info.id);
if (!c)
-   igt_drm_client_add(clients, info.id, client_pid,
-  client_name, info.busy);
+   igt_drm_client_add(clients, &info, client_pid,
+  client_name, minor);
else
igt_drm_client_update(c, client_pid,
  client_name, info.busy);
diff --git a/lib/igt_drm_clients.h b/lib/igt_drm_clients.h
index 91e9da4c0733..7a6318c6af5f 100644
--- a/lib/igt_drm_clients.h
+++ b/lib/igt_drm_clients.h
@@ -48,6 +48,7 @@ struct igt_drm_client {
 
enum igt_drm_client_status status;
unsigned int id;
+   unsigned int drm_minor;
unsigned int pid;
char name[24];
char print_name[24];
@@ -95,8 +96,9 @@ igt_drm_clients_sort(struct igt_drm_clients *clients,
 void igt_drm_client_update(struct igt_drm_client *c,
   unsigned int pid, char *name, uint64_t val[16]);
 
-void igt_drm_client_add(struct igt_drm_clients *clients, unsigned int id,
-   unsigned int pid, char *name, uint64_t busy[16]);
+void igt_drm_client_add(struct igt_drm_clients *clients,
+   struct dr

[PATCH i-g-t 04/11] lib: Extract igt_drm_clients from intel_gpu_top

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Code movement with some improvements to prepare for further work in
making a vendor agnostic gputop tool possible.

Signed-off-by: Tvrtko Ursulin 
---
 lib/igt_drm_clients.c | 386 +++
 lib/igt_drm_clients.h | 102 +
 lib/meson.build   |   8 +
 tools/intel_gpu_top.c | 516 ++
 tools/meson.build |   2 +-
 5 files changed, 570 insertions(+), 444 deletions(-)
 create mode 100644 lib/igt_drm_clients.c
 create mode 100644 lib/igt_drm_clients.h

diff --git a/lib/igt_drm_clients.c b/lib/igt_drm_clients.c
new file mode 100644
index ..58d82648d821
--- /dev/null
+++ b/lib/igt_drm_clients.c
@@ -0,0 +1,386 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "igt_drm_clients.h"
+#include "igt_drm_fdinfo.h"
+
+struct igt_drm_clients *igt_drm_clients_init(void *private_data)
+{
+   struct igt_drm_clients *clients;
+
+   clients = malloc(sizeof(*clients));
+   if (!clients)
+   return NULL;
+
+   memset(clients, 0, sizeof(*clients));
+
+   clients->private_data = private_data;
+
+   return clients;
+}
+
+struct igt_drm_client *
+igt_drm_clients_find(struct igt_drm_clients *clients,
+enum igt_drm_client_status status,
+unsigned int id)
+{
+   unsigned int start, num;
+   struct igt_drm_client *c;
+
+   start = status == IGT_DRM_CLIENT_FREE ? clients->active_clients : 0; /* 
Free block at the end. */
+   num = clients->num_clients - start;
+
+   for (c = &clients->client[start]; num; c++, num--) {
+   if (status != c->status)
+   continue;
+
+   if (status == IGT_DRM_CLIENT_FREE || c->id == id)
+   return c;
+   }
+
+   return NULL;
+}
+
+void
+igt_drm_client_update(struct igt_drm_client *c, unsigned int pid, char *name,
+ uint64_t val[16])
+{
+   unsigned int i;
+
+   if (c->pid != pid)
+   c->pid = pid;
+
+   if (strcmp(c->name, name)) {
+   char *p;
+
+   strncpy(c->name, name, sizeof(c->name) - 1);
+   strncpy(c->print_name, name, sizeof(c->print_name) - 1);
+
+   p = c->print_name;
+   while (*p) {
+   if (!isprint(*p))
+   *p = '*';
+   p++;
+   }
+   }
+
+   c->last_runtime = 0;
+   c->total_runtime = 0;
+
+   for (i = 0; i < c->clients->num_classes; i++) {
+   if (val[i] < c->last[i])
+   continue; /* It will catch up soon. */
+
+   c->total_runtime += val[i];
+   c->val[i] = val[i] - c->last[i];
+   c->last_runtime += c->val[i];
+   c->last[i] = val[i];
+   }
+
+   c->samples++;
+   c->status = IGT_DRM_CLIENT_ALIVE;
+}
+
+void
+igt_drm_client_add(struct igt_drm_clients *clients, unsigned int id,
+  unsigned int pid, char *name, uint64_t busy[16])
+{
+   struct igt_drm_client *c;
+
+   assert(!igt_drm_clients_find(clients, IGT_DRM_CLIENT_ALIVE, id));
+
+   c = igt_drm_clients_find(clients, IGT_DRM_CLIENT_FREE, 0);
+   if (!c) {
+   unsigned int idx = clients->num_clients;
+
+   clients->num_clients += (clients->num_clients + 2) / 2;
+   clients->client = realloc(clients->client,
+ clients->num_clients * sizeof(*c));
+   assert(clients->client);
+
+   c = &clients->client[idx];
+   memset(c, 0, (clients->num_clients - idx) * sizeof(*c));
+   }

[PATCH i-g-t 02/11] tests/i915/drm_fdinfo: Basic and functional tests for GPU busyness exported via fdinfo

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Mostly inherited from the perf_pmu, some basic tests, and some tests to
verify exported GPU busyness is as expected.

Signed-off-by: Tvrtko Ursulin 
---
 tests/i915/drm_fdinfo.c | 555 
 tests/meson.build   |   8 +
 2 files changed, 563 insertions(+)
 create mode 100644 tests/i915/drm_fdinfo.c

diff --git a/tests/i915/drm_fdinfo.c b/tests/i915/drm_fdinfo.c
new file mode 100644
index ..e3b1ebb0f454
--- /dev/null
+++ b/tests/i915/drm_fdinfo.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_core.h"
+#include "igt_device.h"
+#include "igt_drm_fdinfo.h"
+#include "i915/gem.h"
+#include "intel_ctx.h"
+
+IGT_TEST_DESCRIPTION("Test the i915 drm fdinfo data");
+
+const double tolerance = 0.05f;
+const unsigned long batch_duration_ns = 500e6;
+
+#define __assert_within_epsilon(x, ref, tol_up, tol_down) \
+   igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
+(double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
+"'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of 
%f)\n",\
+#x, #ref, (double)(x), \
+(tol_up) * 100.0, (tol_down) * 100.0, \
+(double)(ref))
+
+#define assert_within_epsilon(x, ref, tolerance) \
+   __assert_within_epsilon(x, ref, tolerance, tolerance)
+
+static void basics(int i915, unsigned int num_classes)
+{
+   struct drm_client_fdinfo info = { };
+   bool ret;
+
+   ret = igt_parse_drm_fdinfo(i915, &info);
+   igt_assert(ret);
+
+   igt_assert(!strcmp(info.driver, "i915"));
+
+   igt_assert_eq(info.num_engines, num_classes);
+}
+
+/*
+ * Helper for cases where we assert on time spent sleeping (directly or
+ * indirectly), so make it more robust by ensuring the system sleep time
+ * is within test tolerance to start with.
+ */
+static unsigned int measured_usleep(unsigned int usec)
+{
+   struct timespec ts = { };
+   unsigned int slept;
+
+   slept = igt_nsec_elapsed(&ts);
+   igt_assert(slept == 0);
+   do {
+   usleep(usec - slept);
+   slept = igt_nsec_elapsed(&ts) / 1000;
+   } while (slept < usec);
+
+   return igt_nsec_elapsed(&ts);
+}
+
+#define TEST_BUSY (1)
+#define FLAG_SYNC (2)
+#define TEST_TRAILING_IDLE (4)
+#define FLAG_HANG (8)
+#define TEST_ISOLATION (16)
+
+static igt_spin_t *__spin_poll(int fd, uint64_t ahnd, const intel_ctx_t *ctx,
+  const struct intel_execution_engine2 *e)
+{
+   struct igt_spin_factory opts = {
+   .ahnd = ahnd,
+   .ctx = ctx,
+   .engine = e->flags,
+   };
+
+   if (gem_class_can_store_dword(fd, e->class))
+   opts.flags |= IGT_SPIN_POLL_RUN;
+
+   return __igt_spin_factory(fd, &opts);
+}
+
+static unsigned long __spin_wait(int fd, igt_spin_t *spin)
+{
+   struct timespec start = { };
+
+   igt_nsec_elapsed(&start);
+
+   if (igt_spin_has_poll(spin)) {
+   unsigned long timeout = 0;
+
+   while (!igt_spin_has_started(spin)) {
+   unsigned long t = igt_nsec_elapsed(&start);
+
+   igt_assert(gem_bo_busy(fd, spin->handle));
+   if ((t - timeout) > 250e6) {
+   timeout = t;
+   igt_warn("Spinner not running after %.2fms\n",
+(double)t / 1e6);
+   igt_assert(t < 2e9);
+   }
+   }
+   } else {
+   igt_debug("__spin_wait - usleep mode\n");
+   usleep(500e3); /* Better than nothing! */
+   }
+
+   igt_assert(gem_bo_busy(fd, spin->handle));
+   retur

[PATCH i-g-t 07/11] libdrmclient: Support multiple DRM cards

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Require DRM minor match during client lookup.

Signed-off-by: Tvrtko Ursulin 
---
 lib/igt_drm_clients.c | 14 --
 lib/igt_drm_clients.h |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/lib/igt_drm_clients.c b/lib/igt_drm_clients.c
index 116479a9f363..cf044597d537 100644
--- a/lib/igt_drm_clients.c
+++ b/lib/igt_drm_clients.c
@@ -57,7 +57,7 @@ struct igt_drm_clients *igt_drm_clients_init(void 
*private_data)
 struct igt_drm_client *
 igt_drm_clients_find(struct igt_drm_clients *clients,
 enum igt_drm_client_status status,
-unsigned int id)
+unsigned int drm_minor, unsigned int id)
 {
unsigned int start, num;
struct igt_drm_client *c;
@@ -69,7 +69,8 @@ igt_drm_clients_find(struct igt_drm_clients *clients,
if (status != c->status)
continue;
 
-   if (status == IGT_DRM_CLIENT_FREE || c->id == id)
+   if (status == IGT_DRM_CLIENT_FREE ||
+   (drm_minor == c->drm_minor && c->id == id))
return c;
}
 
@@ -123,9 +124,10 @@ igt_drm_client_add(struct igt_drm_clients *clients,
 {
struct igt_drm_client *c;
 
-   assert(!igt_drm_clients_find(clients, IGT_DRM_CLIENT_ALIVE, info->id));
+   assert(!igt_drm_clients_find(clients, IGT_DRM_CLIENT_ALIVE,
+drm_minor, info->id));
 
-   c = igt_drm_clients_find(clients, IGT_DRM_CLIENT_FREE, 0);
+   c = igt_drm_clients_find(clients, IGT_DRM_CLIENT_FREE, 0, 0);
if (!c) {
unsigned int idx = clients->num_clients;
 
@@ -359,11 +361,11 @@ igt_drm_clients_scan(struct igt_drm_clients *clients,
continue;
 
if (igt_drm_clients_find(clients, IGT_DRM_CLIENT_ALIVE,
-   info.id))
+minor, info.id))
continue; /* Skip duplicate fds. */
 
c = igt_drm_clients_find(clients, IGT_DRM_CLIENT_PROBE,
-   info.id);
+minor, info.id);
if (!c)
igt_drm_client_add(clients, &info, client_pid,
   client_name, minor);
diff --git a/lib/igt_drm_clients.h b/lib/igt_drm_clients.h
index 7a6318c6af5f..6da445eab34a 100644
--- a/lib/igt_drm_clients.h
+++ b/lib/igt_drm_clients.h
@@ -87,7 +87,7 @@ igt_drm_clients_scan(struct igt_drm_clients *clients,
 struct igt_drm_client *
 igt_drm_clients_find(struct igt_drm_clients *clients,
 enum igt_drm_client_status status,
-unsigned int id);
+unsigned int drm_minor, unsigned int id);
 
 struct igt_drm_clients *
 igt_drm_clients_sort(struct igt_drm_clients *clients,
-- 
2.32.0



[PATCH i-g-t 10/11] libdrmclient: Enforce client status sort order in the library

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Some libdrmclient operations require that inactive clients are last in the
list. Rather than relying on callers of the library sort routine to
implement their comparison callbacks correctly, enforce this order
directly in the library and let callers comparison callbacks concern
themselves only with ordering they are interested in.

Signed-off-by: Tvrtko Ursulin 
---
 lib/igt_drm_clients.c | 25 +++--
 lib/igt_drm_clients.h |  2 +-
 tools/intel_gpu_top.c | 81 +++
 3 files changed, 58 insertions(+), 50 deletions(-)

diff --git a/lib/igt_drm_clients.c b/lib/igt_drm_clients.c
index 6299f3632b74..e8e86969f984 100644
--- a/lib/igt_drm_clients.c
+++ b/lib/igt_drm_clients.c
@@ -185,10 +185,29 @@ void igt_drm_client_free(struct igt_drm_client *c)
memset(c, 0, sizeof(*c));
 }
 
+struct sort_context
+{
+   int (*user_cmp)(const void *, const void *, void *);
+};
+
+static int sort_cmp(const void *_a, const void *_b, void *_ctx)
+{
+   const struct sort_context *ctx = _ctx;
+   const struct igt_drm_client *a = _a;
+   const struct igt_drm_client *b = _b;
+   int cmp = b->status - a->status;
+
+   if (cmp == 0)
+   return ctx->user_cmp(_a, _b, _ctx);
+   else
+   return cmp;
+}
+
 struct igt_drm_clients *
 igt_drm_clients_sort(struct igt_drm_clients *clients,
-int (*cmp)(const void *, const void *))
+int (*cmp)(const void *, const void *, void *))
 {
+   struct sort_context ctx = { .user_cmp = cmp };
unsigned int active, free;
struct igt_drm_client *c;
int tmp;
@@ -196,8 +215,8 @@ igt_drm_clients_sort(struct igt_drm_clients *clients,
if (!clients)
return clients;
 
-   qsort(clients->client, clients->num_clients, sizeof(*clients->client),
- cmp);
+   qsort_r(clients->client, clients->num_clients, sizeof(*clients->client),
+ sort_cmp, &ctx);
 
/* Trim excessive array space. */
active = 0;
diff --git a/lib/igt_drm_clients.h b/lib/igt_drm_clients.h
index 4668df2d7fd6..47547aa8b75f 100644
--- a/lib/igt_drm_clients.h
+++ b/lib/igt_drm_clients.h
@@ -90,7 +90,7 @@ igt_drm_clients_find(struct igt_drm_clients *clients,
 
 struct igt_drm_clients *
 igt_drm_clients_sort(struct igt_drm_clients *clients,
-int (*cmp)(const void *, const void *));
+int (*cmp)(const void *, const void *, void *));
 
 void igt_drm_client_update(struct igt_drm_client *c,
   unsigned int pid, char *name, uint64_t val[16]);
diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
index fb7ec9ae6d74..fcc4e5501d3d 100644
--- a/tools/intel_gpu_top.c
+++ b/tools/intel_gpu_top.c
@@ -644,85 +644,74 @@ static void pmu_sample(struct engines *engines)
}
 }
 
-static int client_last_cmp(const void *_a, const void *_b)
+static int
+__client_id_cmp(const struct igt_drm_client *a,
+   const struct igt_drm_client *b)
+{
+   if (a->id > b->id)
+   return 1;
+   else if (a->id < b->id)
+   return -1;
+   else
+   return 0;
+}
+
+static int client_last_cmp(const void *_a, const void *_b, void *unused)
 {
const struct igt_drm_client *a = _a;
const struct igt_drm_client *b = _b;
-   long tot_a, tot_b;
+   long val_a = a->last_runtime, val_b = b->last_runtime;
 
/*
 * Sort clients in descending order of runtime in the previous sampling
-* period for active ones, followed by inactive. Tie-breaker is client
-* id.
+* period. Tie-breaker is client id.
 */
 
-   tot_a = a->status == IGT_DRM_CLIENT_ALIVE ? a->last_runtime : -1;
-   tot_b = b->status == IGT_DRM_CLIENT_ALIVE ? b->last_runtime : -1;
-
-   tot_b -= tot_a;
-   if (tot_b > 0)
+   if (val_a == val_b)
+   return __client_id_cmp(a, b);
+   else if (val_b > val_a)
return 1;
-   if (tot_b < 0)
+   else
return -1;
-
-   return (int)b->id - a->id;
 }
 
-static int client_total_cmp(const void *_a, const void *_b)
+static int client_total_cmp(const void *_a, const void *_b, void *unused)
 {
const struct igt_drm_client *a = _a;
const struct igt_drm_client *b = _b;
-   long tot_a, tot_b;
+   long val_a = a->total_runtime, val_b = b->total_runtime;
 
-   tot_a = a->status == IGT_DRM_CLIENT_ALIVE ? a->total_runtime : -1;
-   tot_b = b->status == IGT_DRM_CLIENT_ALIVE ? b->total_runtime : -1;
-
-   tot_b -= tot_a;
-   if (tot_b > 0)
+   if (val_a == val_b)
+   return __client_id_cmp(a, b);
+   else if (val_b > val_a)
return 1;
-   if (tot_b < 0)
+   else
return -1;
-
-   return (int)b->id - a->id;
 }
 
-static int client_id_cmp(const void *_a, const void *_b)
+static int client_id_cmp(c

[PATCH i-g-t 09/11] libdrmclient/intel_gpu_top: Decouple hardcoded engine assumptions

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Intel_gpu_top gets it's main engine configuration data via PMU probe and
uses that for per client view as well. Furthemore code so far assumed only
clients belonging from a single DRM card would be tracked in a single
clients list.

Break this inter-dependency by moving the engine data to be per client and
also have libdrmclient probe the engine configuration independently using
the previously added libdrmfdinfo facilities.

Signed-off-by: Tvrtko Ursulin 
---
 lib/igt_drm_clients.c |  34 +++--
 lib/igt_drm_clients.h |  11 ++---
 tools/intel_gpu_top.c | 112 ++
 3 files changed, 117 insertions(+), 40 deletions(-)

diff --git a/lib/igt_drm_clients.c b/lib/igt_drm_clients.c
index cf044597d537..6299f3632b74 100644
--- a/lib/igt_drm_clients.c
+++ b/lib/igt_drm_clients.c
@@ -103,7 +103,7 @@ igt_drm_client_update(struct igt_drm_client *c, unsigned 
int pid, char *name,
c->last_runtime = 0;
c->total_runtime = 0;
 
-   for (i = 0; i < c->clients->num_classes; i++) {
+   for (i = 0; i <= c->engines->max_engine_id; i++) {
if (val[i] < c->last[i])
continue; /* It will catch up soon. */
 
@@ -123,6 +123,7 @@ igt_drm_client_add(struct igt_drm_clients *clients,
   unsigned int pid, char *name, unsigned int drm_minor)
 {
struct igt_drm_client *c;
+   unsigned int i;
 
assert(!igt_drm_clients_find(clients, IGT_DRM_CLIENT_ALIVE,
 drm_minor, info->id));
@@ -143,8 +144,28 @@ igt_drm_client_add(struct igt_drm_clients *clients,
c->id = info->id;
c->drm_minor = drm_minor;
c->clients = clients;
-   c->val = calloc(clients->num_classes, sizeof(c->val));
-   c->last = calloc(clients->num_classes, sizeof(c->last));
+   c->engines = malloc(sizeof(*c->engines));
+   assert(c->engines);
+   memset(c->engines, 0, sizeof(*c->engines));
+   c->engines->capacity = calloc(info->last_engine_index + 1,
+ sizeof(*c->engines->capacity));
+   assert(c->engines->capacity);
+   c->engines->names = calloc(info->last_engine_index + 1,
+  sizeof(*c->engines->names));
+   assert(c->engines->names);
+
+   for (i = 0; i <= info->last_engine_index; i++) {
+   if (!info->capacity[i])
+   continue;
+
+   c->engines->capacity[i] = info->capacity[i];
+   c->engines->names[i] = strdup(info->names[i]);
+   assert(c->engines->names[i]);
+   c->engines->num_engines++;
+   c->engines->max_engine_id = i;
+   }
+   c->val = calloc(c->engines->max_engine_id + 1, sizeof(c->val));
+   c->last = calloc(c->engines->max_engine_id + 1, sizeof(c->last));
assert(c->val && c->last);
 
igt_drm_client_update(c, pid, name, info->busy);
@@ -152,6 +173,13 @@ igt_drm_client_add(struct igt_drm_clients *clients,
 
 void igt_drm_client_free(struct igt_drm_client *c)
 {
+   unsigned int i;
+
+   for (i = 0; i <= c->engines->max_engine_id; i++)
+   free(c->engines->names[i]);
+   free(c->engines->capacity);
+   free(c->engines->names);
+   free(c->engines);
free(c->val);
free(c->last);
memset(c, 0, sizeof(*c));
diff --git a/lib/igt_drm_clients.h b/lib/igt_drm_clients.h
index 6da445eab34a..4668df2d7fd6 100644
--- a/lib/igt_drm_clients.h
+++ b/lib/igt_drm_clients.h
@@ -35,10 +35,11 @@ enum igt_drm_client_status {
IGT_DRM_CLIENT_PROBE
 };
 
-struct igt_drm_client_engine_class {
-   unsigned int engine_class;
-   const char *name;
+struct igt_drm_client_engines {
unsigned int num_engines;
+   unsigned int max_engine_id;
+   unsigned int *capacity;
+   char **names;
 };
 
 struct igt_drm_clients;
@@ -47,6 +48,7 @@ struct igt_drm_client {
struct igt_drm_clients *clients;
 
enum igt_drm_client_status status;
+   struct igt_drm_client_engines *engines;
unsigned int id;
unsigned int drm_minor;
unsigned int pid;
@@ -63,9 +65,6 @@ struct igt_drm_clients {
unsigned int num_clients;
unsigned int active_clients;
 
-   unsigned int num_classes;
-   struct igt_drm_client_engine_class *engine_class;
-
void *private_data;
 
struct igt_drm_client *client; /* Must be last. */
diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
index 99e8e1d8ffd4..fb7ec9ae6d74 100644
--- a/tools/intel_gpu_top.c
+++ b/tools/intel_gpu_top.c
@@ -66,6 +66,12 @@ struct pmu_counter {
bool present;
 };
 
+struct engine_class {
+   unsigned int engine_class;
+   const char *name;
+   unsigned int num_engines;
+};
+
 struct engine {
const char *name;
char *display_name;
@@ -84,7 +90,7 @@ struct engine {
 struct engines {
unsigned int num_engines;
unsig

[PATCH i-g-t 11/11] gputop: Basic vendor agnostic GPU top tool

2022-02-22 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Rudimentary vendor agnostic example of how lib_igt_drm_clients can be used
to display a sorted by card and usage list of processes using GPUs.

Signed-off-by: Tvrtko Ursulin 
Cc: Rob Clark 
---
 tools/gputop.c| 276 ++
 tools/meson.build |   5 +
 2 files changed, 281 insertions(+)
 create mode 100644 tools/gputop.c

diff --git a/tools/gputop.c b/tools/gputop.c
new file mode 100644
index ..dbc5b7b4ce42
--- /dev/null
+++ b/tools/gputop.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "igt_drm_clients.h"
+#include "igt_drm_fdinfo.h"
+
+#define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
+
+static const char *bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" };
+
+static void n_spaces(const unsigned int n)
+{
+   unsigned int i;
+
+   for (i = 0; i < n; i++)
+   putchar(' ');
+}
+
+static void print_percentage_bar(double percent, int max_len)
+{
+   int bar_len, i, len = max_len - 2;
+   const int w = 8;
+
+   assert(max_len > 0);
+
+   bar_len = ceil(w * percent * len / 100.0);
+   if (bar_len > w * len)
+   bar_len = w * len;
+
+   putchar('|');
+
+   for (i = bar_len; i >= w; i -= w)
+   printf("%s", bars[w]);
+   if (i)
+   printf("%s", bars[i]);
+
+   len -= (bar_len + (w - 1)) / w;
+   n_spaces(len);
+
+   putchar('|');
+}
+
+static int
+print_client_header(struct igt_drm_client *c, int lines, int con_w, int con_h,
+   int *engine_w)
+{
+   const char *pidname = "   PID  NAME ";
+   int ret, len = strlen(pidname);
+
+   if (lines++ >= con_h || len >= con_w)
+   return lines;
+   printf("\033[7m");
+   ret = printf("DRM minor %u", c->drm_minor);
+   n_spaces(con_w - ret);
+
+   if (lines++ >= con_h)
+   return lines;
+   printf("\n%s", pidname);
+
+   if (c->engines->num_engines) {
+   unsigned int i;
+   int width;
+
+   *engine_w = width = (con_w - len) / c->engines->num_engines;
+
+   for (i = 0; i <= c->engines->max_engine_id; i++) {
+   const char *name = c->engines->names[i];
+   int name_len = strlen(name);
+   int pad = (width - name_len) / 2;
+   int spaces = width - pad - name_len;
+
+   if (!name)
+   continue;
+
+   if (pad < 0 || spaces < 0)
+   continue;
+
+   n_spaces(pad);
+   printf("%s", name);
+   n_spaces(spaces);
+   len += pad + name_len + spaces;
+   }
+   }
+
+   n_spaces(con_w - len);
+   printf("\033[0m\n");
+
+   return lines;
+}
+
+
+static bool
+newheader(const struct igt_drm_client *c, const struct igt_drm_client *pc)
+{
+   return !pc || c->drm_minor != pc->drm_minor;
+}
+
+static bool filter_idle = true;
+
+static int
+print_client(struct igt_drm_client *c, struct igt_drm_client **prevc,
+double t, int lines, int con_w, int con_h,
+unsigned int period_us, int *engine_w)
+{
+   unsigned int i;
+
+   if (filter_idle && (!c->total_runtime || c->samples < 2))
+   return lines;
+
+   if (newheader(c, *prevc)) {
+   lines = print_client_header(c, lines, con_w, con_h, engine_w);
+   if (lines >= con_h)
+   return 

Re: [PATCH v2 13/22] drm/nouveau/kms: Remove redundant zpos initialisation

2022-02-22 Thread Maxime Ripard
Hi,

On Mon, Feb 21, 2022 at 05:42:36PM +0100, Karol Herbst wrote:
> On Mon, Feb 21, 2022 at 11:00 AM Maxime Ripard  wrote:
> >
> > The nouveau KMS driver will call drm_plane_create_zpos_property() with
> > an init value depending on the plane purpose.
> >
> > Since the initial value wasn't carried over in the state, the driver had
> > to set it again in nv50_wndw_reset(). However, the helpers have been
> > adjusted to set it properly at reset, so this is not needed anymore.
> >
> > Cc: nouv...@lists.freedesktop.org
> > Cc: Ben Skeggs 
> > Cc: Karol Herbst 
> > Cc: Lyude Paul 
> > Signed-off-by: Maxime Ripard 
> > ---
> >  drivers/gpu/drm/nouveau/dispnv50/wndw.c | 2 --
> >  1 file changed, 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c 
> > b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> > index 133c8736426a..0c1a2ea0ed04 100644
> > --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> > +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> > @@ -635,8 +635,6 @@ nv50_wndw_reset(struct drm_plane *plane)
> > plane->funcs->atomic_destroy_state(plane, plane->state);
> >
> > __drm_atomic_helper_plane_reset(plane, &asyw->state);
> > -   plane->state->zpos = nv50_wndw_zpos_default(plane);
> > -   plane->state->normalized_zpos = nv50_wndw_zpos_default(plane);
> 
> so reading the surrounding code a little it feels like those
> assignments actually do something. If my understanding is correct
> plane->state points to &asyw->state, but asyw was just kzalloced in
> this function. __drm_atomic_helper_plane_reset doesn't set the zpos or
> normalized_zpos fields as long as zpos_property is 0, so those fields
> won't be set with that change anymore.
> 
> I just don't know if it's fine like that or if this function should
> set zpos_property instead or something. Anyway, the commit description
> makes it sound like that an unneeded assignment would be removed here,
> which doesn't seem to be the case. But I don't really know much about
> all the drm API interactions, so it might just be fine, mostly asking
> to get a better idea on how all those pieces fit together.

If you're looking at the code without that patch series, you're right.

These patches change that however:
https://lore.kernel.org/dri-devel/20220221095918.18763-7-max...@cerno.tech/
https://lore.kernel.org/dri-devel/20220221095918.18763-8-max...@cerno.tech/

So, once they have been applied those assignments are made in
__drm_atomic_helper_plane_reset and are no longer relevant here.

Maxime


signature.asc
Description: PGP signature


[PATCH] drm/i915: Check input parameter for NULL

2022-02-22 Thread Yongzhi Liu
[why]
i915_gem_object_put_pages_phys() frees pages and standard
pattern is to allow caller to not care if it's
NULL or not. This will reduce burden on
the callers to perform this check.

[how]
Fix it by adding Null check.

Signed-off-by: Yongzhi Liu 
---
 drivers/gpu/drm/i915/gem/i915_gem_phys.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c 
b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index ca6faff..09c3dcb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -95,8 +95,13 @@ void
 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
   struct sg_table *pages)
 {
-   dma_addr_t dma = sg_dma_address(pages->sgl);
-   void *vaddr = sg_page(pages->sgl);
+   dma_addr_t dma;
+   void *vaddr;
+
+   if (!pages)
+   return;
+   dma = sg_dma_address(pages->sgl);
+   vaddr = sg_page(pages->sgl);
 
__i915_gem_object_release_shmem(obj, pages, false);
 
-- 
2.7.4



Re: [PATCH v10 3/4] drm/lsdc: add drm driver for loongson display controller

2022-02-22 Thread Sui Jingfeng



On 2022/2/22 16:27, Maxime Ripard wrote:

+   if (!of_device_is_available(output)) {
+   of_node_put(output);
+   drm_info(ddev, "connector%d is not available\n", index);
+   return NULL;
+   }
+
+   disp_tims_np = of_get_child_by_name(output, "display-timings");
+   if (disp_tims_np) {
+   lsdc_get_display_timings_from_dtb(output, &lconn->disp_tim);
+   lconn->has_disp_tim = true;
+   of_node_put(disp_tims_np);
+   drm_info(ddev, "Found display timings provided by 
connector%d\n", index);
+   }
+
+   connector_type = lsdc_get_connector_type(ddev, output, index);
+
+   if (output) {
+   of_node_put(output);
+   output = NULL;
+   }
+
+DT_SKIPED:
+
+   /* Only create the i2c channel if display timing is not provided */
+   if (!lconn->has_disp_tim) {
+   const struct lsdc_chip_desc * const desc = ldev->desc;
+
+   if (desc->have_builtin_i2c)
+   lconn->ddc = lsdc_create_i2c_chan(ddev, index);
+   else
+   lconn->ddc = lsdc_get_i2c_adapter(ddev, index);

This looks weird: the connector bindings have a property to store the
i2c controller connected to the DDC lines, so you should use that
instead.


This is not  weird,  ast, mgag200, hibmc do the same thing.


[PATCH 0/7] drm/i915: Use the memcpy_from_wc function from drm

2022-02-22 Thread Balasubramani Vivekanandan
drm_memcpy_from_wc() performs fast copy from WC memory type using
non-temporal instructions. Now there are two similar implementations of
this function. One exists in drm_cache.c as drm_memcpy_from_wc() and
another implementation in i915/i915_memcpy.c as i915_memcpy_from_wc().
drm_memcpy_from_wc() was the recent addition through the series
https://patchwork.freedesktop.org/patch/436276/?series=90681&rev=6

The goal of this patch series is to change all users of
i915_memcpy_from_wc() to drm_memcpy_from_wc() and a have common
implementation in drm and eventually remove the copy from i915.

Another benefit of using memcpy functions from drm is that
drm_memcpy_from_wc() is available for non-x86 architectures.
i915_memcpy_from_wc() is implemented only for x86 and prevents building
i915 for ARM64.
drm_memcpy_from_wc() does fast copy using non-temporal instructions for
x86 and for other architectures makes use of memcpy() family of
functions as fallback.

Another major difference is unlike i915_memcpy_from_wc(),
drm_memcpy_from_wc() will not fail if the passed address argument is not
alignment to be used with non-temporal load instructions or if the
platform lacks support for those instructions (non-temporal load
instructions are provided through SSE4.1 instruction set extension).
Instead drm_memcpy_from_wc() continues with fallback functions to
complete the copy.
This relieves the caller from checking the return value of
i915_memcpy_from_wc() and explicitly using a fallback.

Follow up series will be created to remove the memcpy_from_wc functions
from i915 once the dependency is completely removed.

Cc: Jani Nikula 
Cc: Lucas De Marchi  
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Chris Wilson  
Cc: Thomas Hellstr_m  
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 

Balasubramani Vivekanandan (7):
  drm: Relax alignment constraint for destination address
  drm: Add drm_memcpy_from_wc() variant which accepts destination
address
  drm/i915: use the memcpy_from_wc call from the drm
  drm/i915/guc: use the memcpy_from_wc call from the drm
  drm/i915/selftests: use the memcpy_from_wc call from the drm
  drm/i915/gt: Avoid direct dereferencing of io memory
  drm/i915: Avoid dereferencing io mapped memory

 drivers/gpu/drm/drm_cache.c   | 98 +--
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  8 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  | 21 ++--
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.c| 11 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c | 45 +
 .../drm/i915/selftests/intel_memory_region.c  |  8 +-
 include/drm/drm_cache.h   |  3 +
 7 files changed, 148 insertions(+), 46 deletions(-)

-- 
2.25.1



[PATCH 1/7] drm: Relax alignment constraint for destination address

2022-02-22 Thread Balasubramani Vivekanandan
There is no need for the destination address to be aligned to 16 byte
boundary to be able to use the non-temporal instructions while copying.
Non-temporal instructions are used only for loading from the source
address which has alignment constraints.
We only need to take care of using the right instructions, based on
whether destination address is aligned or not, while storing the data to
the destination address.

__memcpy_ntdqu is copied from i915/i915_memcpy.c

Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Chris Wilson 

Signed-off-by: Balasubramani Vivekanandan 
---
 drivers/gpu/drm/drm_cache.c | 44 -
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index c3e6e615bf09..a21c1350eb09 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -278,18 +278,50 @@ static void __memcpy_ntdqa(void *dst, const void *src, 
unsigned long len)
kernel_fpu_end();
 }
 
+static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
+{
+   kernel_fpu_begin();
+
+   while (len >= 4) {
+   asm("movntdqa   (%0), %%xmm0\n"
+   "movntdqa 16(%0), %%xmm1\n"
+   "movntdqa 32(%0), %%xmm2\n"
+   "movntdqa 48(%0), %%xmm3\n"
+   "movups %%xmm0,   (%1)\n"
+   "movups %%xmm1, 16(%1)\n"
+   "movups %%xmm2, 32(%1)\n"
+   "movups %%xmm3, 48(%1)\n"
+   :: "r" (src), "r" (dst) : "memory");
+   src += 64;
+   dst += 64;
+   len -= 4;
+   }
+   while (len--) {
+   asm("movntdqa (%0), %%xmm0\n"
+   "movups %%xmm0, (%1)\n"
+   :: "r" (src), "r" (dst) : "memory");
+   src += 16;
+   dst += 16;
+   }
+
+   kernel_fpu_end();
+}
+
 /*
  * __drm_memcpy_from_wc copies @len bytes from @src to @dst using
- * non-temporal instructions where available. Note that all arguments
- * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
- * of 16.
+ * non-temporal instructions where available. Note that @src must be aligned to
+ * 16 bytes and @len must be a multiple of 16.
  */
 static void __drm_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 {
-   if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
+   if (unlikely(((unsigned long)src | len) & 15)) {
memcpy(dst, src, len);
-   else if (likely(len))
-   __memcpy_ntdqa(dst, src, len >> 4);
+   } else if (likely(len)) {
+   if (IS_ALIGNED((unsigned long)dst, 16))
+   __memcpy_ntdqa(dst, src, len >> 4);
+   else
+   __memcpy_ntdqu(dst, src, len >> 4);
+   }
 }
 
 /**
-- 
2.25.1



[PATCH 2/7] drm: Add drm_memcpy_from_wc() variant which accepts destination address

2022-02-22 Thread Balasubramani Vivekanandan
Fast copy using non-temporal instructions for x86 currently exists at two
locations. One is implemented in i915 driver at i915/i915_memcpy.c and
another copy at drm_cache.c. The plan is to remove the duplicate
implementation in i915 driver and use the functions from drm_cache.c.

A variant of drm_memcpy_from_wc() is added in drm_cache.c which accepts
address as argument instead of iosys_map for destination. It is a very
common scenario in i915 to copy from a WC memory type, which may be an
io memory or a system memory to a destination address pointing to system
memory. To avoid the overhead of creating iosys_map type for the
destination, new variant is created to accept the address directly.

Also a new function is exported in drm_cache.c to find if the fast copy
is supported by the platform or not. It is required for i915.

Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Thomas Hellstr_m 

Signed-off-by: Balasubramani Vivekanandan 
---
 drivers/gpu/drm/drm_cache.c | 54 +
 include/drm/drm_cache.h |  3 +++
 2 files changed, 57 insertions(+)

diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index a21c1350eb09..eb0bcd33665e 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -358,6 +358,54 @@ void drm_memcpy_from_wc(struct iosys_map *dst,
 }
 EXPORT_SYMBOL(drm_memcpy_from_wc);
 
+/**
+ * drm_memcpy_from_wc_vaddr - Perform the fastest available memcpy from a 
source
+ * that may be WC.
+ * @dst: The destination pointer
+ * @src: The source pointer
+ * @len: The size of the area to transfer in bytes
+ *
+ * Same as drm_memcpy_from_wc except destination is accepted as system memory
+ * address. Useful in situations where passing destination address as iosys_map
+ * is simply an overhead and can be avoided.
+ */
+void drm_memcpy_from_wc_vaddr(void *dst, const struct iosys_map *src,
+ unsigned long len)
+{
+   if (WARN_ON(in_interrupt())) {
+   iosys_map_memcpy_from(dst, src, 0, len);
+   return;
+   }
+
+   if (static_branch_likely(&has_movntdqa)) {
+   __drm_memcpy_from_wc(dst,
+src->is_iomem ?
+(void const __force *)src->vaddr_iomem :
+src->vaddr,
+len);
+   return;
+   }
+
+   iosys_map_memcpy_from(dst, src, 0, len);
+}
+EXPORT_SYMBOL(drm_memcpy_from_wc_vaddr);
+
+/*
+ * drm_memcpy_fastcopy_supported - Returns if fast copy using non-temporal
+ * instructions is supported
+ *
+ * Returns true if platform has support for fast copying from wc memory type
+ * using non-temporal instructions. Else false.
+ */
+bool drm_memcpy_fastcopy_supported(void)
+{
+   if (static_branch_likely(&has_movntdqa))
+   return true;
+
+   return false;
+}
+EXPORT_SYMBOL(drm_memcpy_fastcopy_supported);
+
 /*
  * drm_memcpy_init_early - One time initialization of the WC memcpy code
  */
@@ -382,6 +430,12 @@ void drm_memcpy_from_wc(struct iosys_map *dst,
 }
 EXPORT_SYMBOL(drm_memcpy_from_wc);
 
+bool drm_memcpy_fastcopy_supported(void)
+{
+   return false;
+}
+EXPORT_SYMBOL(drm_memcpy_fastcopy_supported);
+
 void drm_memcpy_init_early(void)
 {
 }
diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index 22deb216b59c..8f48e4dcd7dc 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -77,4 +77,7 @@ void drm_memcpy_init_early(void);
 void drm_memcpy_from_wc(struct iosys_map *dst,
const struct iosys_map *src,
unsigned long len);
+bool drm_memcpy_fastcopy_supported(void);
+void drm_memcpy_from_wc_vaddr(void *dst, const struct iosys_map *src,
+ unsigned long len);
 #endif
-- 
2.25.1



[PATCH 3/7] drm/i915: use the memcpy_from_wc call from the drm

2022-02-22 Thread Balasubramani Vivekanandan
memcpy_from_wc functions in i915_memcpy.c will be removed and replaced
by the implementation in drm_cache.c.
Updated to use the functions provided by drm_cache.c.

Signed-off-by: Balasubramani Vivekanandan 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 2d593d573ef1..49ff8e3e71d9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -449,16 +449,16 @@ static void
 i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 
offset, void *dst, int size)
 {
void __iomem *src_map;
-   void __iomem *src_ptr;
+   struct iosys_map src_ptr;
+
dma_addr_t dma = i915_gem_object_get_dma_address(obj, offset >> 
PAGE_SHIFT);
 
src_map = io_mapping_map_wc(&obj->mm.region->iomap,
dma - obj->mm.region->region.start,
PAGE_SIZE);
 
-   src_ptr = src_map + offset_in_page(offset);
-   if (!i915_memcpy_from_wc(dst, (void __force *)src_ptr, size))
-   memcpy_fromio(dst, src_ptr, size);
+   iosys_map_set_vaddr_iomem(&src_ptr, (src_map + offset_in_page(offset)));
+   drm_memcpy_from_wc_vaddr(dst, &src_ptr, size);
 
io_mapping_unmap(src_map);
 }
-- 
2.25.1



  1   2   3   >