Re: [PATCH 3/7] drm/i915/hwmon: Power PL1 limit and TDP setting

2022-09-28 Thread Gupta, Anshuman




On 9/27/2022 11:20 AM, Badal Nilawar wrote:

From: Dale B Stimson 

Use i915 HWMON to display/modify dGfx power PL1 limit and TDP setting.

v2:
   - Fix review comments (Ashutosh)
   - Do not restore power1_max upon module unload/load sequence
 because on production systems modules are always loaded
 and not unloaded/reloaded (Ashutosh)
   - Fix review comments (Jani)
   - Remove endianness conversion (Ashutosh)
v3: Add power1_rated_max (Ashutosh)
v4:
   - Use macro HWMON_CHANNEL_INFO to define power channel (Guenter)
   - Update the date and kernel version in Documentation (Badal)
v5: Use hwm_ prefix for static functions (Ashutosh)
v6: Fix review comments (Ashutosh)
v7:
   - Define PCU_PACKAGE_POWER_SKU for DG1,DG2 and move
 PKG_PKG_TDP to intel_mchbar_regs.h (Anshuman)
   - KernelVersion: 6.2, Date: February 2023 in doc (Tvrtko)

Cc: Guenter Roeck 
Signed-off-by: Dale B Stimson 
Signed-off-by: Ashutosh Dixit 
Signed-off-by: Riana Tauro 
Signed-off-by: Badal Nilawar 
Acked-by: Guenter Roeck 
Reviewed-by: Ashutosh Dixit 

LGTM,
Reviewed-by: Anshuman Gupta 

---
  .../ABI/testing/sysfs-driver-intel-i915-hwmon |  20 +++
  drivers/gpu/drm/i915/i915_hwmon.c | 158 +-
  drivers/gpu/drm/i915/intel_mchbar_regs.h  |  12 ++
  3 files changed, 188 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index cd9554c1a4f8..16e697b1db3d 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -5,3 +5,23 @@ Contact:   dri-devel@lists.freedesktop.org
  Description:  RO. Current Voltage in millivolt.
  
  		Only supported for particular Intel i915 graphics platforms.

+
+What:  /sys/devices/.../hwmon/hwmon/power1_max
+Date:  February 2023
+KernelVersion: 6.2
+Contact:   dri-devel@lists.freedesktop.org
+Description:   RW. Card reactive sustained  (PL1/Tau) power limit in 
microwatts.
+
+   The power controller will throttle the operating frequency
+   if the power averaged over a window (typically seconds)
+   exceeds this limit.
+
+   Only supported for particular Intel i915 graphics platforms.
+
+What:  /sys/devices/.../hwmon/hwmon/power1_rated_max
+Date:  February 2023
+KernelVersion: 6.2
+Contact:   dri-devel@lists.freedesktop.org
+Description:   RO. Card default power limit (default TDP setting).
+
+   Only supported for particular Intel i915 graphics platforms.
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 9fcff6a884ee..53d34a7a86f7 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -16,11 +16,16 @@
  /*
   * SF_* - scale factors for particular quantities according to hwmon spec.
   * - voltage  - millivolts
+ * - power  - microwatts
   */
  #define SF_VOLTAGE1000
+#define SF_POWER   100
  
  struct hwm_reg {

i915_reg_t gt_perf_status;
+   i915_reg_t pkg_power_sku_unit;
+   i915_reg_t pkg_power_sku;
+   i915_reg_t pkg_rapl_limit;
  };
  
  struct hwm_drvdata {

@@ -34,10 +39,68 @@ struct i915_hwmon {
struct hwm_drvdata ddat;
struct mutex hwmon_lock;/* counter overflow logic and 
rmw */
struct hwm_reg rg;
+   int scl_shift_power;
  };
  
+static void

+hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
+   i915_reg_t reg, u32 clear, u32 set)
+{
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   struct intel_uncore *uncore = ddat->uncore;
+   intel_wakeref_t wakeref;
+
+   mutex_lock(&hwmon->hwmon_lock);
+
+   with_intel_runtime_pm(uncore->rpm, wakeref)
+   intel_uncore_rmw(uncore, reg, clear, set);
+
+   mutex_unlock(&hwmon->hwmon_lock);
+}
+
+/*
+ * This function's return type of u64 allows for the case where the scaling
+ * of the field taken from the 32-bit register value might cause a result to
+ * exceed 32 bits.
+ */
+static u64
+hwm_field_read_and_scale(struct hwm_drvdata *ddat, i915_reg_t rgadr,
+u32 field_msk, int nshift, u32 scale_factor)
+{
+   struct intel_uncore *uncore = ddat->uncore;
+   intel_wakeref_t wakeref;
+   u32 reg_value;
+
+   with_intel_runtime_pm(uncore->rpm, wakeref)
+   reg_value = intel_uncore_read(uncore, rgadr);
+
+   reg_value = REG_FIELD_GET(field_msk, reg_value);
+
+   return mul_u64_u32_shr(reg_value, scale_factor, nshift);
+}
+
+static void
+hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr,
+ u32 field_msk, int nshift,
+ unsigned int scale_factor, long lval)
+{
+   u32 nval;
+   u32 bits_to_clear;
+   u32 bits_to_set;
+
+   /* Computation in 64-bits to avoid overflow. Round to neares

Re: [PATCH 6/7] drm/i915/hwmon: Expose power1_max_interval

2022-09-28 Thread Gupta, Anshuman




On 9/27/2022 11:20 AM, Badal Nilawar wrote:

From: Ashutosh Dixit 

Expose power1_max_interval, that is the tau corresponding to PL1, as a
custom hwmon attribute. Some bit manipulation is needed because of the
format of PKG_PWR_LIM_1_TIME in
GT0_PACKAGE_RAPL_LIMIT register (1.x * power(2,y)).

v2: Update date and kernel version in Documentation (Badal)
v3: Cleaned up hwm_power1_max_interval_store() (Badal)
v4:
   - Fixed review comments (Anshuman)
   - In hwm_power1_max_interval_store() get PKG_MAX_WIN from
 pkg_power_sku when it is valid (Ashutosh)
   - KernelVersion: 6.2, Date: February 2023 in doc (Tvrtko)
v5: On some of the DGFX setups it is seen that although pkg_power_sku
 is valid the field PKG_WIN_MAX is not populated. So it is
 decided to stick to default value of PKG_WIN_MAX (Ashutosh)

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
Acked-by: Guenter Roeck 

LGTM,
Reviewed-by: Anshuman Gupta 

---
  .../ABI/testing/sysfs-driver-intel-i915-hwmon |   9 ++
  drivers/gpu/drm/i915/i915_hwmon.c | 115 +-
  drivers/gpu/drm/i915/intel_mchbar_regs.h  |   7 ++
  3 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index f9d6d3b08bba..19b9fe3ef237 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -26,6 +26,15 @@ Description: RO. Card default power limit (default TDP 
setting).
  
  		Only supported for particular Intel i915 graphics platforms.
  
+What:		/sys/devices/.../hwmon/hwmon/power1_max_interval

+Date:  February 2023
+KernelVersion: 6.2
+Contact:   dri-devel@lists.freedesktop.org
+Description:   RW. Sustained power limit interval (Tau in PL1/Tau) in
+   milliseconds over which sustained power is averaged.
+
+   Only supported for particular Intel i915 graphics platforms.
+
  What: /sys/devices/.../hwmon/hwmon/power1_crit
  Date: February 2023
  KernelVersion:6.2
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 2394fa789793..641143956c45 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -20,11 +20,13 @@
   * - power  - microwatts
   * - curr   - milliamperes
   * - energy - microjoules
+ * - time   - milliseconds
   */
  #define SF_VOLTAGE1000
  #define SF_POWER  100
  #define SF_CURR   1000
  #define SF_ENERGY 100
+#define SF_TIME1000
  
  struct hwm_reg {

i915_reg_t gt_perf_status;
@@ -53,6 +55,7 @@ struct i915_hwmon {
struct hwm_reg rg;
int scl_shift_power;
int scl_shift_energy;
+   int scl_shift_time;
  };
  
  static void

@@ -161,6 +164,115 @@ hwm_energy(struct hwm_drvdata *ddat, long *energy)
return 0;
  }
  
+static ssize_t

+hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
+char *buf)
+{
+   struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   intel_wakeref_t wakeref;
+   u32 r, x, y, x_w = 2; /* 2 bits */
+   u64 tau4, out;
+
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit);
+
+   x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
+   y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
+   /*
+* tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
+* = (4 | x) << (y - 2)
+* where (y - 2) ensures a 1.x fixed point representation of 1.x
+* However because y can be < 2, we compute
+* tau4 = (4 | x) << y
+* but add 2 when doing the final right shift to account for units
+*/
+   tau4 = ((1 << x_w) | x) << y;
+   /* val in hwmon interface units (millisec) */
+   out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+   return sysfs_emit(buf, "%llu\n", out);
+}
+
+static ssize_t
+hwm_power1_max_interval_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+   struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   long val, max_win, ret;
+   u32 x, y, rxy, x_w = 2; /* 2 bits */
+   u64 tau4, r;
+
+#define PKG_MAX_WIN_DEFAULT 0x12ull
+
+   ret = kstrtoul(buf, 0, &val);
+   if (ret)
+   return ret;
+
+   /*
+* val must be < max in hwmon interface units. The steps below are
+* explained in i915_power1_max_interval_show()
+*/
+   r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
+
+   x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
+   y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
+   tau4 = ((1 << x_w) | x

[PATCH 1/2] drm/msm/a6xx: Replace kcalloc() with kvzalloc()

2022-09-28 Thread Akhil P Oommen
In order to reduce chance of allocation failure while capturing a6xx
gpu state, use kvzalloc() instead of kcalloc() in state_kcalloc().

Indirectly, this patch helps to fix leaking memory allocated for
gmu_debug object.

Fixes: b859f9b009b (drm/msm/gpu: Snapshot GMU debug buffer)
Signed-off-by: Akhil P Oommen 
---

 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 55f4433..3c112a6 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -91,7 +91,7 @@ struct a6xx_state_memobj {
 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t 
objsize)
 {
struct a6xx_state_memobj *obj =
-   kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
+   kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
 
if (!obj)
return NULL;
@@ -819,7 +819,7 @@ static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
 
snapshot->iova = bo->iova;
snapshot->size = bo->size;
-   snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
+   snapshot->data = state_kcalloc(a6xx_state, 1, snapshot->size);
if (!snapshot->data)
return NULL;
 
@@ -1034,14 +1034,8 @@ static void a6xx_gpu_state_destroy(struct kref *kref)
struct a6xx_gpu_state *a6xx_state = container_of(state,
struct a6xx_gpu_state, base);
 
-   if (a6xx_state->gmu_log)
-   kvfree(a6xx_state->gmu_log->data);
-
-   if (a6xx_state->gmu_hfi)
-   kvfree(a6xx_state->gmu_hfi->data);
-
list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
-   kfree(obj);
+   kvfree(obj);
 
adreno_gpu_state_destroy(state);
kfree(a6xx_state);
-- 
2.7.4



[PATCH 2/2] drm/msm/gpu: Fix crash during system suspend after unbind

2022-09-28 Thread Akhil P Oommen
In adreno_unbind, we should clean up gpu device's drvdata to avoid
accessing a stale pointer during system suspend. Also, check for NULL
ptr in both system suspend/resume callbacks.

Signed-off-by: Akhil P Oommen 
---
Rebased on msm-next + some external fixes to boot sc7280 device.

 drivers/gpu/drm/msm/adreno/adreno_device.c | 10 +-
 drivers/gpu/drm/msm/msm_gpu.c  |  2 ++
 drivers/gpu/drm/msm/msm_gpu.h  |  4 
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c 
b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 24b489b..6288064 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -679,6 +679,9 @@ static int adreno_system_suspend(struct device *dev)
struct msm_gpu *gpu = dev_to_gpu(dev);
int remaining, ret;
 
+   if (!gpu)
+   return 0;
+
suspend_scheduler(gpu);
 
remaining = wait_event_timeout(gpu->retire_event,
@@ -700,7 +703,12 @@ static int adreno_system_suspend(struct device *dev)
 
 static int adreno_system_resume(struct device *dev)
 {
-   resume_scheduler(dev_to_gpu(dev));
+   struct msm_gpu *gpu = dev_to_gpu(dev);
+
+   if (!gpu)
+   return 0;
+
+   resume_scheduler(gpu);
return pm_runtime_force_resume(dev);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 0098ee8..021f4e2 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -997,4 +997,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
}
 
msm_devfreq_cleanup(gpu);
+
+   platform_set_drvdata(gpu->pdev, NULL);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index ff911e73..58a72e6 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -280,6 +280,10 @@ struct msm_gpu {
 static inline struct msm_gpu *dev_to_gpu(struct device *dev)
 {
struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev);
+
+   if (!adreno_smmu)
+   return NULL;
+
return container_of(adreno_smmu, struct msm_gpu, adreno_smmu);
 }
 
-- 
2.7.4



Re: [PATCH v2 01/16] slab: Remove __malloc attribute from realloc functions

2022-09-28 Thread Geert Uytterhoeven
Hi Kees,

On Fri, Sep 23, 2022 at 10:35 PM Kees Cook  wrote:
> The __malloc attribute should not be applied to "realloc" functions, as
> the returned pointer may alias the storage of the prior pointer. Instead
> of splitting __malloc from __alloc_size, which would be a huge amount of
> churn, just create __realloc_size for the few cases where it is needed.
>
> Additionally removes the conditional test for __alloc_size__, which is
> always defined now.
>
> Cc: Christoph Lameter 
> Cc: Pekka Enberg 
> Cc: David Rientjes 
> Cc: Joonsoo Kim 
> Cc: Andrew Morton 
> Cc: Vlastimil Babka 
> Cc: Roman Gushchin 
> Cc: Hyeonggon Yoo <42.hye...@gmail.com>
> Cc: Marco Elver 
> Cc: linux...@kvack.org
> Signed-off-by: Kees Cook 

Thanks for your patch, which is now commit 63caa04ec60583b1 ("slab:
Remove __malloc attribute from realloc functions") in next-20220927.

nore...@ellerman.id.au reported all gcc8-based builds to fail
(e.g. [1], more at [2]):

In file included from :
./include/linux/percpu.h: In function ‘__alloc_reserved_percpu’:
././include/linux/compiler_types.h:279:30: error: expected
declaration specifiers before ‘__alloc_size__’
 #define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc
  ^~
./include/linux/percpu.h:120:74: note: in expansion of macro ‘__alloc_size’
[...]

It's building fine with e.g. gcc-9 (which is my usual m68k cross-compiler).
Reverting this commit on next-20220927 fixes the issue.

[1] http://kisskb.ellerman.id.au/kisskb/buildresult/14803908/
[2] 
http://kisskb.ellerman.id.au/kisskb/head/1bd8b75fe6adeaa89d02968bdd811ffe708cf839/



> ---
>  include/linux/compiler_types.h | 13 +
>  include/linux/slab.h   | 12 ++--
>  mm/slab_common.c   |  4 ++--
>  3 files changed, 13 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index 4f2a819fd60a..f141a6f6b9f6 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -271,15 +271,12 @@ struct ftrace_likely_data {
>
>  /*
>   * Any place that could be marked with the "alloc_size" attribute is also
> - * a place to be marked with the "malloc" attribute. Do this as part of the
> - * __alloc_size macro to avoid redundant attributes and to avoid missing a
> - * __malloc marking.
> + * a place to be marked with the "malloc" attribute, except those that may
> + * be performing a _reallocation_, as that may alias the existing pointer.
> + * For these, use __realloc_size().
>   */
> -#ifdef __alloc_size__
> -# define __alloc_size(x, ...)  __alloc_size__(x, ## __VA_ARGS__) __malloc
> -#else
> -# define __alloc_size(x, ...)  __malloc
> -#endif
> +#define __alloc_size(x, ...)   __alloc_size__(x, ## __VA_ARGS__) __malloc
> +#define __realloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__)
>
>  #ifndef asm_volatile_goto
>  #define asm_volatile_goto(x...) asm goto(x)
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 0fefdf528e0d..41bd036e7551 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -184,7 +184,7 @@ int kmem_cache_shrink(struct kmem_cache *s);
>  /*
>   * Common kmalloc functions provided by all allocators
>   */
> -void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) 
> __alloc_size(2);
> +void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) 
> __realloc_size(2);
>  void kfree(const void *objp);
>  void kfree_sensitive(const void *objp);
>  size_t __ksize(const void *objp);
> @@ -647,10 +647,10 @@ static inline __alloc_size(1, 2) void 
> *kmalloc_array(size_t n, size_t size, gfp_
>   * @new_size: new size of a single member of the array
>   * @flags: the type of memory to allocate (see kmalloc)
>   */
> -static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p,
> -   size_t 
> new_n,
> -   size_t 
> new_size,
> -   gfp_t 
> flags)
> +static inline __realloc_size(2, 3) void * __must_check krealloc_array(void 
> *p,
> + size_t 
> new_n,
> + size_t 
> new_size,
> + gfp_t 
> flags)
>  {
> size_t bytes;
>
> @@ -774,7 +774,7 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, 
> size_t size, gfp_t fla
>  }
>
>  extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t 
> flags)
> - __alloc_size(3);
> + __realloc_size(3);
>  extern void kvfree(const void *addr);
>  extern void kvfree_sensitive(const void *addr, size_t len);
>
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> inde

Re: [PATCH v3 3/5] clk: qcom: gdsc: Add a reset op to poll gdsc collapse

2022-09-28 Thread Akhil P Oommen

On 9/27/2022 10:56 PM, Bjorn Andersson wrote:

On Fri, Aug 19, 2022 at 01:48:37AM +0530, Akhil P Oommen wrote:

Add a reset op compatible function to poll for gdsc collapse.

Signed-off-by: Akhil P Oommen 
---

(no changes since v2)

Changes in v2:
- Minor update to function prototype

  drivers/clk/qcom/gdsc.c | 23 +++
  drivers/clk/qcom/gdsc.h |  7 +++
  2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c
index 44520ef..2d0f1d1 100644
--- a/drivers/clk/qcom/gdsc.c
+++ b/drivers/clk/qcom/gdsc.c
@@ -17,6 +17,7 @@
  #include 
  #include 
  #include "gdsc.h"
+#include "reset.h"
  
  #define PWR_ON_MASK		BIT(31)

  #define EN_REST_WAIT_MASK GENMASK_ULL(23, 20)
@@ -116,7 +117,8 @@ static int gdsc_hwctrl(struct gdsc *sc, bool en)
return regmap_update_bits(sc->regmap, sc->gdscr, HW_CONTROL_MASK, val);
  }
  
-static int gdsc_poll_status(struct gdsc *sc, enum gdsc_status status)

+static int gdsc_poll_status(struct gdsc *sc, enum gdsc_status status,
+   s64 timeout_us, unsigned int interval_ms)
  {
ktime_t start;
  
@@ -124,7 +126,9 @@ static int gdsc_poll_status(struct gdsc *sc, enum gdsc_status status)

do {
if (gdsc_check_status(sc, status))
return 0;
-   } while (ktime_us_delta(ktime_get(), start) < TIMEOUT_US);
+   if (interval_ms)
+   msleep(interval_ms);

You effectively msleep(5) here, for which you shouldn't use msleep() -
or more likely, this only happens in exceptional circumstances, so a
longer interval_ms seems reasonable.
By reducing the overall polling time here, we can reduce any user 
visible impact like missing frame/janks due to gpu hang/recovery. I kept 
5ms here because in my local testing on sc7280 device I didn't see any 
benefit beyond decreasing below 5ms. Msleep() here also helps to quickly 
schedule other threads which holds pm_runtime refcount on cx_gdsc, which 
indirectly helps to reduce overall polling time here significantly in my 
testing.





+   } while (ktime_us_delta(ktime_get(), start) < timeout_us);
  
  	if (gdsc_check_status(sc, status))

return 0;
@@ -172,7 +176,7 @@ static int gdsc_toggle_logic(struct gdsc *sc, enum 
gdsc_status status)
udelay(1);
}
  
-	ret = gdsc_poll_status(sc, status);

+   ret = gdsc_poll_status(sc, status, TIMEOUT_US, 0);
WARN(ret, "%s status stuck at 'o%s'", sc->pd.name, status ? "ff" : "n");
  
  	if (!ret && status == GDSC_OFF && sc->rsupply) {

@@ -343,7 +347,7 @@ static int _gdsc_disable(struct gdsc *sc)
 */
udelay(1);
  
-		ret = gdsc_poll_status(sc, GDSC_ON);

+   ret = gdsc_poll_status(sc, GDSC_ON, TIMEOUT_US, 0);
if (ret)
return ret;
}
@@ -565,3 +569,14 @@ int gdsc_gx_do_nothing_enable(struct generic_pm_domain 
*domain)
return 0;
  }
  EXPORT_SYMBOL_GPL(gdsc_gx_do_nothing_enable);
+
+int gdsc_wait_for_collapse(void *priv)
+{
+   struct gdsc *sc = priv;
+   int ret;
+
+   ret = gdsc_poll_status(sc, GDSC_OFF, 50, 5);

So I presume the GPU driver will put() the GDSC and then issue a reset,
which will wait up to 5 seconds for the GDSC to be turned off.
Not exactly. GPU driver will put() its GDSC vote and will wait for 500ms 
to allow other clients to drop their vote and the cx_gdsc to finally 
collapse at hw. There is no hw interface to 'reset' entire GPU 
subsystem. We have to pull the plug on gdsc to reset it.


So essentially, this logic is needed because we don't wait for VOTABLE
GDSCs to be turned off? And we have no way to do the put-with-wait for
this specific case.

I would like the commit message to capture this reasoning.
Agree. Will post a new patchset once we have consensus on the rest of 
the things here.


-Akhil.


Thanks,
Bjorn


+   WARN(ret, "%s status stuck at 'on'", sc->pd.name);
+   return ret;
+}
+EXPORT_SYMBOL_GPL(gdsc_wait_for_collapse);
diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h
index ad313d7..d484bdb 100644
--- a/drivers/clk/qcom/gdsc.h
+++ b/drivers/clk/qcom/gdsc.h
@@ -12,6 +12,7 @@
  struct regmap;
  struct regulator;
  struct reset_controller_dev;
+struct qcom_reset_map;
  
  /**

   * struct gdsc - Globally Distributed Switch Controller
@@ -79,6 +80,7 @@ int gdsc_register(struct gdsc_desc *desc, struct 
reset_controller_dev *,
  struct regmap *);
  void gdsc_unregister(struct gdsc_desc *desc);
  int gdsc_gx_do_nothing_enable(struct generic_pm_domain *domain);
+int gdsc_wait_for_collapse(void *priv);
  #else
  static inline int gdsc_register(struct gdsc_desc *desc,
struct reset_controller_dev *rcdev,
@@ -88,5 +90,10 @@ static inline int gdsc_register(struct gdsc_desc *desc,
  }
  
  static inline void gdsc_unregister(struct gdsc_desc *desc) {};

+
+static int gdsc_wait_for

Re: [Intel-gfx] [PATCH 04/16] drm/i915/vm_bind: Add support to create persistent vma

2022-09-28 Thread Tvrtko Ursulin



On 28/09/2022 07:19, Niranjana Vishwanathapura wrote:

Add i915_vma_instance_persistent() to create persistent vmas.
Persistent vmas will use i915_gtt_view to support partial binding.

vma_lookup is tied to segment of the object instead of section
of VA space. Hence, it do not support aliasing. ie., multiple
mappings (at different VA) point to the same gtt_view of object.
Skip vma_lookup for persistent vmas to support aliasing.

Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andi Shyti 
---
  drivers/gpu/drm/i915/i915_vma.c   | 39 ---
  drivers/gpu/drm/i915/i915_vma.h   | 16 +--
  drivers/gpu/drm/i915/i915_vma_types.h |  7 +
  3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f17c09ead7d7..5839e1f55f00 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -109,7 +109,8 @@ static void __i915_vma_retire(struct i915_active *ref)
  static struct i915_vma *
  vma_create(struct drm_i915_gem_object *obj,
   struct i915_address_space *vm,
-  const struct i915_gtt_view *view)
+  const struct i915_gtt_view *view,
+  bool skip_lookup_cache)
  {
struct i915_vma *pos = ERR_PTR(-E2BIG);
struct i915_vma *vma;
@@ -196,6 +197,9 @@ vma_create(struct drm_i915_gem_object *obj,
__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
}
  
+	if (skip_lookup_cache)

+   goto skip_rb_insert;
+
rb = NULL;
p = &obj->vma.tree.rb_node;
while (*p) {
@@ -220,6 +224,7 @@ vma_create(struct drm_i915_gem_object *obj,
rb_link_node(&vma->obj_node, rb, p);
rb_insert_color(&vma->obj_node, &obj->vma.tree);
  
+skip_rb_insert:

if (i915_vma_is_ggtt(vma))
/*
 * We put the GGTT vma at the start of the vma-list, followed
@@ -299,7 +304,34 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
  
  	/* vma_create() will resolve the race if another creates the vma */

if (unlikely(!vma))
-   vma = vma_create(obj, vm, view);
+   vma = vma_create(obj, vm, view, false);
+
+   GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
+   return vma;
+}
+
+/**
+ * i915_vma_create_persistent - create a persistent VMA
+ * @obj: parent &struct drm_i915_gem_object to be mapped
+ * @vm: address space in which the mapping is located
+ * @view: additional mapping requirements
+ *
+ * Creates a persistent vma.
+ *
+ * Returns the vma, or an error pointer.
+ */
+struct i915_vma *
+i915_vma_create_persistent(struct drm_i915_gem_object *obj,
+  struct i915_address_space *vm,
+  const struct i915_gtt_view *view)
+{
+   struct i915_vma *vma;
+
+   GEM_BUG_ON(!kref_read(&vm->ref));
+
+   vma = vma_create(obj, vm, view, true);
+   if (!IS_ERR(vma))
+   i915_vma_set_persistent(vma);
  
  	GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));

return vma;
@@ -1666,7 +1698,8 @@ static void release_references(struct i915_vma *vma, 
struct intel_gt *gt,
  
  	spin_lock(&obj->vma.lock);

list_del(&vma->obj_link);
-   if (!RB_EMPTY_NODE(&vma->obj_node))
+   if (!i915_vma_is_persistent(vma) &&


Thinking out loud - maybe you don't need the extra condition? But it is 
good for self-documenting purposes in any case.



+   !RB_EMPTY_NODE(&vma->obj_node))
rb_erase(&vma->obj_node, &obj->vma.tree);
  
  	spin_unlock(&obj->vma.lock);

diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index aecd9c64486b..51e712de380a 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -44,6 +44,10 @@ struct i915_vma *
  i915_vma_instance(struct drm_i915_gem_object *obj,
  struct i915_address_space *vm,
  const struct i915_gtt_view *view);
+struct i915_vma *
+i915_vma_create_persistent(struct drm_i915_gem_object *obj,
+  struct i915_address_space *vm,
+  const struct i915_gtt_view *view);
  
  void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);

  #define I915_VMA_RELEASE_MAP BIT(0)
@@ -138,6 +142,16 @@ static inline u32 i915_ggtt_pin_bias(struct i915_vma *vma)
return i915_vm_to_ggtt(vma->vm)->pin_bias;
  }
  
+static inline bool i915_vma_is_persistent(const struct i915_vma *vma)

+{
+   return test_bit(I915_VMA_PERSISTENT_BIT, __i915_vma_flags(vma));
+}
+
+static inline void i915_vma_set_persistent(struct i915_vma *vma)
+{
+   set_bit(I915_VMA_PERSISTENT_BIT, __i915_vma_flags(vma));
+}
+
  static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
  {
i915_gem_object_get(vma->obj);
@@ -164,8 +178,6 @@ i915_vma_compare(struct i915_vma *vma,
  {
ptrdiff_t cmp;
  
-	GEM_BUG_ON(view && !i915_is_ggtt_or_

Re: [PATCH v5 1/6] dt-bindings: arm: mediatek: mmsys: change compatible for MT8195

2022-09-28 Thread Krzysztof Kozlowski
On 27/09/2022 17:26, Jason-JH.Lin wrote:
> For previous MediaTek SoCs, such as MT8173, there are 2 display HW
> pipelines binding to 1 mmsys with the same power domain, the same
> clock driver and the same mediatek-drm driver.
> 
> For MT8195, VDOSYS0 and VDOSYS1 are 2 display HW pipelines binding to
> 2 different power domains, different clock drivers and different
> mediatek-drm drivers.
> 
> Moreover, Hardware pipeline of VDOSYS0 has these components: COLOR,
> CCORR, AAL, GAMMA, DITHER. They are related to the PQ (Picture Quality)
> and they makes VDOSYS0 supports PQ function while they are not
> including in VDOSYS1.
> 
> Hardware pipeline of VDOSYS1 has the component ETHDR (HDR related
> component). It makes VDOSYS1 supports the HDR function while it's not
> including in VDOSYS0.


Reviewed-by: Krzysztof Kozlowski 

Best regards,
Krzysztof



[PATCH v13 0/9] Fixes integer overflow or integer truncation issues in page lookups, ttm place configuration and scatterlist creation

2022-09-28 Thread Gwan-gyeong Mun
This patch series fixes integer overflow or integer truncation issues in
page lookups, ttm place configuration and scatterlist creation, etc.
We need to check that we avoid integer overflows when looking up a page,
and so fix all the instances where we have mistakenly used a plain integer
instead of a more suitable long.
And there is an impedance mismatch between the scatterlist API using
unsigned int and our memory/page accounting in unsigned long. That is we
may try to create a scatterlist for a large object that overflows returning
a small table into which we try to fit very many pages. As the object size
is under the control of userspace, we have to be prudent and catch the
conversion errors. To catch the implicit truncation as we switch from
unsigned long into the scatterlist's unsigned int, we use improved
overflows_type check and report E2BIG prior to the operation. This is
already used in our create ioctls to indicate if the uABI request is simply
too large for the backing store. 
And ttm place also has the same problem with scatterlist creation,
and we fix the integer truncation problem with the way approached by
scatterlist creation.
And It corrects the error code to return -E2BIG when creating gem objects
using ttm or shmem, if the size is too large in each case.
In order to provide a common macro, it adds a few utility macros
into overflow header.
Introduce check_assign() and check_assign_user_ptr(). the check_assign()
macro which performs an assigning source value into destination pointer
along with an overflow check and check_assign_user_ptr() macro which
performs an assigning source value into destination pointer type variable
along with an overflow check. If an explicit overflow check is required
while assigning to a user-space ptr, assign_user_ptr() can be used instead
of u64_to_user_ptr() to assign integers into __user pointers along with an
overflow check. check_assign(), overflows_type() are implemented on top of
updated check_add_overflow() macro [1], and it also uses updated
overflows_type() and castable_to_type() macro [2].
Therefore this series include the patch which came from Kees [1][2]
(the both patches are under reviewing from other patch mails).

[1] https://lore.kernel.org/all/202208311040.C6CA8253@keescook/
[2] https://lore.kernel.org/lkml/20220926191109.1803094-1-keesc...@chromium.org/

Chris Wilson (3):
  drm/i915/gem: Typecheck page lookups
  drm/i915: Check for integer truncation on scatterlist creation
  drm/i915: Remove truncation warning for large objects

Gwan-gyeong Mun (4):
  overflow: Introduce check_assign() and check_assign_user_ptr()
  drm/i915: Check for integer truncation on the configuration of ttm
place
  drm/i915: Check if the size is too big while creating shmem file
  drm/i915: Use error code as -E2BIG when the size of gem ttm object is
too large

Kees Cook (2):
  overflow: Allow mixed type arguments
  overflow: Introduce overflows_type() and castable_to_type()

 drivers/gpu/drm/i915/gem/i915_gem_internal.c  |   6 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   7 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h| 303 +--
 drivers/gpu/drm/i915/gem/i915_gem_pages.c |  27 +-
 drivers/gpu/drm/i915/gem/i915_gem_phys.c  |   4 +
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  19 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |  23 +-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |   5 +-
 .../drm/i915/gem/selftests/i915_gem_context.c |  12 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|   8 +-
 .../drm/i915/gem/selftests/i915_gem_object.c  |   8 +-
 drivers/gpu/drm/i915/gvt/dmabuf.c |   9 +-
 drivers/gpu/drm/i915/i915_gem.c   |  18 +-
 drivers/gpu/drm/i915/i915_scatterlist.h   |  11 +
 drivers/gpu/drm/i915/i915_user_extensions.c   |   6 +-
 drivers/gpu/drm/i915/i915_utils.h |   4 -
 drivers/gpu/drm/i915/i915_vma.c   |   8 +-
 drivers/gpu/drm/i915/intel_region_ttm.c   |  17 +-
 include/linux/compiler.h  |   1 +
 include/linux/overflow.h  | 166 --
 lib/overflow_kunit.c  | 489 --
 21 files changed, 993 insertions(+), 158 deletions(-)

-- 
2.37.1



[PATCH v13 3/9] overflow: Introduce overflows_type() and castable_to_type()

2022-09-28 Thread Gwan-gyeong Mun
From: Kees Cook 

Implement a robust overflows_type() macro to test if a variable or
constant value would overflow another variable or type. This can be
used as a constant expression for static_assert() (which requires a
constant expression[1][2]) when used on constant values. This must be
constructed manually, since __builtin_add_overflow() does not produce
a constant expression[3].

Additionally adds castable_to_type(), similar to __same_type(), but for
checking if a constant value would overflow if cast to a given type.

Add unit tests for overflows_type(), __same_type(), and castable_to_type()
to the existing KUnit "overflow" test.

[1] https://en.cppreference.com/w/c/language/_Static_assert
[2] C11 standard (ISO/IEC 9899:2011): 6.7.10 Static assertions
[3] https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html
6.56 Built-in Functions to Perform Arithmetic with Overflow Checking
Built-in Function: bool __builtin_add_overflow (type1 a, type2 b,

Cc: Luc Van Oostenryck 
Cc: Nathan Chancellor 
Cc: Nick Desaulniers 
Cc: Tom Rix 
Cc: Daniel Latypov 
Cc: Vitor Massaru Iha 
Cc: "Gustavo A. R. Silva" 
Cc: linux-harden...@vger.kernel.org
Cc: l...@lists.linux.dev
Co-developed-by: Gwan-gyeong Mun 
Signed-off-by: Gwan-gyeong Mun 
Signed-off-by: Kees Cook 
---
 drivers/gpu/drm/i915/i915_utils.h |   4 -
 include/linux/compiler.h  |   1 +
 include/linux/overflow.h  |  48 
 lib/overflow_kunit.c  | 388 +-
 4 files changed, 436 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_utils.h 
b/drivers/gpu/drm/i915/i915_utils.h
index 6c14d13364bf..67a66d4d5c70 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -111,10 +111,6 @@ bool i915_error_injected(void);
 #define range_overflows_end_t(type, start, size, max) \
range_overflows_end((type)(start), (type)(size), (type)(max))
 
-/* Note we don't consider signbits :| */
-#define overflows_type(x, T) \
-   (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
-
 #define ptr_mask_bits(ptr, n) ({   \
unsigned long __v = (unsigned long)(ptr);   \
(typeof(ptr))(__v & -BIT(n));   \
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 7713d7bcdaea..c631107e93b1 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -244,6 +244,7 @@ static inline void *offset_to_ptr(const int *off)
  * bool and also pointer types.
  */
 #define is_signed_type(type) (((type)(-1)) < (__force type)1)
+#define is_unsigned_type(type) (!is_signed_type(type))
 
 /*
  * This is needed in functions which generate the stack canary, see
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 8ccbfa46f0ed..f63cefeabcba 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -171,6 +171,54 @@ static inline bool __must_check __must_check_overflow(bool 
overflow)
check_assign(value, &kptr) ? 1 : (({ ptr = (void __user *)kptr; }), 0); 
\
 }))
 
+#define __overflows_type_constexpr(x, T) ( \
+   is_unsigned_type(typeof(x)) ?   \
+   (x) > type_max(typeof(T)) ? 1 : 0   \
+   : is_unsigned_type(typeof(T)) ? \
+   (x) < 0 || (x) > type_max(typeof(T)) ? 1 : 0\
+   : (x) < type_min(typeof(T)) ||  \
+ (x) > type_max(typeof(T)) ? 1 : 0)
+
+#define __overflows_type(x, T) ({  \
+   typeof(T) v = 0;\
+   check_add_overflow((x), v, &v); \
+})
+
+/**
+ * overflows_type - helper for checking the overflows between value, variables,
+ * or data type
+ *
+ * @n: source constant value or variable to be checked
+ * @T: destination variable or data type proposed to store @x
+ *
+ * Compares the @x expression for whether or not it can safely fit in
+ * the storage of the type in @T. @x and @T can have different types.
+ * If @x is a constant expression, this will also resolve to a constant
+ * expression.
+ *
+ * Returns: true if overflow can occur, false otherwise.
+ */
+#define overflows_type(n, T)   \
+   __builtin_choose_expr(__is_constexpr(n),\
+ __overflows_type_constexpr(n, T), \
+ __overflows_type(n, T))
+
+/**
+ * castable_to_type - like __same_type(), but also allows for casted literals
+ *
+ * @n: variable or constant value
+ * @T: variable or data type
+ *
+ * Unlike the __same_type() macro, this allows a constant value as the
+ * first argument. If this value would not overflow into an assignment
+ * of the second argument's type, it returns true. Otherwise, this falls
+ * back to __same_type().
+ */
+#define castable_to_type(n, T) \
+

[PATCH v13 5/9] drm/i915: Check for integer truncation on scatterlist creation

2022-09-28 Thread Gwan-gyeong Mun
From: Chris Wilson 

There is an impedance mismatch between the scatterlist API using unsigned
int and our memory/page accounting in unsigned long. That is we may try
to create a scatterlist for a large object that overflows returning a
small table into which we try to fit very many pages. As the object size
is under control of userspace, we have to be prudent and catch the
conversion errors.

To catch the implicit truncation as we switch from unsigned long into the
scatterlist's unsigned int, we use overflows_type check and report
E2BIG prior to the operation. This is already used in our create ioctls to
indicate if the uABI request is simply too large for the backing store.
Failing that type check, we have a second check at sg_alloc_table time
to make sure the values we are passing into the scatterlist API are not
truncated.

It uses pgoff_t for locals that are dealing with page indices, in this
case, the page count is the limit of the page index.
And it uses safe_conversion() macro which performs a type conversion (cast)
of an integer value into a new variable, checking that the destination is
large enough to hold the source value.

v2: Move added i915_utils's macro into drm_util header (Jani N)
v5: Fix macros to be enclosed in parentheses for complex values
Fix too long line warning
v8: Replace safe_conversion() with check_assign() (Kees)

Signed-off-by: Chris Wilson 
Signed-off-by: Gwan-gyeong Mun 
Cc: Tvrtko Ursulin 
Cc: Brian Welty 
Cc: Matthew Auld 
Cc: Thomas Hellström 
Reviewed-by: Nirmoy Das 
Reviewed-by: Mauro Carvalho Chehab 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c |  6 --
 drivers/gpu/drm/i915/gem/i915_gem_object.h   |  3 ---
 drivers/gpu/drm/i915/gem/i915_gem_phys.c |  4 
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  5 -
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  4 
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c  |  5 -
 drivers/gpu/drm/i915/gvt/dmabuf.c|  9 +
 drivers/gpu/drm/i915/i915_scatterlist.h  | 11 +++
 8 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..53fa27e1c950 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -37,10 +37,13 @@ static int i915_gem_object_get_pages_internal(struct 
drm_i915_gem_object *obj)
struct sg_table *st;
struct scatterlist *sg;
unsigned int sg_page_sizes;
-   unsigned int npages;
+   pgoff_t npages; /* restricted by sg_alloc_table */
int max_order;
gfp_t gfp;
 
+   if (check_assign(obj->base.size >> PAGE_SHIFT, &npages))
+   return -E2BIG;
+
max_order = MAX_ORDER;
 #ifdef CONFIG_SWIOTLB
if (is_swiotlb_active(obj->base.dev->dev)) {
@@ -67,7 +70,6 @@ static int i915_gem_object_get_pages_internal(struct 
drm_i915_gem_object *obj)
if (!st)
return -ENOMEM;
 
-   npages = obj->base.size / PAGE_SIZE;
if (sg_alloc_table(st, npages, GFP_KERNEL)) {
kfree(st);
return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 26e7f86dbed9..9f8e29112c31 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -26,9 +26,6 @@ enum intel_region_id;
  * this and catch if we ever need to fix it. In the meantime, if you do
  * spot such a local variable, please consider fixing!
  *
- * Aside from our own locals (for which we have no excuse!):
- * - sg_table embeds unsigned int for nents
- *
  * We can check for invalidly typed locals with typecheck(), see for example
  * i915_gem_object_get_sg().
  */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c 
b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 0d0e46dae559..88ba7266a3a5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -28,6 +28,10 @@ static int i915_gem_object_get_pages_phys(struct 
drm_i915_gem_object *obj)
void *dst;
int i;
 
+   /* Contiguous chunk, with a single scatterlist element */
+   if (overflows_type(obj->base.size, sg->length))
+   return -E2BIG;
+
if (GEM_WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
return -EINVAL;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index f42ca1179f37..339b0a9cf2d0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -193,13 +193,16 @@ static int shmem_get_pages(struct drm_i915_gem_object 
*obj)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
-   const unsigned long p

[PATCH v13 1/9] overflow: Allow mixed type arguments

2022-09-28 Thread Gwan-gyeong Mun
From: Kees Cook 

When the check_[op]_overflow() helpers were introduced, all arguments were
required to be the same type to make the fallback macros simpler. However,
now that the fallback macros have been removed[1], it is fine to allow
mixed types, which makes using the helpers much more useful, as they
can be used to test for type-based overflows (e.g. adding two large ints
but storing into a u8), as would be handy in the drm core[2].

Remove the restriction, and add additional self-tests that exercise some
of the mixed-type overflow cases, and double-check for accidental macro
side-effects.

[1] https://git.kernel.org/linus/4eb6bd55cfb22ffc20652732340c4962f3ac9a91
[2] 
https://lore.kernel.org/lkml/20220824084514.2261614-2-gwan-gyeong@intel.com

Cc: Rasmus Villemoes 
Cc: Andrzej Hajda 
Cc: "Gustavo A. R. Silva" 
Cc: Nick Desaulniers 
Cc: linux-harden...@vger.kernel.org
Signed-off-by: Kees Cook 
Signed-off-by: Gwan-gyeong Mun 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Gwan-gyeong Mun 
Tested-by:  Gwan-gyeong Mun 
---
 include/linux/overflow.h |  72 
 lib/overflow_kunit.c | 101 ---
 2 files changed, 113 insertions(+), 60 deletions(-)

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 0eb3b192f07a..19dfdd74835e 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -51,40 +51,50 @@ static inline bool __must_check __must_check_overflow(bool 
overflow)
return unlikely(overflow);
 }
 
-/*
- * For simplicity and code hygiene, the fallback code below insists on
- * a, b and *d having the same type (similar to the min() and max()
- * macros), whereas gcc's type-generic overflow checkers accept
- * different types. Hence we don't just make check_add_overflow an
- * alias for __builtin_add_overflow, but add type checks similar to
- * below.
+/** check_add_overflow() - Calculate addition with overflow checking
+ *
+ * @a: first addend
+ * @b: second addend
+ * @d: pointer to store sum
+ *
+ * Returns 0 on success.
+ *
+ * *@d holds the results of the attempted addition, but is not considered
+ * "safe for use" on a non-zero return value, which indicates that the
+ * sum has overflowed or been truncated.
  */
-#define check_add_overflow(a, b, d) __must_check_overflow(({   \
-   typeof(a) __a = (a);\
-   typeof(b) __b = (b);\
-   typeof(d) __d = (d);\
-   (void) (&__a == &__b);  \
-   (void) (&__a == __d);   \
-   __builtin_add_overflow(__a, __b, __d);  \
-}))
+#define check_add_overflow(a, b, d)\
+   __must_check_overflow(__builtin_add_overflow(a, b, d))
 
-#define check_sub_overflow(a, b, d) __must_check_overflow(({   \
-   typeof(a) __a = (a);\
-   typeof(b) __b = (b);\
-   typeof(d) __d = (d);\
-   (void) (&__a == &__b);  \
-   (void) (&__a == __d);   \
-   __builtin_sub_overflow(__a, __b, __d);  \
-}))
+/** check_sub_overflow() - Calculate subtraction with overflow checking
+ *
+ * @a: minuend; value to subtract from
+ * @b: subtrahend; value to subtract from @a
+ * @d: pointer to store difference
+ *
+ * Returns 0 on success.
+ *
+ * *@d holds the results of the attempted subtraction, but is not considered
+ * "safe for use" on a non-zero return value, which indicates that the
+ * difference has underflowed or been truncated.
+ */
+#define check_sub_overflow(a, b, d)\
+   __must_check_overflow(__builtin_sub_overflow(a, b, d))
 
-#define check_mul_overflow(a, b, d) __must_check_overflow(({   \
-   typeof(a) __a = (a);\
-   typeof(b) __b = (b);\
-   typeof(d) __d = (d);\
-   (void) (&__a == &__b);  \
-   (void) (&__a == __d);   \
-   __builtin_mul_overflow(__a, __b, __d);  \
-}))
+/** check_mul_overflow() - Calculate multiplication with overflow checking
+ *
+ * @a: first factor
+ * @b: second factor
+ * @d: pointer to store product
+ *
+ * Returns 0 on success.
+ *
+ * *@d holds the results of the attempted multiplication, but is not
+ * considered "safe for use" on a non-zero return value, which indicates
+ * that the product has overflowed or been truncated.
+ */
+#define check_mul_overflow(a, b, d)\
+   __must_check_overflow(__builtin_mul_overflow(a, b, d))
 
 /** check_shl_overflow() - Calculate a left-shifted value and check overflow
  *
diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c
index 7e3e43679b73..0d98c9bc75da 100644
--- a/lib/overflow_kunit.c
+++ b/lib/overflow_kunit.c
@@ -16,12 +16,15 @@
 #include 
 #include 
 
-#define DEFINE_TEST_ARRAY(t)   \
-   static const struct test_ ## t {\
-   t a, b; \
-   t sum, diff, prod;  \
-

[PATCH v13 2/9] overflow: Introduce check_assign() and check_assign_user_ptr()

2022-09-28 Thread Gwan-gyeong Mun
Add check_assign() macro which performs an assigning source value into
destination pointer along with an overflow check and
check_assign_user_ptr() macro which performs an assigning source value into
destination pointer type variable along with an overflow check. If an
explicit overflow check is required while assigning to a user-space ptr,
assign_user_ptr() can be used instead of u64_to_user_ptr() to assign
integers into __user pointers along with an overflow check.

v3: Add is_type_unsigned() macro (Mauro)
Modify overflows_type() macro to consider signed data types (Mauro)
Fix the problem that safe_conversion() macro always returns true
v4: Fix kernel-doc markups
v6: Move macro addition location so that it can be used by other than drm
subsystem (Jani, Mauro, Andi)
Change is_type_unsigned to is_unsigned_type to have the same name form
as is_signed_type macro
v8: Add check_assign() and remove safe_conversion() (Kees)
Fix overflows_type() to use gcc's built-in overflow function (Andrzej)
Add overflows_ptr() to allow overflow checking when assigning a value
into a pointer variable (G.G.)
v9: Fix overflows_type() to use __builtin_add_overflow() instead of
__builtin_add_overflow_p() (Andrzej)
Fix overflows_ptr() to use overflows_type() with the unsigned long type
(Andrzej)
v10: Remove a redundant type checking for a pointer. (Andrzej)
 Use updated check_add_overflow macro instead of __builtin_add_overflow
 (G.G)
 Add check_assign_user_ptr() macro and drop overflows_ptr() macro(Kees)
v11: Fix incorrect type assignment between different address spaces caused
 by the wrong use of __user macro. (kernel test robot)
 Update macro description (G.G)
v12: Remove overflows_type() macro here. updated overflows_type() macro
 will be added in a subsequent patch (G.G)

Signed-off-by: Gwan-gyeong Mun 
Cc: Thomas Hellström 
Cc: Matthew Auld 
Cc: Nirmoy Das 
Cc: Jani Nikula 
Cc: Andi Shyti 
Cc: Andrzej Hajda 
Cc: Mauro Carvalho Chehab 
Cc: Kees Cook 
Reviewed-by: Mauro Carvalho Chehab  (v5)
Reviewed-by: Andrzej Hajda  (v9)
Acked-by: Kees Cook 
Reported-by: kernel test robot 
---
 drivers/gpu/drm/i915/i915_user_extensions.c |  6 +--
 include/linux/overflow.h| 44 +
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c 
b/drivers/gpu/drm/i915/i915_user_extensions.c
index c822d0aafd2d..80ec8390b0d8 100644
--- a/drivers/gpu/drm/i915/i915_user_extensions.c
+++ b/drivers/gpu/drm/i915/i915_user_extensions.c
@@ -50,11 +50,11 @@ int i915_user_extensions(struct i915_user_extension __user 
*ext,
if (err)
return err;
 
-   if (get_user(next, &ext->next_extension) ||
-   overflows_type(next, ext))
+   if (get_user(next, &ext->next_extension))
return -EFAULT;
 
-   ext = u64_to_user_ptr(next);
+   if (check_assign_user_ptr(next, ext))
+   return -EFAULT;
}
 
return 0;
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 19dfdd74835e..8ccbfa46f0ed 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * We need to compute the minimum and maximum values representable in a given
@@ -127,6 +128,49 @@ static inline bool __must_check __must_check_overflow(bool 
overflow)
(*_d >> _to_shift) != _a);  \
 }))
 
+/**
+ * check_assign - perform an assigning source value into destination pointer
+ *along with an overflow check.
+ *
+ * @value: source value
+ * @ptr: Destination pointer address
+ *
+ * Returns:
+ * If the value would overflow the destination, it returns true. If not return
+ * false. When overflow does not occur, the assigning into destination from
+ * value succeeds. It follows the return policy as other check_*_overflow()
+ * functions return non-zero as a failure.
+ */
+#define check_assign(value, ptr) __must_check_overflow(({  \
+   check_add_overflow(0, value, ptr);  \
+}))
+
+/**
+ * check_assign_user_ptr - perform an assigning source value into destination
+ * pointer type variable along with an overflow check
+ *
+ * @value: source value; a source value is expected to have a value of a size
+ * that can be stored in a pointer-type variable.
+ * @ptr: destination pointer type variable
+ *
+ * u64_to_user_ptr can be used in the kernel to avoid warnings about integers
+ * and pointers of different sizes. But u64_to_user_ptr is not performing the
+ * checking of overflow. If you need an explicit overflow check while
+ * assigning, check_assign_user_ptr() can be used to assign integers into
+ * pointers along with an overflow check. If ptr is not a pointer type,
+ * a warning message outputs while 

[PATCH v13 4/9] drm/i915/gem: Typecheck page lookups

2022-09-28 Thread Gwan-gyeong Mun
From: Chris Wilson 

We need to check that we avoid integer overflows when looking up a page,
and so fix all the instances where we have mistakenly used a plain
integer instead of a more suitable long. Be pedantic and add integer
typechecking to the lookup so that we can be sure that we are safe.
And it also uses pgoff_t as our page lookups must remain compatible with
the page cache, pgoff_t is currently exactly unsigned long.

v2: Move added i915_utils's macro into drm_util header (Jani N)
v3: Make not use the same macro name on a function. (Mauro)
For kernel-doc, macros and functions are handled in the same namespace,
the same macro name on a function prevents ever adding documentation
for it.
v4: Add kernel-doc markups to the kAPI functions and macros (Mauoro)
v5: Fix an alignment to match open parenthesis
v6: Rebase
v10: Use assert_typable instead of exactly_pgoff_t() macro. (Kees)
v11: Change the use of assert_typable to assert_same_typable (G.G)
v12: Change to use static_assert(__castable_to_type(n ,T)) style since
 the assert_same_typable() macro has been dropped. (G.G)
v13: Change the use of __castable_to_type() to castable_to_type()
 Remove an unnecessary header include line. (G.G)

Signed-off-by: Chris Wilson 
Signed-off-by: Gwan-gyeong Mun 
Cc: Tvrtko Ursulin 
Cc: Matthew Auld 
Cc: Thomas Hellström 
Cc: Kees Cook 
Reviewed-by: Nirmoy Das  (v2)
Reviewed-by: Mauro Carvalho Chehab  (v3)
Reviewed-by: Andrzej Hajda  (v5)
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   7 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h| 293 --
 drivers/gpu/drm/i915/gem/i915_gem_pages.c |  27 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   2 +-
 .../drm/i915/gem/selftests/i915_gem_context.c |  12 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|   8 +-
 .../drm/i915/gem/selftests/i915_gem_object.c  |   8 +-
 drivers/gpu/drm/i915/i915_gem.c   |  18 +-
 drivers/gpu/drm/i915/i915_vma.c   |   8 +-
 9 files changed, 322 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 7ff9c7877bec..29ed0ec05d12 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -413,10 +413,11 @@ void __i915_gem_object_invalidate_frontbuffer(struct 
drm_i915_gem_object *obj,
 static void
 i915_gem_object_read_from_page_kmap(struct drm_i915_gem_object *obj, u64 
offset, void *dst, int size)
 {
+   pgoff_t idx = offset >> PAGE_SHIFT;
void *src_map;
void *src_ptr;
 
-   src_map = kmap_atomic(i915_gem_object_get_page(obj, offset >> 
PAGE_SHIFT));
+   src_map = kmap_atomic(i915_gem_object_get_page(obj, idx));
 
src_ptr = src_map + offset_in_page(offset);
if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
@@ -429,9 +430,10 @@ i915_gem_object_read_from_page_kmap(struct 
drm_i915_gem_object *obj, u64 offset,
 static void
 i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 
offset, void *dst, int size)
 {
+   pgoff_t idx = offset >> PAGE_SHIFT;
+   dma_addr_t dma = i915_gem_object_get_dma_address(obj, idx);
void __iomem *src_map;
void __iomem *src_ptr;
-   dma_addr_t dma = i915_gem_object_get_dma_address(obj, offset >> 
PAGE_SHIFT);
 
src_map = io_mapping_map_wc(&obj->mm.region->iomap,
dma - obj->mm.region->region.start,
@@ -460,6 +462,7 @@ i915_gem_object_read_from_page_iomap(struct 
drm_i915_gem_object *obj, u64 offset
  */
 int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 
offset, void *dst, int size)
 {
+   GEM_BUG_ON(overflows_type(offset >> PAGE_SHIFT, pgoff_t));
GEM_BUG_ON(offset >= obj->base.size);
GEM_BUG_ON(offset_in_page(offset) > PAGE_SIZE - size);
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index a3b7551a57fc..26e7f86dbed9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -27,8 +27,10 @@ enum intel_region_id;
  * spot such a local variable, please consider fixing!
  *
  * Aside from our own locals (for which we have no excuse!):
- * - sg_table embeds unsigned int for num_pages
- * - get_user_pages*() mixed ints with longs
+ * - sg_table embeds unsigned int for nents
+ *
+ * We can check for invalidly typed locals with typecheck(), see for example
+ * i915_gem_object_get_sg().
  */
 #define GEM_CHECK_SIZE_OVERFLOW(sz) \
GEM_WARN_ON((sz) >> PAGE_SHIFT > INT_MAX)
@@ -363,44 +365,289 @@ i915_gem_object_get_tile_row_size(const struct 
drm_i915_gem_object *obj)
 int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
   unsigned int tiling, unsigned int stride);
 
+/**
+ * __i915_gem_object_page_iter_get_sg - helper to find the t

[PATCH v13 6/9] drm/i915: Check for integer truncation on the configuration of ttm place

2022-09-28 Thread Gwan-gyeong Mun
There is an impedance mismatch between the first/last valid page
frame number of ttm place in unsigned and our memory/page accounting in
unsigned long.
As the object size is under the control of userspace, we have to be prudent
and catch the conversion errors.
To catch the implicit truncation as we switch from unsigned long to
unsigned, we use overflows_type check and report E2BIG or overflow_type
prior to the operation.

v3: Not to change execution inside a macro. (Mauro)
Add safe_conversion_gem_bug_on() macro and remove temporal
SAFE_CONVERSION() macro.
v4: Fix unhandled GEM_BUG_ON() macro call from safe_conversion_gem_bug_on()
v6: Fix to follow general use case for GEM_BUG_ON(). (Jani)
v7: Fix to use WARN_ON() macro where GEM_BUG_ON() macro was used. (Jani)
v8: Replace safe_conversion() with check_assign() (Kees)

Signed-off-by: Gwan-gyeong Mun 
Cc: Chris Wilson 
Cc: Matthew Auld 
Cc: Thomas Hellström 
Cc: Jani Nikula 
Reviewed-by: Nirmoy Das  (v2)
Reviewed-by: Mauro Carvalho Chehab  (v3)
Reported-by: kernel test robot 
Reviewed-by: Andrzej Hajda  (v5)
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c |  6 +++---
 drivers/gpu/drm/i915/intel_region_ttm.c | 17 ++---
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 8d7c392d335c..d33f06b95c48 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -140,14 +140,14 @@ i915_ttm_place_from_region(const struct 
intel_memory_region *mr,
if (flags & I915_BO_ALLOC_CONTIGUOUS)
place->flags |= TTM_PL_FLAG_CONTIGUOUS;
if (offset != I915_BO_INVALID_OFFSET) {
-   place->fpfn = offset >> PAGE_SHIFT;
-   place->lpfn = place->fpfn + (size >> PAGE_SHIFT);
+   WARN_ON(check_assign(offset >> PAGE_SHIFT, &place->fpfn));
+   WARN_ON(check_assign(place->fpfn + (size >> PAGE_SHIFT), 
&place->lpfn));
} else if (mr->io_size && mr->io_size < mr->total) {
if (flags & I915_BO_ALLOC_GPU_ONLY) {
place->flags |= TTM_PL_FLAG_TOPDOWN;
} else {
place->fpfn = 0;
-   place->lpfn = mr->io_size >> PAGE_SHIFT;
+   WARN_ON(check_assign(mr->io_size >> PAGE_SHIFT, 
&place->lpfn));
}
}
 }
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 575d67bc6ffe..37a964b20b36 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -209,14 +209,23 @@ intel_region_ttm_resource_alloc(struct 
intel_memory_region *mem,
if (flags & I915_BO_ALLOC_CONTIGUOUS)
place.flags |= TTM_PL_FLAG_CONTIGUOUS;
if (offset != I915_BO_INVALID_OFFSET) {
-   place.fpfn = offset >> PAGE_SHIFT;
-   place.lpfn = place.fpfn + (size >> PAGE_SHIFT);
+   if (WARN_ON(check_assign(offset >> PAGE_SHIFT, &place.fpfn))) {
+   ret = -E2BIG;
+   goto out;
+   }
+   if (WARN_ON(check_assign(place.fpfn + (size >> PAGE_SHIFT), 
&place.lpfn))) {
+   ret = -E2BIG;
+   goto out;
+   }
} else if (mem->io_size && mem->io_size < mem->total) {
if (flags & I915_BO_ALLOC_GPU_ONLY) {
place.flags |= TTM_PL_FLAG_TOPDOWN;
} else {
place.fpfn = 0;
-   place.lpfn = mem->io_size >> PAGE_SHIFT;
+   if (WARN_ON(check_assign(mem->io_size >> PAGE_SHIFT, 
&place.lpfn))) {
+   ret = -E2BIG;
+   goto out;
+   }
}
}
 
@@ -224,6 +233,8 @@ intel_region_ttm_resource_alloc(struct intel_memory_region 
*mem,
mock_bo.bdev = &mem->i915->bdev;
 
ret = man->func->alloc(man, &mock_bo, &place, &res);
+
+out:
if (ret == -ENOSPC)
ret = -ENXIO;
if (!ret)
-- 
2.37.1



[PATCH v13 7/9] drm/i915: Check if the size is too big while creating shmem file

2022-09-28 Thread Gwan-gyeong Mun
The __shmem_file_setup() function returns -EINVAL if size is greater than
MAX_LFS_FILESIZE. To handle the same error as other code that returns
-E2BIG when the size is too large, it add a code that returns -E2BIG when
the size is larger than the size that can be handled.

v4: If BITS_PER_LONG is 32, size > MAX_LFS_FILESIZE is always false, so it
checks only when BITS_PER_LONG is 64.

Signed-off-by: Gwan-gyeong Mun 
Cc: Chris Wilson 
Cc: Matthew Auld 
Cc: Thomas Hellström 
Reviewed-by: Nirmoy Das 
Reviewed-by: Mauro Carvalho Chehab 
Reported-by: kernel test robot 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 339b0a9cf2d0..ca30060e34ab 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -541,6 +541,20 @@ static int __create_shmem(struct drm_i915_private *i915,
 
drm_gem_private_object_init(&i915->drm, obj, size);
 
+   /* XXX: The __shmem_file_setup() function returns -EINVAL if size is
+* greater than MAX_LFS_FILESIZE.
+* To handle the same error as other code that returns -E2BIG when
+* the size is too large, we add a code that returns -E2BIG when the
+* size is larger than the size that can be handled.
+* If BITS_PER_LONG is 32, size > MAX_LFS_FILESIZE is always false,
+* so we only needs to check when BITS_PER_LONG is 64.
+* If BITS_PER_LONG is 32, E2BIG checks are processed when
+* i915_gem_object_size_2big() is called before init_object() callback
+* is called.
+*/
+   if (BITS_PER_LONG == 64 && size > MAX_LFS_FILESIZE)
+   return -E2BIG;
+
if (i915->mm.gemfs)
filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
 flags);
-- 
2.37.1



Re: [PATCH v5 5/6] drm/mediatek: add mediatek-drm of vdosys0 support for mt8195

2022-09-28 Thread AngeloGioacchino Del Regno

Il 27/09/22 17:27, Jason-JH.Lin ha scritto:

Add driver data of mt8195 vdosys0 to mediatek-drm and the sub driver.

Signed-off-by: Jason-JH.Lin 


Reviewed-by: AngeloGioacchino Del Regno 





[PATCH v13 8/9] drm/i915: Use error code as -E2BIG when the size of gem ttm object is too large

2022-09-28 Thread Gwan-gyeong Mun
The ttm_bo_init_reserved() functions returns -ENOSPC if the size is too big
to add vma. The direct function that returns -ENOSPC is 
drm_mm_insert_node_in_range().
To handle the same error as other code returning -E2BIG when the size is
too large, it converts return value to -E2BIG.

Signed-off-by: Gwan-gyeong Mun 
Cc: Chris Wilson 
Cc: Matthew Auld 
Cc: Thomas Hellström 
Reviewed-by: Nirmoy Das 
Reviewed-by: Mauro Carvalho Chehab 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index d33f06b95c48..a2557f1ecbce 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1243,6 +1243,17 @@ int __i915_gem_ttm_object_init(struct 
intel_memory_region *mem,
ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), bo_type,
   &i915_sys_placement, page_size >> PAGE_SHIFT,
   &ctx, NULL, NULL, i915_ttm_bo_destroy);
+
+   /*
+* XXX: The ttm_bo_init_reserved() functions returns -ENOSPC if the size
+* is too big to add vma. The direct function that returns -ENOSPC is
+* drm_mm_insert_node_in_range(). To handle the same error as other code
+* that returns -E2BIG when the size is too large, it converts -ENOSPC 
to
+* -E2BIG.
+*/
+   if (size >> PAGE_SHIFT > INT_MAX && ret == -ENOSPC)
+   ret = -E2BIG;
+
if (ret)
return i915_ttm_err_to_gem(ret);
 
-- 
2.37.1



[PATCH v13 9/9] drm/i915: Remove truncation warning for large objects

2022-09-28 Thread Gwan-gyeong Mun
From: Chris Wilson 

Having addressed the issues surrounding incorrect types for local
variables and potential integer truncation in using the scatterlist API,
we have closed all the loop holes we had previously identified with
dangerously large object creation. As such, we can eliminate the warning
put in place to remind us to complete the review.

Signed-off-by: Chris Wilson 
Signed-off-by: Gwan-gyeong Mun 
Cc: Tvrtko Ursulin 
Cc: Brian Welty 
Cc: Matthew Auld 
Cc: Thomas Hellström 
Testcase: igt@gem_create@create-massive
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4991
Reviewed-by: Nirmoy Das 
Reviewed-by: Mauro Carvalho Chehab 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h | 15 ---
 1 file changed, 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 9f8e29112c31..59a64262647b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,25 +20,10 @@
 
 enum intel_region_id;
 
-/*
- * XXX: There is a prevalence of the assumption that we fit the
- * object's page count inside a 32bit _signed_ variable. Let's document
- * this and catch if we ever need to fix it. In the meantime, if you do
- * spot such a local variable, please consider fixing!
- *
- * We can check for invalidly typed locals with typecheck(), see for example
- * i915_gem_object_get_sg().
- */
-#define GEM_CHECK_SIZE_OVERFLOW(sz) \
-   GEM_WARN_ON((sz) >> PAGE_SHIFT > INT_MAX)
-
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
 
-   if (GEM_CHECK_SIZE_OVERFLOW(size))
-   return true;
-
if (overflows_type(size, obj->base.size))
return true;
 
-- 
2.37.1



Re: [PATCH v5 3/6] soc: mediatek: add mtk-mmsys support for mt8195 vdosys0

2022-09-28 Thread AngeloGioacchino Del Regno

Il 27/09/22 17:27, Jason-JH.Lin ha scritto:

1. Add mt8195 driver data with compatible "mediatek-mt8195-vdosys0".
2. Add mt8195 routing table settings of vdosys0.

Signed-off-by: Jason-JH.Lin 


Reviewed-by: AngeloGioacchino Del Regno 





Re: [PATCH v5 1/6] dt-bindings: arm: mediatek: mmsys: change compatible for MT8195

2022-09-28 Thread AngeloGioacchino Del Regno

Il 27/09/22 17:26, Jason-JH.Lin ha scritto:

For previous MediaTek SoCs, such as MT8173, there are 2 display HW
pipelines binding to 1 mmsys with the same power domain, the same
clock driver and the same mediatek-drm driver.

For MT8195, VDOSYS0 and VDOSYS1 are 2 display HW pipelines binding to
2 different power domains, different clock drivers and different
mediatek-drm drivers.

Moreover, Hardware pipeline of VDOSYS0 has these components: COLOR,
CCORR, AAL, GAMMA, DITHER. They are related to the PQ (Picture Quality)
and they makes VDOSYS0 supports PQ function while they are not
including in VDOSYS1.

Hardware pipeline of VDOSYS1 has the component ETHDR (HDR related
component). It makes VDOSYS1 supports the HDR function while it's not
including in VDOSYS0.

To summarize0:
Only VDOSYS0 can support PQ adjustment.
Only VDOSYS1 can support HDR adjustment.

Therefore, we need to separate these two different mmsys hardwares to
2 different compatibles for MT8195.

Fixes: 81c5a41d10b9 ("dt-bindings: arm: mediatek: mmsys: add mt8195 SoC 
binding")
Signed-off-by: Jason-JH.Lin 
Signed-off-by: Bo-Chen Chen 


Reviewed-by: AngeloGioacchino Del Regno 





Re: [PATCH v2] overflow: Introduce overflows_type() and castable_to_type()

2022-09-28 Thread Gwan-gyeong Mun

Hi Kees,

To check the intel-gfx ci results and test results from other mailing 
lists, I have rebased this patch and included it in this series [1].


[1] https://patchwork.freedesktop.org/series/109169/

G.G

On 9/26/22 10:11 PM, Kees Cook wrote:

Implement a robust overflows_type() macro to test if a variable or
constant value would overflow another variable or type. This can be
used as a constant expression for static_assert() (which requires a
constant expression[1][2]) when used on constant values. This must be
constructed manually, since __builtin_add_overflow() does not produce
a constant expression[3].

Additionally adds castable_to_type(), similar to __same_type(), but for
checking if a constant value would overflow if cast to a given type.

Add unit tests for overflows_type(), __same_type(), and castable_to_type()
to the existing KUnit "overflow" test.

[1] https://en.cppreference.com/w/c/language/_Static_assert
[2] C11 standard (ISO/IEC 9899:2011): 6.7.10 Static assertions
[3] https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html
 6.56 Built-in Functions to Perform Arithmetic with Overflow Checking
 Built-in Function: bool __builtin_add_overflow (type1 a, type2 b,

Cc: Luc Van Oostenryck 
Cc: Nathan Chancellor 
Cc: Nick Desaulniers 
Cc: Tom Rix 
Cc: Daniel Latypov 
Cc: Vitor Massaru Iha 
Cc: "Gustavo A. R. Silva" 
Cc: linux-harden...@vger.kernel.org
Cc: l...@lists.linux.dev
Co-developed-by: Gwan-gyeong Mun 
Signed-off-by: Gwan-gyeong Mun 
Signed-off-by: Kees Cook 
---
v2:
  - fix comment typo
  - wrap clang pragma to avoid GCC warnings
  - style nit cleanups
  - rename __castable_to_type() to castable_to_type()
  - remove prior overflows_type() definition
v1: https://lore.kernel.org/lkml/20220926003743.409911-1-keesc...@chromium.org
---
  drivers/gpu/drm/i915/i915_utils.h |   4 -
  include/linux/compiler.h  |   1 +
  include/linux/overflow.h  |  48 
  lib/overflow_kunit.c  | 388 +-
  4 files changed, 436 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_utils.h 
b/drivers/gpu/drm/i915/i915_utils.h
index c10d68cdc3ca..d14b7faee054 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -111,10 +111,6 @@ bool i915_error_injected(void);
  #define range_overflows_end_t(type, start, size, max) \
range_overflows_end((type)(start), (type)(size), (type)(max))
  
-/* Note we don't consider signbits :| */

-#define overflows_type(x, T) \
-   (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
-
  #define ptr_mask_bits(ptr, n) ({  \
unsigned long __v = (unsigned long)(ptr);   \
(typeof(ptr))(__v & -BIT(n));   \
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 7713d7bcdaea..c631107e93b1 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -244,6 +244,7 @@ static inline void *offset_to_ptr(const int *off)
   * bool and also pointer types.
   */
  #define is_signed_type(type) (((type)(-1)) < (__force type)1)
+#define is_unsigned_type(type) (!is_signed_type(type))
  
  /*

   * This is needed in functions which generate the stack canary, see
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 19dfdd74835e..58eb34aa2af9 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -127,6 +127,54 @@ static inline bool __must_check __must_check_overflow(bool 
overflow)
(*_d >> _to_shift) != _a);\
  }))
  
+#define __overflows_type_constexpr(x, T) (			\

+   is_unsigned_type(typeof(x)) ?   \
+   (x) > type_max(typeof(T)) ? 1 : 0\
+   : is_unsigned_type(typeof(T)) ? \
+   (x) < 0 || (x) > type_max(typeof(T)) ? 1 : 0  \
+   : (x) < type_min(typeof(T)) ||   \
+ (x) > type_max(typeof(T)) ? 1 : 0)
+
+#define __overflows_type(x, T) ({  \
+   typeof(T) v = 0;\
+   check_add_overflow((x), v, &v); \
+})
+
+/**
+ * overflows_type - helper for checking the overflows between value, variables,
+ * or data type
+ *
+ * @n: source constant value or variable to be checked
+ * @T: destination variable or data type proposed to store @x
+ *
+ * Compares the @x expression for whether or not it can safely fit in
+ * the storage of the type in @T. @x and @T can have different types.
+ * If @x is a constant expression, this will also resolve to a constant
+ * expression.
+ *
+ * Returns: true if overflow can occur, false otherwise.
+ */
+#define overflows_type(n, T)   \
+   __builtin_choose_expr(__is_constexpr(n),\
+ __overflows_type_constexpr(n, T), \
+   

Re: [PATCH 01/12] drm/i915/gen8: Create separate reg definitions for new MCR registers

2022-09-28 Thread Balasubramani Vivekanandan
On 19.09.2022 15:32, Matt Roper wrote:
> Gen8 was the first time our hardware had multicast registers (or at
> least the first time the multicast nature was exposed and MMIO accesses
> could be steered).  There are some registers that transitioned from
> singleton behavior to multicast during the gen7 -> gen8 transition;
> let's duplicate the register definitions for those registers in
> preparation for upcoming patches that will handle MCR registers in a
> special manner.
> 
> The registers adjusted are:
>  * MISCCPCTL
>  * SAMPLER_INSTDONE
>  * ROW_INSTDONE
>  * ROW_CHICKEN2
>  * HALF_SLICE_CHICKEN1
>  * HALF_SLICE_CHICKEN3
> 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  4 ++--
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 11 +-
>  drivers/gpu/drm/i915/gt/intel_workarounds.c   | 22 +--
>  .../gpu/drm/i915/gt/uc/intel_guc_capture.c|  4 ++--
>  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c |  2 +-
>  drivers/gpu/drm/i915/gvt/handlers.c   |  2 +-
>  drivers/gpu/drm/i915/gvt/mmio_context.c   |  2 +-
>  drivers/gpu/drm/i915/intel_gvt_mmio_table.c   |  2 +-
>  drivers/gpu/drm/i915/intel_pm.c   | 10 -
>  9 files changed, 34 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 2ddcad497fa3..c408bac3c533 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1559,11 +1559,11 @@ void intel_engine_get_instdone(const struct 
> intel_engine_cs *engine,
>   for_each_ss_steering(iter, engine->gt, slice, subslice) {
>   instdone->sampler[slice][subslice] =
>   intel_gt_mcr_read(engine->gt,
> -   GEN7_SAMPLER_INSTDONE,
> +   GEN8_SAMPLER_INSTDONE,
> slice, subslice);
>   instdone->row[slice][subslice] =
>   intel_gt_mcr_read(engine->gt,
> -   GEN7_ROW_INSTDONE,
> +   GEN8_ROW_INSTDONE,
> slice, subslice);
>   }
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 1cbb7226400b..e5a1ea255640 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -647,6 +647,9 @@
>  
>  #define GEN7_MISCCPCTL   _MMIO(0x9424)
>  #define   GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0)
> +
> +#define GEN8_MISCCPCTL   _MMIO(0x9424)
> +#define   GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0)

When I went through the driver to check if is there any instance where
platforms above Gen7 still using Gen7 registers, I found the following
two functions still using GEN7_MISCCPCTL. Can you check?

  * dg2_gt_workarounds_init
  * pvc_gt_workarounds_init

Regards,
Bala

>  #define   GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1)
>  #define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE   (1 << 2)
>  #define   GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1 << 4)
> @@ -1068,18 +1071,22 @@
>  #define GEN12_GAM_DONE   _MMIO(0xcf68)
>  
>  #define GEN7_HALF_SLICE_CHICKEN1 _MMIO(0xe100) /* IVB GT1 + VLV 
> */
> +#define GEN8_HALF_SLICE_CHICKEN1 _MMIO(0xe100)
>  #define   GEN7_MAX_PS_THREAD_DEP (8 << 12)
>  #define   GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE(1 << 10)
>  #define   GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE(1 << 4)
>  #define   GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE   (1 << 3)
>  
>  #define GEN7_SAMPLER_INSTDONE_MMIO(0xe160)
> +#define GEN8_SAMPLER_INSTDONE_MMIO(0xe160)
>  #define GEN7_ROW_INSTDONE_MMIO(0xe164)
> +#define GEN8_ROW_INSTDONE_MMIO(0xe164)
>  
>  #define HALF_SLICE_CHICKEN2  _MMIO(0xe180)
>  #define   GEN8_ST_PO_DISABLE (1 << 13)
>  
> -#define HALF_SLICE_CHICKEN3  _MMIO(0xe184)
> +#define HSW_HALF_SLICE_CHICKEN3  _MMIO(0xe184)
> +#define GEN8_HALF_SLICE_CHICKEN3 _MMIO(0xe184)
>  #define   HSW_SAMPLE_C_PERFORMANCE   (1 << 9)
>  #define   GEN8_CENTROID_PIXEL_OPT_DIS(1 << 8)
>  #define   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC(1 << 5)
> @@ -1132,6 +1139,8 @@
>  #define   DISABLE_EARLY_EOT  REG_BIT(1)
>  
>  #define GEN7_ROW_CHICKEN2_MMIO(0xe4f4)
> +
> +#define GEN8_ROW_CHICKEN2_MMIO(0xe4f4)
>  #define   GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15)
>  #define   GEN12_DISABLE_EARLY_READ   REG_BIT(14)
>  #define   GEN12_ENABLE_LA

Re: [PATCH linux-next v2] backlight: use sysfs_emit() to instead of scnprintf()

2022-09-28 Thread Daniel Thompson
On Wed, Sep 28, 2022 at 01:41:15AM +, yexingchen...@gmail.com wrote:
> From: ye xingchen 
>
> Replace the open-code with sysfs_emit() to simplify the code.
>
> Signed-off-by: ye xingchen 
> ---
> v1 -> v2
> Add the rest of this fixes for this pattern in the 'drivers/video/backlight' 
> directory.
>  drivers/video/backlight/lm3533_bl.c | 10 +-
>  drivers/video/backlight/lp855x_bl.c |  4 ++--

What happened to the lp8788 fixes?


Daniel.


>  2 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/video/backlight/lm3533_bl.c 
> b/drivers/video/backlight/lm3533_bl.c
> index 1df1b6643c0b..5e2ce9285245 100644
> --- a/drivers/video/backlight/lm3533_bl.c
> +++ b/drivers/video/backlight/lm3533_bl.c
> @@ -66,7 +66,7 @@ static ssize_t show_id(struct device *dev,
>  {
>   struct lm3533_bl *bl = dev_get_drvdata(dev);
>
> - return scnprintf(buf, PAGE_SIZE, "%d\n", bl->id);
> + return sysfs_emit(buf, "%d\n", bl->id);
>  }
>
>  static ssize_t show_als_channel(struct device *dev,
> @@ -75,7 +75,7 @@ static ssize_t show_als_channel(struct device *dev,
>   struct lm3533_bl *bl = dev_get_drvdata(dev);
>   unsigned channel = lm3533_bl_get_ctrlbank_id(bl);
>
> - return scnprintf(buf, PAGE_SIZE, "%u\n", channel);
> + return sysfs_emit(buf, "%u\n", channel);
>  }
>
>  static ssize_t show_als_en(struct device *dev,
> @@ -95,7 +95,7 @@ static ssize_t show_als_en(struct device *dev,
>   mask = 1 << (2 * ctrlbank);
>   enable = val & mask;
>
> - return scnprintf(buf, PAGE_SIZE, "%d\n", enable);
> + return sysfs_emit(buf, "%d\n", enable);
>  }
>
>  static ssize_t store_als_en(struct device *dev,
> @@ -147,7 +147,7 @@ static ssize_t show_linear(struct device *dev,
>   else
>   linear = 0;
>
> - return scnprintf(buf, PAGE_SIZE, "%x\n", linear);
> + return sysfs_emit(buf, "%x\n", linear);
>  }
>
>  static ssize_t store_linear(struct device *dev,
> @@ -190,7 +190,7 @@ static ssize_t show_pwm(struct device *dev,
>   if (ret)
>   return ret;
>
> - return scnprintf(buf, PAGE_SIZE, "%u\n", val);
> + return sysfs_emit(buf, "%u\n", val);
>  }
>
>  static ssize_t store_pwm(struct device *dev,
> diff --git a/drivers/video/backlight/lp855x_bl.c 
> b/drivers/video/backlight/lp855x_bl.c
> index bd0bdeae23a4..fafc1a9e76ef 100644
> --- a/drivers/video/backlight/lp855x_bl.c
> +++ b/drivers/video/backlight/lp855x_bl.c
> @@ -293,7 +293,7 @@ static ssize_t lp855x_get_chip_id(struct device *dev,
>  {
>   struct lp855x *lp = dev_get_drvdata(dev);
>
> - return scnprintf(buf, PAGE_SIZE, "%s\n", lp->chipname);
> + return sysfs_emit(buf, "%s\n", lp->chipname);
>  }
>
>  static ssize_t lp855x_get_bl_ctl_mode(struct device *dev,
> @@ -307,7 +307,7 @@ static ssize_t lp855x_get_bl_ctl_mode(struct device *dev,
>   else if (lp->mode == REGISTER_BASED)
>   strmode = "register based";
>
> - return scnprintf(buf, PAGE_SIZE, "%s\n", strmode);
> + return sysfs_emit(buf, "%s\n", strmode);
>  }
>
>  static DEVICE_ATTR(chip_id, S_IRUGO, lp855x_get_chip_id, NULL);
> --
> 2.25.1
>
>


Re: [PATCH v2 0/2] drm/rockchip: dw_hdmi: Add 4k@30 support

2022-09-28 Thread Sascha Hauer
On Tue, Sep 27, 2022 at 07:53:54PM +0200, Dan Johansen wrote:
> 
> Den 26.09.2022 kl. 12.30 skrev Michael Riesch:
> > Hi Sascha,
> > 
> > On 9/26/22 10:04, Sascha Hauer wrote:
> > > This series adds support for 4k@30 to the rockchip HDMI controller. This
> > > has been tested on a rk3568 rock3a board. It should be possible to add
> > > 4k@60 support the same way, but it doesn't work for me, so let's add
> > > 4k@30 as a first step.
> > >   
> > >  Sascha
> > > 
> > > Changes since v1:
> > > - Allow non standard clock rates only on Synopsys phy as suggested by
> > >Robin Murphy
> > > 
> > > Sascha Hauer (2):
> > >drm/rockchip: dw_hdmi: relax mode_valid hook
> > >drm/rockchip: dw_hdmi: Add support for 4k@30 resolution
> > > 
> > >   drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 34 -
> > >   1 file changed, 27 insertions(+), 7 deletions(-)
> > Thanks for the v2! On a RK3568 EVB1 with a HP 27f 4k monitor
> > 
> > Tested-by: Michael Riesch 
> 
> Sadly this still doesn't give my display out on my 2k monitor. Not even just
> 1080p picture like the old current implementation does.

By "like the old current implementation" you mean that this patchset
introduces a regression for you?

Sascha

-- 
Pengutronix e.K.   | |
Steuerwalder Str. 21   | http://www.pengutronix.de/  |
31137 Hildesheim, Germany  | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |


Re: [PATCH v2 0/2] drm/rockchip: dw_hdmi: Add 4k@30 support

2022-09-28 Thread Dan Johansen



Den 28.09.2022 kl. 10.37 skrev Sascha Hauer:

On Tue, Sep 27, 2022 at 07:53:54PM +0200, Dan Johansen wrote:

Den 26.09.2022 kl. 12.30 skrev Michael Riesch:

Hi Sascha,

On 9/26/22 10:04, Sascha Hauer wrote:

This series adds support for 4k@30 to the rockchip HDMI controller. This
has been tested on a rk3568 rock3a board. It should be possible to add
4k@60 support the same way, but it doesn't work for me, so let's add
4k@30 as a first step.

 Sascha

Changes since v1:
- Allow non standard clock rates only on Synopsys phy as suggested by
Robin Murphy

Sascha Hauer (2):
drm/rockchip: dw_hdmi: relax mode_valid hook
drm/rockchip: dw_hdmi: Add support for 4k@30 resolution

   drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 34 -
   1 file changed, 27 insertions(+), 7 deletions(-)

Thanks for the v2! On a RK3568 EVB1 with a HP 27f 4k monitor

Tested-by: Michael Riesch 

Sadly this still doesn't give my display out on my 2k monitor. Not even just
1080p picture like the old current implementation does.

By "like the old current implementation" you mean that this patchset
introduces a regression for you?
Yes. What currently in the kernel at least shows as 1080p on my 2K 
monitor, while this patchset turns off the screen.


Sascha


--
Kind regards
*Dan Johansen*
Project lead of the *Manjaro ARM* project
Manjaro-ARM 


Re: [PATCH v13 5/9] drm/i915: Check for integer truncation on scatterlist creation

2022-09-28 Thread Jani Nikula
On Wed, 28 Sep 2022, Gwan-gyeong Mun  wrote:
> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h 
> b/drivers/gpu/drm/i915/i915_scatterlist.h
> index 9ddb3e743a3e..1d1802beb42b 100644
> --- a/drivers/gpu/drm/i915/i915_scatterlist.h
> +++ b/drivers/gpu/drm/i915/i915_scatterlist.h
> @@ -220,4 +220,15 @@ struct i915_refct_sgt 
> *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
>u64 region_start,
>u32 page_alignment);
>  
> +/* Wrap scatterlist.h to sanity check for integer truncation */
> +typedef unsigned int __sg_size_t; /* see linux/scatterlist.h */
> +#define sg_alloc_table(sgt, nents, gfp) \
> + overflows_type(nents, __sg_size_t) ? -E2BIG \
> + : ((sg_alloc_table)(sgt, (__sg_size_t)(nents), gfp))
> +
> +#define sg_alloc_table_from_pages_segment(sgt, pages, npages, offset, size, 
> max_segment, gfp) \
> + overflows_type(npages, __sg_size_t) ? -E2BIG \
> + : ((sg_alloc_table_from_pages_segment)(sgt, pages, 
> (__sg_size_t)(npages), offset, \
> +size, max_segment, gfp))
> +
>  #endif

No. I don't think we should shadow sg_alloc_table() and
sg_alloc_table_from_pages_segment().

Either get this in scatterlist.h (preferred) or prefix with i915_ or
whatever to indicate it's our local thing.

i915_scatterlist.h already has too much scatterlist "namespace" abuse
that I'd rather see gone than violated more.


BR,
Jani.



-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [PATCH -next v3] backlight: gpio_backlight: Switch to use dev_err_probe() helper

2022-09-28 Thread Lee Jones
On Tue, 27 Sep 2022, Daniel Thompson wrote:

> On Tue, Sep 27, 2022 at 11:31:38AM +0800, Yang Yingliang wrote:
> > In the probe path, dev_err() can be replaced with dev_err_probe()
> > which will check if error code is -EPROBE_DEFER and prints the
> > error name. It also sets the defer probe reason which can be
> > checked later through debugfs. It's more simple in error path.
> >
> > Signed-off-by: Yang Yingliang 
> 
> Reviewed-by: Daniel Thompson 


The original patch is not in my inbox.

Was I missed from the original mail?

-- 
Lee Jones [李琼斯]


Re: [PATCH -next v3] backlight: gpio_backlight: Switch to use dev_err_probe() helper

2022-09-28 Thread Yang Yingliang

Hi,

On 2022/9/28 17:11, Lee Jones wrote:

On Tue, 27 Sep 2022, Daniel Thompson wrote:


On Tue, Sep 27, 2022 at 11:31:38AM +0800, Yang Yingliang wrote:

In the probe path, dev_err() can be replaced with dev_err_probe()
which will check if error code is -EPROBE_DEFER and prints the
error name. It also sets the defer probe reason which can be
checked later through debugfs. It's more simple in error path.

Signed-off-by: Yang Yingliang 

Reviewed-by: Daniel Thompson 


The original patch is not in my inbox.

Was I missed from the original mail?

I have already add your mail address to my Cc list, when sending the patch.

Here is the early mail:
V1:
https://lore.kernel.org/linux-fbdev/f98b2e35-0f86-ffd0-db11-ca91930e1...@huawei.com/T/#t

V2:
https://lore.kernel.org/linux-fbdev/7d8cfce2-ba08-afc0-de2d-972ee15b5...@huawei.com/T/#t

Thanks,
Yang


[PATCH] drm/tve200: Use drm_* variants for logging

2022-09-28 Thread Khalid Masum
We have routines like drm_info/warn/err for logging. Use them instead
of dev_* variants to get drm-formatted log messages.

Signed-off-by: Khalid Masum 
---
 drivers/gpu/drm/tve200/tve200_display.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/tve200/tve200_display.c 
b/drivers/gpu/drm/tve200/tve200_display.c
index 771bad881714..d453581b6b18 100644
--- a/drivers/gpu/drm/tve200/tve200_display.c
+++ b/drivers/gpu/drm/tve200/tve200_display.c
@@ -60,7 +60,7 @@ irqreturn_t tve200_irq(int irq, void *data)
}
writel(val, priv->regs + TVE200_CTRL);
} else
-   dev_err(priv->drm->dev, "stray IRQ %08x\n", stat);
+   drm_err(priv->drm, "stray IRQ %08x\n", stat);
 
/* Clear the interrupt once done */
writel(stat, priv->regs + TVE200_INT_CLR);
@@ -146,7 +146,7 @@ static void tve200_display_enable(struct 
drm_simple_display_pipe *pipe,
}
if (retries == 5 &&
readl(priv->regs + TVE200_CTRL_4) & TVE200_CTRL_4_RESET) {
-   dev_err(drm->dev, "can't get hardware out of reset\n");
+   drm_err(drm, "can't get hardware out of reset\n");
return;
}
 
@@ -171,14 +171,14 @@ static void tve200_display_enable(struct 
drm_simple_display_pipe *pipe,
if ((mode->hdisplay == 352 && mode->vdisplay == 240) || /* SIF(525) */
(mode->hdisplay == 352 && mode->vdisplay == 288)) { /* CIF(625) */
ctrl1 |= TVE200_CTRL_IPRESOL_CIF;
-   dev_info(drm->dev, "CIF mode\n");
+   drm_info(drm, "CIF mode\n");
} else if (mode->hdisplay == 640 && mode->vdisplay == 480) {
ctrl1 |= TVE200_CTRL_IPRESOL_VGA;
-   dev_info(drm->dev, "VGA mode\n");
+   drm_info(drm, "VGA mode\n");
} else if ((mode->hdisplay == 720 && mode->vdisplay == 480) ||
   (mode->hdisplay == 720 && mode->vdisplay == 576)) {
ctrl1 |= TVE200_CTRL_IPRESOL_D1;
-   dev_info(drm->dev, "D1 mode\n");
+   drm_info(drm, "D1 mode\n");
}
 
if (format & DRM_FORMAT_BIG_ENDIAN) {
@@ -226,7 +226,7 @@ static void tve200_display_enable(struct 
drm_simple_display_pipe *pipe,
ctrl1 |= TVE200_IPDMOD_YUV420;
break;
default:
-   dev_err(drm->dev, "Unknown FB format 0x%08x\n",
+   drm_err(drm, "Unknown FB format 0x%08x\n",
fb->format->format);
break;
}
-- 
2.37.3



Re: [PATCH 1/4] drm: lcdif: Fix indentation in lcdif_regs.h

2022-09-28 Thread Kieran Bingham
Quoting Laurent Pinchart (2022-09-28 00:38:18)
> A couple of the register macro values are incorrectly indented. Fix
> them.
> 

Reviewed-by: Kieran Bingham 

> Signed-off-by: Laurent Pinchart 
> ---
>  drivers/gpu/drm/mxsfb/lcdif_regs.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/mxsfb/lcdif_regs.h 
> b/drivers/gpu/drm/mxsfb/lcdif_regs.h
> index 8e8bef175bf2..013f2cace2a0 100644
> --- a/drivers/gpu/drm/mxsfb/lcdif_regs.h
> +++ b/drivers/gpu/drm/mxsfb/lcdif_regs.h
> @@ -130,7 +130,7 @@
>  #define CTRL_FETCH_START_OPTION_BPVBIT(9)
>  #define CTRL_FETCH_START_OPTION_RESV   GENMASK(9, 8)
>  #define CTRL_FETCH_START_OPTION_MASK   GENMASK(9, 8)
> -#define CTRL_NEG   BIT(4)
> +#define CTRL_NEG   BIT(4)
>  #define CTRL_INV_PXCK  BIT(3)
>  #define CTRL_INV_DEBIT(2)
>  #define CTRL_INV_VSBIT(1)
> @@ -186,7 +186,7 @@
>  #define INT_ENABLE_D1_PLANE_PANIC_EN   BIT(0)
>  
>  #define CTRLDESCL0_1_HEIGHT(n) (((n) & 0x) << 16)
> -#define CTRLDESCL0_1_HEIGHT_MASK   GENMASK(31, 16)
> +#define CTRLDESCL0_1_HEIGHT_MASK   GENMASK(31, 16)
>  #define CTRLDESCL0_1_WIDTH(n)  ((n) & 0x)
>  #define CTRLDESCL0_1_WIDTH_MASKGENMASK(15, 0)
>  
> -- 
> Regards,
> 
> Laurent Pinchart
>


Re: [PATCH v2 1/4] drm: lcdif: Fix indentation in lcdif_regs.h

2022-09-28 Thread Kieran Bingham
Quoting Laurent Pinchart (2022-09-28 01:58:09)
> A couple of the register macro values are incorrectly indented. Fix
> them.
> 

Argh, there was already a v2 posted. Sometimes (more often than I like)
I really hate email...


Reviewed-by: Kieran Bingham 

> Signed-off-by: Laurent Pinchart 
> Reviewed-by: Marek Vasut 
> ---
>  drivers/gpu/drm/mxsfb/lcdif_regs.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/mxsfb/lcdif_regs.h 
> b/drivers/gpu/drm/mxsfb/lcdif_regs.h
> index 8e8bef175bf2..013f2cace2a0 100644
> --- a/drivers/gpu/drm/mxsfb/lcdif_regs.h
> +++ b/drivers/gpu/drm/mxsfb/lcdif_regs.h
> @@ -130,7 +130,7 @@
>  #define CTRL_FETCH_START_OPTION_BPVBIT(9)
>  #define CTRL_FETCH_START_OPTION_RESV   GENMASK(9, 8)
>  #define CTRL_FETCH_START_OPTION_MASK   GENMASK(9, 8)
> -#define CTRL_NEG   BIT(4)
> +#define CTRL_NEG   BIT(4)
>  #define CTRL_INV_PXCK  BIT(3)
>  #define CTRL_INV_DEBIT(2)
>  #define CTRL_INV_VSBIT(1)
> @@ -186,7 +186,7 @@
>  #define INT_ENABLE_D1_PLANE_PANIC_EN   BIT(0)
>  
>  #define CTRLDESCL0_1_HEIGHT(n) (((n) & 0x) << 16)
> -#define CTRLDESCL0_1_HEIGHT_MASK   GENMASK(31, 16)
> +#define CTRLDESCL0_1_HEIGHT_MASK   GENMASK(31, 16)
>  #define CTRLDESCL0_1_WIDTH(n)  ((n) & 0x)
>  #define CTRLDESCL0_1_WIDTH_MASKGENMASK(15, 0)
>  
> -- 
> Regards,
> 
> Laurent Pinchart
>


Re: [PATCH v2 2/4] drm: lcdif: Don't use BIT() for multi-bit register fields

2022-09-28 Thread Kieran Bingham
Quoting Laurent Pinchart (2022-09-28 01:58:10)
> The BIT() macro is meant to represent a single bit. Don't use it for
> values of register fields that span multiple bits.
> 
> Signed-off-by: Laurent Pinchart 
> ---
> Changes since v1:
> 
> - Use hex for field values
> ---
>  drivers/gpu/drm/mxsfb/lcdif_regs.h | 28 ++--
>  1 file changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/mxsfb/lcdif_regs.h 
> b/drivers/gpu/drm/mxsfb/lcdif_regs.h
> index 013f2cace2a0..0d5d9bedd94a 100644
> --- a/drivers/gpu/drm/mxsfb/lcdif_regs.h
> +++ b/drivers/gpu/drm/mxsfb/lcdif_regs.h
> @@ -138,9 +138,9 @@
>  
>  #define DISP_PARA_DISP_ON  BIT(31)
>  #define DISP_PARA_SWAP_EN  BIT(30)
> -#define DISP_PARA_LINE_PATTERN_UYVY_H  (GENMASK(29, 28) | BIT(26))

I mean, I like BIT (and sometimes GENMASK) but ... What was going on
there!

Defintely better this way.

Reviewed-by: Kieran Bingham 

> -#define DISP_PARA_LINE_PATTERN_RGB565  GENMASK(28, 26)
> -#define DISP_PARA_LINE_PATTERN_RGB888  0
> +#define DISP_PARA_LINE_PATTERN_UYVY_H  (0xd << 26)
> +#define DISP_PARA_LINE_PATTERN_RGB565  (0x7 << 26)
> +#define DISP_PARA_LINE_PATTERN_RGB888  (0x0 << 26)
>  #define DISP_PARA_LINE_PATTERN_MASKGENMASK(29, 26)
>  #define DISP_PARA_DISP_MODE_MASK   GENMASK(25, 24)
>  #define DISP_PARA_BGND_R_MASK  GENMASK(23, 16)
> @@ -202,18 +202,18 @@
>  
>  #define CTRLDESCL0_5_ENBIT(31)
>  #define CTRLDESCL0_5_SHADOW_LOAD_ENBIT(30)
> -#define CTRLDESCL0_5_BPP_16_RGB565 BIT(26)
> -#define CTRLDESCL0_5_BPP_16_ARGB1555   (BIT(26) | BIT(24))
> -#define CTRLDESCL0_5_BPP_16_ARGB   (BIT(26) | BIT(25))
> -#define CTRLDESCL0_5_BPP_YCbCr422  (BIT(26) | BIT(25) | BIT(24))
> -#define CTRLDESCL0_5_BPP_24_RGB888 BIT(27)
> -#define CTRLDESCL0_5_BPP_32_ARGB   (BIT(27) | BIT(24))
> -#define CTRLDESCL0_5_BPP_32_ABGR   (BIT(27) | BIT(25))
> +#define CTRLDESCL0_5_BPP_16_RGB565 (0x4 << 24)
> +#define CTRLDESCL0_5_BPP_16_ARGB1555   (0x5 << 24)
> +#define CTRLDESCL0_5_BPP_16_ARGB   (0x6 << 24)
> +#define CTRLDESCL0_5_BPP_YCbCr422  (0x7 << 24)
> +#define CTRLDESCL0_5_BPP_24_RGB888 (0x8 << 24)
> +#define CTRLDESCL0_5_BPP_32_ARGB   (0x9 << 24)
> +#define CTRLDESCL0_5_BPP_32_ABGR   (0xa << 24)
>  #define CTRLDESCL0_5_BPP_MASK  GENMASK(27, 24)
> -#define CTRLDESCL0_5_YUV_FORMAT_Y2VY1U 0
> -#define CTRLDESCL0_5_YUV_FORMAT_Y2UY1V BIT(14)
> -#define CTRLDESCL0_5_YUV_FORMAT_VY2UY1 BIT(15)
> -#define CTRLDESCL0_5_YUV_FORMAT_UY2VY1 (BIT(15) | BIT(14))
> +#define CTRLDESCL0_5_YUV_FORMAT_Y2VY1U (0x0 << 14)
> +#define CTRLDESCL0_5_YUV_FORMAT_Y2UY1V (0x1 << 14)
> +#define CTRLDESCL0_5_YUV_FORMAT_VY2UY1 (0x2 << 14)
> +#define CTRLDESCL0_5_YUV_FORMAT_UY2VY1 (0x3 << 14)
>  #define CTRLDESCL0_5_YUV_FORMAT_MASK   GENMASK(15, 14)
>  
>  #define CSC0_CTRL_CSC_MODE_RGB2YCbCr   GENMASK(2, 1)
> -- 
> Regards,
> 
> Laurent Pinchart
>


[PATCH v3] drm: document uAPI page-flip flags

2022-09-28 Thread Simon Ser
Document flags accepted by the page-flip and atomic IOCTLs.

v2 (Pekka):
- Mention DRM_EVENT_FLIP_COMPLETE in DRM_MODE_PAGE_FLIP_EVENT docs.
- Expand DRM_MODE_ATOMIC_NONBLOCK and DRM_MODE_ATOMIC_ALLOW_MODESET
  description.
v3:
- Fix struct field ref syntax (Daniel)
- Clarify when artifacts are no longer displayed (Daniel)
- Add note about sinks deciding to show artifacts on their own (Pekka, Daniel)

Signed-off-by: Simon Ser 
Cc: Daniel Vetter 
Cc: Pekka Paalanen 
Cc: Ville Syrjala 
---
 include/uapi/drm/drm_mode.h | 63 -
 1 file changed, 62 insertions(+), 1 deletion(-)

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index fa953309d9ce..9b10327b9d21 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -935,12 +935,31 @@ struct hdr_output_metadata {
};
 };
 
+/**
+ * DRM_MODE_PAGE_FLIP_EVENT
+ *
+ * Request that the kernel sends back a vblank event (see
+ * struct drm_event_vblank) with the &DRM_EVENT_FLIP_COMPLETE type when the
+ * page-flip is done.
+ */
 #define DRM_MODE_PAGE_FLIP_EVENT 0x01
+/**
+ * DRM_MODE_PAGE_FLIP_ASYNC
+ *
+ * Request that the page-flip is performed as soon as possible, ie. with no
+ * delay due to waiting for vblank. This may cause tearing to be visible on
+ * the screen.
+ */
 #define DRM_MODE_PAGE_FLIP_ASYNC 0x02
 #define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4
 #define DRM_MODE_PAGE_FLIP_TARGET_RELATIVE 0x8
 #define DRM_MODE_PAGE_FLIP_TARGET (DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE | \
   DRM_MODE_PAGE_FLIP_TARGET_RELATIVE)
+/**
+ * DRM_MODE_PAGE_FLIP_FLAGS
+ *
+ * Bitmask of flags suitable for &drm_mode_crtc_page_flip_target.flags.
+ */
 #define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT | \
  DRM_MODE_PAGE_FLIP_ASYNC | \
  DRM_MODE_PAGE_FLIP_TARGET)
@@ -1034,11 +1053,53 @@ struct drm_mode_destroy_dumb {
__u32 handle;
 };
 
-/* page-flip flags are valid, plus: */
+/**
+ * DRM_MODE_ATOMIC_TEST_ONLY
+ *
+ * Do not apply the atomic commit, instead check whether the hardware supports
+ * this configuration.
+ *
+ * See &drm_mode_config_funcs.atomic_check for more details on test-only
+ * commits.
+ */
 #define DRM_MODE_ATOMIC_TEST_ONLY 0x0100
+/**
+ * DRM_MODE_ATOMIC_NONBLOCK
+ *
+ * Do not block while applying the atomic commit. The &DRM_IOCTL_MODE_ATOMIC
+ * IOCTL returns immediately instead of waiting for the changes to be applied
+ * in hardware. Note, the driver will still check that the update can be
+ * applied before retuning.
+ */
 #define DRM_MODE_ATOMIC_NONBLOCK  0x0200
+/**
+ * DRM_MODE_ATOMIC_ALLOW_MODESET
+ *
+ * Allow the update to result in temporary or transient visible artifacts while
+ * the update is being applied. Applying the update may also take significantly
+ * more time than a page flip. All visual artifacts will disappear by the time
+ * the update is completed, as signalled throught the vblank event's timestamp
+ * (see struct drm_event_vblank).
+ *
+ * This flag must be set when the KMS update might cause visible artifacts.
+ * Without this flag such KMS update will return a EINVAL error. What kind of
+ * update may cause visible artifacts depends on the driver and the hardware.
+ * User-space that needs to know beforehand if an update might cause visible
+ * artifacts can use &DRM_MODE_ATOMIC_TEST_ONLY without
+ * &DRM_MODE_ATOMIC_ALLOW_MODESET to see if it fails.
+ *
+ * To the best of the driver's knowledge, visual artifacts are guaranteed to
+ * not appear when this flag is not set. Some sinks might display visual
+ * artifacts outside of the driver's control.
+ */
 #define DRM_MODE_ATOMIC_ALLOW_MODESET 0x0400
 
+/**
+ * DRM_MODE_ATOMIC_FLAGS
+ *
+ * Bitfield of flags accepted by the &DRM_IOCTL_MODE_ATOMIC IOCTL in
+ * &drm_mode_atomic.flags.
+ */
 #define DRM_MODE_ATOMIC_FLAGS (\
DRM_MODE_PAGE_FLIP_EVENT |\
DRM_MODE_PAGE_FLIP_ASYNC |\
-- 
2.37.3




Re: [PATCH v2 3/4] drm: lcdif: Switch to limited range for RGB to YUV conversion

2022-09-28 Thread Kieran Bingham
Quoting Laurent Pinchart (2022-09-28 01:58:11)
> Up to and including v1.3, HDMI supported limited quantization range only
> for YCbCr. HDMI v1.4 introduced selectable quantization ranges, but this
> features isn't supported in the dw-hdmi driver that is used in
> conjunction with the LCDIF in the i.MX8MP. The HDMI YCbCr output is thus
> always advertised in the AVI infoframe as limited range.
> 
> The LCDIF driver, on the other hand, configures the CSC to produce full
> range YCbCr. This mismatch results in loss of details and incorrect
> colours. Fix it by switching to limited range YCbCr.
> 
> The coefficients are copied from drivers/media/platforms/nxp/imx-pxp.c
> for coherency, as the hardware is most likely identical.

Perhaps we need one or two of these somewhere:

https://colorconfidence.com/products/calibrite-colorchecker-display

Or does anyone have one that could test this patch?

Anyway:

Reviewed-by: Kieran Bingham 


> Fixes: 9db35bb349a0 ("drm: lcdif: Add support for i.MX8MP LCDIF variant")
> Signed-off-by: Laurent Pinchart 
> ---
> Changes since v1:
> 
> - Use coefficients from imx-pxp.c
> ---
>  drivers/gpu/drm/mxsfb/lcdif_kms.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/mxsfb/lcdif_kms.c 
> b/drivers/gpu/drm/mxsfb/lcdif_kms.c
> index 1f22ea5896d5..c3622be0c587 100644
> --- a/drivers/gpu/drm/mxsfb/lcdif_kms.c
> +++ b/drivers/gpu/drm/mxsfb/lcdif_kms.c
> @@ -53,16 +53,16 @@ static void lcdif_set_formats(struct lcdif_drm_private 
> *lcdif,
> writel(DISP_PARA_LINE_PATTERN_UYVY_H,
>lcdif->base + LCDC_V8_DISP_PARA);
>  
> -   /* CSC: BT.601 Full Range RGB to YCbCr coefficients. */
> -   writel(CSC0_COEF0_A2(0x096) | CSC0_COEF0_A1(0x04c),
> +   /* CSC: BT.601 Limited Range RGB to YCbCr coefficients. */
> +   writel(CSC0_COEF0_A2(0x081) | CSC0_COEF0_A1(0x041),
>lcdif->base + LCDC_V8_CSC0_COEF0);
> -   writel(CSC0_COEF1_B1(0x7d5) | CSC0_COEF1_A3(0x01d),
> +   writel(CSC0_COEF1_B1(0x7db) | CSC0_COEF1_A3(0x019),
>lcdif->base + LCDC_V8_CSC0_COEF1);
> -   writel(CSC0_COEF2_B3(0x080) | CSC0_COEF2_B2(0x7ac),
> +   writel(CSC0_COEF2_B3(0x070) | CSC0_COEF2_B2(0x7b6),
>lcdif->base + LCDC_V8_CSC0_COEF2);
> -   writel(CSC0_COEF3_C2(0x795) | CSC0_COEF3_C1(0x080),
> +   writel(CSC0_COEF3_C2(0x7a2) | CSC0_COEF3_C1(0x070),
>lcdif->base + LCDC_V8_CSC0_COEF3);
> -   writel(CSC0_COEF4_D1(0x000) | CSC0_COEF4_C3(0x7ec),
> +   writel(CSC0_COEF4_D1(0x010) | CSC0_COEF4_C3(0x7ee),
>lcdif->base + LCDC_V8_CSC0_COEF4);
> writel(CSC0_COEF5_D3(0x080) | CSC0_COEF5_D2(0x080),
>lcdif->base + LCDC_V8_CSC0_COEF5);
> -- 
> Regards,
> 
> Laurent Pinchart
>


Re: [PATCH 02/12] drm/i915/xehp: Create separate reg definitions for new MCR registers

2022-09-28 Thread Balasubramani Vivekanandan
On 19.09.2022 15:32, Matt Roper wrote:
> Starting in Xe_HP, several registers our driver works with have been
> converted from singleton registers into replicated registers with
> multicast behavior.  Although the registers are still located at the
> same MMIO offsets as on previous platforms, let's duplicate the register
> definitions in preparation for upcoming patches that will handle
> multicast registers in a special manner.
> 
> The registers that are now replicated on Xe_HP are:
>  * PAT_INDEX (mslice replication)
>  * FF_MODE2 (gslice replication)
>  * COMMON_SLICE_CHICKEN3 (gslice replication)
>  * SLICE_COMMON_ECO_CHICKEN1 (gslice replication)
>  * SLICE_UNIT_LEVEL_CLKGATE (gslice replication)
>  * LNCFCMOCS (lncf replication)
> 
> Bspec: 66534
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 18 -
>  drivers/gpu/drm/i915/gt/intel_gtt.c | 29 ++---
>  drivers/gpu/drm/i915/gt/intel_mocs.c|  5 +++-
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 -
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  |  7 +++--
>  5 files changed, 52 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index e5a1ea255640..559e3473f14c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -329,6 +329,7 @@
>  #define GEN7_TLB_RD_ADDR _MMIO(0x4700)
>  
>  #define GEN12_PAT_INDEX(index)   _MMIO(0x4800 + (index) 
> * 4)
> +#define XEHP_PAT_INDEX(index)_MMIO(0x4800 + (index) 
> * 4)
>  
>  #define XEHP_TILE0_ADDR_RANGE_MMIO(0x4900)
>  #define   XEHP_TILE_LMEM_RANGE_SHIFT 8
> @@ -387,7 +388,8 @@
>  #define   DIS_OVER_FETCH_CACHE   REG_BIT(1)
>  #define   DIS_MULT_MISS_RD_SQUASHREG_BIT(0)
>  
> -#define FF_MODE2 _MMIO(0x6604)
> +#define GEN12_FF_MODE2   _MMIO(0x6604)
> +#define XEHP_FF_MODE2_MMIO(0x6604)
>  #define   FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24)
>  #define   FF_MODE2_GS_TIMER_224  
> REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
>  #define   FF_MODE2_TDS_TIMER_MASKREG_GENMASK(23, 16)
> @@ -442,6 +444,7 @@
>  #define GEN8_HDC_CHICKEN1_MMIO(0x7304)
>  
>  #define GEN11_COMMON_SLICE_CHICKEN3  _MMIO(0x7304)
> +#define XEHP_COMMON_SLICE_CHICKEN3   _MMIO(0x7304)
>  #define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN   REG_BIT(12)
>  #define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE  REG_BIT(12)
>  #define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11)
> @@ -455,10 +458,9 @@
>  #define   DISABLE_PIXEL_MASK_CAMMING (1 << 14)
>  
>  #define GEN9_SLICE_COMMON_ECO_CHICKEN1   _MMIO(0x731c)
> -#define   GEN11_STATE_CACHE_REDIRECT_TO_CS   (1 << 11)
> -
> -#define SLICE_COMMON_ECO_CHICKEN1_MMIO(0x731c)
> +#define XEHP_SLICE_COMMON_ECO_CHICKEN1   _MMIO(0x731c)
>  #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14)
> +#define   GEN11_STATE_CACHE_REDIRECT_TO_CS   (1 << 11)
>  
>  #define GEN9_SLICE_PGCTL_ACK(slice)  _MMIO(0x804c + (slice) * 0x4)
>  #define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 
> 0x34 + \
> @@ -703,7 +705,8 @@
>  #define   GAMTLBVEBOX0_CLKGATE_DIS   REG_BIT(16)
>  #define   LTCDD_CLKGATE_DIS  REG_BIT(10)
>  
> -#define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
> +#define GEN11_SLICE_UNIT_LEVEL_CLKGATE   _MMIO(0x94d4)
> +#define XEHP_SLICE_UNIT_LEVEL_CLKGATE_MMIO(0x94d4)
>  #define   SARBUNIT_CLKGATE_DIS   (1 << 5)
>  #define   RCCUNIT_CLKGATE_DIS(1 << 7)
>  #define   MSCUNIT_CLKGATE_DIS(1 << 10)
> @@ -718,7 +721,7 @@
>  #define   VSUNIT_CLKGATE_DIS_TGL REG_BIT(19)
>  #define   PSDUNIT_CLKGATE_DISREG_BIT(5)
>  
> -#define SUBSLICE_UNIT_LEVEL_CLKGATE  _MMIO(0x9524)
> +#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE_MMIO(0x9524)
>  #define   DSS_ROUTER_CLKGATE_DIS REG_BIT(28)
>  #define   GWUNIT_CLKGATE_DIS REG_BIT(16)
>  
> @@ -943,7 +946,8 @@
>  
>  /* MOCS (Memory Object Control State) registers */
>  #define GEN9_LNCFCMOCS(i)_MMIO(0xb020 + (i) * 4) /* L3 
> Cache Control */

GEN9_LNCFCMOCS is used in few functions in file selftest_mocs.c. This
patch has untouched those instances. Is it by intention to handle it
part of a separate series?  If the plan is to handle it later sometime
can we create a ticket to keep track of it?

Regards,
Bala

> -#define GEN9_LNCFCMOCS_REG_COUNT 32
> +#define XEHP_LNCFCMOCS(i)_MMIO(0xb020 + (i) * 4) /* L3 
> Cache Control */
> +#define LNCFCM

Re: [PATCH v2 4/4] drm: lcdif: Add support for YUV planes

2022-09-28 Thread Kieran Bingham
Quoting Laurent Pinchart (2022-09-28 01:58:12)
> From: Kieran Bingham 
> 

It looks like this has progressed a bit since it left my computer ;-)


> The LCDIF includes a color space converter that supports YUV input. Use
> it to support YUV planes, either through the converter if the output
> format is RGB, or in conversion bypass mode otherwise.
> 
> Signed-off-by: Kieran Bingham 
> Signed-off-by: Laurent Pinchart 
> ---
> Changes since v1:
> 
> - Support all YCbCr encodings and quantization ranges
> - Drop incorrect comment
> ---
>  drivers/gpu/drm/mxsfb/lcdif_kms.c  | 183 +
>  drivers/gpu/drm/mxsfb/lcdif_regs.h |   5 +-
>  2 files changed, 164 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/mxsfb/lcdif_kms.c 
> b/drivers/gpu/drm/mxsfb/lcdif_kms.c
> index c3622be0c587..b469a90fd50f 100644
> --- a/drivers/gpu/drm/mxsfb/lcdif_kms.c
> +++ b/drivers/gpu/drm/mxsfb/lcdif_kms.c
> @@ -15,6 +15,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -32,13 +33,77 @@
>  /* 
> -
>   * CRTC
>   */
> +
> +/*
> + * Despite the reference manual stating the opposite, the D1, D2 and D3 
> offset
> + * values are added to Y, U and V, not subtracted. They must thus be 
> programmed
> + * with negative values.
> + */
> +static const u32 lcdif_yuv2rgb_coeffs[3][2][6] = {

Ick ... I sort of dislike this. It's fine here at the moment, and I like
the table ... but here we're definining the size of the table based on
external enum values. (Are those ABI stable, perhaps they are already?)

If someone were to put 

 enum drm_color_encoding {
+DRM_COLOR_LEGACY, 
 DRM_COLOR_YCBCR_BT601,
 DRM_COLOR_YCBCR_BT709,
 DRM_COLOR_YCBCR_BT2020,
 DRM_COLOR_ENCODING_MAX,
 };

 enum drm_color_range {
 DRM_COLOR_YCBCR_LIMITED_RANGE,
+DRM_COLOR_YCBCR_MID_RANGE,
 DRM_COLOR_YCBCR_FULL_RANGE,
 DRM_COLOR_RANGE_MAX,
 };

Then this table allocation would be wrong.

Perhaps swapping for

> +static const u32 
> lcdif_yuv2rgb_coeffs[DRM_COLOR_ENCODING_MAX][DRM_COLOR_RANGE_MAX][6] = {

Would be safer ... but longer :-( ? 


Anyway, I think the rest of it looks fine, and perhaps these enums are
in the UAPI which would make them stable anyway:


Reviewed-by: Kieran Bingham 

> +   [DRM_COLOR_YCBCR_BT601] = {
> +   [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
> +   CSC0_COEF0_A1(0x012a) | CSC0_COEF0_A2(0x),
> +   CSC0_COEF1_A3(0x01a2) | CSC0_COEF1_B1(0x0123),
> +   CSC0_COEF2_B2(0x079c) | CSC0_COEF2_B3(0x0730),
> +   CSC0_COEF3_C1(0x0124) | CSC0_COEF3_C2(0x0204),
> +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x01f0),
> +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> +   },
> +   [DRM_COLOR_YCBCR_FULL_RANGE] = {
> +   CSC0_COEF0_A1(0x0100) | CSC0_COEF0_A2(0x),
> +   CSC0_COEF1_A3(0x0167) | CSC0_COEF1_B1(0x0100),
> +   CSC0_COEF2_B2(0x07a8) | CSC0_COEF2_B3(0x0749),
> +   CSC0_COEF3_C1(0x0100) | CSC0_COEF3_C2(0x01c6),
> +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x),
> +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> +   },
> +   },
> +   [DRM_COLOR_YCBCR_BT709] = {
> +   [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
> +   CSC0_COEF0_A1(0x012a) | CSC0_COEF0_A2(0x),
> +   CSC0_COEF1_A3(0x01d6) | CSC0_COEF1_B1(0x0123),
> +   CSC0_COEF2_B2(0x07c9) | CSC0_COEF2_B3(0x0778),
> +   CSC0_COEF3_C1(0x0123) | CSC0_COEF3_C2(0x021d),
> +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x01f0),
> +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> +   },
> +   [DRM_COLOR_YCBCR_FULL_RANGE] = {
> +   CSC0_COEF0_A1(0x0100) | CSC0_COEF0_A2(0x),
> +   CSC0_COEF1_A3(0x0193) | CSC0_COEF1_B1(0x0100),
> +   CSC0_COEF2_B2(0x07d0) | CSC0_COEF2_B3(0x0788),
> +   CSC0_COEF3_C1(0x0100) | CSC0_COEF3_C2(0x01db),
> +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x),
> +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> +   },
> +   },
> +   [DRM_COLOR_YCBCR_BT2020] = {
> +   [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
> +   CSC0_COEF0_A1(0x012a) | CSC0_COEF0_A2(0x),
> +   CSC0_COEF1_A3(0x01b8) | CSC0_COEF1_B1(0x0123),
> +   CSC0_COEF2_B2(0x07d0) | CSC0_COEF2_B3(0x075a),
> +   CSC0_COEF3_C1(0x0124) | CSC0_COEF3_C2(0x0224),
> +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x01

Re: [RFC v2] drm/kms: control display brightness through drm_connector properties

2022-09-28 Thread Jani Nikula
On Fri, 09 Sep 2022, Hans de Goede  wrote:
> Hi all,
>
> Here is v2 of my "drm/kms: control display brightness through drm_connector 
> properties" RFC:
>
> Changes from version 1:
> - Drop bl_brightness_0_is_min_brightness from list of new connector
>   properties.
> - Clearly define that 0 is always min-brightness when setting the brightness
>   through the connector properties.
> - Drop bl_brightness_control_method from list of new connector
>   properties.
> - Phase 1 of the plan has been completed
>
> As discussed already several times in the past:
>  https://www.x.org/wiki/Events/XDC2014/XDC2014GoedeBacklight/
>  
> https://lore.kernel.org/all/4b17ba08-39f3-57dd-5aad-d37d844b0...@linux.intel.com/
>
> The current userspace API for brightness control offered by
> /sys/class/backlight devices has various issues:
>
> 1. There is no way to map the backlight device to a specific
>display-output / panel (1)
> 2. Controlling the brightness requires root-rights requiring
>desktop-environments to use suid-root helpers for this.
> 3. The meaning of 0 is not clearly defined, it can be either off,
>or minimum brightness at which the display is still readable
>(in a low light environment)
> 4. It's not possible to change both the gamma and the brightness in the
>same KMS atomic commit. You'd want to be able to reduce brightness to
>conserve power, and counter the effects of that by changing gamma to
>reach a visually similar image. And you'd want to have the changes take
>effect at the same time instead of reducing brightness at some frame and
>change gamma at some other frame. This is pretty much impossible to do
>via the sysfs interface.
>
> As already discussed on various conference's hallway tracks
> and as has been proposed on the dri-devel list once before (2),
> it seems that there is consensus that the best way to to solve these
> 2 issues is to add support for controlling a video-output's brightness
> through properties on the drm_connector.
>
> This RFC outlines my plan to try and actually implement this,
> which has 3 phases:
>
>
> Phase 1: Stop registering multiple /sys/class/backlight devs for a single 
> display
> =
>
> On x86 there can be multiple firmware + direct-hw-access methods
> for controlling the backlight and in some cases the kernel registers
> multiple backlight-devices for a single internal laptop LCD panel.
>
> A plan to fix this was posted here:
> https://lore.kernel.org/dri-devel/98519ba0-7f18-201a-ea34-652f50343...@redhat.com/
> And a pull-req actually implementing this plan has been send out this week:
> https://lore.kernel.org/dri-devel/261afe3d-7790-e945-adf6-a2c96c9b1...@redhat.com/
>
>
> Phase 2: Add drm_connector properties mirroring the matching backlight device
> =
>
> The plan is to add a drm_connector helper function, which optionally takes
> a pointer to the backlight device for the GPU's native backlight device,
> which will then mirror the backlight settings from the backlight device
> in a set of read/write brightness* properties on the connector.
>
> This function can then be called by GPU drivers for the drm_connector for
> the internal panel and it will then take care of everything. When there
> is no native GPU backlight device, or when it should not be used then
> (on x86) the helper will use the acpi_video_get_backlight_type() to
> determine which backlight-device should be used instead and it will find
> + mirror that one.
>
>
> Phase 3: Deprecate /sys/class/backlight uAPI
> 
>
> Once most userspace has moved over to using the new drm_connector
> brightness props, a Kconfig option can be added to stop exporting
> the backlight-devices under /sys/class/backlight. The plan is to
> just disable the sysfs interface and keep the existing backlight-device
> internal kernel abstraction as is, since some abstraction for (non GPU
> native) backlight devices will be necessary regardless.
>
> It is unsure if we will ever be able to do this. For example people using
> non fully integrated desktop environments like e.g. sway often use custom
> scripts binded to hotkeys to get functionality like the brightness
> up/down keyboard hotkeys changing the brightness. This typically involves
> e.g. the xbacklight utility.
>
> Even if the xbacklight utility is ported to use kms with the new connector
> object brightness properties then this still will not work because
> changing the properties will require drm-master rights and e.g. sway will
> already hold those.
>
>
> The drm_connector brightness properties
> ===
>
> The new uAPI for this consists of 2 properties:
>
> 1. "display brightness": rw 0-int32_max property controlling the brightness 
> setting
> of the connected display. The actual maximum of this will

Re: [PATCH v2 4/4] drm: lcdif: Add support for YUV planes

2022-09-28 Thread Laurent Pinchart
Hi Kieran,

On Wed, Sep 28, 2022 at 10:59:36AM +0100, Kieran Bingham wrote:
> Quoting Laurent Pinchart (2022-09-28 01:58:12)
> > From: Kieran Bingham 
> 
> It looks like this has progressed a bit since it left my computer ;-)

I wish the same would be universally true for all patches :-)

> > The LCDIF includes a color space converter that supports YUV input. Use
> > it to support YUV planes, either through the converter if the output
> > format is RGB, or in conversion bypass mode otherwise.
> > 
> > Signed-off-by: Kieran Bingham 
> > Signed-off-by: Laurent Pinchart 
> > ---
> > Changes since v1:
> > 
> > - Support all YCbCr encodings and quantization ranges
> > - Drop incorrect comment
> > ---
> >  drivers/gpu/drm/mxsfb/lcdif_kms.c  | 183 +
> >  drivers/gpu/drm/mxsfb/lcdif_regs.h |   5 +-
> >  2 files changed, 164 insertions(+), 24 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/mxsfb/lcdif_kms.c 
> > b/drivers/gpu/drm/mxsfb/lcdif_kms.c
> > index c3622be0c587..b469a90fd50f 100644
> > --- a/drivers/gpu/drm/mxsfb/lcdif_kms.c
> > +++ b/drivers/gpu/drm/mxsfb/lcdif_kms.c
> > @@ -15,6 +15,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -32,13 +33,77 @@
> >  /* 
> > -
> >   * CRTC
> >   */
> > +
> > +/*
> > + * Despite the reference manual stating the opposite, the D1, D2 and D3 
> > offset
> > + * values are added to Y, U and V, not subtracted. They must thus be 
> > programmed
> > + * with negative values.
> > + */
> > +static const u32 lcdif_yuv2rgb_coeffs[3][2][6] = {
> 
> Ick ... I sort of dislike this. It's fine here at the moment, and I like
> the table ... but here we're definining the size of the table based on
> external enum values. (Are those ABI stable, perhaps they are already?)
> 
> If someone were to put 
> 
>  enum drm_color_encoding {
> +DRM_COLOR_LEGACY, 
>  DRM_COLOR_YCBCR_BT601,
>  DRM_COLOR_YCBCR_BT709,
>  DRM_COLOR_YCBCR_BT2020,
>  DRM_COLOR_ENCODING_MAX,
>  };
> 
>  enum drm_color_range {
>  DRM_COLOR_YCBCR_LIMITED_RANGE,
> +  DRM_COLOR_YCBCR_MID_RANGE,
>  DRM_COLOR_YCBCR_FULL_RANGE,
>  DRM_COLOR_RANGE_MAX,
>  };
> 
> Then this table allocation would be wrong.
> 
> Perhaps swapping for
> 
> > +static const u32 
> > lcdif_yuv2rgb_coeffs[DRM_COLOR_ENCODING_MAX][DRM_COLOR_RANGE_MAX][6] = {
> 
> Would be safer ... but longer :-( ? 
> 
> Anyway, I think the rest of it looks fine, and perhaps these enums are
> in the UAPI which would make them stable anyway:

The enums themselves are not exposed in UAPI headers, but userspace
depends on the values, which thus have to remain stable.

> Reviewed-by: Kieran Bingham 
> 
> > +   [DRM_COLOR_YCBCR_BT601] = {
> > +   [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
> > +   CSC0_COEF0_A1(0x012a) | CSC0_COEF0_A2(0x),
> > +   CSC0_COEF1_A3(0x01a2) | CSC0_COEF1_B1(0x0123),
> > +   CSC0_COEF2_B2(0x079c) | CSC0_COEF2_B3(0x0730),
> > +   CSC0_COEF3_C1(0x0124) | CSC0_COEF3_C2(0x0204),
> > +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x01f0),
> > +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> > +   },
> > +   [DRM_COLOR_YCBCR_FULL_RANGE] = {
> > +   CSC0_COEF0_A1(0x0100) | CSC0_COEF0_A2(0x),
> > +   CSC0_COEF1_A3(0x0167) | CSC0_COEF1_B1(0x0100),
> > +   CSC0_COEF2_B2(0x07a8) | CSC0_COEF2_B3(0x0749),
> > +   CSC0_COEF3_C1(0x0100) | CSC0_COEF3_C2(0x01c6),
> > +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x),
> > +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> > +   },
> > +   },
> > +   [DRM_COLOR_YCBCR_BT709] = {
> > +   [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
> > +   CSC0_COEF0_A1(0x012a) | CSC0_COEF0_A2(0x),
> > +   CSC0_COEF1_A3(0x01d6) | CSC0_COEF1_B1(0x0123),
> > +   CSC0_COEF2_B2(0x07c9) | CSC0_COEF2_B3(0x0778),
> > +   CSC0_COEF3_C1(0x0123) | CSC0_COEF3_C2(0x021d),
> > +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x01f0),
> > +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> > +   },
> > +   [DRM_COLOR_YCBCR_FULL_RANGE] = {
> > +   CSC0_COEF0_A1(0x0100) | CSC0_COEF0_A2(0x),
> > +   CSC0_COEF1_A3(0x0193) | CSC0_COEF1_B1(0x0100),
> > +   CSC0_COEF2_B2(0x07d0) | CSC0_COEF2_B3(0x0788),
> > +   CSC0_COEF3_C1(0x0100) | CSC0_COEF3_C2(0x01db),
> > +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x),
> > +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180)

Re: [PATCH v3] drm: document uAPI page-flip flags

2022-09-28 Thread Pekka Paalanen
On Wed, 28 Sep 2022 09:41:57 +
Simon Ser  wrote:

> Document flags accepted by the page-flip and atomic IOCTLs.
> 
> v2 (Pekka):
> - Mention DRM_EVENT_FLIP_COMPLETE in DRM_MODE_PAGE_FLIP_EVENT docs.
> - Expand DRM_MODE_ATOMIC_NONBLOCK and DRM_MODE_ATOMIC_ALLOW_MODESET
>   description.
> v3:
> - Fix struct field ref syntax (Daniel)
> - Clarify when artifacts are no longer displayed (Daniel)
> - Add note about sinks deciding to show artifacts on their own (Pekka, Daniel)
> 

Reviewed-by: Pekka Paalanen 

Only nitpicks below, you can ignore them.

> Signed-off-by: Simon Ser 
> Cc: Daniel Vetter 
> Cc: Pekka Paalanen 
> Cc: Ville Syrjala 
> ---
>  include/uapi/drm/drm_mode.h | 63 -
>  1 file changed, 62 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
> index fa953309d9ce..9b10327b9d21 100644
> --- a/include/uapi/drm/drm_mode.h
> +++ b/include/uapi/drm/drm_mode.h
> @@ -935,12 +935,31 @@ struct hdr_output_metadata {
>   };
>  };
>  
> +/**
> + * DRM_MODE_PAGE_FLIP_EVENT
> + *
> + * Request that the kernel sends back a vblank event (see
> + * struct drm_event_vblank) with the &DRM_EVENT_FLIP_COMPLETE type when the
> + * page-flip is done.
> + */
>  #define DRM_MODE_PAGE_FLIP_EVENT 0x01
> +/**
> + * DRM_MODE_PAGE_FLIP_ASYNC
> + *
> + * Request that the page-flip is performed as soon as possible, ie. with no
> + * delay due to waiting for vblank. This may cause tearing to be visible on
> + * the screen.
> + */
>  #define DRM_MODE_PAGE_FLIP_ASYNC 0x02
>  #define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4
>  #define DRM_MODE_PAGE_FLIP_TARGET_RELATIVE 0x8
>  #define DRM_MODE_PAGE_FLIP_TARGET (DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE | \
>  DRM_MODE_PAGE_FLIP_TARGET_RELATIVE)
> +/**
> + * DRM_MODE_PAGE_FLIP_FLAGS
> + *
> + * Bitmask of flags suitable for &drm_mode_crtc_page_flip_target.flags.

Should this mention also drm_mode_crtc_page_flip.flags?

UAPI header defines both structs.

> + */
>  #define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT | \
> DRM_MODE_PAGE_FLIP_ASYNC | \
> DRM_MODE_PAGE_FLIP_TARGET)
> @@ -1034,11 +1053,53 @@ struct drm_mode_destroy_dumb {
>   __u32 handle;
>  };
>  
> -/* page-flip flags are valid, plus: */
> +/**
> + * DRM_MODE_ATOMIC_TEST_ONLY
> + *
> + * Do not apply the atomic commit, instead check whether the hardware 
> supports
> + * this configuration.
> + *
> + * See &drm_mode_config_funcs.atomic_check for more details on test-only
> + * commits.
> + */
>  #define DRM_MODE_ATOMIC_TEST_ONLY 0x0100
> +/**
> + * DRM_MODE_ATOMIC_NONBLOCK
> + *
> + * Do not block while applying the atomic commit. The &DRM_IOCTL_MODE_ATOMIC
> + * IOCTL returns immediately instead of waiting for the changes to be applied
> + * in hardware. Note, the driver will still check that the update can be
> + * applied before retuning.
> + */
>  #define DRM_MODE_ATOMIC_NONBLOCK  0x0200
> +/**
> + * DRM_MODE_ATOMIC_ALLOW_MODESET
> + *
> + * Allow the update to result in temporary or transient visible artifacts 
> while
> + * the update is being applied. Applying the update may also take 
> significantly
> + * more time than a page flip. All visual artifacts will disappear by the 
> time
> + * the update is completed, as signalled throught the vblank event's 
> timestamp

typo: throught

> + * (see struct drm_event_vblank).
> + *
> + * This flag must be set when the KMS update might cause visible artifacts.
> + * Without this flag such KMS update will return a EINVAL error. What kind of
> + * update may cause visible artifacts depends on the driver and the hardware.
> + * User-space that needs to know beforehand if an update might cause visible
> + * artifacts can use &DRM_MODE_ATOMIC_TEST_ONLY without
> + * &DRM_MODE_ATOMIC_ALLOW_MODESET to see if it fails.
> + *
> + * To the best of the driver's knowledge, visual artifacts are guaranteed to
> + * not appear when this flag is not set. Some sinks might display visual
> + * artifacts outside of the driver's control.

Ok, so we kept the "visual artifacts" semantics and allow monitors to
do otherwise.

I'm still not sure what this means for things like infoframe data where
changing a certain field (e.g. HDR_OUTPUT_METADATA structure's EOTF
field) has a high risk of causing a visual glitch. I cannot imagine why
a monitor manufacturer would not be able to avoid the glitch if they
wanted to. The glitch might or might not happen, and we cannot know in
advance or afterwards whether it did happen, but it is probable that
many monitors will glitch.

I think "To the best of driver's knowledge" means that if someone
reports a monitor to glitch, the driver/kernel would need to add that
field to the "needs modeset" set. But doing so can regress other
monitors that didn't glitch, so it needs to be a monitor quirk.

This is not something for this patch, but would it be possible to agree
on th

[PATCH v4 0/5] drm: Add driver for PowerPC OF displays

2022-09-28 Thread Thomas Zimmermann
PowerPC's Open Firmware offers a simple display buffer for graphics
output. Add ofdrm, a DRM driver for the device. As with the existing
simpledrm driver, the graphics hardware is pre-initialized by the
firmware. The driver only provides blitting, no actual DRM modesetting
is possible.

Patch 1 adds ofdrm, which has again been significantly reworked.
The FWFB library has been removed infavor of various functions in
existing DRM helper libraries. Ofdrm now supports damage iterators
and synchronization for imported GEM BOs.

Patches 2 to 4 add support for color management. The code has been
taken from fbdev's offb. I have no hardware available for testing the
functionality. Qemu's stdvga apparently does not support gamma tables
in RGB modes. I verified that the color management code is executed
by running Gnome's night-mode settings, but the display's color tone
does not change.

Patch 5, which is new in version 4 of this patchset, adds support for
big-endian scanout buffers. It works at least with qemu's ppc64
emulation. Fbdev emulation and pixman rendering works. GL rendering
produces incorrect colors.

Tested by running fbdev emulation, Wayland Gnome, and Weston on qemu's
ppc64le and ppc64 emulation. 

Thomas Zimmermann (5):
  drm/ofdrm: Add ofdrm for Open Firmware framebuffers
  drm/ofdrm: Add CRTC state
  drm/ofdrm: Add per-model device function
  drm/ofdrm: Support color management
  drm/ofdrm: Support big-endian scanout buffers

 MAINTAINERS |1 +
 drivers/gpu/drm/drm_format_helper.c |   10 +
 drivers/gpu/drm/tiny/Kconfig|   13 +
 drivers/gpu/drm/tiny/Makefile   |1 +
 drivers/gpu/drm/tiny/ofdrm.c| 1421 +++
 drivers/video/fbdev/Kconfig |1 +
 6 files changed, 1447 insertions(+)
 create mode 100644 drivers/gpu/drm/tiny/ofdrm.c


base-commit: eee1f4330f388247943e97b93008ef11ababfda0
-- 
2.37.3



[PATCH v4 3/5] drm/ofdrm: Add per-model device function

2022-09-28 Thread Thomas Zimmermann
Add a per-model device-function structure in preparation of adding
color-management support. Detection of the individual models has been
taken from fbdev's offb.

v3:
* define constants for PCI ids (Javier)

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Javier Martinez Canillas 
---
 drivers/gpu/drm/tiny/ofdrm.c | 125 +++
 1 file changed, 125 insertions(+)

diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
index d53ca70934b5..5abed3ec0a35 100644
--- a/drivers/gpu/drm/tiny/ofdrm.c
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -28,6 +28,21 @@
 #define DRIVER_MAJOR   1
 #define DRIVER_MINOR   0
 
+#define PCI_VENDOR_ID_ATI_R520 0x7100
+#define PCI_VENDOR_ID_ATI_R600 0x9400
+
+enum ofdrm_model {
+   OFDRM_MODEL_UNKNOWN,
+   OFDRM_MODEL_MACH64, /* ATI Mach64 */
+   OFDRM_MODEL_RAGE128, /* ATI Rage128 */
+   OFDRM_MODEL_RAGE_M3A, /* ATI Rage Mobility M3 Head A */
+   OFDRM_MODEL_RAGE_M3B, /* ATI Rage Mobility M3 Head B */
+   OFDRM_MODEL_RADEON, /* ATI Radeon */
+   OFDRM_MODEL_GXT2000, /* IBM GXT2000 */
+   OFDRM_MODEL_AVIVO, /* ATI R5xx */
+   OFDRM_MODEL_QEMU, /* QEMU VGA */
+};
+
 /*
  * Helpers for display nodes
  */
@@ -148,14 +163,63 @@ static u64 display_get_address_of(struct drm_device *dev, 
struct device_node *of
return address;
 }
 
+static bool is_avivo(__be32 vendor, __be32 device)
+{
+   /* This will match most R5xx */
+   return (vendor == PCI_VENDOR_ID_ATI) &&
+  ((device >= PCI_VENDOR_ID_ATI_R520 && device < 0x7800) ||
+   (PCI_VENDOR_ID_ATI_R600 >= 0x9400));
+}
+
+static enum ofdrm_model display_get_model_of(struct drm_device *dev, struct 
device_node *of_node)
+{
+   enum ofdrm_model model = OFDRM_MODEL_UNKNOWN;
+
+   if (of_node_name_prefix(of_node, "ATY,Rage128")) {
+   model = OFDRM_MODEL_RAGE128;
+   } else if (of_node_name_prefix(of_node, "ATY,RageM3pA") ||
+  of_node_name_prefix(of_node, "ATY,RageM3p12A")) {
+   model = OFDRM_MODEL_RAGE_M3A;
+   } else if (of_node_name_prefix(of_node, "ATY,RageM3pB")) {
+   model = OFDRM_MODEL_RAGE_M3B;
+   } else if (of_node_name_prefix(of_node, "ATY,Rage6")) {
+   model = OFDRM_MODEL_RADEON;
+   } else if (of_node_name_prefix(of_node, "ATY,")) {
+   return OFDRM_MODEL_MACH64;
+   } else if (of_device_is_compatible(of_node, "pci1014,b7") ||
+  of_device_is_compatible(of_node, "pci1014,21c")) {
+   model = OFDRM_MODEL_GXT2000;
+   } else if (of_node_name_prefix(of_node, "vga,Display-")) {
+   struct device_node *of_parent;
+   const __be32 *vendor_p, *device_p;
+
+   /* Look for AVIVO initialized by SLOF */
+   of_parent = of_get_parent(of_node);
+   vendor_p = of_get_property(of_parent, "vendor-id", NULL);
+   device_p = of_get_property(of_parent, "device-id", NULL);
+   if (vendor_p && device_p && is_avivo(*vendor_p, *device_p))
+   model = OFDRM_MODEL_AVIVO;
+   of_node_put(of_parent);
+   } else if (of_device_is_compatible(of_node, "qemu,std-vga")) {
+   model = OFDRM_MODEL_QEMU;
+   }
+
+   return model;
+}
+
 /*
  * Open Firmware display device
  */
 
+struct ofdrm_device_funcs {
+};
+
 struct ofdrm_device {
struct drm_device dev;
struct platform_device *pdev;
 
+   const struct ofdrm_device_funcs *funcs;
+
/* firmware-buffer settings */
struct iosys_map screen_base;
struct drm_display_mode mode;
@@ -519,6 +583,33 @@ static const struct drm_mode_config_funcs 
ofdrm_mode_config_funcs = {
  * Init / Cleanup
  */
 
+static const struct ofdrm_device_funcs ofdrm_unknown_device_funcs = {
+};
+
+static const struct ofdrm_device_funcs ofdrm_mach64_device_funcs = {
+};
+
+static const struct ofdrm_device_funcs ofdrm_rage128_device_funcs = {
+};
+
+static const struct ofdrm_device_funcs ofdrm_rage_m3a_device_funcs = {
+};
+
+static const struct ofdrm_device_funcs ofdrm_rage_m3b_device_funcs = {
+};
+
+static const struct ofdrm_device_funcs ofdrm_radeon_device_funcs = {
+};
+
+static const struct ofdrm_device_funcs ofdrm_gxt2000_device_funcs = {
+};
+
+static const struct ofdrm_device_funcs ofdrm_avivo_device_funcs = {
+};
+
+static const struct ofdrm_device_funcs ofdrm_qemu_device_funcs = {
+};
+
 static struct drm_display_mode ofdrm_mode(unsigned int width, unsigned int 
height)
 {
/*
@@ -540,6 +631,7 @@ static struct ofdrm_device *ofdrm_device_create(struct 
drm_driver *drv,
struct device_node *of_node = pdev->dev.of_node;
struct ofdrm_device *odev;
struct drm_device *dev;
+   enum ofdrm_model model;
int width, height, depth, linebytes;
const struct drm_format_info *format;
u64 address;
@@ -568,6 +660,39 @@ static struct ofdrm_device *ofdrm_device_creat

[PATCH v4 1/5] drm/ofdrm: Add ofdrm for Open Firmware framebuffers

2022-09-28 Thread Thomas Zimmermann
Open Firmware provides basic display output via the 'display' node.
DT platform code already provides a device that represents the node's
framebuffer. Add a DRM driver for the device. The display mode and
color format is pre-initialized by the system's firmware. Runtime
modesetting via DRM is not possible. The display is useful during
early boot stages or as error fallback.

Similar functionality is already provided by fbdev's offb driver,
which is insufficient for modern userspace. The old driver includes
support for BootX device tree, which can be found on old 32-bit
PowerPC Macintosh systems. If these are still in use, the
functionality can be added to ofdrm or implemented in a new
driver. As with simpledrm, the fbdev driver cannot be selected if
ofdrm is already enabled.

Two notable points about the driver:

 * Reading the framebuffer aperture from the device tree is not
reliable on all systems. Ofdrm takes the heuristics and a comment
from offb to pick the correct range.

 * No resource management may be tied to the underlying PCI device.
Otherwise the handover to the native driver will fail with a resource
conflict. PCI management is therefore done as part of the platform
device's cleanup.

The driver has been tested on qemu's ppc64le emulation. The device
hand-over has been tested with bochs.

v4:
* set preferred depth to the correct value
* set bpp value for console emulation
* output scanout-buffer parameters with drm_dbg()
v3:
* reintegrate FWFB helpers into ofdrm
* use damage iterator
* sync GEM BOs with drm_gem_fb_{begin,end}_cpu_access()
* fix various atomic_check helpers
* remove CRTC atomic_{enable,disable} (Javier)
* compute stride with drm_format_info_min_pitch() (Daniel)
v2:
* removed simple-pipe helpers
* built driver on top of FWFB helpers
* merged all init code into single function
* make PCI support optional (Michal)
* support COMPILE_TEST (Javier)

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Javier Martinez Canillas 
---
 MAINTAINERS   |   1 +
 drivers/gpu/drm/tiny/Kconfig  |  13 +
 drivers/gpu/drm/tiny/Makefile |   1 +
 drivers/gpu/drm/tiny/ofdrm.c  | 760 ++
 drivers/video/fbdev/Kconfig   |   1 +
 5 files changed, 776 insertions(+)
 create mode 100644 drivers/gpu/drm/tiny/ofdrm.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7c7db809473e..dcb443f2496b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6656,6 +6656,7 @@ L:dri-devel@lists.freedesktop.org
 S: Maintained
 T: git git://anongit.freedesktop.org/drm/drm-misc
 F: drivers/gpu/drm/drm_aperture.c
+F: drivers/gpu/drm/tiny/ofdrm.c
 F: drivers/gpu/drm/tiny/simpledrm.c
 F: drivers/video/aperture.c
 F: include/drm/drm_aperture.h
diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig
index 565957264875..a300b03a3c7a 100644
--- a/drivers/gpu/drm/tiny/Kconfig
+++ b/drivers/gpu/drm/tiny/Kconfig
@@ -51,6 +51,19 @@ config DRM_GM12U320
 This is a KMS driver for projectors which use the GM12U320 chipset
 for video transfer over USB2/3, such as the Acer C120 mini projector.
 
+config DRM_OFDRM
+   tristate "Open Firmware display driver"
+   depends on DRM && OF && (PPC || COMPILE_TEST)
+   select APERTURE_HELPERS
+   select DRM_GEM_SHMEM_HELPER
+   select DRM_KMS_HELPER
+   help
+ DRM driver for Open Firmware framebuffers.
+
+ This driver assumes that the display hardware has been initialized
+ by the Open Firmware before the kernel boots. Scanout buffer, size,
+ and display format must be provided via device tree.
+
 config DRM_PANEL_MIPI_DBI
tristate "DRM support for MIPI DBI compatible panels"
depends on DRM && SPI
diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile
index 1d9d6227e7ab..76dde89a044b 100644
--- a/drivers/gpu/drm/tiny/Makefile
+++ b/drivers/gpu/drm/tiny/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_DRM_ARCPGU)+= arcpgu.o
 obj-$(CONFIG_DRM_BOCHS)+= bochs.o
 obj-$(CONFIG_DRM_CIRRUS_QEMU)  += cirrus.o
 obj-$(CONFIG_DRM_GM12U320) += gm12u320.o
+obj-$(CONFIG_DRM_OFDRM)+= ofdrm.o
 obj-$(CONFIG_DRM_PANEL_MIPI_DBI)   += panel-mipi-dbi.o
 obj-$(CONFIG_DRM_SIMPLEDRM)+= simpledrm.o
 obj-$(CONFIG_TINYDRM_HX8357D)  += hx8357d.o
diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
new file mode 100644
index ..98bd99ab7e46
--- /dev/null
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -0,0 +1,760 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define DRIVER_NAME"ofdrm"
+#define

[PATCH v4 5/5] drm/ofdrm: Support big-endian scanout buffers

2022-09-28 Thread Thomas Zimmermann
All DRM formats assume little-endian byte order. On big-endian systems,
it is likely that the scanout buffer is in big endian as well. Update
the format accordingly and add endianess conversion to the format-helper
library. Also opt-in to allocated buffers in host format by default.

Suggested-by: Geert Uytterhoeven 
Signed-off-by: Thomas Zimmermann 
---
 drivers/gpu/drm/drm_format_helper.c | 10 ++
 drivers/gpu/drm/tiny/ofdrm.c| 55 +++--
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_format_helper.c 
b/drivers/gpu/drm/drm_format_helper.c
index 4afc4ac27342..fca7936db083 100644
--- a/drivers/gpu/drm/drm_format_helper.c
+++ b/drivers/gpu/drm/drm_format_helper.c
@@ -659,6 +659,11 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int 
*dst_pitch, uint32_t d
drm_fb_xrgb_to_rgb565(dst, dst_pitch, src, fb, 
clip, false);
return 0;
}
+   } else if (dst_format == (DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN)) {
+   if (fb_format == DRM_FORMAT_RGB565) {
+   drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
+   return 0;
+   }
} else if (dst_format == DRM_FORMAT_RGB888) {
if (fb_format == DRM_FORMAT_XRGB) {
drm_fb_xrgb_to_rgb888(dst, dst_pitch, src, fb, 
clip);
@@ -677,6 +682,11 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int 
*dst_pitch, uint32_t d
drm_fb_xrgb_to_xrgb2101010(dst, dst_pitch, src, fb, 
clip);
return 0;
}
+   } else if (dst_format == DRM_FORMAT_BGRX) {
+   if (fb_format == DRM_FORMAT_XRGB) {
+   drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
+   return 0;
+   }
}
 
drm_warn_once(fb->dev, "No conversion helper from %p4cc to %p4cc 
found.\n",
diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
index 0bf5eebf6678..6e100a7f5db7 100644
--- a/drivers/gpu/drm/tiny/ofdrm.c
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -94,7 +94,7 @@ static int display_get_validated_int0(struct drm_device *dev, 
const char *name,
 }
 
 static const struct drm_format_info *display_get_validated_format(struct 
drm_device *dev,
- u32 depth)
+ u32 depth, 
bool big_endian)
 {
const struct drm_format_info *info;
u32 format;
@@ -115,6 +115,29 @@ static const struct drm_format_info 
*display_get_validated_format(struct drm_dev
return ERR_PTR(-EINVAL);
}
 
+   /*
+* DRM formats assume little-endian byte order. Update the format
+* if the scanout buffer uses big-endian ordering.
+*/
+   if (big_endian) {
+   switch (format) {
+   case DRM_FORMAT_XRGB:
+   format = DRM_FORMAT_BGRX;
+   break;
+   case DRM_FORMAT_ARGB:
+   format = DRM_FORMAT_BGRA;
+   break;
+   case DRM_FORMAT_RGB565:
+   format = DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN;
+   break;
+   case DRM_FORMAT_XRGB1555:
+   format = DRM_FORMAT_XRGB1555 | DRM_FORMAT_BIG_ENDIAN;
+   break;
+   default:
+   break;
+   }
+   }
+
info = drm_format_info(format);
if (!info) {
drm_err(dev, "cannot find framebuffer format for depth %u\n", 
depth);
@@ -134,6 +157,23 @@ static int display_read_u32_of(struct drm_device *dev, 
struct device_node *of_no
return ret;
 }
 
+static bool display_get_big_endian_of(struct drm_device *dev, struct 
device_node *of_node)
+{
+   bool big_endian;
+
+#ifdef __BIG_ENDIAN
+   big_endian = true;
+   if (of_get_property(of_node, "little-endian", NULL))
+   big_endian = false;
+#else
+   big_endian = false;
+   if (of_get_property(of_node, "big-endian", NULL))
+   big_endian = true;
+#endif
+
+   return big_endian;
+}
+
 static int display_get_width_of(struct drm_device *dev, struct device_node 
*of_node)
 {
u32 width;
@@ -613,6 +653,7 @@ static void ofdrm_device_set_gamma_linear(struct 
ofdrm_device *odev,
 
switch (format->format) {
case DRM_FORMAT_RGB565:
+   case DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN:
/* Use better interpolation, to take 32 values from 0 to 255 */
for (i = 0; i < OFDRM_GAMMA_LUT_SIZE / 8; i++) {
unsigned char r = i * 8 + i / 4;
@@ -631,6 +672,7 @@ static void ofdrm_device_set_gamma_linear(struct 
ofdrm_device *odev,
}

[PATCH v4 2/5] drm/ofdrm: Add CRTC state

2022-09-28 Thread Thomas Zimmermann
Add a dedicated CRTC state to ofdrm to later store information for
palette updates.

v3:
* rework CRTC state helpers (Javier)

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Javier Martinez Canillas 
---
 drivers/gpu/drm/tiny/ofdrm.c | 59 ++--
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
index 98bd99ab7e46..d53ca70934b5 100644
--- a/drivers/gpu/drm/tiny/ofdrm.c
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -278,6 +278,21 @@ static struct resource *ofdrm_find_fb_resource(struct 
ofdrm_device *odev,
  * Modesetting
  */
 
+struct ofdrm_crtc_state {
+   struct drm_crtc_state base;
+};
+
+static struct ofdrm_crtc_state *to_ofdrm_crtc_state(struct drm_crtc_state 
*base)
+{
+   return container_of(base, struct ofdrm_crtc_state, base);
+}
+
+static void ofdrm_crtc_state_destroy(struct ofdrm_crtc_state *ofdrm_crtc_state)
+{
+   __drm_atomic_helper_crtc_destroy_state(&ofdrm_crtc_state->base);
+   kfree(ofdrm_crtc_state);
+}
+
 /*
  * Support all formats of OF display and maybe more; in order
  * of preference. The display's update function will do any
@@ -428,13 +443,51 @@ static const struct drm_crtc_helper_funcs 
ofdrm_crtc_helper_funcs = {
.atomic_check = ofdrm_crtc_helper_atomic_check,
 };
 
+static void ofdrm_crtc_reset(struct drm_crtc *crtc)
+{
+   struct ofdrm_crtc_state *ofdrm_crtc_state =
+   kzalloc(sizeof(*ofdrm_crtc_state), GFP_KERNEL);
+
+   if (crtc->state)
+   ofdrm_crtc_state_destroy(to_ofdrm_crtc_state(crtc->state));
+
+   if (ofdrm_crtc_state)
+   __drm_atomic_helper_crtc_reset(crtc, &ofdrm_crtc_state->base);
+   else
+   __drm_atomic_helper_crtc_reset(crtc, NULL);
+}
+
+static struct drm_crtc_state *ofdrm_crtc_atomic_duplicate_state(struct 
drm_crtc *crtc)
+{
+   struct drm_device *dev = crtc->dev;
+   struct drm_crtc_state *crtc_state = crtc->state;
+   struct ofdrm_crtc_state *new_ofdrm_crtc_state;
+
+   if (drm_WARN_ON(dev, !crtc_state))
+   return NULL;
+
+   new_ofdrm_crtc_state = kzalloc(sizeof(*new_ofdrm_crtc_state), 
GFP_KERNEL);
+   if (!new_ofdrm_crtc_state)
+   return NULL;
+
+   __drm_atomic_helper_crtc_duplicate_state(crtc, 
&new_ofdrm_crtc_state->base);
+
+   return &new_ofdrm_crtc_state->base;
+}
+
+static void ofdrm_crtc_atomic_destroy_state(struct drm_crtc *crtc,
+   struct drm_crtc_state *crtc_state)
+{
+   ofdrm_crtc_state_destroy(to_ofdrm_crtc_state(crtc_state));
+}
+
 static const struct drm_crtc_funcs ofdrm_crtc_funcs = {
-   .reset = drm_atomic_helper_crtc_reset,
+   .reset = ofdrm_crtc_reset,
.destroy = drm_crtc_cleanup,
.set_config = drm_atomic_helper_set_config,
.page_flip = drm_atomic_helper_page_flip,
-   .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
-   .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+   .atomic_duplicate_state = ofdrm_crtc_atomic_duplicate_state,
+   .atomic_destroy_state = ofdrm_crtc_atomic_destroy_state,
 };
 
 static int ofdrm_connector_helper_get_modes(struct drm_connector *connector)
-- 
2.37.3



[PATCH v4 4/5] drm/ofdrm: Support color management

2022-09-28 Thread Thomas Zimmermann
Support the CRTC's color-management property and implement each model's
palette support.

The OF hardware has different methods of setting the palette. The
respective code has been taken from fbdev's offb and refactored into
per-model device functions. The device functions integrate this
functionality into the overall modesetting.

As palette handling is a CRTC property that depends on the primary
plane's color format, the plane's atomic_check helper now updates the
format field in ofdrm's custom CRTC state. The CRTC's atomic_flush
helper updates the palette for the format as needed.

v4:
* use cpu_to_be32() (Geert)
v3:
* lookup CRTC state with drm_atomic_get_new_crtc_state()
* access HW palette with writeb(), writel(), and readl() (Ben)
* declare register values as u32

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Javier Martinez Canillas 
---
 drivers/gpu/drm/tiny/ofdrm.c | 442 ++-
 1 file changed, 437 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
index 5abed3ec0a35..0bf5eebf6678 100644
--- a/drivers/gpu/drm/tiny/ofdrm.c
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -31,6 +32,33 @@
 #define PCI_VENDOR_ID_ATI_R520 0x7100
 #define PCI_VENDOR_ID_ATI_R600 0x9400
 
+#define OFDRM_GAMMA_LUT_SIZE   256
+
+/* Definitions used by the Avivo palette  */
+#define AVIVO_DC_LUT_RW_SELECT  0x6480
+#define AVIVO_DC_LUT_RW_MODE0x6484
+#define AVIVO_DC_LUT_RW_INDEX   0x6488
+#define AVIVO_DC_LUT_SEQ_COLOR  0x648c
+#define AVIVO_DC_LUT_PWL_DATA   0x6490
+#define AVIVO_DC_LUT_30_COLOR   0x6494
+#define AVIVO_DC_LUT_READ_PIPE_SELECT   0x6498
+#define AVIVO_DC_LUT_WRITE_EN_MASK  0x649c
+#define AVIVO_DC_LUT_AUTOFILL   0x64a0
+#define AVIVO_DC_LUTA_CONTROL   0x64c0
+#define AVIVO_DC_LUTA_BLACK_OFFSET_BLUE 0x64c4
+#define AVIVO_DC_LUTA_BLACK_OFFSET_GREEN0x64c8
+#define AVIVO_DC_LUTA_BLACK_OFFSET_RED  0x64cc
+#define AVIVO_DC_LUTA_WHITE_OFFSET_BLUE 0x64d0
+#define AVIVO_DC_LUTA_WHITE_OFFSET_GREEN0x64d4
+#define AVIVO_DC_LUTA_WHITE_OFFSET_RED  0x64d8
+#define AVIVO_DC_LUTB_CONTROL   0x6cc0
+#define AVIVO_DC_LUTB_BLACK_OFFSET_BLUE 0x6cc4
+#define AVIVO_DC_LUTB_BLACK_OFFSET_GREEN0x6cc8
+#define AVIVO_DC_LUTB_BLACK_OFFSET_RED  0x6ccc
+#define AVIVO_DC_LUTB_WHITE_OFFSET_BLUE 0x6cd0
+#define AVIVO_DC_LUTB_WHITE_OFFSET_GREEN0x6cd4
+#define AVIVO_DC_LUTB_WHITE_OFFSET_RED  0x6cd8
+
 enum ofdrm_model {
OFDRM_MODEL_UNKNOWN,
OFDRM_MODEL_MACH64, /* ATI Mach64 */
@@ -211,7 +239,14 @@ static enum ofdrm_model display_get_model_of(struct 
drm_device *dev, struct devi
  * Open Firmware display device
  */
 
+struct ofdrm_device;
+
 struct ofdrm_device_funcs {
+   void __iomem *(*cmap_ioremap)(struct ofdrm_device *odev,
+ struct device_node *of_node,
+ u64 fb_bas);
+   void (*cmap_write)(struct ofdrm_device *odev, unsigned char index,
+  unsigned char r, unsigned char g, unsigned char b);
 };
 
 struct ofdrm_device {
@@ -226,6 +261,9 @@ struct ofdrm_device {
const struct drm_format_info *format;
unsigned int pitch;
 
+   /* colormap */
+   void __iomem *cmap_base;
+
/* modesetting */
uint32_t formats[8];
struct drm_plane primary_plane;
@@ -338,12 +376,322 @@ static struct resource *ofdrm_find_fb_resource(struct 
ofdrm_device *odev,
return max_res;
 }
 
+/*
+ * Colormap / Palette
+ */
+
+static void __iomem *get_cmap_address_of(struct ofdrm_device *odev, struct 
device_node *of_node,
+int bar_no, unsigned long offset, 
unsigned long size)
+{
+   struct drm_device *dev = &odev->dev;
+   const __be32 *addr_p;
+   u64 max_size, address;
+   unsigned int flags;
+   void __iomem *mem;
+
+   addr_p = of_get_pci_address(of_node, bar_no, &max_size, &flags);
+   if (!addr_p)
+   addr_p = of_get_address(of_node, bar_no, &max_size, &flags);
+   if (!addr_p)
+   return ERR_PTR(-ENODEV);
+
+   if ((flags & (IORESOURCE_IO | IORESOURCE_MEM)) == 0)
+   return ERR_PTR(-ENODEV);
+
+   if ((offset + size) >= max_size)
+   return ERR_PTR(-ENODEV);
+
+   address = of_translate_address(of_node, addr_p);
+   if (address == OF_BAD_ADDR)
+   return ERR_PTR(-ENODEV);
+
+   mem = devm_ioremap(dev->dev, address + offset, size);
+   if (!mem)
+   return ERR_PTR(-ENOMEM);
+
+   return mem;
+}
+
+static void __iomem *ofdrm_mach64_cmap_ioremap(struct ofdrm_device *odev,
+ 

Re: [RFC v2] drm/kms: control display brightness through drm_connector properties

2022-09-28 Thread Ville Syrjälä
On Wed, Sep 28, 2022 at 01:04:01PM +0300, Jani Nikula wrote:
> On Fri, 09 Sep 2022, Hans de Goede  wrote:
> > Hi all,
> >
> > Here is v2 of my "drm/kms: control display brightness through drm_connector 
> > properties" RFC:
> >
> > Changes from version 1:
> > - Drop bl_brightness_0_is_min_brightness from list of new connector
> >   properties.
> > - Clearly define that 0 is always min-brightness when setting the brightness
> >   through the connector properties.
> > - Drop bl_brightness_control_method from list of new connector
> >   properties.
> > - Phase 1 of the plan has been completed
> >
> > As discussed already several times in the past:
> >  https://www.x.org/wiki/Events/XDC2014/XDC2014GoedeBacklight/
> >  
> > https://lore.kernel.org/all/4b17ba08-39f3-57dd-5aad-d37d844b0...@linux.intel.com/
> >
> > The current userspace API for brightness control offered by
> > /sys/class/backlight devices has various issues:
> >
> > 1. There is no way to map the backlight device to a specific
> >display-output / panel (1)
> > 2. Controlling the brightness requires root-rights requiring
> >desktop-environments to use suid-root helpers for this.
> > 3. The meaning of 0 is not clearly defined, it can be either off,
> >or minimum brightness at which the display is still readable
> >(in a low light environment)
> > 4. It's not possible to change both the gamma and the brightness in the
> >same KMS atomic commit. You'd want to be able to reduce brightness to
> >conserve power, and counter the effects of that by changing gamma to
> >reach a visually similar image. And you'd want to have the changes take
> >effect at the same time instead of reducing brightness at some frame and
> >change gamma at some other frame. This is pretty much impossible to do
> >via the sysfs interface.
> >
> > As already discussed on various conference's hallway tracks
> > and as has been proposed on the dri-devel list once before (2),
> > it seems that there is consensus that the best way to to solve these
> > 2 issues is to add support for controlling a video-output's brightness
> > through properties on the drm_connector.
> >
> > This RFC outlines my plan to try and actually implement this,
> > which has 3 phases:
> >
> >
> > Phase 1: Stop registering multiple /sys/class/backlight devs for a single 
> > display
> > =
> >
> > On x86 there can be multiple firmware + direct-hw-access methods
> > for controlling the backlight and in some cases the kernel registers
> > multiple backlight-devices for a single internal laptop LCD panel.
> >
> > A plan to fix this was posted here:
> > https://lore.kernel.org/dri-devel/98519ba0-7f18-201a-ea34-652f50343...@redhat.com/
> > And a pull-req actually implementing this plan has been send out this week:
> > https://lore.kernel.org/dri-devel/261afe3d-7790-e945-adf6-a2c96c9b1...@redhat.com/
> >
> >
> > Phase 2: Add drm_connector properties mirroring the matching backlight 
> > device
> > =
> >
> > The plan is to add a drm_connector helper function, which optionally takes
> > a pointer to the backlight device for the GPU's native backlight device,
> > which will then mirror the backlight settings from the backlight device
> > in a set of read/write brightness* properties on the connector.
> >
> > This function can then be called by GPU drivers for the drm_connector for
> > the internal panel and it will then take care of everything. When there
> > is no native GPU backlight device, or when it should not be used then
> > (on x86) the helper will use the acpi_video_get_backlight_type() to
> > determine which backlight-device should be used instead and it will find
> > + mirror that one.
> >
> >
> > Phase 3: Deprecate /sys/class/backlight uAPI
> > 
> >
> > Once most userspace has moved over to using the new drm_connector
> > brightness props, a Kconfig option can be added to stop exporting
> > the backlight-devices under /sys/class/backlight. The plan is to
> > just disable the sysfs interface and keep the existing backlight-device
> > internal kernel abstraction as is, since some abstraction for (non GPU
> > native) backlight devices will be necessary regardless.
> >
> > It is unsure if we will ever be able to do this. For example people using
> > non fully integrated desktop environments like e.g. sway often use custom
> > scripts binded to hotkeys to get functionality like the brightness
> > up/down keyboard hotkeys changing the brightness. This typically involves
> > e.g. the xbacklight utility.
> >
> > Even if the xbacklight utility is ported to use kms with the new connector
> > object brightness properties then this still will not work because
> > changing the properties will require drm-master rights and e.g. sway will
> > already hold those.
> >
> >
> > The drm_connecto

[PATCH v4] drm/i915/mtl: enable local stolen memory

2022-09-28 Thread Aravind Iddamsetty
As an integrated GPU, MTL does not have local memory and
HAS_LMEM() returns false.  However the platform's stolen memory
is presented via BAR2 (i.e., the BAR we traditionally consider
to be the LMEM BAR) and should be managed by the driver the same
way that local memory is on dgpu platforms (which includes
setting the "lmem" bit on page table entries).  We use the term
"local stolen memory" to refer to this model.

BSPEC: 53098, 63830

v2:
1. dropped is_dsm_invalid, updated valid_stolen_size check from Lucas
(Jani, Lucas)
2. drop lmembar_is_igpu_stolen
3. revert to referring GFXMEM_BAR as GEN12_LMEM_BAR (Lucas)

v3:(Jani)
1. rename get_mtl_gms_size to mtl_get_gms_size
2. define register for MMIO address

v4:(Matt)
1. Use REG_FIELD_GET to read GMS value
2. replace the calculations with SZ_256M/SZ_8M

Cc: Matt Roper 
Cc: Lucas De Marchi 
Cc: Jani Nikula 

Signed-off-by: CQ Tang 
Signed-off-by: Aravind Iddamsetty 
Original-author: CQ Tang
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 83 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h|  3 +
 drivers/gpu/drm/i915/i915_reg.h|  4 ++
 4 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index c5a4035c99cd..2f8530a0ff62 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -77,9 +77,9 @@ void i915_gem_stolen_remove_node(struct drm_i915_private 
*i915,
mutex_unlock(&i915->mm.stolen_lock);
 }
 
-static bool valid_stolen_size(struct resource *dsm)
+static bool valid_stolen_size(struct drm_i915_private *i915, struct resource 
*dsm)
 {
-   return dsm->start != 0 && dsm->end > dsm->start;
+   return (dsm->start != 0 || HAS_BAR2_SMEM_STOLEN(i915)) && dsm->end > 
dsm->start;
 }
 
 static int adjust_stolen(struct drm_i915_private *i915,
@@ -88,7 +88,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
/*
@@ -135,7 +135,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
}
}
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
return 0;
@@ -149,8 +149,11 @@ static int request_smem_stolen(struct drm_i915_private 
*i915,
/*
 * With stolen lmem, we don't need to request system memory for the
 * address range since it's local to the gpu.
+*
+* Starting MTL, in IGFX devices the stolen memory is exposed via
+* BAR2 and shall be considered similar to stolen lmem.
 */
-   if (HAS_LMEM(i915))
+   if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
return 0;
 
/*
@@ -385,8 +388,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private 
*i915,
 
drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
-   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
-
switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
case GEN8_STOLEN_RESERVED_1M:
*size = 1024 * 1024;
@@ -404,6 +405,12 @@ static void icl_get_stolen_reserved(struct 
drm_i915_private *i915,
*size = 8 * 1024 * 1024;
MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
}
+
+   if (HAS_BAR2_SMEM_STOLEN(i915))
+   /* the base is initialized to stolen top so subtract size to 
get base */
+   *base -= *size;
+   else
+   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
 }
 
 /*
@@ -833,6 +840,29 @@ static const struct intel_memory_region_ops 
i915_region_stolen_lmem_ops = {
.init_object = _i915_gem_object_stolen_init,
 };
 
+static int mtl_get_gms_size(struct intel_uncore *uncore)
+{
+   u16 ggc, gms;
+
+   ggc = intel_uncore_read16(uncore, GGC);
+
+   /* check GGMS, should be fixed 0x3 (8MB) */
+   if ((ggc & GGMS_MASK) != GGMS_MASK)
+   return -EIO;
+
+   /* return valid GMS value, -EIO if invalid */
+   gms = REG_FIELD_GET(GMS_MASK, ggc);
+   switch (gms) {
+   case 0x0 ... 0x04:
+   return gms * 32;
+   case 0xf0 ... 0xfe:
+   return (gms - 0xf0 + 1) * 4;
+   default:
+   MISSING_CASE(gms);
+   return -EIO;
+   }
+}
+
 struct intel_memory_region *
 i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
   u16 instance)
@@ -843,6 +873,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
struct intel_memory_region *mem;
resource_size_t io_start, io_size;
resource_size_t min_page_size;
+   int ret;
 
if (

Re: [PATCH v2 1/2] drm/i915: enable PS64 support for DG2

2022-09-28 Thread Das, Nirmoy



On 9/27/2022 5:39 PM, Matthew Auld wrote:

It turns out that on production DG2/ATS HW we should have support for
PS64. This feature allows to provide a 64K TLB hint at the PTE level,
which is a lot more flexible than the current method of enabling 64K GTT
pages for the entire page-table, since that leads to all kinds of
annoying restrictions, as documented in:

commit caa574ffc4aaf4f29b890223878c63e2e7772f62
Author: Matthew Auld 
Date:   Sat Feb 19 00:17:49 2022 +0530

 drm/i915/uapi: document behaviour for DG2 64K support

 On discrete platforms like DG2, we need to support a minimum page size
 of 64K when dealing with device local-memory. This is quite tricky for
 various reasons, so try to document the new implicit uapi for this.

With PS64, we can now drop the 2M GTT alignment restriction, and instead
only require 64K or larger when dealing with lmem. We still use the
compact-pt layout when possible, but only when we are certain that this
doesn't interfere with userspace.

Note that this is a change in uAPI behaviour, but hopefully shouldn't be
a concern (IGT is at least able to autodetect the alignment), since we
are only making the GTT alignment constraint less restrictive.

Based on a patch from CQ Tang.

v2: update the comment wrt scratch page

Reported-by: Michal Mrozek 
Signed-off-by: Matthew Auld 
Cc: Lionel Landwerlin 
Cc: Thomas Hellström 
Cc: Stuart Summers 
Cc: Jordan Justen 
Cc: Yang A Shi 
Cc: Nirmoy Das 
---
  .../gpu/drm/i915/gem/selftests/huge_pages.c   | 159 +-
  drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  81 +
  drivers/gpu/drm/i915/gt/intel_gtt.c   |  21 +--
  drivers/gpu/drm/i915/gt/intel_gtt.h   |   1 +
  drivers/gpu/drm/i915/i915_drv.h   |   7 -
  drivers/gpu/drm/i915/i915_pci.c   |   2 -
  drivers/gpu/drm/i915/i915_vma.c   |   9 +-
  drivers/gpu/drm/i915/intel_device_info.h  |   1 -
  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   9 +-
  include/uapi/drm/i915_drm.h   |  36 ++--
  10 files changed, 220 insertions(+), 106 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c 
b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index c570cf780079..cc26c1293208 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1161,7 +1161,8 @@ static int igt_write_huge(struct drm_i915_private *i915,
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
  
  	size = obj->base.size;

-   if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
+   if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+   !HAS_64K_PAGES(i915))
size = round_up(size, I915_GTT_PAGE_SIZE_2M);
  
  	n = 0;

@@ -1214,6 +1215,10 @@ static int igt_write_huge(struct drm_i915_private *i915,
 * size and ensure the vma offset is at the start of the pt
 * boundary, however to improve coverage we opt for testing both
 * aligned and unaligned offsets.
+*
+* With PS64 this is no longer the case, but to ensure we
+* sometimes get the compact layout for smaller objects, apply
+* the round_up anyway.
 */
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
offset_low = round_down(offset_low,
@@ -1411,6 +1416,7 @@ static int igt_ppgtt_sanity_check(void *arg)
{ SZ_2M + SZ_4K,SZ_64K | SZ_4K  },
{ SZ_2M + SZ_4K,SZ_2M  | SZ_4K  },
{ SZ_2M + SZ_64K,   SZ_2M  | SZ_64K },
+   { SZ_2M + SZ_64K,   SZ_64K  },
};
int i, j;
int err;
@@ -1540,6 +1546,156 @@ static int igt_ppgtt_compact(void *arg)
return err;
  }
  
+static int igt_ppgtt_mixed(void *arg)

+{
+   struct drm_i915_private *i915 = arg;
+   const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
+   struct drm_i915_gem_object *obj, *on;
+   struct i915_gem_engines *engines;
+   struct i915_gem_engines_iter it;
+   struct i915_address_space *vm;
+   struct i915_gem_context *ctx;
+   struct intel_context *ce;
+   struct file *file;
+   I915_RND_STATE(prng);
+   LIST_HEAD(objects);
+   struct intel_memory_region *mr;
+   struct i915_vma *vma;
+   unsigned int count;
+   u32 i, rem, addr;
+   int *order;
+   int n, err;
+
+   /*
+* Sanity check mixing 4K and 64K pages within the same page-table via
+* the new PS64 TLB hint.
+*/
+
+   if (!HAS_64K_PAGES(i915)) {
+   pr_info("device lacks PS64, skipping\n");
+   return 0;
+   }
+
+   file = mock_file(i915);
+   if (IS_ERR(file))
+   return PTR_ERR(file);
+
+   ctx = hugepage_ctx(i915, file);
+   if (IS_ERR(ctx)) {
+   err = PTR_ERR(ctx);
+   go

Re: [RFC/PATCH] backlight: hx8357: prepare to conversion to gpiod API

2022-09-28 Thread Daniel Thompson
On Tue, Sep 27, 2022 at 03:32:35PM -0700, Dmitry Torokhov wrote:
> Properties describing GPIOs should be named as "-gpios" or
> "-gpio", and that is what gpiod API expects, however the
> driver uses non-standard "gpios-reset" name. Let's adjust this, and also
> note that the reset line is active low as that is also important to
> gpiod API.

No objections to the goal but...


> Signed-off-by: Dmitry Torokhov 
> ---
>
> Another option is to add another quirk into gpiolib-of.c, but we
> may end up with a ton of them once we convert everything away from
> of_get_named_gpio() to gpiod API, so I'd prefer not doing that.

... it is unusual to permit backwards incompatible changes to the DT
bindings[1]: creating "flag days" where hardware stops functioning if
you boot an new kernel with an old DT is a known annoyance to users.

I usually favour quirks tables or similar[2] rather than break legacy
DTs. Very occasionally I accept (believable) arguments that no legacy
DTs actually exist but that can very difficult to verify.

Overall I'd like to solicit views from both GPIO and DT maintainers
before rejecting quirks tables as a way to help smooth these sort of
changes (or links to ML archives if this has already been discussed).

[1] For this particular driver the situation is muddied slightly
because it looks like complex since it looks the bindings for
himax,hx8357 and himax,hx8369 are undocumented (and badly named).

[2] When the property is not parsed by library code mostly we handle
legacy by consuming both new or old names in the parser code.


> diff --git a/drivers/video/backlight/hx8357.c 
> b/drivers/video/backlight/hx8357.c
> index 9b50bc96e00f..41332f48b2df 100644
> --- a/drivers/video/backlight/hx8357.c
> +++ b/drivers/video/backlight/hx8357.c
> @@ -601,7 +601,7 @@ static int hx8357_probe(struct spi_device *spi)
>   if (!match || !match->data)
>   return -EINVAL;
>
> - lcd->reset = of_get_named_gpio(spi->dev.of_node, "gpios-reset", 0);
> + lcd->reset = of_get_named_gpio(spi->dev.of_node, "reset-gpios", 0);
>   if (!gpio_is_valid(lcd->reset)) {
>   dev_err(&spi->dev, "Missing dt property: gpios-reset\n");
>   return -EINVAL;

Daniel.


Re: [PATCH v2 4/4] drm: lcdif: Add support for YUV planes

2022-09-28 Thread Kieran Bingham
Quoting Laurent Pinchart (2022-09-28 11:05:33)
> Hi Kieran,
> 
> On Wed, Sep 28, 2022 at 10:59:36AM +0100, Kieran Bingham wrote:
> > Quoting Laurent Pinchart (2022-09-28 01:58:12)
> > > From: Kieran Bingham 
> > 
> > It looks like this has progressed a bit since it left my computer ;-)
> 
> I wish the same would be universally true for all patches :-)
> 
> > > The LCDIF includes a color space converter that supports YUV input. Use
> > > it to support YUV planes, either through the converter if the output
> > > format is RGB, or in conversion bypass mode otherwise.
> > > 
> > > Signed-off-by: Kieran Bingham 
> > > Signed-off-by: Laurent Pinchart 
> > > ---
> > > Changes since v1:
> > > 
> > > - Support all YCbCr encodings and quantization ranges
> > > - Drop incorrect comment
> > > ---
> > >  drivers/gpu/drm/mxsfb/lcdif_kms.c  | 183 +
> > >  drivers/gpu/drm/mxsfb/lcdif_regs.h |   5 +-
> > >  2 files changed, 164 insertions(+), 24 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/mxsfb/lcdif_kms.c 
> > > b/drivers/gpu/drm/mxsfb/lcdif_kms.c
> > > index c3622be0c587..b469a90fd50f 100644
> > > --- a/drivers/gpu/drm/mxsfb/lcdif_kms.c
> > > +++ b/drivers/gpu/drm/mxsfb/lcdif_kms.c
> > > @@ -15,6 +15,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > >  #include 
> > >  #include 
> > >  #include 
> > > @@ -32,13 +33,77 @@
> > >  /* 
> > > -
> > >   * CRTC
> > >   */
> > > +
> > > +/*
> > > + * Despite the reference manual stating the opposite, the D1, D2 and D3 
> > > offset
> > > + * values are added to Y, U and V, not subtracted. They must thus be 
> > > programmed
> > > + * with negative values.
> > > + */
> > > +static const u32 lcdif_yuv2rgb_coeffs[3][2][6] = {
> > 
> > Ick ... I sort of dislike this. It's fine here at the moment, and I like
> > the table ... but here we're definining the size of the table based on
> > external enum values. (Are those ABI stable, perhaps they are already?)
> > 
> > If someone were to put 
> > 
> >  enum drm_color_encoding {
> > +DRM_COLOR_LEGACY, 
> >  DRM_COLOR_YCBCR_BT601,
> >  DRM_COLOR_YCBCR_BT709,
> >  DRM_COLOR_YCBCR_BT2020,
> >  DRM_COLOR_ENCODING_MAX,
> >  };
> > 
> >  enum drm_color_range {
> >  DRM_COLOR_YCBCR_LIMITED_RANGE,
> > +  DRM_COLOR_YCBCR_MID_RANGE,
> >  DRM_COLOR_YCBCR_FULL_RANGE,
> >  DRM_COLOR_RANGE_MAX,
> >  };
> > 
> > Then this table allocation would be wrong.
> > 
> > Perhaps swapping for
> > 
> > > +static const u32 
> > > lcdif_yuv2rgb_coeffs[DRM_COLOR_ENCODING_MAX][DRM_COLOR_RANGE_MAX][6] = {
> > 
> > Would be safer ... but longer :-( ? 
> > 
> > Anyway, I think the rest of it looks fine, and perhaps these enums are
> > in the UAPI which would make them stable anyway:
> 
> The enums themselves are not exposed in UAPI headers, but userspace
> depends on the values, which thus have to remain stable.

And I saw you had to redefine them to use them in libcamera. Perhaps
they should be in a UAPI header then...
--
Kieran


> 
> > Reviewed-by: Kieran Bingham 
> > 
> > > +   [DRM_COLOR_YCBCR_BT601] = {
> > > +   [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
> > > +   CSC0_COEF0_A1(0x012a) | CSC0_COEF0_A2(0x),
> > > +   CSC0_COEF1_A3(0x01a2) | CSC0_COEF1_B1(0x0123),
> > > +   CSC0_COEF2_B2(0x079c) | CSC0_COEF2_B3(0x0730),
> > > +   CSC0_COEF3_C1(0x0124) | CSC0_COEF3_C2(0x0204),
> > > +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x01f0),
> > > +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> > > +   },
> > > +   [DRM_COLOR_YCBCR_FULL_RANGE] = {
> > > +   CSC0_COEF0_A1(0x0100) | CSC0_COEF0_A2(0x),
> > > +   CSC0_COEF1_A3(0x0167) | CSC0_COEF1_B1(0x0100),
> > > +   CSC0_COEF2_B2(0x07a8) | CSC0_COEF2_B3(0x0749),
> > > +   CSC0_COEF3_C1(0x0100) | CSC0_COEF3_C2(0x01c6),
> > > +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x),
> > > +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> > > +   },
> > > +   },
> > > +   [DRM_COLOR_YCBCR_BT709] = {
> > > +   [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
> > > +   CSC0_COEF0_A1(0x012a) | CSC0_COEF0_A2(0x),
> > > +   CSC0_COEF1_A3(0x01d6) | CSC0_COEF1_B1(0x0123),
> > > +   CSC0_COEF2_B2(0x07c9) | CSC0_COEF2_B3(0x0778),
> > > +   CSC0_COEF3_C1(0x0123) | CSC0_COEF3_C2(0x021d),
> > > +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x01f0),
> > > +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> > > +   },
> > > +   [DRM_COLOR_YCBCR_FULL_RANGE] = {
> > > +   CSC0_COEF

RE: [Intel-gfx] [PATCH v4] drm/i915/mtl: enable local stolen memory

2022-09-28 Thread Gupta, Anshuman



> -Original Message-
> From: Intel-gfx  On Behalf Of Aravind
> Iddamsetty
> Sent: Wednesday, September 28, 2022 4:36 PM
> To: intel-...@lists.freedesktop.org
> Cc: De Marchi, Lucas ; dri-
> de...@lists.freedesktop.org
> Subject: [Intel-gfx] [PATCH v4] drm/i915/mtl: enable local stolen memory
> 
> As an integrated GPU, MTL does not have local memory and
> HAS_LMEM() returns false.  However the platform's stolen memory is presented
> via BAR2 (i.e., the BAR we traditionally consider to be the LMEM BAR) and
AFAIU BAR2 has represented stolen memory on prior generations as well.
Like on ADL.
Region 0: Memory at 618e00 (64-bit, non-prefetchable) [size=16M]
Region 2: Memory at 40 (64-bit, prefetchable) [size=256M]
Region 4: I/O ports at 4000 [size=64]
May be a bit of more explanation required here for BAR2 , how is it deifferent?
Br,
Anshuman Gupta.
> should be managed by the driver the same way that local memory is on dgpu
> platforms (which includes setting the "lmem" bit on page table entries).  We 
> use
> the term "local stolen memory" to refer to this model.
> 
> BSPEC: 53098, 63830
> 
> v2:
> 1. dropped is_dsm_invalid, updated valid_stolen_size check from Lucas (Jani,
> Lucas) 2. drop lmembar_is_igpu_stolen 3. revert to referring GFXMEM_BAR as
> GEN12_LMEM_BAR (Lucas)
> 
> v3:(Jani)
> 1. rename get_mtl_gms_size to mtl_get_gms_size 2. define register for MMIO
> address
> 
> v4:(Matt)
> 1. Use REG_FIELD_GET to read GMS value
> 2. replace the calculations with SZ_256M/SZ_8M
> 
> Cc: Matt Roper 
> Cc: Lucas De Marchi 
> Cc: Jani Nikula 
> 
> Signed-off-by: CQ Tang 
> Signed-off-by: Aravind Iddamsetty 
> Original-author: CQ Tang
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 83 ++
>  drivers/gpu/drm/i915/gt/intel_ggtt.c   |  2 +-
>  drivers/gpu/drm/i915/i915_drv.h|  3 +
>  drivers/gpu/drm/i915/i915_reg.h|  4 ++
>  4 files changed, 76 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index c5a4035c99cd..2f8530a0ff62 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -77,9 +77,9 @@ void i915_gem_stolen_remove_node(struct
> drm_i915_private *i915,
>   mutex_unlock(&i915->mm.stolen_lock);
>  }
> 
> -static bool valid_stolen_size(struct resource *dsm)
> +static bool valid_stolen_size(struct drm_i915_private *i915, struct
> +resource *dsm)
>  {
> - return dsm->start != 0 && dsm->end > dsm->start;
> + return (dsm->start != 0 || HAS_BAR2_SMEM_STOLEN(i915)) && dsm-
> >end >
> +dsm->start;
>  }
> 
>  static int adjust_stolen(struct drm_i915_private *i915, @@ -88,7 +88,7 @@
> static int adjust_stolen(struct drm_i915_private *i915,
>   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
>   struct intel_uncore *uncore = ggtt->vm.gt->uncore;
> 
> - if (!valid_stolen_size(dsm))
> + if (!valid_stolen_size(i915, dsm))
>   return -EINVAL;
> 
>   /*
> @@ -135,7 +135,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
>   }
>   }
> 
> - if (!valid_stolen_size(dsm))
> + if (!valid_stolen_size(i915, dsm))
>   return -EINVAL;
> 
>   return 0;
> @@ -149,8 +149,11 @@ static int request_smem_stolen(struct
> drm_i915_private *i915,
>   /*
>* With stolen lmem, we don't need to request system memory for the
>* address range since it's local to the gpu.
> +  *
> +  * Starting MTL, in IGFX devices the stolen memory is exposed via
> +  * BAR2 and shall be considered similar to stolen lmem.
>*/
> - if (HAS_LMEM(i915))
> + if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
>   return 0;
> 
>   /*
> @@ -385,8 +388,6 @@ static void icl_get_stolen_reserved(struct
> drm_i915_private *i915,
> 
>   drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n",
> reg_val);
> 
> - *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
> -
>   switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
>   case GEN8_STOLEN_RESERVED_1M:
>   *size = 1024 * 1024;
> @@ -404,6 +405,12 @@ static void icl_get_stolen_reserved(struct
> drm_i915_private *i915,
>   *size = 8 * 1024 * 1024;
>   MISSING_CASE(reg_val &
> GEN8_STOLEN_RESERVED_SIZE_MASK);
>   }
> +
> + if (HAS_BAR2_SMEM_STOLEN(i915))
> + /* the base is initialized to stolen top so subtract size to get
> base */
> + *base -= *size;
> + else
> + *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
>  }
> 
>  /*
> @@ -833,6 +840,29 @@ static const struct intel_memory_region_ops
> i915_region_stolen_lmem_ops = {
>   .init_object = _i915_gem_object_stolen_init,  };
> 
> +static int mtl_get_gms_size(struct intel_uncore *uncore) {
> + u16 ggc, gms;
> +
> + ggc = intel_uncore_read16(uncore, GGC);
> +
> + /* check GGMS

Re: [PATCH v4 5/5] drm/ofdrm: Support big-endian scanout buffers

2022-09-28 Thread Michal Suchánek
Hello,

On Wed, Sep 28, 2022 at 12:50:10PM +0200, Thomas Zimmermann wrote:
> All DRM formats assume little-endian byte order. On big-endian systems,
> it is likely that the scanout buffer is in big endian as well. Update
> the format accordingly and add endianess conversion to the format-helper
> library. Also opt-in to allocated buffers in host format by default.

This sounds backwards to me.

Skimming through the code it sounds like the buffer is in fact in the
same format all the time but when the CPU is switched to BE it sees the
data loaded from it differently.

Or am I missing something?

Thanks

Michal

> 
> Suggested-by: Geert Uytterhoeven 
> Signed-off-by: Thomas Zimmermann 
> ---
>  drivers/gpu/drm/drm_format_helper.c | 10 ++
>  drivers/gpu/drm/tiny/ofdrm.c| 55 +++--
>  2 files changed, 63 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_format_helper.c 
> b/drivers/gpu/drm/drm_format_helper.c
> index 4afc4ac27342..fca7936db083 100644
> --- a/drivers/gpu/drm/drm_format_helper.c
> +++ b/drivers/gpu/drm/drm_format_helper.c
> @@ -659,6 +659,11 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned 
> int *dst_pitch, uint32_t d
>   drm_fb_xrgb_to_rgb565(dst, dst_pitch, src, fb, 
> clip, false);
>   return 0;
>   }
> + } else if (dst_format == (DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN)) {
> + if (fb_format == DRM_FORMAT_RGB565) {
> + drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
> + return 0;
> + }
>   } else if (dst_format == DRM_FORMAT_RGB888) {
>   if (fb_format == DRM_FORMAT_XRGB) {
>   drm_fb_xrgb_to_rgb888(dst, dst_pitch, src, fb, 
> clip);
> @@ -677,6 +682,11 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned 
> int *dst_pitch, uint32_t d
>   drm_fb_xrgb_to_xrgb2101010(dst, dst_pitch, src, fb, 
> clip);
>   return 0;
>   }
> + } else if (dst_format == DRM_FORMAT_BGRX) {
> + if (fb_format == DRM_FORMAT_XRGB) {
> + drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
> + return 0;
> + }
>   }
>  
>   drm_warn_once(fb->dev, "No conversion helper from %p4cc to %p4cc 
> found.\n",
> diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
> index 0bf5eebf6678..6e100a7f5db7 100644
> --- a/drivers/gpu/drm/tiny/ofdrm.c
> +++ b/drivers/gpu/drm/tiny/ofdrm.c
> @@ -94,7 +94,7 @@ static int display_get_validated_int0(struct drm_device 
> *dev, const char *name,
>  }
>  
>  static const struct drm_format_info *display_get_validated_format(struct 
> drm_device *dev,
> -   u32 depth)
> +   u32 depth, 
> bool big_endian)
>  {
>   const struct drm_format_info *info;
>   u32 format;
> @@ -115,6 +115,29 @@ static const struct drm_format_info 
> *display_get_validated_format(struct drm_dev
>   return ERR_PTR(-EINVAL);
>   }
>  
> + /*
> +  * DRM formats assume little-endian byte order. Update the format
> +  * if the scanout buffer uses big-endian ordering.
> +  */
> + if (big_endian) {
> + switch (format) {
> + case DRM_FORMAT_XRGB:
> + format = DRM_FORMAT_BGRX;
> + break;
> + case DRM_FORMAT_ARGB:
> + format = DRM_FORMAT_BGRA;
> + break;
> + case DRM_FORMAT_RGB565:
> + format = DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN;
> + break;
> + case DRM_FORMAT_XRGB1555:
> + format = DRM_FORMAT_XRGB1555 | DRM_FORMAT_BIG_ENDIAN;
> + break;
> + default:
> + break;
> + }
> + }
> +
>   info = drm_format_info(format);
>   if (!info) {
>   drm_err(dev, "cannot find framebuffer format for depth %u\n", 
> depth);
> @@ -134,6 +157,23 @@ static int display_read_u32_of(struct drm_device *dev, 
> struct device_node *of_no
>   return ret;
>  }
>  
> +static bool display_get_big_endian_of(struct drm_device *dev, struct 
> device_node *of_node)
> +{
> + bool big_endian;
> +
> +#ifdef __BIG_ENDIAN
> + big_endian = true;
> + if (of_get_property(of_node, "little-endian", NULL))
> + big_endian = false;
> +#else
> + big_endian = false;
> + if (of_get_property(of_node, "big-endian", NULL))
> + big_endian = true;
> +#endif
> +
> + return big_endian;
> +}
> +
>  static int display_get_width_of(struct drm_device *dev, struct device_node 
> *of_node)
>  {
>   u32 width;
> @@ -613,6 +653,7 @@ static void ofdrm_device_set_gamma_linear(struct 
> ofdrm_de

Re: [PATCH v2 4/4] drm: lcdif: Add support for YUV planes

2022-09-28 Thread Laurent Pinchart
Hi Kieran,

On Wed, Sep 28, 2022 at 12:05:03PM +0100, Kieran Bingham wrote:
> Quoting Laurent Pinchart (2022-09-28 11:05:33)
> > On Wed, Sep 28, 2022 at 10:59:36AM +0100, Kieran Bingham wrote:
> > > Quoting Laurent Pinchart (2022-09-28 01:58:12)
> > > > From: Kieran Bingham 
> > > 
> > > It looks like this has progressed a bit since it left my computer ;-)
> > 
> > I wish the same would be universally true for all patches :-)
> > 
> > > > The LCDIF includes a color space converter that supports YUV input. Use
> > > > it to support YUV planes, either through the converter if the output
> > > > format is RGB, or in conversion bypass mode otherwise.
> > > > 
> > > > Signed-off-by: Kieran Bingham 
> > > > Signed-off-by: Laurent Pinchart 
> > > > ---
> > > > Changes since v1:
> > > > 
> > > > - Support all YCbCr encodings and quantization ranges
> > > > - Drop incorrect comment
> > > > ---
> > > >  drivers/gpu/drm/mxsfb/lcdif_kms.c  | 183 +
> > > >  drivers/gpu/drm/mxsfb/lcdif_regs.h |   5 +-
> > > >  2 files changed, 164 insertions(+), 24 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/mxsfb/lcdif_kms.c 
> > > > b/drivers/gpu/drm/mxsfb/lcdif_kms.c
> > > > index c3622be0c587..b469a90fd50f 100644
> > > > --- a/drivers/gpu/drm/mxsfb/lcdif_kms.c
> > > > +++ b/drivers/gpu/drm/mxsfb/lcdif_kms.c
> > > > @@ -15,6 +15,7 @@
> > > >  #include 
> > > >  #include 
> > > >  #include 
> > > > +#include 
> > > >  #include 
> > > >  #include 
> > > >  #include 
> > > > @@ -32,13 +33,77 @@
> > > >  /* 
> > > > -
> > > >   * CRTC
> > > >   */
> > > > +
> > > > +/*
> > > > + * Despite the reference manual stating the opposite, the D1, D2 and 
> > > > D3 offset
> > > > + * values are added to Y, U and V, not subtracted. They must thus be 
> > > > programmed
> > > > + * with negative values.
> > > > + */
> > > > +static const u32 lcdif_yuv2rgb_coeffs[3][2][6] = {
> > > 
> > > Ick ... I sort of dislike this. It's fine here at the moment, and I like
> > > the table ... but here we're definining the size of the table based on
> > > external enum values. (Are those ABI stable, perhaps they are already?)
> > > 
> > > If someone were to put 
> > > 
> > >  enum drm_color_encoding {
> > > +DRM_COLOR_LEGACY, 
> > >  DRM_COLOR_YCBCR_BT601,
> > >  DRM_COLOR_YCBCR_BT709,
> > >  DRM_COLOR_YCBCR_BT2020,
> > >  DRM_COLOR_ENCODING_MAX,
> > >  };
> > > 
> > >  enum drm_color_range {
> > >  DRM_COLOR_YCBCR_LIMITED_RANGE,
> > > +  DRM_COLOR_YCBCR_MID_RANGE,
> > >  DRM_COLOR_YCBCR_FULL_RANGE,
> > >  DRM_COLOR_RANGE_MAX,
> > >  };
> > > 
> > > Then this table allocation would be wrong.
> > > 
> > > Perhaps swapping for
> > > 
> > > > +static const u32 
> > > > lcdif_yuv2rgb_coeffs[DRM_COLOR_ENCODING_MAX][DRM_COLOR_RANGE_MAX][6] = {
> > > 
> > > Would be safer ... but longer :-( ? 
> > > 
> > > Anyway, I think the rest of it looks fine, and perhaps these enums are
> > > in the UAPI which would make them stable anyway:
> > 
> > The enums themselves are not exposed in UAPI headers, but userspace
> > depends on the values, which thus have to remain stable.
> 
> And I saw you had to redefine them to use them in libcamera. Perhaps
> they should be in a UAPI header then...

I think that would make sense. Patches are welcome :-)

> > > Reviewed-by: Kieran Bingham 
> > > 
> > > > +   [DRM_COLOR_YCBCR_BT601] = {
> > > > +   [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
> > > > +   CSC0_COEF0_A1(0x012a) | CSC0_COEF0_A2(0x),
> > > > +   CSC0_COEF1_A3(0x01a2) | CSC0_COEF1_B1(0x0123),
> > > > +   CSC0_COEF2_B2(0x079c) | CSC0_COEF2_B3(0x0730),
> > > > +   CSC0_COEF3_C1(0x0124) | CSC0_COEF3_C2(0x0204),
> > > > +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x01f0),
> > > > +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> > > > +   },
> > > > +   [DRM_COLOR_YCBCR_FULL_RANGE] = {
> > > > +   CSC0_COEF0_A1(0x0100) | CSC0_COEF0_A2(0x),
> > > > +   CSC0_COEF1_A3(0x0167) | CSC0_COEF1_B1(0x0100),
> > > > +   CSC0_COEF2_B2(0x07a8) | CSC0_COEF2_B3(0x0749),
> > > > +   CSC0_COEF3_C1(0x0100) | CSC0_COEF3_C2(0x01c6),
> > > > +   CSC0_COEF4_C3(0x) | CSC0_COEF4_D1(0x),
> > > > +   CSC0_COEF5_D2(0x0180) | CSC0_COEF5_D3(0x0180),
> > > > +   },
> > > > +   },
> > > > +   [DRM_COLOR_YCBCR_BT709] = {
> > > > +   [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
> > > > +   CSC0_COEF0_A1(0x012a) | CSC0_COEF0_A2(0x),
> > > > +   CSC0_COEF1_A3(0x01d6) | CSC0_COEF1_B1(0x0123),
> > > > +   CSC0_COEF2_B2(0x07c9) | CSC0_COEF2_B3(0x0778),
> > > > +   

Re: [RFC v2] drm/kms: control display brightness through drm_connector properties

2022-09-28 Thread Ville Syrjälä
On Wed, Sep 28, 2022 at 01:57:18PM +0300, Ville Syrjälä wrote:
> On Wed, Sep 28, 2022 at 01:04:01PM +0300, Jani Nikula wrote:
> > On Fri, 09 Sep 2022, Hans de Goede  wrote:
> > > Hi all,
> > >
> > > Here is v2 of my "drm/kms: control display brightness through 
> > > drm_connector properties" RFC:
> > >
> > > Changes from version 1:
> > > - Drop bl_brightness_0_is_min_brightness from list of new connector
> > >   properties.
> > > - Clearly define that 0 is always min-brightness when setting the 
> > > brightness
> > >   through the connector properties.
> > > - Drop bl_brightness_control_method from list of new connector
> > >   properties.
> > > - Phase 1 of the plan has been completed
> > >
> > > As discussed already several times in the past:
> > >  https://www.x.org/wiki/Events/XDC2014/XDC2014GoedeBacklight/
> > >  
> > > https://lore.kernel.org/all/4b17ba08-39f3-57dd-5aad-d37d844b0...@linux.intel.com/
> > >
> > > The current userspace API for brightness control offered by
> > > /sys/class/backlight devices has various issues:
> > >
> > > 1. There is no way to map the backlight device to a specific
> > >display-output / panel (1)
> > > 2. Controlling the brightness requires root-rights requiring
> > >desktop-environments to use suid-root helpers for this.
> > > 3. The meaning of 0 is not clearly defined, it can be either off,
> > >or minimum brightness at which the display is still readable
> > >(in a low light environment)
> > > 4. It's not possible to change both the gamma and the brightness in the
> > >same KMS atomic commit. You'd want to be able to reduce brightness to
> > >conserve power, and counter the effects of that by changing gamma to
> > >reach a visually similar image. And you'd want to have the changes take
> > >effect at the same time instead of reducing brightness at some frame 
> > > and
> > >change gamma at some other frame. This is pretty much impossible to do
> > >via the sysfs interface.
> > >
> > > As already discussed on various conference's hallway tracks
> > > and as has been proposed on the dri-devel list once before (2),
> > > it seems that there is consensus that the best way to to solve these
> > > 2 issues is to add support for controlling a video-output's brightness
> > > through properties on the drm_connector.
> > >
> > > This RFC outlines my plan to try and actually implement this,
> > > which has 3 phases:
> > >
> > >
> > > Phase 1: Stop registering multiple /sys/class/backlight devs for a single 
> > > display
> > > =
> > >
> > > On x86 there can be multiple firmware + direct-hw-access methods
> > > for controlling the backlight and in some cases the kernel registers
> > > multiple backlight-devices for a single internal laptop LCD panel.
> > >
> > > A plan to fix this was posted here:
> > > https://lore.kernel.org/dri-devel/98519ba0-7f18-201a-ea34-652f50343...@redhat.com/
> > > And a pull-req actually implementing this plan has been send out this 
> > > week:
> > > https://lore.kernel.org/dri-devel/261afe3d-7790-e945-adf6-a2c96c9b1...@redhat.com/
> > >
> > >
> > > Phase 2: Add drm_connector properties mirroring the matching backlight 
> > > device
> > > =
> > >
> > > The plan is to add a drm_connector helper function, which optionally takes
> > > a pointer to the backlight device for the GPU's native backlight device,
> > > which will then mirror the backlight settings from the backlight device
> > > in a set of read/write brightness* properties on the connector.
> > >
> > > This function can then be called by GPU drivers for the drm_connector for
> > > the internal panel and it will then take care of everything. When there
> > > is no native GPU backlight device, or when it should not be used then
> > > (on x86) the helper will use the acpi_video_get_backlight_type() to
> > > determine which backlight-device should be used instead and it will find
> > > + mirror that one.
> > >
> > >
> > > Phase 3: Deprecate /sys/class/backlight uAPI
> > > 
> > >
> > > Once most userspace has moved over to using the new drm_connector
> > > brightness props, a Kconfig option can be added to stop exporting
> > > the backlight-devices under /sys/class/backlight. The plan is to
> > > just disable the sysfs interface and keep the existing backlight-device
> > > internal kernel abstraction as is, since some abstraction for (non GPU
> > > native) backlight devices will be necessary regardless.
> > >
> > > It is unsure if we will ever be able to do this. For example people using
> > > non fully integrated desktop environments like e.g. sway often use custom
> > > scripts binded to hotkeys to get functionality like the brightness
> > > up/down keyboard hotkeys changing the brightness. This typically involves
> > > e.g. the xbacklight utility.
> > >
> > 

Re: [PATCH] drm/display: Don't rewrite link config when setting phy test pattern

2022-09-28 Thread Jani Nikula
On Thu, 15 Sep 2022, Khaled Almahallawy  wrote:
> The sequence for Source DP PHY CTS automation is [2][1]:
> 1- Emulate successful Link Training(LT)
> 2- Short HPD and change link rates and number of lanes by LT.
> (This is same flow for Link Layer CTS)
> 3- Short HPD and change PHY test pattern and swing/pre-emphasis
> levels (This step should not trigger LT)
>
> The problem is with DP PHY compliance setup as follow:
>
>  [DPTX + on board LTTPR]--Main Link--->[Scope]
>   ^ |
>   | |
>   | |
>   --Aux Ch-->[Aux Emulator]
>
> At step 3, before writing TRAINING_LANEx_SET/LINK_QUAL_PATTERN_SET
> to declare the pattern/swing requested by scope, we write link
> config in LINK_BW_SET/LANE_COUNT_SET on a port that has LTTPR.
> As LTTPR snoops aux transaction, LINK_BW_SET/LANE_COUNT_SET writes
> indicate a LT will start [Check DP 2.0 E11 -Sec 3.6.8.2 & 3.6.8.6.3],
> and LTTPR will reset the link and stop sending DP signals to
> DPTX/Scope causing the measurements to fail. Note that step 3 will
> not trigger LT and DP link will never recovered by the
> Aux Emulator/Scope.
>
> The reset of link can be tested with a monitor connected to LTTPR
> port simply by writing to LINK_BW_SET or LANE_COUNT_SET as follow
>
>   igt/tools/dpcd_reg write --offset=0x100 --value 0x14 --device=2
>
> OR
>
>   printf '\x14' | sudo dd of=/dev/drm_dp_aux2 bs=1 count=1 conv=notrunc
>   seek=$((0x100))
>
> This single aux write causes the screen to blank, sending short HPD to
> DPTX, setting LINK_STATUS_UPDATE = 1 in DPCD 0x204, and triggering LT.
>
> As stated in [1]:
> "Before any TX electrical testing can be performed, the link between a
> DPTX and DPRX (in this case, a piece of test equipment), including all
> LTTPRs within the path, shall be trained as defined in this Standard."
>
> In addition, changing Phy pattern/Swing/Pre-emphasis (Step 3) uses the
> same link rate and lane count applied on step 2, so no need to redo LT.
>
> The fix is to not rewrite link config in step 3, and just writes
> TRAINING_LANEx_SET and LINK_QUAL_PATTERN_SET
>
> [1]: DP 2.0 E11 - 3.6.11.1 LTTPR DPTX_PHY Electrical Compliance
>
> [2]: Configuring UnigrafDPTC Controller - Automation Test Sequence
> https://www.keysight.com/us/en/assets/9922-01244/help-files/
> D9040DPPC-DisplayPort-Test-Software-Online-Help-latest.chm
>
> Cc: Imre Deak 
> Cc: Jani Nikula 
> Cc: Or Cochvi 
> Signed-off-by: Khaled Almahallawy 

Pushed to drm-misc-next, thanks for the patch.

I didn't seek further confirmation because i915 is still the only user
of this function it seems.

BR,
Jani.


> ---
>  drivers/gpu/drm/display/drm_dp_helper.c | 9 -
>  1 file changed, 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/display/drm_dp_helper.c 
> b/drivers/gpu/drm/display/drm_dp_helper.c
> index 92990a3d577a..9f055d9710ea 100644
> --- a/drivers/gpu/drm/display/drm_dp_helper.c
> +++ b/drivers/gpu/drm/display/drm_dp_helper.c
> @@ -2670,17 +2670,8 @@ int drm_dp_set_phy_test_pattern(struct drm_dp_aux *aux,
>   struct drm_dp_phy_test_params *data, u8 dp_rev)
>  {
>   int err, i;
> - u8 link_config[2];
>   u8 test_pattern;
>  
> - link_config[0] = drm_dp_link_rate_to_bw_code(data->link_rate);
> - link_config[1] = data->num_lanes;
> - if (data->enhanced_frame_cap)
> - link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
> - err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, link_config, 2);
> - if (err < 0)
> - return err;
> -
>   test_pattern = data->phy_pattern;
>   if (dp_rev < 0x12) {
>   test_pattern = (test_pattern << 2) &

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [PATCH v4 5/5] drm/ofdrm: Support big-endian scanout buffers

2022-09-28 Thread Thomas Zimmermann

Hi

Am 28.09.22 um 13:12 schrieb Michal Suchánek:

Hello,

On Wed, Sep 28, 2022 at 12:50:10PM +0200, Thomas Zimmermann wrote:

All DRM formats assume little-endian byte order. On big-endian systems,
it is likely that the scanout buffer is in big endian as well. Update
the format accordingly and add endianess conversion to the format-helper
library. Also opt-in to allocated buffers in host format by default.


This sounds backwards to me.


In which way?



Skimming through the code it sounds like the buffer is in fact in the
same format all the time but when the CPU is switched to BE it sees the
data loaded from it differently.

Or am I missing something?


Which buffer do you mean? The scanout buffer coming from the firmware, 
or the GEM BOs that are allocated by renderers?


The scanout buffer is either in BE or LE format. According to the code 
in offb, it's signaled by a node in the device tree. I took that code 
into ofdrm and set the scanout format accordingly.


The GEM BO can be in any format. If necessary, ofdrm converts internally 
while copying it to the scanout buffer. The quirk we opt in, makes DRM 
prefer whatever default byteorder the host prefers (BE or LE). According 
to the docs, it's the right thing to do. But that only affects the GEM 
code, not the scanout buffer.


Best regards
Thomas



Thanks

Michal



Suggested-by: Geert Uytterhoeven 
Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/drm_format_helper.c | 10 ++
  drivers/gpu/drm/tiny/ofdrm.c| 55 +++--
  2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_format_helper.c 
b/drivers/gpu/drm/drm_format_helper.c
index 4afc4ac27342..fca7936db083 100644
--- a/drivers/gpu/drm/drm_format_helper.c
+++ b/drivers/gpu/drm/drm_format_helper.c
@@ -659,6 +659,11 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int 
*dst_pitch, uint32_t d
drm_fb_xrgb_to_rgb565(dst, dst_pitch, src, fb, 
clip, false);
return 0;
}
+   } else if (dst_format == (DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN)) {
+   if (fb_format == DRM_FORMAT_RGB565) {
+   drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
+   return 0;
+   }
} else if (dst_format == DRM_FORMAT_RGB888) {
if (fb_format == DRM_FORMAT_XRGB) {
drm_fb_xrgb_to_rgb888(dst, dst_pitch, src, fb, 
clip);
@@ -677,6 +682,11 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int 
*dst_pitch, uint32_t d
drm_fb_xrgb_to_xrgb2101010(dst, dst_pitch, src, fb, 
clip);
return 0;
}
+   } else if (dst_format == DRM_FORMAT_BGRX) {
+   if (fb_format == DRM_FORMAT_XRGB) {
+   drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
+   return 0;
+   }
}
  
  	drm_warn_once(fb->dev, "No conversion helper from %p4cc to %p4cc found.\n",

diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
index 0bf5eebf6678..6e100a7f5db7 100644
--- a/drivers/gpu/drm/tiny/ofdrm.c
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -94,7 +94,7 @@ static int display_get_validated_int0(struct drm_device *dev, 
const char *name,
  }
  
  static const struct drm_format_info *display_get_validated_format(struct drm_device *dev,

- u32 depth)
+ u32 depth, 
bool big_endian)
  {
const struct drm_format_info *info;
u32 format;
@@ -115,6 +115,29 @@ static const struct drm_format_info 
*display_get_validated_format(struct drm_dev
return ERR_PTR(-EINVAL);
}
  
+	/*

+* DRM formats assume little-endian byte order. Update the format
+* if the scanout buffer uses big-endian ordering.
+*/
+   if (big_endian) {
+   switch (format) {
+   case DRM_FORMAT_XRGB:
+   format = DRM_FORMAT_BGRX;
+   break;
+   case DRM_FORMAT_ARGB:
+   format = DRM_FORMAT_BGRA;
+   break;
+   case DRM_FORMAT_RGB565:
+   format = DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN;
+   break;
+   case DRM_FORMAT_XRGB1555:
+   format = DRM_FORMAT_XRGB1555 | DRM_FORMAT_BIG_ENDIAN;
+   break;
+   default:
+   break;
+   }
+   }
+
info = drm_format_info(format);
if (!info) {
drm_err(dev, "cannot find framebuffer format for depth %u\n", 
depth);
@@ -134,6 +157,23 @@ static int display_read_u32_of(struct drm_device *dev, 
struct device_node *of_no
return ret;
  }
  
+static b

Re: [PATCH 5/7] nouveau/dmem: Refactor nouveau_dmem_fault_copy_one()

2022-09-28 Thread Alistair Popple


Lyude Paul  writes:

> On Mon, 2022-09-26 at 16:03 +1000, Alistair Popple wrote:
>> nouveau_dmem_fault_copy_one() is used during handling of CPU faults via
>> the migrate_to_ram() callback and is used to copy data from GPU to CPU
>> memory. It is currently specific to fault handling, however a future
>> patch implementing eviction of data during teardown needs similar
>> functionality.
>>
>> Refactor out the core functionality so that it is not specific to fault
>> handling.
>>
>> Signed-off-by: Alistair Popple 
>> ---
>>  drivers/gpu/drm/nouveau/nouveau_dmem.c | 59 +--
>>  1 file changed, 29 insertions(+), 30 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
>> b/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> index f9234ed..66ebbd4 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> @@ -139,44 +139,25 @@ static void nouveau_dmem_fence_done(struct 
>> nouveau_fence **fence)
>>  }
>>  }
>>
>> -static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
>> -struct vm_fault *vmf, struct migrate_vma *args,
>> -dma_addr_t *dma_addr)
>> +static int nouveau_dmem_copy_one(struct nouveau_drm *drm, struct page 
>> *spage,
>> +struct page *dpage, dma_addr_t *dma_addr)
>>  {
>>  struct device *dev = drm->dev->dev;
>> -struct page *dpage, *spage;
>> -struct nouveau_svmm *svmm;
>> -
>> -spage = migrate_pfn_to_page(args->src[0]);
>> -if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE))
>> -return 0;
>>
>> -dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
>> -if (!dpage)
>> -return VM_FAULT_SIGBUS;
>>  lock_page(dpage);
>>
>>  *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
>>  if (dma_mapping_error(dev, *dma_addr))
>> -goto error_free_page;
>> +return -EIO;
>>
>> -svmm = spage->zone_device_data;
>> -mutex_lock(&svmm->mutex);
>> -nouveau_svmm_invalidate(svmm, args->start, args->end);
>>  if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr,
>> -NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage)))
>> -goto error_dma_unmap;
>> -mutex_unlock(&svmm->mutex);
>> + NOUVEAU_APER_VRAM,
>> + nouveau_dmem_page_addr(spage))) {
>> +dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
>> +return -EIO;
>> +}
>
> Feel free to just align this with the starting (, as long as it doesn't go
> above 100 characters it doesn't really matter imho and would look nicer that
> way.
>
> Otherwise:
>
> Reviewed-by: Lyude Paul 

Thanks! I'm not sure I precisely understood your alignment comment above
but feel free to let me know if I got it wrong in v2.

> Will look at the other patch in a moment
>
>>
>> -args->dst[0] = migrate_pfn(page_to_pfn(dpage));
>>  return 0;
>> -
>> -error_dma_unmap:
>> -mutex_unlock(&svmm->mutex);
>> -dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
>> -error_free_page:
>> -__free_page(dpage);
>> -return VM_FAULT_SIGBUS;
>>  }
>>
>>  static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
>> @@ -184,9 +165,11 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct 
>> vm_fault *vmf)
>>  struct nouveau_drm *drm = page_to_drm(vmf->page);
>>  struct nouveau_dmem *dmem = drm->dmem;
>>  struct nouveau_fence *fence;
>> +struct nouveau_svmm *svmm;
>> +struct page *spage, *dpage;
>>  unsigned long src = 0, dst = 0;
>>  dma_addr_t dma_addr = 0;
>> -vm_fault_t ret;
>> +vm_fault_t ret = 0;
>>  struct migrate_vma args = {
>>  .vma= vmf->vma,
>>  .start  = vmf->address,
>> @@ -207,9 +190,25 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct 
>> vm_fault *vmf)
>>  if (!args.cpages)
>>  return 0;
>>
>> -ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr);
>> -if (ret || dst == 0)
>> +spage = migrate_pfn_to_page(src);
>> +if (!spage || !(src & MIGRATE_PFN_MIGRATE))
>> +goto done;
>> +
>> +dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
>> +if (!dpage)
>> +goto done;
>> +
>> +dst = migrate_pfn(page_to_pfn(dpage));
>> +
>> +svmm = spage->zone_device_data;
>> +mutex_lock(&svmm->mutex);
>> +nouveau_svmm_invalidate(svmm, args.start, args.end);
>> +ret = nouveau_dmem_copy_one(drm, spage, dpage, &dma_addr);
>> +mutex_unlock(&svmm->mutex);
>> +if (ret) {
>> +ret = VM_FAULT_SIGBUS;
>>  goto done;
>> +}
>>
>>  nouveau_fence_new(dmem->migrate.chan, false, &fence);
>>  migrate_vma_pages(&args);


Re: [Intel-gfx] [PATCH] drm/i915: Perf_limit_reasons are only available for Gen11+

2022-09-28 Thread Jani Nikula
On Mon, 19 Sep 2022, Ashutosh Dixit  wrote:
> Register GT0_PERF_LIMIT_REASONS (0x1381a8) is available only for
> Gen11+. Therefore ensure perf_limit_reasons sysfs/debugfs files are created
> only for Gen11+. Otherwise on Gen < 5 accessing these files results in the
> following oops:
>
> <1> [88.829420] BUG: unable to handle page fault for address: c9bb81a8
> <1> [88.829438] #PF: supervisor read access in kernel mode
> <1> [88.829447] #PF: error_code(0x) - not-present page
>
> Bspec: 20008
> Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/6863
> Fixes: fe5979665f64 ("drm/i915/debugfs: Add perf_limit_reasons in debugfs")
> Fixes: fa68bff7cf27 ("drm/i915/gt: Add sysfs throttle frequency interfaces")
> Signed-off-by: Ashutosh Dixit 

Ashutosh, can you provide a backport of this i.e. commit 0d2d201095e9
("drm/i915: Perf_limit_reasons are only available for Gen11+") that
applies cleanly on drm-intel-fixes, please?

BR,
Jani.


> ---
>  drivers/gpu/drm/i915/gt/intel_gt.c|  4 
>  drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 10 +-
>  drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 15 +++
>  3 files changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 5ddae95d4886..b367cfff48d5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -233,6 +233,10 @@ static void gen6_clear_engine_error_register(struct 
> intel_engine_cs *engine)
>  
>  i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt)
>  {
> + /* GT0_PERF_LIMIT_REASONS is available only for Gen11+ */
> + if (GRAPHICS_VER(gt->i915) < 11)
> + return INVALID_MMIO_REG;
> +
>   return gt->type == GT_MEDIA ?
>   MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS;
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> index 68310881a793..10f680dbd7b6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> @@ -682,6 +682,14 @@ static int perf_limit_reasons_clear(void *data, u64 val)
>  
>   return 0;
>  }
> +
> +static bool perf_limit_reasons_eval(void *data)
> +{
> + struct intel_gt *gt = data;
> +
> + return i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt));
> +}
> +
>  DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get,
>   perf_limit_reasons_clear, "%llu\n");
>  
> @@ -694,7 +702,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, 
> struct dentry *root)
>   { "forcewake_user", &forcewake_user_fops, NULL},
>   { "llc", &llc_fops, llc_eval },
>   { "rps_boost", &rps_boost_fops, rps_eval },
> - { "perf_limit_reasons", &perf_limit_reasons_fops, NULL },
> + { "perf_limit_reasons", &perf_limit_reasons_fops, 
> perf_limit_reasons_eval },
>   };
>  
>   intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
> index 54deae45d81f..904160952369 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
> @@ -545,8 +545,7 @@ static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_ratl, 
> RATL_MASK);
>  static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_thermalert, 
> VR_THERMALERT_MASK);
>  static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_tdc, VR_TDC_MASK);
>  
> -static const struct attribute *freq_attrs[] = {
> - &dev_attr_punit_req_freq_mhz.attr,
> +static const struct attribute *throttle_reason_attrs[] = {
>   &attr_throttle_reason_status.attr,
>   &attr_throttle_reason_pl1.attr,
>   &attr_throttle_reason_pl2.attr,
> @@ -791,12 +790,20 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct 
> kobject *kobj)
>   if (!is_object_gt(kobj))
>   return;
>  
> - ret = sysfs_create_files(kobj, freq_attrs);
> + ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr);
>   if (ret)
>   drm_warn(>->i915->drm,
> -  "failed to create gt%u throttle sysfs files (%pe)",
> +  "failed to create gt%u punit_req_freq_mhz sysfs (%pe)",
>gt->info.id, ERR_PTR(ret));
>  
> + if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) {
> + ret = sysfs_create_files(kobj, throttle_reason_attrs);
> + if (ret)
> + drm_warn(>->i915->drm,
> +  "failed to create gt%u throttle sysfs files 
> (%pe)",
> +  gt->info.id, ERR_PTR(ret));
> + }
> +
>   if (HAS_MEDIA_RATIO_MODE(gt->i915) && intel_uc_uses_guc_slpc(>->uc)) {
>   ret = sysfs_create_files(kobj, media_perf_power_attrs);
>   if (

[PATCH v2 0/8] Fix several device private page reference counting issues

2022-09-28 Thread Alistair Popple
This series aims to fix a number of page reference counting issues in
drivers dealing with device private ZONE_DEVICE pages. These result in
use-after-free type bugs, either from accessing a struct page which no
longer exists because it has been removed or accessing fields within the
struct page which are no longer valid because the page has been freed.

During normal usage it is unlikely these will cause any problems. However
without these fixes it is possible to crash the kernel from userspace.
These crashes can be triggered either by unloading the kernel module or
unbinding the device from the driver prior to a userspace task exiting. In
modules such as Nouveau it is also possible to trigger some of these issues
by explicitly closing the device file-descriptor prior to the task exiting
and then accessing device private memory.

This involves some minor changes to both PowerPC and AMD GPU code.
Unfortunately I lack hardware to test either of those so any help there
would be appreciated. The changes mimic what is done in for both Nouveau
and hmm-tests though so I doubt they will cause problems.

To: Andrew Morton 
To: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org
Cc: amd-...@lists.freedesktop.org
Cc: nouv...@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org

Alistair Popple (8):
  mm/memory.c: Fix race when faulting a device private page
  mm: Free device private pages have zero refcount
  mm/memremap.c: Take a pgmap reference on page allocation
  mm/migrate_device.c: Refactor migrate_vma and migrate_deivce_coherent_page()
  mm/migrate_device.c: Add migrate_device_range()
  nouveau/dmem: Refactor nouveau_dmem_fault_copy_one()
  nouveau/dmem: Evict device private memory during release
  hmm-tests: Add test for migrate_device_range()

 arch/powerpc/kvm/book3s_hv_uvmem.c   |  17 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  19 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  11 +-
 drivers/gpu/drm/nouveau/nouveau_dmem.c   | 108 +++
 include/linux/memremap.h |   1 +-
 include/linux/migrate.h  |  15 ++-
 lib/test_hmm.c   | 129 ++---
 lib/test_hmm_uapi.h  |   1 +-
 mm/memory.c  |  16 +-
 mm/memremap.c|  30 ++-
 mm/migrate.c |  34 +--
 mm/migrate_device.c  | 239 +---
 mm/page_alloc.c  |   8 +-
 tools/testing/selftests/vm/hmm-tests.c   |  49 +-
 15 files changed, 516 insertions(+), 163 deletions(-)

base-commit: 088b8aa537c2c767765f1c19b555f21ffe555786
-- 
git-series 0.9.1


[PATCH v2 3/8] mm/memremap.c: Take a pgmap reference on page allocation

2022-09-28 Thread Alistair Popple
ZONE_DEVICE pages have a struct dev_pagemap which is allocated by a
driver. When the struct page is first allocated by the kernel in
memremap_pages() a reference is taken on the associated pagemap to
ensure it is not freed prior to the pages being freed.

Prior to 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page
refcount") pages were considered free and returned to the driver when
the reference count dropped to one. However the pagemap reference was
not dropped until the page reference count hit zero. This would occur as
part of the final put_page() in memunmap_pages() which would wait for
all pages to be freed prior to returning.

When the extra refcount was removed the pagemap reference was no longer
being dropped in put_page(). Instead memunmap_pages() was changed to
explicitly drop the pagemap references. This means that memunmap_pages()
can complete even though pages are still mapped by the kernel which can
lead to kernel crashes, particularly if a driver frees the pagemap.

To fix this drivers should take a pagemap reference when allocating the
page. This reference can then be returned when the page is freed.

Signed-off-by: Alistair Popple 
Fixes: 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page refcount")
Cc: Jason Gunthorpe 
Cc: Felix Kuehling 
Cc: Alex Deucher 
Cc: Christian König 
Cc: Ben Skeggs 
Cc: Lyude Paul 
Cc: Ralph Campbell 
Cc: Alex Sierra 
Cc: John Hubbard 
Cc: Dan Williams 

---

Again I expect this will conflict with Dan's series. This implements the
first suggestion from Jason at
https://lore.kernel.org/linux-mm/yzly5jjof0jdl...@nvidia.com/ so
whatever we end up doing for DAX we should do the same here.
---
 mm/memremap.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/mm/memremap.c b/mm/memremap.c
index 1c2c038..421bec3 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -138,8 +138,11 @@ void memunmap_pages(struct dev_pagemap *pgmap)
int i;
 
percpu_ref_kill(&pgmap->ref);
-   for (i = 0; i < pgmap->nr_range; i++)
-   percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i));
+   if (pgmap->type != MEMORY_DEVICE_PRIVATE &&
+   pgmap->type != MEMORY_DEVICE_COHERENT)
+   for (i = 0; i < pgmap->nr_range; i++)
+   percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i));
+
wait_for_completion(&pgmap->done);
 
for (i = 0; i < pgmap->nr_range; i++)
@@ -264,7 +267,9 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct 
mhp_params *params,
memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
PHYS_PFN(range->start),
PHYS_PFN(range_len(range)), pgmap);
-   percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
+   if (pgmap->type != MEMORY_DEVICE_PRIVATE &&
+   pgmap->type != MEMORY_DEVICE_COHERENT)
+   percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
return 0;
 
 err_add_memory:
@@ -502,16 +507,24 @@ void free_zone_device_page(struct page *page)
page->mapping = NULL;
page->pgmap->ops->page_free(page);
 
-   /*
-* Reset the page count to 1 to prepare for handing out the page again.
-*/
if (page->pgmap->type != MEMORY_DEVICE_PRIVATE &&
page->pgmap->type != MEMORY_DEVICE_COHERENT)
+   /*
+* Reset the page count to 1 to prepare for handing out the page
+* again.
+*/
set_page_count(page, 1);
+   else
+   put_dev_pagemap(page->pgmap);
 }
 
 void zone_device_page_init(struct page *page)
 {
+   /*
+* Drivers shouldn't be allocating pages after calling
+* memunmap_pages().
+*/
+   WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref));
set_page_count(page, 1);
lock_page(page);
 }
-- 
git-series 0.9.1


[PATCH v2 1/8] mm/memory.c: Fix race when faulting a device private page

2022-09-28 Thread Alistair Popple
When the CPU tries to access a device private page the migrate_to_ram()
callback associated with the pgmap for the page is called. However no
reference is taken on the faulting page. Therefore a concurrent
migration of the device private page can free the page and possibly the
underlying pgmap. This results in a race which can crash the kernel due
to the migrate_to_ram() function pointer becoming invalid. It also means
drivers can't reliably read the zone_device_data field because the page
may have been freed with memunmap_pages().

Close the race by getting a reference on the page while holding the ptl
to ensure it has not been freed. Unfortunately the elevated reference
count will cause the migration required to handle the fault to fail. To
avoid this failure pass the faulting page into the migrate_vma functions
so that if an elevated reference count is found it can be checked to see
if it's expected or not.

Signed-off-by: Alistair Popple 
Cc: Jason Gunthorpe 
Cc: John Hubbard 
Cc: Ralph Campbell 
Cc: Michael Ellerman 
Cc: Felix Kuehling 
Cc: Lyude Paul 
---
 arch/powerpc/kvm/book3s_hv_uvmem.c   | 15 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 11 +---
 include/linux/migrate.h  |  8 ++-
 lib/test_hmm.c   |  7 ++---
 mm/memory.c  | 16 +++-
 mm/migrate.c | 34 ++---
 mm/migrate_device.c  | 18 +
 9 files changed, 87 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c 
b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 5980063..d4eacf4 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -508,10 +508,10 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
unsigned long start,
unsigned long end, unsigned long page_shift,
-   struct kvm *kvm, unsigned long gpa)
+   struct kvm *kvm, unsigned long gpa, struct page *fault_page)
 {
unsigned long src_pfn, dst_pfn = 0;
-   struct migrate_vma mig;
+   struct migrate_vma mig = { 0 };
struct page *dpage, *spage;
struct kvmppc_uvmem_page_pvt *pvt;
unsigned long pfn;
@@ -525,6 +525,7 @@ static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
mig.dst = &dst_pfn;
mig.pgmap_owner = &kvmppc_uvmem_pgmap;
mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+   mig.fault_page = fault_page;
 
/* The requested page is already paged-out, nothing to do */
if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
@@ -580,12 +581,14 @@ static int __kvmppc_svm_page_out(struct vm_area_struct 
*vma,
 static inline int kvmppc_svm_page_out(struct vm_area_struct *vma,
  unsigned long start, unsigned long end,
  unsigned long page_shift,
- struct kvm *kvm, unsigned long gpa)
+ struct kvm *kvm, unsigned long gpa,
+ struct page *fault_page)
 {
int ret;
 
mutex_lock(&kvm->arch.uvmem_lock);
-   ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa);
+   ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa,
+   fault_page);
mutex_unlock(&kvm->arch.uvmem_lock);
 
return ret;
@@ -736,7 +739,7 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
bool pagein)
 {
unsigned long src_pfn, dst_pfn = 0;
-   struct migrate_vma mig;
+   struct migrate_vma mig = { 0 };
struct page *spage;
unsigned long pfn;
struct page *dpage;
@@ -994,7 +997,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct 
vm_fault *vmf)
 
if (kvmppc_svm_page_out(vmf->vma, vmf->address,
vmf->address + PAGE_SIZE, PAGE_SHIFT,
-   pvt->kvm, pvt->gpa))
+   pvt->kvm, pvt->gpa, vmf->page))
return VM_FAULT_SIGBUS;
else
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b059a77..776448b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -409,7 +409,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
uint64_t npages = (end - start) >> PAGE_SHIFT;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
-   struct migrate_vma migrate;
+   struct migrate_vma migrate = { 0 };
unsigned long cpages = 0;
dma_addr_t *scratch;
vo

[PATCH v2 2/8] mm: Free device private pages have zero refcount

2022-09-28 Thread Alistair Popple
Since 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page
refcount") device private pages have no longer had an extra reference
count when the page is in use. However before handing them back to the
owning device driver we add an extra reference count such that free
pages have a reference count of one.

This makes it difficult to tell if a page is free or not because both
free and in use pages will have a non-zero refcount. Instead we should
return pages to the drivers page allocator with a zero reference count.
Kernel code can then safely use kernel functions such as
get_page_unless_zero().

Signed-off-by: Alistair Popple 
Cc: Jason Gunthorpe 
Cc: Michael Ellerman 
Cc: Felix Kuehling 
Cc: Alex Deucher 
Cc: Christian König 
Cc: Ben Skeggs 
Cc: Lyude Paul 
Cc: Ralph Campbell 
Cc: Alex Sierra 
Cc: John Hubbard 
Cc: Dan Williams 

---

This will conflict with Dan's series to fix reference counts for DAX[1].
At the moment this only makes changes for device private and coherent
pages, however if DAX is fixed to remove the extra refcount then we
should just be able to drop the checks for private/coherent pages and
treat them the same.

[1] - 
https://lore.kernel.org/linux-mm/166329930818.2786261.6086109734008025807.st...@dwillia2-xfh.jf.intel.com/
---
 arch/powerpc/kvm/book3s_hv_uvmem.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  2 +-
 drivers/gpu/drm/nouveau/nouveau_dmem.c   |  2 +-
 include/linux/memremap.h |  1 +
 lib/test_hmm.c   |  2 +-
 mm/memremap.c|  9 +
 mm/page_alloc.c  |  8 
 7 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c 
b/arch/powerpc/kvm/book3s_hv_uvmem.c
index d4eacf4..9d8de68 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -718,7 +718,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long 
gpa, struct kvm *kvm)
 
dpage = pfn_to_page(uvmem_pfn);
dpage->zone_device_data = pvt;
-   lock_page(dpage);
+   zone_device_page_init(dpage);
return dpage;
 out_clear:
spin_lock(&kvmppc_uvmem_bitmap_lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 776448b..97a6845 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -223,7 +223,7 @@ svm_migrate_get_vram_page(struct svm_range *prange, 
unsigned long pfn)
page = pfn_to_page(pfn);
svm_range_bo_ref(prange->svm_bo);
page->zone_device_data = prange->svm_bo;
-   lock_page(page);
+   zone_device_page_init(page);
 }
 
 static void
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 1635661..b092988 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -326,7 +326,7 @@ nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
return NULL;
}
 
-   lock_page(page);
+   zone_device_page_init(page);
return page;
 }
 
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 1901049..f68bf6d 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -182,6 +182,7 @@ static inline bool folio_is_device_coherent(const struct 
folio *folio)
 }
 
 #ifdef CONFIG_ZONE_DEVICE
+void zone_device_page_init(struct page *page);
 void *memremap_pages(struct dev_pagemap *pgmap, int nid);
 void memunmap_pages(struct dev_pagemap *pgmap);
 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 89463ff..688c15d 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -627,8 +627,8 @@ static struct page *dmirror_devmem_alloc_page(struct 
dmirror_device *mdevice)
goto error;
}
 
+   zone_device_page_init(dpage);
dpage->zone_device_data = rpage;
-   lock_page(dpage);
return dpage;
 
 error:
diff --git a/mm/memremap.c b/mm/memremap.c
index 25029a4..1c2c038 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -505,8 +505,17 @@ void free_zone_device_page(struct page *page)
/*
 * Reset the page count to 1 to prepare for handing out the page again.
 */
+   if (page->pgmap->type != MEMORY_DEVICE_PRIVATE &&
+   page->pgmap->type != MEMORY_DEVICE_COHERENT)
+   set_page_count(page, 1);
+}
+
+void zone_device_page_init(struct page *page)
+{
set_page_count(page, 1);
+   lock_page(page);
 }
+EXPORT_SYMBOL_GPL(zone_device_page_init);
 
 #ifdef CONFIG_FS_DAX
 bool __put_devmap_managed_page_refs(struct page *page, int refs)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9d49803..4df1e43 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6744,6 +6744,14 @@ static void __ref __init_zone_device_page(struct page 
*page, unsigned long pfn,
se

[PATCH v2 4/8] mm/migrate_device.c: Refactor migrate_vma and migrate_deivce_coherent_page()

2022-09-28 Thread Alistair Popple
migrate_device_coherent_page() reuses the existing migrate_vma family of
functions to migrate a specific page without providing a valid mapping
or vma. This looks a bit odd because it means we are calling
migrate_vma_*() without setting a valid vma, however it was considered
acceptable at the time because the details were internal to
migrate_device.c and there was only a single user.

One of the reasons the details could be kept internal was that this was
strictly for migrating device coherent memory. Such memory can be copied
directly by the CPU without intervention from a driver. However this
isn't true for device private memory, and a future change requires
similar functionality for device private memory. So refactor the code
into something more sensible for migrating device memory without a vma.

Signed-off-by: Alistair Popple 
Cc: "Huang, Ying" 
Cc: Zi Yan 
Cc: Matthew Wilcox 
Cc: Yang Shi 
Cc: David Hildenbrand 
Cc: Ralph Campbell 
Cc: John Hubbard 
---
 mm/migrate_device.c | 150 +
 1 file changed, 85 insertions(+), 65 deletions(-)

diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index f756c00..ba479b5 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -345,26 +345,20 @@ static bool migrate_vma_check_page(struct page *page, 
struct page *fault_page)
 }
 
 /*
- * migrate_vma_unmap() - replace page mapping with special migration pte entry
- * @migrate: migrate struct containing all migration information
- *
- * Isolate pages from the LRU and replace mappings (CPU page table pte) with a
- * special migration pte entry and check if it has been pinned. Pinned pages 
are
- * restored because we cannot migrate them.
- *
- * This is the last step before we call the device driver callback to allocate
- * destination memory and copy contents of original page over to new page.
+ * Unmaps pages for migration. Returns number of unmapped pages.
  */
-static void migrate_vma_unmap(struct migrate_vma *migrate)
+static unsigned long migrate_device_unmap(unsigned long *src_pfns,
+ unsigned long npages,
+ struct page *fault_page)
 {
-   const unsigned long npages = migrate->npages;
unsigned long i, restore = 0;
bool allow_drain = true;
+   unsigned long unmapped = 0;
 
lru_add_drain();
 
for (i = 0; i < npages; i++) {
-   struct page *page = migrate_pfn_to_page(migrate->src[i]);
+   struct page *page = migrate_pfn_to_page(src_pfns[i]);
struct folio *folio;
 
if (!page)
@@ -379,8 +373,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
}
 
if (isolate_lru_page(page)) {
-   migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
-   migrate->cpages--;
+   src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
restore++;
continue;
}
@@ -394,34 +387,54 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
try_to_migrate(folio, 0);
 
if (page_mapped(page) ||
-   !migrate_vma_check_page(page, migrate->fault_page)) {
+   !migrate_vma_check_page(page, fault_page)) {
if (!is_zone_device_page(page)) {
get_page(page);
putback_lru_page(page);
}
 
-   migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
-   migrate->cpages--;
+   src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
restore++;
continue;
}
+
+   unmapped++;
}
 
for (i = 0; i < npages && restore; i++) {
-   struct page *page = migrate_pfn_to_page(migrate->src[i]);
+   struct page *page = migrate_pfn_to_page(src_pfns[i]);
struct folio *folio;
 
-   if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
+   if (!page || (src_pfns[i] & MIGRATE_PFN_MIGRATE))
continue;
 
folio = page_folio(page);
remove_migration_ptes(folio, folio, false);
 
-   migrate->src[i] = 0;
+   src_pfns[i] = 0;
folio_unlock(folio);
folio_put(folio);
restore--;
}
+
+   return unmapped;
+}
+
+/*
+ * migrate_vma_unmap() - replace page mapping with special migration pte entry
+ * @migrate: migrate struct containing all migration information
+ *
+ * Isolate pages from the LRU and replace mappings (CPU page table pte) with a
+ * special migration pte entry and check if it has been pinned. Pinned pages 
are
+ * restored because we cannot migrate them.
+ *
+ * This is

[PATCH v2 5/8] mm/migrate_device.c: Add migrate_device_range()

2022-09-28 Thread Alistair Popple
Device drivers can use the migrate_vma family of functions to migrate
existing private anonymous mappings to device private pages. These pages
are backed by memory on the device with drivers being responsible for
copying data to and from device memory.

Device private pages are freed via the pgmap->page_free() callback when
they are unmapped and their refcount drops to zero. Alternatively they
may be freed indirectly via migration back to CPU memory in response to
a pgmap->migrate_to_ram() callback called whenever the CPU accesses
an address mapped to a device private page.

In other words drivers cannot control the lifetime of data allocated on
the devices and must wait until these pages are freed from userspace.
This causes issues when memory needs to reclaimed on the device, either
because the device is going away due to a ->release() callback or
because another user needs to use the memory.

Drivers could use the existing migrate_vma functions to migrate data off
the device. However this would require them to track the mappings of
each page which is both complicated and not always possible. Instead
drivers need to be able to migrate device pages directly so they can
free up device memory.

To allow that this patch introduces the migrate_device family of
functions which are functionally similar to migrate_vma but which skips
the initial lookup based on mapping.

Signed-off-by: Alistair Popple 
Cc: "Huang, Ying" 
Cc: Zi Yan 
Cc: Matthew Wilcox 
Cc: Yang Shi 
Cc: David Hildenbrand 
Cc: Ralph Campbell 
Cc: John Hubbard 
---
 include/linux/migrate.h |  7 +++-
 mm/migrate_device.c | 89 ++
 2 files changed, 89 insertions(+), 7 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 82ffa47..582cdc7 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -225,6 +225,13 @@ struct migrate_vma {
 int migrate_vma_setup(struct migrate_vma *args);
 void migrate_vma_pages(struct migrate_vma *migrate);
 void migrate_vma_finalize(struct migrate_vma *migrate);
+int migrate_device_range(unsigned long *src_pfns, unsigned long start,
+   unsigned long npages);
+void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns,
+   unsigned long npages);
+void migrate_device_finalize(unsigned long *src_pfns,
+   unsigned long *dst_pfns, unsigned long npages);
+
 #endif /* CONFIG_MIGRATION */
 
 #endif /* _LINUX_MIGRATE_H */
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index ba479b5..824860a 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -681,7 +681,7 @@ static void migrate_vma_insert_page(struct migrate_vma 
*migrate,
*src &= ~MIGRATE_PFN_MIGRATE;
 }
 
-static void migrate_device_pages(unsigned long *src_pfns,
+static void __migrate_device_pages(unsigned long *src_pfns,
unsigned long *dst_pfns, unsigned long npages,
struct migrate_vma *migrate)
 {
@@ -703,6 +703,9 @@ static void migrate_device_pages(unsigned long *src_pfns,
if (!page) {
unsigned long addr;
 
+   if (!(src_pfns[i] & MIGRATE_PFN_MIGRATE))
+   continue;
+
/*
 * The only time there is no vma is when called from
 * migrate_device_coherent_page(). However this isn't
@@ -710,8 +713,6 @@ static void migrate_device_pages(unsigned long *src_pfns,
 */
VM_BUG_ON(!migrate);
addr = migrate->start + i*PAGE_SIZE;
-   if (!(src_pfns[i] & MIGRATE_PFN_MIGRATE))
-   continue;
if (!notified) {
notified = true;
 
@@ -767,6 +768,22 @@ static void migrate_device_pages(unsigned long *src_pfns,
 }
 
 /**
+ * migrate_device_pages() - migrate meta-data from src page to dst page
+ * @src_pfns: src_pfns returned from migrate_device_range()
+ * @dst_pfns: array of pfns allocated by the driver to migrate memory to
+ * @npages: number of pages in the range
+ *
+ * Equivalent to migrate_vma_pages(). This is called to migrate struct page
+ * meta-data from source struct page to destination.
+ */
+void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns,
+   unsigned long npages)
+{
+   __migrate_device_pages(src_pfns, dst_pfns, npages, NULL);
+}
+EXPORT_SYMBOL(migrate_device_pages);
+
+/**
  * migrate_vma_pages() - migrate meta-data from src page to dst page
  * @migrate: migrate struct containing all migration information
  *
@@ -776,12 +793,22 @@ static void migrate_device_pages(unsigned long *src_pfns,
  */
 void migrate_vma_pages(struct migrate_vma *migrate)
 {
-   migrate_device_pages(migrate->src, migrate->dst, migrate->npages, 
migrate);
+   __migrate_device_p

[PATCH v2 6/8] nouveau/dmem: Refactor nouveau_dmem_fault_copy_one()

2022-09-28 Thread Alistair Popple
nouveau_dmem_fault_copy_one() is used during handling of CPU faults via
the migrate_to_ram() callback and is used to copy data from GPU to CPU
memory. It is currently specific to fault handling, however a future
patch implementing eviction of data during teardown needs similar
functionality.

Refactor out the core functionality so that it is not specific to fault
handling.

Signed-off-by: Alistair Popple 
Reviewed-by: Lyude Paul 
Cc: Ben Skeggs 
Cc: Ralph Campbell 
Cc: John Hubbard 
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 58 +--
 1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index b092988..65f51fb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -139,44 +139,24 @@ static void nouveau_dmem_fence_done(struct nouveau_fence 
**fence)
}
 }
 
-static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
-   struct vm_fault *vmf, struct migrate_vma *args,
-   dma_addr_t *dma_addr)
+static int nouveau_dmem_copy_one(struct nouveau_drm *drm, struct page *spage,
+   struct page *dpage, dma_addr_t *dma_addr)
 {
struct device *dev = drm->dev->dev;
-   struct page *dpage, *spage;
-   struct nouveau_svmm *svmm;
-
-   spage = migrate_pfn_to_page(args->src[0]);
-   if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE))
-   return 0;
 
-   dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
-   if (!dpage)
-   return VM_FAULT_SIGBUS;
lock_page(dpage);
 
*dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, *dma_addr))
-   goto error_free_page;
+   return -EIO;
 
-   svmm = spage->zone_device_data;
-   mutex_lock(&svmm->mutex);
-   nouveau_svmm_invalidate(svmm, args->start, args->end);
if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr,
-   NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage)))
-   goto error_dma_unmap;
-   mutex_unlock(&svmm->mutex);
+NOUVEAU_APER_VRAM, 
nouveau_dmem_page_addr(spage))) {
+   dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+   return -EIO;
+   }
 
-   args->dst[0] = migrate_pfn(page_to_pfn(dpage));
return 0;
-
-error_dma_unmap:
-   mutex_unlock(&svmm->mutex);
-   dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
-error_free_page:
-   __free_page(dpage);
-   return VM_FAULT_SIGBUS;
 }
 
 static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
@@ -184,9 +164,11 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct 
vm_fault *vmf)
struct nouveau_drm *drm = page_to_drm(vmf->page);
struct nouveau_dmem *dmem = drm->dmem;
struct nouveau_fence *fence;
+   struct nouveau_svmm *svmm;
+   struct page *spage, *dpage;
unsigned long src = 0, dst = 0;
dma_addr_t dma_addr = 0;
-   vm_fault_t ret;
+   vm_fault_t ret = 0;
struct migrate_vma args = {
.vma= vmf->vma,
.start  = vmf->address,
@@ -207,9 +189,25 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct 
vm_fault *vmf)
if (!args.cpages)
return 0;
 
-   ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr);
-   if (ret || dst == 0)
+   spage = migrate_pfn_to_page(src);
+   if (!spage || !(src & MIGRATE_PFN_MIGRATE))
+   goto done;
+
+   dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
+   if (!dpage)
+   goto done;
+
+   dst = migrate_pfn(page_to_pfn(dpage));
+
+   svmm = spage->zone_device_data;
+   mutex_lock(&svmm->mutex);
+   nouveau_svmm_invalidate(svmm, args.start, args.end);
+   ret = nouveau_dmem_copy_one(drm, spage, dpage, &dma_addr);
+   mutex_unlock(&svmm->mutex);
+   if (ret) {
+   ret = VM_FAULT_SIGBUS;
goto done;
+   }
 
nouveau_fence_new(dmem->migrate.chan, false, &fence);
migrate_vma_pages(&args);
-- 
git-series 0.9.1


[PATCH v2 7/8] nouveau/dmem: Evict device private memory during release

2022-09-28 Thread Alistair Popple
When the module is unloaded or a GPU is unbound from the module it is
possible for device private pages to still be mapped in currently
running processes. This can lead to a hangs and RCU stall warnings when
unbinding the device as memunmap_pages() will wait in an uninterruptible
state until all device pages have been freed which may never happen.

Fix this by migrating device mappings back to normal CPU memory prior to
freeing the GPU memory chunks and associated device private pages.

Signed-off-by: Alistair Popple 
Cc: Lyude Paul 
Cc: Ben Skeggs 
Cc: Ralph Campbell 
Cc: John Hubbard 
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 48 +++-
 1 file changed, 48 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 65f51fb..5fe2091 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -367,6 +367,52 @@ nouveau_dmem_suspend(struct nouveau_drm *drm)
mutex_unlock(&drm->dmem->mutex);
 }
 
+/*
+ * Evict all pages mapping a chunk.
+ */
+static void
+nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
+{
+   unsigned long i, npages = range_len(&chunk->pagemap.range) >> 
PAGE_SHIFT;
+   unsigned long *src_pfns, *dst_pfns;
+   dma_addr_t *dma_addrs;
+   struct nouveau_fence *fence;
+
+   src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
+   dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
+   dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL);
+
+   migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT,
+   npages);
+
+   for (i = 0; i < npages; i++) {
+   if (src_pfns[i] & MIGRATE_PFN_MIGRATE) {
+   struct page *dpage;
+
+   /*
+* _GFP_NOFAIL because the GPU is going away and there
+* is nothing sensible we can do if we can't copy the
+* data back.
+*/
+   dpage = alloc_page(GFP_HIGHUSER | __GFP_NOFAIL);
+   dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
+   nouveau_dmem_copy_one(chunk->drm,
+   migrate_pfn_to_page(src_pfns[i]), dpage,
+   &dma_addrs[i]);
+   }
+   }
+
+   nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence);
+   migrate_device_pages(src_pfns, dst_pfns, npages);
+   nouveau_dmem_fence_done(&fence);
+   migrate_device_finalize(src_pfns, dst_pfns, npages);
+   kfree(src_pfns);
+   kfree(dst_pfns);
+   for (i = 0; i < npages; i++)
+   dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, 
DMA_BIDIRECTIONAL);
+   kfree(dma_addrs);
+}
+
 void
 nouveau_dmem_fini(struct nouveau_drm *drm)
 {
@@ -378,8 +424,10 @@ nouveau_dmem_fini(struct nouveau_drm *drm)
mutex_lock(&drm->dmem->mutex);
 
list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) {
+   nouveau_dmem_evict_chunk(chunk);
nouveau_bo_unpin(chunk->bo);
nouveau_bo_ref(NULL, &chunk->bo);
+   WARN_ON(chunk->callocated);
list_del(&chunk->list);
memunmap_pages(&chunk->pagemap);
release_mem_region(chunk->pagemap.range.start,
-- 
git-series 0.9.1


[PATCH v2 8/8] hmm-tests: Add test for migrate_device_range()

2022-09-28 Thread Alistair Popple
Signed-off-by: Alistair Popple 
Cc: Jason Gunthorpe 
Cc: Ralph Campbell 
Cc: John Hubbard 
Cc: Alex Sierra 
Cc: Felix Kuehling 
---
 lib/test_hmm.c | 120 +-
 lib/test_hmm_uapi.h|   1 +-
 tools/testing/selftests/vm/hmm-tests.c |  49 +++-
 3 files changed, 149 insertions(+), 21 deletions(-)

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 688c15d..6c2fc85 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -100,6 +100,7 @@ struct dmirror {
 struct dmirror_chunk {
struct dev_pagemap  pagemap;
struct dmirror_device   *mdevice;
+   bool remove;
 };
 
 /*
@@ -192,11 +193,15 @@ static int dmirror_fops_release(struct inode *inode, 
struct file *filp)
return 0;
 }
 
+static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page)
+{
+   return container_of(page->pgmap, struct dmirror_chunk, pagemap);
+}
+
 static struct dmirror_device *dmirror_page_to_device(struct page *page)
 
 {
-   return container_of(page->pgmap, struct dmirror_chunk,
-   pagemap)->mdevice;
+   return dmirror_page_to_chunk(page)->mdevice;
 }
 
 static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range)
@@ -1218,6 +1223,85 @@ static int dmirror_snapshot(struct dmirror *dmirror,
return ret;
 }
 
+static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
+{
+   unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT;
+   unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT;
+   unsigned long npages = end_pfn - start_pfn + 1;
+   unsigned long i;
+   unsigned long *src_pfns;
+   unsigned long *dst_pfns;
+
+   src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
+   dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
+
+   migrate_device_range(src_pfns, start_pfn, npages);
+   for (i = 0; i < npages; i++) {
+   struct page *dpage, *spage;
+
+   spage = migrate_pfn_to_page(src_pfns[i]);
+   if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
+   continue;
+
+   if (WARN_ON(!is_device_private_page(spage) &&
+   !is_device_coherent_page(spage)))
+   continue;
+   spage = BACKING_PAGE(spage);
+   dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
+   lock_page(dpage);
+   copy_highpage(dpage, spage);
+   dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
+   if (src_pfns[i] & MIGRATE_PFN_WRITE)
+   dst_pfns[i] |= MIGRATE_PFN_WRITE;
+   }
+   migrate_device_pages(src_pfns, dst_pfns, npages);
+   migrate_device_finalize(src_pfns, dst_pfns, npages);
+   kfree(src_pfns);
+   kfree(dst_pfns);
+}
+
+/* Removes free pages from the free list so they can't be re-allocated */
+static void dmirror_remove_free_pages(struct dmirror_chunk *devmem)
+{
+   struct dmirror_device *mdevice = devmem->mdevice;
+   struct page *page;
+
+   for (page = mdevice->free_pages; page; page = page->zone_device_data)
+   if (dmirror_page_to_chunk(page) == devmem)
+   mdevice->free_pages = page->zone_device_data;
+}
+
+static void dmirror_device_remove_chunks(struct dmirror_device *mdevice)
+{
+   unsigned int i;
+
+   mutex_lock(&mdevice->devmem_lock);
+   if (mdevice->devmem_chunks) {
+   for (i = 0; i < mdevice->devmem_count; i++) {
+   struct dmirror_chunk *devmem =
+   mdevice->devmem_chunks[i];
+
+   spin_lock(&mdevice->lock);
+   devmem->remove = true;
+   dmirror_remove_free_pages(devmem);
+   spin_unlock(&mdevice->lock);
+
+   dmirror_device_evict_chunk(devmem);
+   memunmap_pages(&devmem->pagemap);
+   if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
+   release_mem_region(devmem->pagemap.range.start,
+  
range_len(&devmem->pagemap.range));
+   kfree(devmem);
+   }
+   mdevice->devmem_count = 0;
+   mdevice->devmem_capacity = 0;
+   mdevice->free_pages = NULL;
+   kfree(mdevice->devmem_chunks);
+   mdevice->devmem_chunks = NULL;
+   }
+   mutex_unlock(&mdevice->devmem_lock);
+}
+
 static long dmirror_fops_unlocked_ioctl(struct file *filp,
unsigned int command,
unsigned long arg)
@@ -1272,6 +1356,11 @@ static long dmirror_fops_unlocked_ioctl(struct file 
*filp,
ret = dmirror_snapshot(dmirror, &cmd);
break;
 
+   case HMM_DMIRROR_RELEASE:
+   d

Re: [PATCH v1 14/17] phy: mediatek: add support for phy-mtk-hdmi-mt8195

2022-09-28 Thread Guillaume Ranquet
On Wed, 28 Sep 2022 04:40, Chunfeng Yun  wrote:
>On Tue, 2022-09-27 at 06:23 -0700, Guillaume Ranquet wrote:
>> On Tue, 20 Sep 2022 09:46, Chunfeng Yun 
>> wrote:
>> > On Mon, 2022-09-19 at 18:56 +0200, Guillaume Ranquet wrote:
>> > > Add basic support for the mediatek hdmi phy on MT8195 SoC
>> > >
>> > > Signed-off-by: Guillaume Ranquet 
>> > >
>> > > diff --git a/drivers/gpu/drm/mediatek/mtk_mt8195_hdmi.c
>> > > b/drivers/gpu/drm/mediatek/mtk_mt8195_hdmi.c
>> > > index bb7593ea4c86..0157acdce56c 100644
>> > > --- a/drivers/gpu/drm/mediatek/mtk_mt8195_hdmi.c
>> > > +++ b/drivers/gpu/drm/mediatek/mtk_mt8195_hdmi.c
>> > > @@ -1344,6 +1344,8 @@ static void mtk_hdmi_bridge_disable(struct
>> > > drm_bridge *bridge,
>> > >  mtk_hdmi_disable_hdcp_encrypt(hdmi);
>> > >  usleep_range(5, 50050);
>> > >
>> > > +phy_power_off(hdmi->phy);
>> > > +
>> > >  hdmi->enabled = false;
>> > >  }
>> > >
>> > > diff --git a/drivers/phy/mediatek/Makefile
>> > > b/drivers/phy/mediatek/Makefile
>> > > index fb1f8edaffa7..c9a50395533e 100644
>> > > --- a/drivers/phy/mediatek/Makefile
>> > > +++ b/drivers/phy/mediatek/Makefile
>> > > @@ -12,6 +12,7 @@ obj-$(CONFIG_PHY_MTK_XSPHY)+= phy-
>> > > mtk-
>> > > xsphy.o
>> > >  phy-mtk-hdmi-drv-y  := phy-mtk-hdmi.o
>> > >  phy-mtk-hdmi-drv-y  += phy-mtk-hdmi-
>> > > mt2701.o
>> > >  phy-mtk-hdmi-drv-y  += phy-mtk-hdmi-
>> > > mt8173.o
>> > > +phy-mtk-hdmi-drv-y  += phy-mtk-hdmi-
>> > > mt8195.o
>> > >  obj-$(CONFIG_PHY_MTK_HDMI)  += phy-mtk-hdmi-drv.o
>> > >
>> > >  phy-mtk-mipi-dsi-drv-y  := phy-mtk-mipi-dsi.o
>> > > diff --git a/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c
>> > > b/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c
>> > > new file mode 100644
>> > > index ..149015b64c02
>> > > --- /dev/null
>> > > +++ b/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c
>> > > @@ -0,0 +1,673 @@
>> > > +// SPDX-License-Identifier: GPL-2.0
>> > > +/*
>> > > + * Copyright (c) 2021 MediaTek Inc.
>> > > + * Copyright (c) 2021 BayLibre, SAS
>> > > + */
>> > > +#include 
>> > > +#include 
>> > > +#include 
>> > > +#include 
>> > > +#include 
>> > > +#include 
>> > > +#include 
>> > > +#include 
>> > > +
>> > > +#include "phy-mtk-hdmi.h"
>> > > +#include "phy-mtk-hdmi-mt8195.h"
>> > > +
>> > > +static void mtk_hdmi_ana_fifo_en(struct mtk_hdmi_phy *hdmi_phy)
>> > > +{
>> > > +/* make data fifo writable for hdmi2.0 */
>> > > +mtk_hdmi_phy_mask(hdmi_phy, HDMI_ANA_CTL,
>> > > REG_ANA_HDMI20_FIFO_EN,
>> > > +  REG_ANA_HDMI20_FIFO_EN);
>> > > +}
>> > > +
>> > > +static void
>> > > +mtk_mt8195_phy_tmds_high_bit_clk_ratio(struct mtk_hdmi_phy
>> > > *hdmi_phy,
>> > > +   bool enable)
>> > > +{
>> > > +mtk_hdmi_ana_fifo_en(hdmi_phy);
>> > > +
>> > > +/* HDMI 2.0 specification, 3.4Gbps <= TMDS Bit Rate <= 6G,
>> > > + * clock bit ratio 1:40, under 3.4Gbps, clock bit ratio 1:10
>> > > + */
>> > > +if (enable)
>> > > +mtk_hdmi_phy_mask(hdmi_phy, HDMI20_CLK_CFG,
>> > > +  0x2 << REG_TXC_DIV_SHIFT,
>> >
>> > Use FIELD_PREP() macro, then no need define REG_TXC_DIV_SHIFT
>> > anymore.
>> >
>>
>> Didn't know about FIELD_* macros, will use them for V2.
>>
>> Thx for the suggestion.
>Please use helpers defined in phy-mtk-io.h, the register access helpers
>of mtk_hdmi_phy_* are already removed in phy next branch.
>
>Thanks a lot
>
>
Hi,

Thank you for the headsup, I've seen the change landed in
next-20220927 on which I'm currently rebasing.


Thx,
Guillaume.


Re: [PATCH v2 09/10] drm/msm/dp: drop modeset sanity checks

2022-09-28 Thread Johan Hovold
On Tue, Sep 27, 2022 at 11:42:53AM -0700, Abhinav Kumar wrote:
> On 9/27/2022 12:14 AM, Johan Hovold wrote:
> > On Mon, Sep 26, 2022 at 11:17:20AM -0700, Abhinav Kumar wrote:
> >> On 9/13/2022 1:53 AM, Johan Hovold wrote:
> >>> Drop the overly defensive modeset sanity checks of function parameters
> >>> which have already been checked or used by the callers.
> >>>
> >>> Reviewed-by: Dmitry Baryshkov 
> >>> Signed-off-by: Johan Hovold 
> >>
> >> The change LGTM, hence
> >>
> >> Reviewed-by: Abhinav Kumar 
> >>
> >> I think we can use below fixes tag so that we can pick up this entire
> >> series for the fixes cycle.
> >>
> >> Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support")
> > 
> > Perhaps that's a requirement for drm, but I wouldn't add a Fixes tag for
> > this otherwise as it's not a bug.
> > 
> > You also have to watch out for Sasha and his autosel scripts which will
> > probably try to backport this to stable if it finds a Fixes tag.

> Discussed with Rob on IRC, we will apply everything except the last two 
> patches of this series in the -fixes and take these two for the next 
> kernel rev push.

So the fixes go in 6.0 and the two follow-on cleanups in 6.1? Or did you
mean 6.1 and 6.2?

Johan


Re: [PATCH 1/3] pwm: Change prototype of .get_state() callback to return an error

2022-09-28 Thread Thierry Reding
On Fri, Sep 16, 2022 at 05:15:04PM +0200, Uwe Kleine-König wrote:
[...]
> diff --git a/drivers/pwm/pwm-crc.c b/drivers/pwm/pwm-crc.c
> index 7b357d1cf642..811e6f424927 100644
> --- a/drivers/pwm/pwm-crc.c
> +++ b/drivers/pwm/pwm-crc.c
> @@ -121,8 +121,8 @@ static int crc_pwm_apply(struct pwm_chip *chip, struct 
> pwm_device *pwm,
>   return 0;
>  }
>  
> -static void crc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
> -   struct pwm_state *state)
> +static int crc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
> +  struct pwm_state *state)
>  {
>   struct crystalcove_pwm *crc_pwm = to_crc_pwm(chip);
>   struct device *dev = crc_pwm->chip.dev;
> @@ -132,13 +132,13 @@ static void crc_pwm_get_state(struct pwm_chip *chip, 
> struct pwm_device *pwm,
>   error = regmap_read(crc_pwm->regmap, PWM0_CLK_DIV, &clk_div_reg);
>   if (error) {
>   dev_err(dev, "Error reading PWM0_CLK_DIV %d\n", error);
> - return;
> + return -EIO;
>   }
>  
>   error = regmap_read(crc_pwm->regmap, PWM0_DUTY_CYCLE, &duty_cycle_reg);
>   if (error) {
>   dev_err(dev, "Error reading PWM0_DUTY_CYCLE %d\n", error);
> - return;
> + return -EIO;
>   }

In other drivers you propagate errors from regmap_read(), why not here?

> diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c
> index 7004f55bbf11..aa06b3ce81a6 100644
> --- a/drivers/pwm/pwm-sprd.c
> +++ b/drivers/pwm/pwm-sprd.c
> @@ -65,8 +65,8 @@ static void sprd_pwm_write(struct sprd_pwm_chip *spc, u32 
> hwid,
>   writel_relaxed(val, spc->base + offset);
>  }
>  
> -static void sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
> -struct pwm_state *state)
> +static int sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
> +   struct pwm_state *state)
>  {
>   struct sprd_pwm_chip *spc =
>   container_of(chip, struct sprd_pwm_chip, chip);
> @@ -80,11 +80,8 @@ static void sprd_pwm_get_state(struct pwm_chip *chip, 
> struct pwm_device *pwm,
>* reading to the registers.
>*/
>   ret = clk_bulk_prepare_enable(SPRD_PWM_CHN_CLKS_NUM, chn->clks);
> - if (ret) {
> - dev_err(spc->dev, "failed to enable pwm%u clocks\n",
> - pwm->hwpwm);

This might be useful information, so perhaps leave it in?

[...]
> diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
> index c8445b0a3339..ead909400e64 100644
> --- a/drivers/pwm/pwm-sun4i.c
> +++ b/drivers/pwm/pwm-sun4i.c
> @@ -108,9 +108,9 @@ static inline void sun4i_pwm_writel(struct sun4i_pwm_chip 
> *chip,
>   writel(val, chip->base + offset);
>  }
>  
> -static void sun4i_pwm_get_state(struct pwm_chip *chip,
> - struct pwm_device *pwm,
> - struct pwm_state *state)
> +static int sun4i_pwm_get_state(struct pwm_chip *chip,
> +struct pwm_device *pwm,
> +struct pwm_state *state)
>  {
>   struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
>   u64 clk_rate, tmp;
> @@ -132,7 +132,7 @@ static void sun4i_pwm_get_state(struct pwm_chip *chip,
>   state->duty_cycle = DIV_ROUND_UP_ULL(state->period, 2);
>   state->polarity = PWM_POLARITY_NORMAL;
>   state->enabled = true;
> - return;
> + return 0;
>   }
>  
>   if ((PWM_REG_PRESCAL(val, pwm->hwpwm) == PWM_PRESCAL_MASK) &&
> @@ -142,7 +142,8 @@ static void sun4i_pwm_get_state(struct pwm_chip *chip,
>   prescaler = prescaler_table[PWM_REG_PRESCAL(val, pwm->hwpwm)];
>  
>   if (prescaler == 0)
> - return;
> + /* huh? is this an error? */
> + return 0;

Yeah, I think this would count as an error. The prescaler value returned
from that table is 0 in what seems to be "invalid" configurations. If
you look at how this is used in sun4i_pwm_calculate(), these entries are
skipped for the computation of the duty cycle. So I would expect this to
happen in either an invalidly configured or completely unconfigured PWM.

That raises the question about what to do in these cases. If we return
an error, that could potentially throw off consumers. So perhaps the
closest would be to return a disabled PWM? Or perhaps it'd be up to the
consumer to provide some fallback configuration for invalidly configured
or unconfigured PWMs.

Thierry


signature.asc
Description: PGP signature


Re: [PATCH 2/3] pwm/tracing: Also record trace events for failed apply calls

2022-09-28 Thread Thierry Reding
On Fri, Sep 16, 2022 at 05:15:05PM +0200, Uwe Kleine-König wrote:
> Record and report an error code for the events. This allows to report
> about failed calls without ambiguity and so gives a more complete
> picture.
> 
> Signed-off-by: Uwe Kleine-König 
> ---
>  drivers/pwm/core.c | 18 --
>  include/trace/events/pwm.h | 20 ++--
>  2 files changed, 18 insertions(+), 20 deletions(-)

Yeah, I like this one. Should make the traces much more useful in the
failure cases.

Thierry


signature.asc
Description: PGP signature


Re: [PATCH 3/3] pwm: Handle .get_state() failures

2022-09-28 Thread Thierry Reding
On Fri, Sep 16, 2022 at 05:15:06PM +0200, Uwe Kleine-König wrote:
> This suppresses diagnosis for PWM_DEBUG routines and makes sure that
> pwm->state isn't modified in pwm_device_request() if .get_state() fails.
> 
> Signed-off-by: Uwe Kleine-König 
> ---
>  drivers/pwm/core.c | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
> index 381db04cfa00..421573590613 100644
> --- a/drivers/pwm/core.c
> +++ b/drivers/pwm/core.c
> @@ -108,9 +108,14 @@ static int pwm_device_request(struct pwm_device *pwm, 
> const char *label)
>   }
>  
>   if (pwm->chip->ops->get_state) {
> - err = pwm->chip->ops->get_state(pwm->chip, pwm, &pwm->state);
> + struct pwm_state state;
> +
> + err = pwm->chip->ops->get_state(pwm->chip, pwm, &state);
>   trace_pwm_get(pwm, &pwm->state, err);
>  
> + if (!err)
> + pwm->state = state;

So basically this means that callers of pwm_get_state() will get the
zeroed out pwm->state. This can cause issues with the likes of
pwm_set_relative_duty_cycle() which many drivers would use. Do we
perhaps want to set an internal error in this case so that it can be
propagated to callers in pwm_get_state()? That would allow them to fall
back to some default configuration rather than potentially breaking
altogether.

Thierry


signature.asc
Description: PGP signature


Re: [PATCH v1 16/17] drm/mediatek: dpi: Add mt8195 hdmi to DPI driver

2022-09-28 Thread AngeloGioacchino Del Regno

Il 27/09/22 15:34, Guillaume Ranquet ha scritto:

On Tue, 20 Sep 2022 14:22, AngeloGioacchino Del Regno
 wrote:

Il 19/09/22 18:56, Guillaume Ranquet ha scritto:

Add the DPI1 hdmi path support in mtk dpi driver

Signed-off-by: Guillaume Ranquet 

diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c 
b/drivers/gpu/drm/mediatek/mtk_dpi.c
index 630a4e301ef6..91212b7610e8 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -15,7 +15,10 @@
   #include 
   #include 
   #include 
+#include 
   #include 
+#include 
+#include 

   #include 

@@ -66,10 +69,14 @@ struct mtk_dpi {
struct drm_bridge *next_bridge;
struct drm_connector *connector;
void __iomem *regs;
+   struct reset_control *reset_ctl;
struct device *dev;
struct clk *engine_clk;
+   struct clk *dpi_ck_cg;
struct clk *pixel_clk;
+   struct clk *dpi_sel_clk;
struct clk *tvd_clk;
+   struct clk *hdmi_cg;



You're adding new clocks and then you're making *all clocks*, including the
already existing ones... optional.

That looks seriously odd can you please give a devicetree example for
MT8195 in the next version, perhaps in the cover letter?

Would also make it easier to test this entire big series.

Regards,
Angelo



The clock names are different for MT8195 HDMI than for the legacy DP.
Making everything optional might not have been a smart move.
I'll try to think of something else to make it look less odd.

The device tree I'm using to test things is rather "hackish" and has a bunch of
changes from what is found on linux-next.
I think Jason and Nancy are due to upstream those patches.

I'll try to include something minimal for you to test.
Otherwise would a public branch containing everything work for you?



Any reference would work for me, "something minimal" or a public branch, it
doesn't really matter.

Thanks!
Angelo




Re: [PATCH 0/4] drm/bridge: lt8912b: Fix corrupt display output due to wrong bridge config

2022-09-28 Thread Robert Foss
On Thu, 22 Sept 2022 at 14:43, Philippe Schenker  wrote:
>
> From: Philippe Schenker 
>
> This patch-set fixes the lt8912b driver that currently does not take
> care whether or not the attached display has postiive or negative syncs
> and or reports on EDID if it needs HDMI mode or DVI.
>
> This series addresses also an issue where the LVDS startup sequence was
> written to the wrong I2C address (the lt8912 has three). This caused
> writing into reserved registers and causing an unstable HDMI picture
> that manifests itself only sometimes and depending on the monitor with a
> flickering and a repeating of going black and coming up again. While at
> it move also some sensible comments to the sequence.
>
>
> Francesco Dolcini (2):
>   drm/bridge: lt8912b: fix corrupted image output
>   drm/bridge: lt8912b: clarify lvds output status
>
> Philippe Schenker (2):
>   drm/bridge: lt8912b: add vsync hsync
>   drm/bridge: lt8912b: set hdmi or dvi mode
>
>  drivers/gpu/drm/bridge/lontium-lt8912b.c | 39 +---
>  1 file changed, 28 insertions(+), 11 deletions(-)
>
> --
> 2.37.3
>

Thanks for the series & the reviews.

Applied to drm-misc-next.


Re: [PATCH 04/16] drm/i915/vm_bind: Add support to create persistent vma

2022-09-28 Thread Andi Shyti
Hi Niranjana,

On Tue, Sep 27, 2022 at 11:19:06PM -0700, Niranjana Vishwanathapura wrote:
> Add i915_vma_instance_persistent() to create persistent vmas.
> Persistent vmas will use i915_gtt_view to support partial binding.
> 
> vma_lookup is tied to segment of the object instead of section
> of VA space. Hence, it do not support aliasing. ie., multiple
> mappings (at different VA) point to the same gtt_view of object.
> Skip vma_lookup for persistent vmas to support aliasing.
> 
> Signed-off-by: Niranjana Vishwanathapura 
> Signed-off-by: Andi Shyti 
> ---
>  drivers/gpu/drm/i915/i915_vma.c   | 39 ---
>  drivers/gpu/drm/i915/i915_vma.h   | 16 +--
>  drivers/gpu/drm/i915/i915_vma_types.h |  7 +
>  3 files changed, 57 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index f17c09ead7d7..5839e1f55f00 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -109,7 +109,8 @@ static void __i915_vma_retire(struct i915_active *ref)
>  static struct i915_vma *
>  vma_create(struct drm_i915_gem_object *obj,
>  struct i915_address_space *vm,
> -const struct i915_gtt_view *view)
> +const struct i915_gtt_view *view,
> +bool skip_lookup_cache)
>  {
>   struct i915_vma *pos = ERR_PTR(-E2BIG);
>   struct i915_vma *vma;
> @@ -196,6 +197,9 @@ vma_create(struct drm_i915_gem_object *obj,
>   __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
>   }
>  
> + if (skip_lookup_cache)
> + goto skip_rb_insert;
> +
>   rb = NULL;
>   p = &obj->vma.tree.rb_node;
>   while (*p) {
> @@ -220,6 +224,7 @@ vma_create(struct drm_i915_gem_object *obj,
>   rb_link_node(&vma->obj_node, rb, p);
>   rb_insert_color(&vma->obj_node, &obj->vma.tree);
>  
> +skip_rb_insert:
>   if (i915_vma_is_ggtt(vma))
>   /*
>* We put the GGTT vma at the start of the vma-list, followed
> @@ -299,7 +304,34 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>  
>   /* vma_create() will resolve the race if another creates the vma */
>   if (unlikely(!vma))
> - vma = vma_create(obj, vm, view);
> + vma = vma_create(obj, vm, view, false);
> +
> + GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
> + return vma;
> +}
> +
> +/**
> + * i915_vma_create_persistent - create a persistent VMA
> + * @obj: parent &struct drm_i915_gem_object to be mapped
> + * @vm: address space in which the mapping is located
> + * @view: additional mapping requirements
> + *
> + * Creates a persistent vma.
> + *
> + * Returns the vma, or an error pointer.
> + */
> +struct i915_vma *
> +i915_vma_create_persistent(struct drm_i915_gem_object *obj,
> +struct i915_address_space *vm,
> +const struct i915_gtt_view *view)
> +{
> + struct i915_vma *vma;
> +
> + GEM_BUG_ON(!kref_read(&vm->ref));
> +
> + vma = vma_create(obj, vm, view, true);
> + if (!IS_ERR(vma))
> + i915_vma_set_persistent(vma);
>  
>   GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
>   return vma;
> @@ -1666,7 +1698,8 @@ static void release_references(struct i915_vma *vma, 
> struct intel_gt *gt,
>  
>   spin_lock(&obj->vma.lock);
>   list_del(&vma->obj_link);
> - if (!RB_EMPTY_NODE(&vma->obj_node))
> + if (!i915_vma_is_persistent(vma) &&
> + !RB_EMPTY_NODE(&vma->obj_node))
>   rb_erase(&vma->obj_node, &obj->vma.tree);
>  
>   spin_unlock(&obj->vma.lock);
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index aecd9c64486b..51e712de380a 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -44,6 +44,10 @@ struct i915_vma *
>  i915_vma_instance(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> const struct i915_gtt_view *view);
> +struct i915_vma *
> +i915_vma_create_persistent(struct drm_i915_gem_object *obj,
> +struct i915_address_space *vm,
> +const struct i915_gtt_view *view);
>  
>  void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
>  #define I915_VMA_RELEASE_MAP BIT(0)
> @@ -138,6 +142,16 @@ static inline u32 i915_ggtt_pin_bias(struct i915_vma 
> *vma)
>   return i915_vm_to_ggtt(vma->vm)->pin_bias;
>  }
>  
> +static inline bool i915_vma_is_persistent(const struct i915_vma *vma)
> +{
> + return test_bit(I915_VMA_PERSISTENT_BIT, __i915_vma_flags(vma));
> +}
> +
> +static inline void i915_vma_set_persistent(struct i915_vma *vma)
> +{
> + set_bit(I915_VMA_PERSISTENT_BIT, __i915_vma_flags(vma));
> +}
> +
>  static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
>  {
>   i915_gem_object_get(vma->obj);
> @@ -164,8 +178,6 @@ i915_vma_compare(struct i915_vma *vma,
> 

Re: [PATCH v2 8/8] hmm-tests: Add test for migrate_device_range()

2022-09-28 Thread Andrew Morton
On Wed, 28 Sep 2022 22:01:22 +1000 Alistair Popple  wrote:

> @@ -1401,22 +1494,7 @@ static int dmirror_device_init(struct dmirror_device 
> *mdevice, int id)
>  
>  static void dmirror_device_remove(struct dmirror_device *mdevice)
>  {
> - unsigned int i;
> -
> - if (mdevice->devmem_chunks) {
> - for (i = 0; i < mdevice->devmem_count; i++) {
> - struct dmirror_chunk *devmem =
> - mdevice->devmem_chunks[i];
> -
> - memunmap_pages(&devmem->pagemap);
> - if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
> - release_mem_region(devmem->pagemap.range.start,
> -
> range_len(&devmem->pagemap.range));
> - kfree(devmem);
> - }
> - kfree(mdevice->devmem_chunks);
> - }
> -
> + dmirror_device_remove_chunks(mdevice);
>   cdev_del(&mdevice->cdevice);
>  }

Needed a bit or rework due to
https://lkml.kernel.org/r/20220826050631.25771-1-mpent...@redhat.com. 
Please check my resolution.


--- a/lib/test_hmm.c~hmm-tests-add-test-for-migrate_device_range
+++ a/lib/test_hmm.c
@@ -100,6 +100,7 @@ struct dmirror {
 struct dmirror_chunk {
struct dev_pagemap  pagemap;
struct dmirror_device   *mdevice;
+   bool remove;
 };
 
 /*
@@ -192,11 +193,15 @@ static int dmirror_fops_release(struct i
return 0;
 }
 
+static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page)
+{
+   return container_of(page->pgmap, struct dmirror_chunk, pagemap);
+}
+
 static struct dmirror_device *dmirror_page_to_device(struct page *page)
 
 {
-   return container_of(page->pgmap, struct dmirror_chunk,
-   pagemap)->mdevice;
+   return dmirror_page_to_chunk(page)->mdevice;
 }
 
 static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range)
@@ -1218,6 +1223,85 @@ static int dmirror_snapshot(struct dmirr
return ret;
 }
 
+static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
+{
+   unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT;
+   unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT;
+   unsigned long npages = end_pfn - start_pfn + 1;
+   unsigned long i;
+   unsigned long *src_pfns;
+   unsigned long *dst_pfns;
+
+   src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
+   dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
+
+   migrate_device_range(src_pfns, start_pfn, npages);
+   for (i = 0; i < npages; i++) {
+   struct page *dpage, *spage;
+
+   spage = migrate_pfn_to_page(src_pfns[i]);
+   if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
+   continue;
+
+   if (WARN_ON(!is_device_private_page(spage) &&
+   !is_device_coherent_page(spage)))
+   continue;
+   spage = BACKING_PAGE(spage);
+   dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
+   lock_page(dpage);
+   copy_highpage(dpage, spage);
+   dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
+   if (src_pfns[i] & MIGRATE_PFN_WRITE)
+   dst_pfns[i] |= MIGRATE_PFN_WRITE;
+   }
+   migrate_device_pages(src_pfns, dst_pfns, npages);
+   migrate_device_finalize(src_pfns, dst_pfns, npages);
+   kfree(src_pfns);
+   kfree(dst_pfns);
+}
+
+/* Removes free pages from the free list so they can't be re-allocated */
+static void dmirror_remove_free_pages(struct dmirror_chunk *devmem)
+{
+   struct dmirror_device *mdevice = devmem->mdevice;
+   struct page *page;
+
+   for (page = mdevice->free_pages; page; page = page->zone_device_data)
+   if (dmirror_page_to_chunk(page) == devmem)
+   mdevice->free_pages = page->zone_device_data;
+}
+
+static void dmirror_device_remove_chunks(struct dmirror_device *mdevice)
+{
+   unsigned int i;
+
+   mutex_lock(&mdevice->devmem_lock);
+   if (mdevice->devmem_chunks) {
+   for (i = 0; i < mdevice->devmem_count; i++) {
+   struct dmirror_chunk *devmem =
+   mdevice->devmem_chunks[i];
+
+   spin_lock(&mdevice->lock);
+   devmem->remove = true;
+   dmirror_remove_free_pages(devmem);
+   spin_unlock(&mdevice->lock);
+
+   dmirror_device_evict_chunk(devmem);
+   memunmap_pages(&devmem->pagemap);
+   if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
+   release_mem_region(devmem->pagemap.range.start,
+  
range_len(&devmem->pagemap.range));
+   kfree(devmem);
+   }

[PATCH -next] video: fbdev: add missing MODULE_DEVICE_TABLE

2022-09-28 Thread Zeng Heng
This patch adds missing MODULE_DEVICE_TABLE definition
which generates correct modalias for automatic loading
of this driver when it is built as an external module.

Signed-off-by: Zeng Heng 
---
 drivers/video/fbdev/vga16fb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/fbdev/vga16fb.c b/drivers/video/fbdev/vga16fb.c
index 35cf51ae3292..af47f8217095 100644
--- a/drivers/video/fbdev/vga16fb.c
+++ b/drivers/video/fbdev/vga16fb.c
@@ -1421,6 +1421,7 @@ static const struct platform_device_id 
vga16fb_driver_id_table[] = {
{"vga-framebuffer", 0},
{ }
 };
+MODULE_DEVICE_TABLE(platform, vga16fb_driver_id_table);
 
 static struct platform_driver vga16fb_driver = {
.probe = vga16fb_probe,
-- 
2.25.1



Re: [PATCH v2 09/10] drm/msm/dp: drop modeset sanity checks

2022-09-28 Thread Abhinav Kumar




On 9/28/2022 5:24 AM, Johan Hovold wrote:

On Tue, Sep 27, 2022 at 11:42:53AM -0700, Abhinav Kumar wrote:

On 9/27/2022 12:14 AM, Johan Hovold wrote:

On Mon, Sep 26, 2022 at 11:17:20AM -0700, Abhinav Kumar wrote:

On 9/13/2022 1:53 AM, Johan Hovold wrote:

Drop the overly defensive modeset sanity checks of function parameters
which have already been checked or used by the callers.

Reviewed-by: Dmitry Baryshkov 
Signed-off-by: Johan Hovold 


The change LGTM, hence

Reviewed-by: Abhinav Kumar 

I think we can use below fixes tag so that we can pick up this entire
series for the fixes cycle.

Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support")


Perhaps that's a requirement for drm, but I wouldn't add a Fixes tag for
this otherwise as it's not a bug.

You also have to watch out for Sasha and his autosel scripts which will
probably try to backport this to stable if it finds a Fixes tag.



Discussed with Rob on IRC, we will apply everything except the last two
patches of this series in the -fixes and take these two for the next
kernel rev push.


So the fixes go in 6.0 and the two follow-on cleanups in 6.1? Or did you
mean 6.1 and 6.2?

Johan


The fixes will go in 6.1 first.

The two follow-on cleanups in 6.2.

Thanks

Abhinav


Re: [PATCH -next v2] backlight: gpio_backlight: Switch to use dev_err_probe() helper

2022-09-28 Thread Daniel Thompson
On Tue, Sep 27, 2022 at 10:17:32AM +0800, Yang Yingliang wrote:
> Hi
>
> On 2022/9/26 23:32, Daniel Thompson wrote:
> > On Mon, Sep 26, 2022 at 10:24:47PM +0800, Yang Yingliang wrote:
> > > In the probe path, dev_err() can be replaced with dev_err_probe()
> > > which will check if error code is -EPROBE_DEFER and prints the
> > > error name. It also sets the defer probe reason which can be
> > > checked later through debugfs. It's more simple in error path.
> > >
> > > Signed-off-by: Yang Yingliang 
> > > ---
> > > v2:
> > >Remove "Error: " in error meassage
> > > ---
> > >   drivers/video/backlight/gpio_backlight.c | 10 +++---
> > >   1 file changed, 3 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/video/backlight/gpio_backlight.c 
> > > b/drivers/video/backlight/gpio_backlight.c
> > > index 6f78d928f054..4ff3939e5f7e 100644
> > > --- a/drivers/video/backlight/gpio_backlight.c
> > > +++ b/drivers/video/backlight/gpio_backlight.c
> > > @@ -64,13 +64,9 @@ static int gpio_backlight_probe(struct platform_device 
> > > *pdev)
> > >   def_value = device_property_read_bool(dev, "default-on");
> > >
> > >   gbl->gpiod = devm_gpiod_get(dev, NULL, GPIOD_ASIS);
> > > - if (IS_ERR(gbl->gpiod)) {
> > > - ret = PTR_ERR(gbl->gpiod);
> > > - if (ret != -EPROBE_DEFER)
> > > - dev_err(dev,
> > > - "Error: The gpios parameter is missing or 
> > > invalid.\n");
> > > - return ret;
> > > - }
> > > + if (IS_ERR(gbl->gpiod))
> > > + return dev_err_probe(dev, PTR_ERR(gbl->gpiod),
> > > +  "The gpios parameter is missing or 
> > > invalid.\n");
> > Why keep the leading "The " ?
> OK, I will remove it in v3.
>
> But you says "the resulting line will read better with a "The " at
> beginning" in your last mail,
> I am confused about this.

That's because my e-mail was confusing! Thanks for fixing it.


Daniel.


[PATCH v4.1] drm/i915/mtl: Define engine context layouts

2022-09-28 Thread Radhakrishna Sripada
From: Matt Roper 

The part of the media and blitter engine contexts that we care about for
setting up an initial state on MTL are nearly similar to DG2 (and PVC).
The difference being PRT_BB_STATE being replaced with NOP.

For render/compute engines, the part of the context images are nearly
the same, although the layout had a very slight change --- one POSH
register was removed and the placement of some LRI/noops adjusted
slightly to compensate.

v2:
 - Dg2, mtl xcs offsets slightly vary. Use a separate offsets array(Bala)
 - Add missing nop in xcs offsets(Bala)
v3:
 - Fix the spacing for nop in xcs offset(MattR)
v4:
 - Fix rcs register offset(MattR)
v4.1:
 - Fix commit message(Lucas)

Bspec: 46261, 46260, 45585
Cc: Balasubramani Vivekanandan 
Cc: Licas De Marchi 
Signed-off-by: Matt Roper 
Signed-off-by: Radhakrishna Sripada 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 84 -
 1 file changed, 82 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 82d899f170fb..e84ef3859934 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -264,6 +264,39 @@ static const u8 dg2_xcs_offsets[] = {
END
 };
 
+static const u8 mtl_xcs_offsets[] = {
+   NOP(1),
+   LRI(13, POSTED),
+   REG16(0x244),
+   REG(0x034),
+   REG(0x030),
+   REG(0x038),
+   REG(0x03c),
+   REG(0x168),
+   REG(0x140),
+   REG(0x110),
+   REG(0x1c0),
+   REG(0x1c4),
+   REG(0x1c8),
+   REG(0x180),
+   REG16(0x2b4),
+   NOP(4),
+
+   NOP(1),
+   LRI(9, POSTED),
+   REG16(0x3a8),
+   REG16(0x28c),
+   REG16(0x288),
+   REG16(0x284),
+   REG16(0x280),
+   REG16(0x27c),
+   REG16(0x278),
+   REG16(0x274),
+   REG16(0x270),
+
+   END
+};
+
 static const u8 gen8_rcs_offsets[] = {
NOP(1),
LRI(14, POSTED),
@@ -606,6 +639,49 @@ static const u8 dg2_rcs_offsets[] = {
END
 };
 
+static const u8 mtl_rcs_offsets[] = {
+   NOP(1),
+   LRI(15, POSTED),
+   REG16(0x244),
+   REG(0x034),
+   REG(0x030),
+   REG(0x038),
+   REG(0x03c),
+   REG(0x168),
+   REG(0x140),
+   REG(0x110),
+   REG(0x1c0),
+   REG(0x1c4),
+   REG(0x1c8),
+   REG(0x180),
+   REG16(0x2b4),
+   REG(0x120),
+   REG(0x124),
+
+   NOP(1),
+   LRI(9, POSTED),
+   REG16(0x3a8),
+   REG16(0x28c),
+   REG16(0x288),
+   REG16(0x284),
+   REG16(0x280),
+   REG16(0x27c),
+   REG16(0x278),
+   REG16(0x274),
+   REG16(0x270),
+
+   NOP(2),
+   LRI(2, POSTED),
+   REG16(0x5a8),
+   REG16(0x5ac),
+
+   NOP(6),
+   LRI(1, 0),
+   REG(0x0c8),
+
+   END
+};
+
 #undef END
 #undef REG16
 #undef REG
@@ -624,7 +700,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs 
*engine)
   !intel_engine_has_relative_mmio(engine));
 
if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
-   if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+   if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
+   return mtl_rcs_offsets;
+   else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return dg2_rcs_offsets;
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
return xehp_rcs_offsets;
@@ -637,7 +715,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs 
*engine)
else
return gen8_rcs_offsets;
} else {
-   if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+   if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
+   return mtl_xcs_offsets;
+   else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return dg2_xcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 12)
return gen12_xcs_offsets;
-- 
2.34.1



Re: [PATCH v2 09/10] drm/msm/dp: drop modeset sanity checks

2022-09-28 Thread Johan Hovold
On Wed, Sep 28, 2022 at 08:33:52AM -0700, Abhinav Kumar wrote:
> On 9/28/2022 5:24 AM, Johan Hovold wrote:
> > On Tue, Sep 27, 2022 at 11:42:53AM -0700, Abhinav Kumar wrote:

> >> Discussed with Rob on IRC, we will apply everything except the last two
> >> patches of this series in the -fixes and take these two for the next
> >> kernel rev push.
> > 
> > So the fixes go in 6.0 and the two follow-on cleanups in 6.1? Or did you
> > mean 6.1 and 6.2?

> The fixes will go in 6.1 first.
> 
> The two follow-on cleanups in 6.2.

Ok, sounds good. Thanks.

Johan


Re: [PATCH v2 01/16] slab: Remove __malloc attribute from realloc functions

2022-09-28 Thread Vlastimil Babka

On 9/28/22 09:26, Geert Uytterhoeven wrote:

Hi Kees,

On Fri, Sep 23, 2022 at 10:35 PM Kees Cook  wrote:

The __malloc attribute should not be applied to "realloc" functions, as
the returned pointer may alias the storage of the prior pointer. Instead
of splitting __malloc from __alloc_size, which would be a huge amount of
churn, just create __realloc_size for the few cases where it is needed.

Additionally removes the conditional test for __alloc_size__, which is
always defined now.

Cc: Christoph Lameter 
Cc: Pekka Enberg 
Cc: David Rientjes 
Cc: Joonsoo Kim 
Cc: Andrew Morton 
Cc: Vlastimil Babka 
Cc: Roman Gushchin 
Cc: Hyeonggon Yoo <42.hye...@gmail.com>
Cc: Marco Elver 
Cc: linux...@kvack.org
Signed-off-by: Kees Cook 


Thanks for your patch, which is now commit 63caa04ec60583b1 ("slab:
Remove __malloc attribute from realloc functions") in next-20220927.

nore...@ellerman.id.au reported all gcc8-based builds to fail
(e.g. [1], more at [2]):

 In file included from :
 ./include/linux/percpu.h: In function ‘__alloc_reserved_percpu’:
 ././include/linux/compiler_types.h:279:30: error: expected
declaration specifiers before ‘__alloc_size__’
  #define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc
   ^~
 ./include/linux/percpu.h:120:74: note: in expansion of macro ‘__alloc_size’
 [...]

It's building fine with e.g. gcc-9 (which is my usual m68k cross-compiler).
Reverting this commit on next-20220927 fixes the issue.


So IIUC it was wrong to remove the #ifdefs?


[1] http://kisskb.ellerman.id.au/kisskb/buildresult/14803908/
[2] 
http://kisskb.ellerman.id.au/kisskb/head/1bd8b75fe6adeaa89d02968bdd811ffe708cf839/




---
  include/linux/compiler_types.h | 13 +
  include/linux/slab.h   | 12 ++--
  mm/slab_common.c   |  4 ++--
  3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 4f2a819fd60a..f141a6f6b9f6 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -271,15 +271,12 @@ struct ftrace_likely_data {

  /*
   * Any place that could be marked with the "alloc_size" attribute is also
- * a place to be marked with the "malloc" attribute. Do this as part of the
- * __alloc_size macro to avoid redundant attributes and to avoid missing a
- * __malloc marking.
+ * a place to be marked with the "malloc" attribute, except those that may
+ * be performing a _reallocation_, as that may alias the existing pointer.
+ * For these, use __realloc_size().
   */
-#ifdef __alloc_size__
-# define __alloc_size(x, ...)  __alloc_size__(x, ## __VA_ARGS__) __malloc
-#else
-# define __alloc_size(x, ...)  __malloc
-#endif
+#define __alloc_size(x, ...)   __alloc_size__(x, ## __VA_ARGS__) __malloc
+#define __realloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__)

  #ifndef asm_volatile_goto
  #define asm_volatile_goto(x...) asm goto(x)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0fefdf528e0d..41bd036e7551 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -184,7 +184,7 @@ int kmem_cache_shrink(struct kmem_cache *s);
  /*
   * Common kmalloc functions provided by all allocators
   */
-void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) 
__alloc_size(2);
+void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) 
__realloc_size(2);
  void kfree(const void *objp);
  void kfree_sensitive(const void *objp);
  size_t __ksize(const void *objp);
@@ -647,10 +647,10 @@ static inline __alloc_size(1, 2) void 
*kmalloc_array(size_t n, size_t size, gfp_
   * @new_size: new size of a single member of the array
   * @flags: the type of memory to allocate (see kmalloc)
   */
-static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p,
-   size_t 
new_n,
-   size_t 
new_size,
-   gfp_t flags)
+static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p,
+ size_t 
new_n,
+ size_t 
new_size,
+ gfp_t 
flags)
  {
 size_t bytes;

@@ -774,7 +774,7 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, 
size_t size, gfp_t fla
  }

  extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t 
flags)
- __alloc_size(3);
+ __realloc_size(3);
  extern void kvfree(const void *addr);
  extern void kvfree_sensitive(const void *addr, size_t len);

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 17996649cfe3..457671ace7eb 100644
--- a/mm/slab_common.c
+++ b/mm/s

Re: [Intel-gfx] [PATCH 04/16] drm/i915/vm_bind: Add support to create persistent vma

2022-09-28 Thread Niranjana Vishwanathapura

On Wed, Sep 28, 2022 at 08:38:39AM +0100, Tvrtko Ursulin wrote:


On 28/09/2022 07:19, Niranjana Vishwanathapura wrote:

Add i915_vma_instance_persistent() to create persistent vmas.
Persistent vmas will use i915_gtt_view to support partial binding.

vma_lookup is tied to segment of the object instead of section
of VA space. Hence, it do not support aliasing. ie., multiple
mappings (at different VA) point to the same gtt_view of object.
Skip vma_lookup for persistent vmas to support aliasing.

Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andi Shyti 
---
 drivers/gpu/drm/i915/i915_vma.c   | 39 ---
 drivers/gpu/drm/i915/i915_vma.h   | 16 +--
 drivers/gpu/drm/i915/i915_vma_types.h |  7 +
 3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f17c09ead7d7..5839e1f55f00 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -109,7 +109,8 @@ static void __i915_vma_retire(struct i915_active *ref)
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
   struct i915_address_space *vm,
-  const struct i915_gtt_view *view)
+  const struct i915_gtt_view *view,
+  bool skip_lookup_cache)
 {
struct i915_vma *pos = ERR_PTR(-E2BIG);
struct i915_vma *vma;
@@ -196,6 +197,9 @@ vma_create(struct drm_i915_gem_object *obj,
__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
}
+   if (skip_lookup_cache)
+   goto skip_rb_insert;
+
rb = NULL;
p = &obj->vma.tree.rb_node;
while (*p) {
@@ -220,6 +224,7 @@ vma_create(struct drm_i915_gem_object *obj,
rb_link_node(&vma->obj_node, rb, p);
rb_insert_color(&vma->obj_node, &obj->vma.tree);
+skip_rb_insert:
if (i915_vma_is_ggtt(vma))
/*
 * We put the GGTT vma at the start of the vma-list, followed
@@ -299,7 +304,34 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
/* vma_create() will resolve the race if another creates the vma */
if (unlikely(!vma))
-   vma = vma_create(obj, vm, view);
+   vma = vma_create(obj, vm, view, false);
+
+   GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
+   return vma;
+}
+
+/**
+ * i915_vma_create_persistent - create a persistent VMA
+ * @obj: parent &struct drm_i915_gem_object to be mapped
+ * @vm: address space in which the mapping is located
+ * @view: additional mapping requirements
+ *
+ * Creates a persistent vma.
+ *
+ * Returns the vma, or an error pointer.
+ */
+struct i915_vma *
+i915_vma_create_persistent(struct drm_i915_gem_object *obj,
+  struct i915_address_space *vm,
+  const struct i915_gtt_view *view)
+{
+   struct i915_vma *vma;
+
+   GEM_BUG_ON(!kref_read(&vm->ref));
+
+   vma = vma_create(obj, vm, view, true);
+   if (!IS_ERR(vma))
+   i915_vma_set_persistent(vma);
GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
return vma;
@@ -1666,7 +1698,8 @@ static void release_references(struct i915_vma *vma, 
struct intel_gt *gt,
spin_lock(&obj->vma.lock);
list_del(&vma->obj_link);
-   if (!RB_EMPTY_NODE(&vma->obj_node))
+   if (!i915_vma_is_persistent(vma) &&


Thinking out loud - maybe you don't need the extra condition? But it 
is good for self-documenting purposes in any case.


Thanks, yah, it is not needed, will remove this update.




+   !RB_EMPTY_NODE(&vma->obj_node))
rb_erase(&vma->obj_node, &obj->vma.tree);
spin_unlock(&obj->vma.lock);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index aecd9c64486b..51e712de380a 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -44,6 +44,10 @@ struct i915_vma *
 i915_vma_instance(struct drm_i915_gem_object *obj,
  struct i915_address_space *vm,
  const struct i915_gtt_view *view);
+struct i915_vma *
+i915_vma_create_persistent(struct drm_i915_gem_object *obj,
+  struct i915_address_space *vm,
+  const struct i915_gtt_view *view);
 void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
 #define I915_VMA_RELEASE_MAP BIT(0)
@@ -138,6 +142,16 @@ static inline u32 i915_ggtt_pin_bias(struct i915_vma *vma)
return i915_vm_to_ggtt(vma->vm)->pin_bias;
 }
+static inline bool i915_vma_is_persistent(const struct i915_vma *vma)
+{
+   return test_bit(I915_VMA_PERSISTENT_BIT, __i915_vma_flags(vma));
+}
+
+static inline void i915_vma_set_persistent(struct i915_vma *vma)
+{
+   set_bit(I915_VMA_PERSISTENT_BIT, __i915_vma_flags(vma));
+}
+
 static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
 {
i915_gem_object_get(vma->obj);
@@ -164,8 +178,6 @@ i91

Re: [PATCH 04/16] drm/i915/vm_bind: Add support to create persistent vma

2022-09-28 Thread Niranjana Vishwanathapura

On Wed, Sep 28, 2022 at 04:44:08PM +0200, Andi Shyti wrote:

Hi Niranjana,

On Tue, Sep 27, 2022 at 11:19:06PM -0700, Niranjana Vishwanathapura wrote:

Add i915_vma_instance_persistent() to create persistent vmas.
Persistent vmas will use i915_gtt_view to support partial binding.

vma_lookup is tied to segment of the object instead of section
of VA space. Hence, it do not support aliasing. ie., multiple
mappings (at different VA) point to the same gtt_view of object.
Skip vma_lookup for persistent vmas to support aliasing.

Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andi Shyti 
---
 drivers/gpu/drm/i915/i915_vma.c   | 39 ---
 drivers/gpu/drm/i915/i915_vma.h   | 16 +--
 drivers/gpu/drm/i915/i915_vma_types.h |  7 +
 3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f17c09ead7d7..5839e1f55f00 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -109,7 +109,8 @@ static void __i915_vma_retire(struct i915_active *ref)
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
   struct i915_address_space *vm,
-  const struct i915_gtt_view *view)
+  const struct i915_gtt_view *view,
+  bool skip_lookup_cache)
 {
struct i915_vma *pos = ERR_PTR(-E2BIG);
struct i915_vma *vma;
@@ -196,6 +197,9 @@ vma_create(struct drm_i915_gem_object *obj,
__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
}

+   if (skip_lookup_cache)
+   goto skip_rb_insert;
+
rb = NULL;
p = &obj->vma.tree.rb_node;
while (*p) {
@@ -220,6 +224,7 @@ vma_create(struct drm_i915_gem_object *obj,
rb_link_node(&vma->obj_node, rb, p);
rb_insert_color(&vma->obj_node, &obj->vma.tree);

+skip_rb_insert:
if (i915_vma_is_ggtt(vma))
/*
 * We put the GGTT vma at the start of the vma-list, followed
@@ -299,7 +304,34 @@ i915_vma_instance(struct drm_i915_gem_object *obj,

/* vma_create() will resolve the race if another creates the vma */
if (unlikely(!vma))
-   vma = vma_create(obj, vm, view);
+   vma = vma_create(obj, vm, view, false);
+
+   GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
+   return vma;
+}
+
+/**
+ * i915_vma_create_persistent - create a persistent VMA
+ * @obj: parent &struct drm_i915_gem_object to be mapped
+ * @vm: address space in which the mapping is located
+ * @view: additional mapping requirements
+ *
+ * Creates a persistent vma.
+ *
+ * Returns the vma, or an error pointer.
+ */
+struct i915_vma *
+i915_vma_create_persistent(struct drm_i915_gem_object *obj,
+  struct i915_address_space *vm,
+  const struct i915_gtt_view *view)
+{
+   struct i915_vma *vma;
+
+   GEM_BUG_ON(!kref_read(&vm->ref));
+
+   vma = vma_create(obj, vm, view, true);
+   if (!IS_ERR(vma))
+   i915_vma_set_persistent(vma);

GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
return vma;
@@ -1666,7 +1698,8 @@ static void release_references(struct i915_vma *vma, 
struct intel_gt *gt,

spin_lock(&obj->vma.lock);
list_del(&vma->obj_link);
-   if (!RB_EMPTY_NODE(&vma->obj_node))
+   if (!i915_vma_is_persistent(vma) &&
+   !RB_EMPTY_NODE(&vma->obj_node))
rb_erase(&vma->obj_node, &obj->vma.tree);

spin_unlock(&obj->vma.lock);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index aecd9c64486b..51e712de380a 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -44,6 +44,10 @@ struct i915_vma *
 i915_vma_instance(struct drm_i915_gem_object *obj,
  struct i915_address_space *vm,
  const struct i915_gtt_view *view);
+struct i915_vma *
+i915_vma_create_persistent(struct drm_i915_gem_object *obj,
+  struct i915_address_space *vm,
+  const struct i915_gtt_view *view);

 void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
 #define I915_VMA_RELEASE_MAP BIT(0)
@@ -138,6 +142,16 @@ static inline u32 i915_ggtt_pin_bias(struct i915_vma *vma)
return i915_vm_to_ggtt(vma->vm)->pin_bias;
 }

+static inline bool i915_vma_is_persistent(const struct i915_vma *vma)
+{
+   return test_bit(I915_VMA_PERSISTENT_BIT, __i915_vma_flags(vma));
+}
+
+static inline void i915_vma_set_persistent(struct i915_vma *vma)
+{
+   set_bit(I915_VMA_PERSISTENT_BIT, __i915_vma_flags(vma));
+}
+
 static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
 {
i915_gem_object_get(vma->obj);
@@ -164,8 +178,6 @@ i915_vma_compare(struct i915_vma *vma,
 {
ptrdiff_t cmp;

-   GEM_BUG_ON(view && !i915_is_ggtt_or_dpt(vm));
-
cmp = ptrdiff(vma->

Re: [PATCH v13 5/9] drm/i915: Check for integer truncation on scatterlist creation

2022-09-28 Thread Linus Torvalds
On Wed, Sep 28, 2022 at 1:15 AM Gwan-gyeong Mun
 wrote:
>
> +   if (check_assign(obj->base.size >> PAGE_SHIFT, &npages))
> +   return -E2BIG;

I have to say, I find that new "check_assign()" macro use to be disgusting.

It's one thing to check for overflows.

It's another thing entirely to just assign something to a local variable.

This disgusting "let's check and assign" needs to die. It makes the
code a completely unreadable mess. The "user" wersion is even worse.

If you worry about overflow, then use a mix of

 (a) use a sufficiently large type to begin with

 (b) check for value range separately

and in this particular case, I also suspect that the whole range check
should have been somewhere else entirely - at the original creation of
that "obj" structure, not at one random end-point where it is used.

In other words, THIS WHOLE PATCH is just end-points checking the size
requirements of that "base.size" thing much too late, when it should
have been checked originally for some "maximum acceptable base size"
instead.

And that "maximum acceptable base size" should *not* be about "this is
the size of the variables we use". It should be a sanity check of
"this value is sane and fits in sane use cases".

Because "let's plug security checks" is most definitely not about
picking random assignments and saying "let's check this one". It's
about trying to catch things earlier than that.

Kees, you need to reign in the craziness in overflow.h.

 Linus


Re: [PATCH v2 01/16] slab: Remove __malloc attribute from realloc functions

2022-09-28 Thread Kees Cook
On Wed, Sep 28, 2022 at 09:26:15AM +0200, Geert Uytterhoeven wrote:
> Hi Kees,
> 
> On Fri, Sep 23, 2022 at 10:35 PM Kees Cook  wrote:
> > The __malloc attribute should not be applied to "realloc" functions, as
> > the returned pointer may alias the storage of the prior pointer. Instead
> > of splitting __malloc from __alloc_size, which would be a huge amount of
> > churn, just create __realloc_size for the few cases where it is needed.
> >
> > Additionally removes the conditional test for __alloc_size__, which is
> > always defined now.
> >
> > Cc: Christoph Lameter 
> > Cc: Pekka Enberg 
> > Cc: David Rientjes 
> > Cc: Joonsoo Kim 
> > Cc: Andrew Morton 
> > Cc: Vlastimil Babka 
> > Cc: Roman Gushchin 
> > Cc: Hyeonggon Yoo <42.hye...@gmail.com>
> > Cc: Marco Elver 
> > Cc: linux...@kvack.org
> > Signed-off-by: Kees Cook 
> 
> Thanks for your patch, which is now commit 63caa04ec60583b1 ("slab:
> Remove __malloc attribute from realloc functions") in next-20220927.
> 
> nore...@ellerman.id.au reported all gcc8-based builds to fail
> (e.g. [1], more at [2]):
> 
> In file included from :
> ./include/linux/percpu.h: In function ‘__alloc_reserved_percpu’:
> ././include/linux/compiler_types.h:279:30: error: expected
> declaration specifiers before ‘__alloc_size__’
>  #define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc
>   ^~
> ./include/linux/percpu.h:120:74: note: in expansion of macro 
> ‘__alloc_size’
> [...]
> 
> It's building fine with e.g. gcc-9 (which is my usual m68k cross-compiler).
> Reverting this commit on next-20220927 fixes the issue.
> 
> [1] http://kisskb.ellerman.id.au/kisskb/buildresult/14803908/
> [2] 
> http://kisskb.ellerman.id.au/kisskb/head/1bd8b75fe6adeaa89d02968bdd811ffe708cf839/

Eek! Thanks for letting me know. I'm confused about this --
__alloc_size__ wasn't optional in compiler_attributes.h -- but obviously
I broke something! I'll go figure this out.

-Kees

-- 
Kees Cook


Re: [PATCH] drm/panel-edp: Add BOE NT116WHM-N4C (HW: V8.1)

2022-09-28 Thread Doug Anderson
Hi,

On Tue, Sep 27, 2022 at 11:51 PM Sean Hong
 wrote:
>
> On Tue, Sep 27, 2022 at 11:27 PM Doug Anderson  wrote:
> >
> > Hi,
> >
> > On Mon, Sep 26, 2022 at 11:35 PM Sean Hong
> >  wrote:
> > >
> > > Add support for the BOE - NT116WHM-N4C (HW: V8.1) panel.
> > >
> > > Signed-off-by: Sean Hong 
> > > ---
> > >  drivers/gpu/drm/panel/panel-edp.c | 1 +
> > >  1 file changed, 1 insertion(+)
> >
> > Wow, another panel?!?
> >
> > Reviewed-by: Douglas Anderson 
> >
> > Pushed to drm-misc:
> >
> > 2f24fe8c54cc drm/panel-edp: Add BOE NT116WHM-N4C (HW: V8.1)
>
> Hi Anderson,
>
> I found some mistakes on this commit. I typed the wrong model name on
> title and content.
> The correct model name is NV116WHM-N4C and the code is correct.
>
> How can I fix it? Do I need to revert this commit and then submit a
> new patch upstream?

There's not much to be done at this point in time. Reverting /
readding the same code with a slightly different commit message
wouldn't be worth it. The code is correct and that's the important
thing. The commit has a link to the mailing list post so anyone who's
confused will hopefully click the link and can find this discussion.

-Doug


Re: [PATCH 01/16] drm/i915/vm_bind: Expose vm lookup function

2022-09-28 Thread Matthew Auld

On 28/09/2022 07:19, Niranjana Vishwanathapura wrote:

Make i915_gem_vm_lookup() function non-static as it will be
used by the vm_bind feature.

Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andi Shyti 


Acked-by: Matthew Auld 


---
  drivers/gpu/drm/i915/gem/i915_gem_context.c | 11 ++-
  drivers/gpu/drm/i915/gem/i915_gem_context.h |  3 +++
  2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 0bcde53c50c6..f4e648ec01ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -346,7 +346,16 @@ static int proto_context_register(struct 
drm_i915_file_private *fpriv,
return ret;
  }
  
-static struct i915_address_space *

+/**
+ * i915_gem_vm_lookup() - looks up for the VM reference given the vm id
+ * @file_priv: the private data associated with the user's file
+ * @id: the VM id
+ *
+ * Finds the VM reference associated to a specific id.
+ *
+ * Returns the VM pointer on success, NULL in case of failure.
+ */
+struct i915_address_space *
  i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id)
  {
struct i915_address_space *vm;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index e5b0f66ea1fe..899fa8f1e0fe 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -139,6 +139,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, 
void *data,
  int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file);
  
+struct i915_address_space *

+i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id);
+
  struct i915_gem_context *
  i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id);
  


Re: [PATCH 02/16] drm/i915/vm_bind: Add __i915_sw_fence_await_reservation()

2022-09-28 Thread Matthew Auld

On 28/09/2022 07:19, Niranjana Vishwanathapura wrote:

Add function __i915_sw_fence_await_reservation() for
asynchronous wait on a dma-resv object with specified
dma_resv_usage. This is required for async vma unbind
with vm_bind.

Signed-off-by: Niranjana Vishwanathapura 
---
  drivers/gpu/drm/i915/i915_sw_fence.c | 28 +---
  drivers/gpu/drm/i915/i915_sw_fence.h | 23 +--
  2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c 
b/drivers/gpu/drm/i915/i915_sw_fence.c
index cc2a8821d22a..b7a10c374a08 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -7,7 +7,6 @@
  #include 
  #include 
  #include 
-#include 
  
  #include "i915_sw_fence.h"

  #include "i915_selftest.h"
@@ -569,11 +568,26 @@ int __i915_sw_fence_await_dma_fence(struct i915_sw_fence 
*fence,
return ret;
  }
  
-int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,

-   struct dma_resv *resv,
-   bool write,
-   unsigned long timeout,
-   gfp_t gfp)
+/**
+ * __i915_sw_fence_await_reservation() - Setup a fence to wait on a dma-resv
+ * object with specified usage.
+ * @fence: the fence that needs to wait
+ * @resv: dma-resv object
+ * @usage: dma_resv_usage (See enum dma_resv_usage)
+ * @timeout: how long to wait in jiffies
+ * @gfp: allocation mode
+ *
+ * Setup the @fence to asynchronously wait on dma-resv object @resv for usage
+ * @usage to complete before signaling.


s/usage @usage/@usage/ ?


+ *
+ * Returns 0 if there is nothing to wait on, -ve upon error and >0 upon


What does "-ve" mean btw?

Acked-by: Matthew Auld 


+ * successfully setting up the wait.
+ */
+int __i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+ struct dma_resv *resv,
+ enum dma_resv_usage usage,
+ unsigned long timeout,
+ gfp_t gfp)
  {
struct dma_resv_iter cursor;
struct dma_fence *f;
@@ -582,7 +596,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence 
*fence,
debug_fence_assert(fence);
might_sleep_if(gfpflags_allow_blocking(gfp));
  
-	dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(write));

+   dma_resv_iter_begin(&cursor, resv, usage);
dma_resv_for_each_fence_unlocked(&cursor, f) {
pending = i915_sw_fence_await_dma_fence(fence, f, timeout,
gfp);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h 
b/drivers/gpu/drm/i915/i915_sw_fence.h
index f752bfc7c6e1..9c4859dc4c0d 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -10,13 +10,13 @@
  #define _I915_SW_FENCE_H_
  
  #include 

+#include 
  #include 
  #include 
  #include  /* for NOTIFY_DONE */
  #include 
  
  struct completion;

-struct dma_resv;
  struct i915_sw_fence;
  
  enum i915_sw_fence_notify {

@@ -89,11 +89,22 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence 
*fence,
  unsigned long timeout,
  gfp_t gfp);
  
-int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,

-   struct dma_resv *resv,
-   bool write,
-   unsigned long timeout,
-   gfp_t gfp);
+int __i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+ struct dma_resv *resv,
+ enum dma_resv_usage usage,
+ unsigned long timeout,
+ gfp_t gfp);
+
+static inline int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+ struct dma_resv *resv,
+ bool write,
+ unsigned long timeout,
+ gfp_t gfp)
+{
+   return __i915_sw_fence_await_reservation(fence, resv,
+dma_resv_usage_rw(write),
+timeout, gfp);
+}
  
  bool i915_sw_fence_await(struct i915_sw_fence *fence);

  void i915_sw_fence_complete(struct i915_sw_fence *fence);


Re: [PATCH 03/16] drm/i915/vm_bind: Expose i915_gem_object_max_page_size()

2022-09-28 Thread Matthew Auld

On 28/09/2022 07:19, Niranjana Vishwanathapura wrote:

Expose i915_gem_object_max_page_size() function non-static
which will be used by the vm_bind feature.

Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andi Shyti 
---
  drivers/gpu/drm/i915/gem/i915_gem_create.c | 19 ++-
  drivers/gpu/drm/i915/gem/i915_gem_object.h |  2 ++
  2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 33673fe7ee0a..4aa7b5582b8e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -15,10 +15,19 @@
  #include "i915_trace.h"
  #include "i915_user_extensions.h"
  
-static u32 object_max_page_size(struct intel_memory_region **placements,

-   unsigned int n_placements)
+/**
+ * i915_gem_object_max_page_size() - max of min_page_size of the regions
+ * @placements:  list of regions
+ * @n_placements: number of the placements
+ *
+ * Calculates the max of the min_page_size of a list of placements passed in.
+ *
+ * Return: max of the min_page_size


"max of the min_page_size, or I915_GTT_PAGE_SIZE_4K if zero placements."

Acked-by: Matthew Auld 


+ */
+u32 i915_gem_object_max_page_size(struct intel_memory_region **placements,
+ unsigned int n_placements)
  {
-   u32 max_page_size = 0;
+   u32 max_page_size = I915_GTT_PAGE_SIZE_4K;
int i;
  
  	for (i = 0; i < n_placements; i++) {

@@ -28,7 +37,6 @@ static u32 object_max_page_size(struct intel_memory_region 
**placements,
max_page_size = max_t(u32, max_page_size, mr->min_page_size);
}
  
-	GEM_BUG_ON(!max_page_size);

return max_page_size;
  }
  
@@ -99,7 +107,8 @@ __i915_gem_object_create_user_ext(struct drm_i915_private *i915, u64 size,
  
  	i915_gem_flush_free_objects(i915);
  
-	size = round_up(size, object_max_page_size(placements, n_placements));

+   size = round_up(size, i915_gem_object_max_page_size(placements,
+   n_placements));
if (size == 0)
return ERR_PTR(-EINVAL);
  
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h

index a3b7551a57fc..d53d01b1860a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -47,6 +47,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
  }
  
  void i915_gem_init__objects(struct drm_i915_private *i915);

+u32 i915_gem_object_max_page_size(struct intel_memory_region **placements,
+ unsigned int n_placements);
  
  void i915_objects_module_exit(void);

  int i915_objects_module_init(void);


Re: [PATCH 05/16] drm/i915/vm_bind: Implement bind and unbind of object

2022-09-28 Thread Matthew Auld

On 28/09/2022 07:19, Niranjana Vishwanathapura wrote:

Add uapi and implement support for bind and unbind of an
object at the specified GPU virtual addresses.

The vm_bind mode is not supported in legacy execbuf2 ioctl.
It will be supported only in the newer execbuf3 ioctl.

Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Prathap Kumar Valsan 
Signed-off-by: Andi Shyti 
---
  drivers/gpu/drm/i915/Makefile |   1 +
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c|   5 +
  drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h   |  26 ++
  .../drm/i915/gem/i915_gem_vm_bind_object.c| 306 ++
  drivers/gpu/drm/i915/gt/intel_gtt.c   |  10 +
  drivers/gpu/drm/i915/gt/intel_gtt.h   |  17 +
  drivers/gpu/drm/i915/i915_driver.c|   3 +
  drivers/gpu/drm/i915/i915_vma.c   |   1 +
  drivers/gpu/drm/i915/i915_vma_types.h |  14 +
  include/uapi/drm/i915_drm.h   | 112 +++
  10 files changed, 495 insertions(+)
  create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h
  create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a26edcdadc21..9bf939ef18ea 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -166,6 +166,7 @@ gem-y += \
gem/i915_gem_ttm_move.o \
gem/i915_gem_ttm_pm.o \
gem/i915_gem_userptr.o \
+   gem/i915_gem_vm_bind_object.o \
gem/i915_gem_wait.o \
gem/i915_gemfs.o
  i915-y += \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cd75b0ca2555..f85f10cf9c34 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -781,6 +781,11 @@ static int eb_select_context(struct i915_execbuffer *eb)
if (unlikely(IS_ERR(ctx)))
return PTR_ERR(ctx);
  
+	if (ctx->vm->vm_bind_mode) {

+   i915_gem_context_put(ctx);
+   return -EOPNOTSUPP;
+   }
+
eb->gem_context = ctx;
if (i915_gem_context_has_full_ppgtt(ctx))
eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h 
b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h
new file mode 100644
index ..36262a6357b5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __I915_GEM_VM_BIND_H
+#define __I915_GEM_VM_BIND_H
+
+#include 
+
+struct drm_device;
+struct drm_file;
+struct i915_address_space;
+struct i915_vma;
+
+struct i915_vma *
+i915_gem_vm_bind_lookup_vma(struct i915_address_space *vm, u64 va);
+
+int i915_gem_vm_bind_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file);
+int i915_gem_vm_unbind_ioctl(struct drm_device *dev, void *data,
+struct drm_file *file);
+
+void i915_gem_vm_unbind_all(struct i915_address_space *vm);
+
+#endif /* __I915_GEM_VM_BIND_H */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
new file mode 100644
index ..e529162abd2c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include 
+
+#include 
+
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_vm_bind.h"
+
+#include "gt/intel_gpu_commands.h"
+
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+INTERVAL_TREE_DEFINE(struct i915_vma, rb, u64, __subtree_last,
+START, LAST, static inline, i915_vm_bind_it)
+
+#undef START
+#undef LAST
+
+/**
+ * DOC: VM_BIND/UNBIND ioctls
+ *
+ * DRM_I915_GEM_VM_BIND/UNBIND ioctls allows UMD to bind/unbind GEM buffer
+ * objects (BOs) or sections of a BOs at specified GPU virtual addresses on a
+ * specified address space (VM). Multiple mappings can map to the same physical
+ * pages of an object (aliasing). These mappings (also referred to as 
persistent
+ * mappings) will be persistent across multiple GPU submissions (execbuf calls)
+ * issued by the UMD, without user having to provide a list of all required
+ * mappings during each submission (as required by older execbuf mode).
+ *
+ * The VM_BIND/UNBIND calls allow UMDs to request a timeline out fence for
+ * signaling the completion of bind/unbind operation.
+ *
+ * VM_BIND feature is advertised to user via I915_PARAM_VM_BIND_VERSION.
+ * User has to opt-in for VM_BIND mode of binding for an address space (VM)
+ * during VM creation time via I915_VM_CREATE_FLAGS_USE_VM_BIND extension.
+ *
+ * VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently
+ * are not ordered. Furthermore, parts of the VM_BIND/UNBIND operations can be
+ * done a

Re: [PATCH 06/16] drm/i915/vm_bind: Support for VM private BOs

2022-09-28 Thread Matthew Auld

On 28/09/2022 07:19, Niranjana Vishwanathapura wrote:

Each VM creates a root_obj and shares it with all of its private objects
to use it as dma_resv object. This has a performance advantage as it
requires a single dma_resv object update for all private BOs vs list of
dma_resv objects update for shared BOs, in the execbuf path.

VM private BOs can be only mapped on specified VM and cannot be dmabuf
exported. Also, they are supported only in vm_bind mode.

Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andi Shyti 
---
  drivers/gpu/drm/i915/gem/i915_gem_create.c| 41 ++-
  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  6 +++
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  4 ++
  drivers/gpu/drm/i915/gem/i915_gem_object.c|  3 ++
  .../gpu/drm/i915/gem/i915_gem_object_types.h  |  3 ++
  drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |  3 ++
  .../drm/i915/gem/i915_gem_vm_bind_object.c|  9 
  drivers/gpu/drm/i915/gt/intel_gtt.c   |  4 ++
  drivers/gpu/drm/i915/gt/intel_gtt.h   |  2 +
  drivers/gpu/drm/i915/i915_vma.c   |  1 +
  drivers/gpu/drm/i915/i915_vma_types.h |  2 +
  include/uapi/drm/i915_drm.h   | 30 ++
  12 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 4aa7b5582b8e..692d95ef5d3e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -11,6 +11,7 @@
  #include "pxp/intel_pxp.h"
  
  #include "i915_drv.h"

+#include "i915_gem_context.h"
  #include "i915_gem_create.h"
  #include "i915_trace.h"
  #include "i915_user_extensions.h"
@@ -252,6 +253,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   u32 vm_id;
  };
  
  static void repr_placements(char *buf, size_t size,

@@ -401,9 +403,24 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
  }
  
+static int ext_set_vm_private(struct i915_user_extension __user *base,

+ void *data)
+{
+   struct drm_i915_gem_create_ext_vm_private ext;
+   struct create_ext *ext_data = data;
+
+   if (copy_from_user(&ext, base, sizeof(ext)))
+   return -EFAULT;
+
+   ext_data->vm_id = ext.vm_id;
+
+   return 0;
+}
+
  static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_VM_PRIVATE] = ext_set_vm_private,
  };
  
  /**

@@ -419,6 +436,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_create_ext *args = data;
struct create_ext ext_data = { .i915 = i915 };
+   struct i915_address_space *vm = NULL;
struct drm_i915_gem_object *obj;
int ret;
  
@@ -432,6 +450,12 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,

if (ret)
return ret;
  
+	if (ext_data.vm_id) {

+   vm = i915_gem_vm_lookup(file->driver_priv, ext_data.vm_id);
+   if (unlikely(!vm))
+   return -ENOENT;
+   }
+
if (!ext_data.n_placements) {
ext_data.placements[0] =
intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM);
@@ -458,8 +482,21 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
ext_data.placements,
ext_data.n_placements,
ext_data.flags);
-   if (IS_ERR(obj))
-   return PTR_ERR(obj);
+   if (IS_ERR(obj)) {
+   ret = PTR_ERR(obj);
+   goto vm_put;
+   }
+
+   if (vm) {
+   obj->base.resv = vm->root_obj->base.resv;
+   obj->priv_root = i915_gem_object_get(vm->root_obj);
+   i915_vm_put(vm);
+   }
  
  	return i915_gem_publish(obj, file, &args->size, &args->handle);

+vm_put:
+   if (vm)
+   i915_vm_put(vm);
+
+   return ret;
  }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index f5062d0c6333..6433173c3e84 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -218,6 +218,12 @@ struct dma_buf *i915_gem_prime_export(struct 
drm_gem_object *gem_obj, int flags)
struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
  
+	if (obj->priv_root) {

+   drm_dbg(obj->base.dev,
+   "Exporting VM private objects is not allowed\n");
+   return ERR_PTR(-EINVAL);
+   }
+

[RFC PATCH v5 1/6] dt-bindings: display: ti, am65x-dss: Add am625 dss compatible

2022-09-28 Thread Aradhya Bhatia
Add ti,am625-dss compatible string.
The DSS IP on TI's AM625 SoC is an update from the DSS on TI's AM65X
SoC. The former has an additional OLDI TX to enable a 2K resolution on
OLDI displays or enable 2 duplicated displays with a smaller resolution.

Signed-off-by: Aradhya Bhatia 
Reviewed-by: Rahul T R 
Acked-by: Krzysztof Kozlowski 
---
 .../devicetree/bindings/display/ti/ti,am65x-dss.yaml  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml 
b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
index 5c7d2cbc4aac..6bbce921479d 100644
--- a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
+++ b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
@@ -19,7 +19,9 @@ description: |
 
 properties:
   compatible:
-const: ti,am65x-dss
+enum:
+  - ti,am625-dss
+  - ti,am65x-dss
 
   reg:
 description:
-- 
2.37.0



[RFC PATCH v5 4/6] drm/tidss: Add support to configure OLDI mode for am625-dss.

2022-09-28 Thread Aradhya Bhatia
The newer version of DSS (AM625-DSS) has 2 OLDI TXes at its disposal.
These can be configured to support the following modes:

1. OLDI_SINGLE_LINK_SINGLE_MODE
Single Output over OLDI 0.
+--++-+  +---+
|  || |  |   |
| CRTC +--->+ ENCODER +->| PANEL |
|  || |  |   |
+--++-+  +---+

2. OLDI_SINGLE_LINK_CLONE_MODE
Duplicate Output over OLDI 0 and 1.
+--++-+  +---+
|  || |  |   |
| CRTC +---+--->| ENCODER +->| PANEL |
|  |   || |  |   |
+--+   |+-+  +---+
   |
   |+-+  +---+
   || |  |   |
   +--->| ENCODER +->| PANEL |
| |  |   |
+-+  +---+

3. OLDI_DUAL_LINK_MODE
Combined Output over OLDI 0 and 1.
+--++-+  +---+
|  || +->|   |
| CRTC +--->+ ENCODER |  | PANEL |
|  || +->|   |
+--++-+  +---+

Following the above pathways for different modes, 2 encoder/panel-bridge
pipes get created for clone mode, and 1 pipe in cases of single link and
dual link mode.

Add support for confgure the OLDI modes using of and lvds DRM helper
functions.

Signed-off-by: Aradhya Bhatia 
---
 drivers/gpu/drm/tidss/tidss_dispc.c |  11 +++
 drivers/gpu/drm/tidss/tidss_dispc.h |   8 ++
 drivers/gpu/drm/tidss/tidss_drv.h   |   3 +
 drivers/gpu/drm/tidss/tidss_kms.c   | 146 +++-
 4 files changed, 145 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c 
b/drivers/gpu/drm/tidss/tidss_dispc.c
index 34f0da4bb3e3..88008ad39b55 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -354,6 +354,8 @@ struct dispc_device {
 
bool is_enabled;
 
+   enum dispc_oldi_modes oldi_mode;
+
struct dss_vp_data vp_data[TIDSS_MAX_PORTS];
 
u32 *fourccs;
@@ -1958,6 +1960,15 @@ const u32 *dispc_plane_formats(struct dispc_device 
*dispc, unsigned int *len)
return dispc->fourccs;
 }
 
+int dispc_configure_oldi_mode(struct dispc_device *dispc,
+ enum dispc_oldi_modes oldi_mode)
+{
+   WARN_ON(!dispc);
+
+   dispc->oldi_mode = oldi_mode;
+   return 0;
+}
+
 static s32 pixinc(int pixels, u8 ps)
 {
if (pixels == 1)
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.h 
b/drivers/gpu/drm/tidss/tidss_dispc.h
index b66418e583ee..45cce1054832 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.h
+++ b/drivers/gpu/drm/tidss/tidss_dispc.h
@@ -64,6 +64,13 @@ enum dispc_dss_subrevision {
DISPC_AM625,
 };
 
+enum dispc_oldi_modes {
+   OLDI_MODE_OFF,  /* OLDI turned off / tied off 
in IP. */
+   OLDI_SINGLE_LINK_SINGLE_MODE,   /* Single Output over OLDI 0. */
+   OLDI_SINGLE_LINK_CLONE_MODE,/* Duplicate Output over OLDI 0 
and 1. */
+   OLDI_DUAL_LINK_MODE,/* Combined Output over OLDI 0 
and 1. */
+};
+
 struct dispc_features {
int min_pclk_khz;
int max_pclk_khz[DISPC_VP_MAX_BUS_TYPE];
@@ -131,6 +138,7 @@ int dispc_plane_setup(struct dispc_device *dispc, u32 
hw_plane,
  u32 hw_videoport);
 int dispc_plane_enable(struct dispc_device *dispc, u32 hw_plane, bool enable);
 const u32 *dispc_plane_formats(struct dispc_device *dispc, unsigned int *len);
+int dispc_configure_oldi_mode(struct dispc_device *dispc, enum 
dispc_oldi_modes oldi_mode);
 
 int dispc_init(struct tidss_device *tidss);
 void dispc_remove(struct tidss_device *tidss);
diff --git a/drivers/gpu/drm/tidss/tidss_drv.h 
b/drivers/gpu/drm/tidss/tidss_drv.h
index d7f27b0b0315..2252ba0222ca 100644
--- a/drivers/gpu/drm/tidss/tidss_drv.h
+++ b/drivers/gpu/drm/tidss/tidss_drv.h
@@ -12,6 +12,9 @@
 #define TIDSS_MAX_PORTS 4
 #define TIDSS_MAX_PLANES 4
 
+/* For AM625-DSS with 2 OLDI TXes */
+#define TIDSS_MAX_BRIDGE_PER_PIPE  2
+
 typedef u32 dispc_irq_t;
 
 struct tidss_device {
diff --git a/drivers/gpu/drm/tidss/tidss_kms.c 
b/drivers/gpu/drm/tidss/tidss_kms.c
index 666e527a0acf..73afe390f36d 100644
--- a/drivers/gpu/drm/tidss/tidss_kms.c
+++ b/drivers/gpu/drm/tidss/tidss_kms.c
@@ -107,32 +107,84 @@ static const struct drm_mode_config_funcs 
mode_config_funcs = {
.atomic_commit = drm_atomic_helper_commit,
 };
 
+static int tidss_get_oldi_mode(struct tidss_device *tidss)
+{
+   int pixel_order;
+   struct device_node *dss_ports, *oldi0_port, *oldi1_port;
+
+   dss_ports = of_get_next_child(tidss->dev->of_node, NULL);
+   oldi0_port = of_graph_get_port_by_id(dss_ports, 0);
+   oldi1_port = of_graph_get_port_by_id(dss_ports, 2);
+
+   if (!(oldi0_port && oldi1_port))
+   return OLDI_SINGLE_LINK_SINGLE_MODE;
+
+ 

[RFC PATCH v5 2/6] dt-bindings: display: ti: am65x-dss: Add new port for am625-dss

2022-09-28 Thread Aradhya Bhatia
Add 3rd "port" property for am625-dss.
This port represents the output from the 2nd OLDI TX (OLDI TX 1) latched
onto the first video port (VP0) from the DSS controller on AM625 SOC.

Signed-off-by: Aradhya Bhatia 
---
 .../bindings/display/ti/ti,am65x-dss.yaml  | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml 
b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
index 6bbce921479d..99576c6ec108 100644
--- a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
+++ b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
@@ -82,13 +82,18 @@ properties:
   port@0:
 $ref: /schemas/graph.yaml#/properties/port
 description:
-  The DSS OLDI output port node form video port 1
+  The DSS OLDI output port node form video port 1 (OLDI TX 0).
 
   port@1:
 $ref: /schemas/graph.yaml#/properties/port
 description:
   The DSS DPI output port node from video port 2
 
+  port@2:
+$ref: /schemas/graph.yaml#/properties/port
+description:
+  The DSS OLDI output port node form video port 1 (OLDI TX 1).
+
   ti,am65x-oldi-io-ctrl:
 $ref: "/schemas/types.yaml#/definitions/phandle"
 description:
@@ -104,6 +109,17 @@ properties:
   Input memory (from main memory to dispc) bandwidth limit in
   bytes per second
 
+if:
+  properties:
+compatible:
+  contains:
+const: ti,am65x-dss
+then:
+  properties:
+ports:
+  properties:
+port@2: false
+
 required:
   - compatible
   - reg
-- 
2.37.0



[RFC PATCH v5 6/6] drm/tidss: Enable Dual and Duplicate Modes for OLDI

2022-09-28 Thread Aradhya Bhatia
The AM625 DSS IP contains 2 OLDI TXes which can work to enable 2
duplicated displays of smaller resolutions or enable a single Dual Link
display with a higher resolution (1920x1200).

Configure the necessary register to enable and disable the OLDI TXes
with necessary modes configurations.

Signed-off-by: Aradhya Bhatia 
---
 drivers/gpu/drm/tidss/tidss_dispc.c | 28 ++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c 
b/drivers/gpu/drm/tidss/tidss_dispc.c
index 68444e0cd8d7..fd7f49535f0c 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -1003,8 +1003,8 @@ static void dispc_enable_oldi(struct dispc_device *dispc, 
u32 hw_videoport,
int count = 0;
 
/*
-* For the moment DUALMODESYNC, MASTERSLAVE, MODE, and SRC
-* bits of DISPC_VP_DSS_OLDI_CFG are set statically to 0.
+* For the moment MASTERSLAVE, and SRC bits of DISPC_VP_DSS_OLDI_CFG are
+* set statically to 0.
 */
 
if (fmt->data_width == 24)
@@ -1021,6 +1021,30 @@ static void dispc_enable_oldi(struct dispc_device 
*dispc, u32 hw_videoport,
 
oldi_cfg |= BIT(0); /* ENABLE */
 
+   switch (dispc->oldi_mode) {
+   case OLDI_MODE_OFF:
+   oldi_cfg &= ~BIT(0); /* DISABLE */
+   break;
+
+   case OLDI_SINGLE_LINK_SINGLE_MODE:
+   /* All configuration is done for this mode.  */
+   break;
+
+   case OLDI_SINGLE_LINK_CLONE_MODE:
+   oldi_cfg |= BIT(5); /* CLONE MODE */
+   break;
+
+   case OLDI_DUAL_LINK_MODE:
+   oldi_cfg |= BIT(11); /* DUALMODESYNC */
+   oldi_cfg |= BIT(3); /* data-mapping field also indicates 
dual-link mode */
+   break;
+
+   default:
+   dev_warn(dispc->dev, "%s: Incorrect oldi mode. Returning.\n",
+__func__);
+   return;
+   }
+
dispc_vp_write(dispc, hw_videoport, DISPC_VP_DSS_OLDI_CFG, oldi_cfg);
 
while (!(oldi_reset_bit & dispc_read(dispc, DSS_SYSSTATUS)) &&
-- 
2.37.0



Re: [PATCH v13 5/9] drm/i915: Check for integer truncation on scatterlist creation

2022-09-28 Thread Kees Cook
On Wed, Sep 28, 2022 at 10:09:04AM -0700, Linus Torvalds wrote:
> Kees, you need to reign in the craziness in overflow.h.

Understood. I've been trying to help the drm folks walk a line between
having a bunch of custom macros hidden away in the drm includes and
building up generalized versions that are actually helpful beyond drm.
But I can see that it doesn't help to have a "do two things at the same
time" macro for the assignment checking.

-- 
Kees Cook


Re: [PATCH 7/7] drm/i915/guc: handle interrupts from media GuC

2022-09-28 Thread Matt Roper
On Tue, Sep 27, 2022 at 05:22:41PM -0700, Ceraolo Spurio, Daniele wrote:
> 
> 
> On 9/27/2022 5:10 PM, Matt Roper wrote:
> > On Thu, Sep 22, 2022 at 03:11:17PM -0700, Daniele Ceraolo Spurio wrote:
> > > The render and media GuCs share the same interrupt enable register, so
> > > we can no longer disable interrupts when we disable communication for
> > > one of the GuCs as this would impact the other GuC. Instead, we keep the
> > > interrupts always enabled in HW and use a variable in the GuC structure
> > > to determine if we want to service the received interrupts or not.
> > Even if they have a unified enable bit, can't we still just update the
> > per-GuC mask bit to get the same behavior (i.e., no interrupts
> > delivered to the host for that specific GuC)?
> 
> We could yes, but we've avoided dynamically using masks for gen11+ because
> it can mess with rc6 (e.g., see
> https://patchwork.freedesktop.org/patch/207829/).

+Cc Mika & Tvrtko in case they remember more historic details.

Is that expected/documented behavior?  Or is it an unlabelled workaround
that might not be an issue anymore on newer platforms?  Also, it looks
like that patch only applies to RING_IMR and doesn't necessarily impact
other interrupt masking such as the GuC mask.

The code today (which seems to be in use without problem on both gen12
and xehp) is setting all mask bits in GEN11_GUC_SG_INTR_MASK and only
clearing the single G2H bit at the point G2H interrupts are enabled.
GEN11_GUC_SG_INTR_MASK has now become GEN12_GUC_MGUC_INTR_MASK, but it
seems like keeping the masking logic the same as we've been using on
gen12 and xehp would be fine if we just never clear the enable bit?

> 
> > 
> > > Signed-off-by: Daniele Ceraolo Spurio 
> > > Cc: Matt Roper 
> > > Cc: John Harrison 
> > > Cc: Alan Previn 
> > > ---
> > >   drivers/gpu/drm/i915/gt/intel_gt_irq.c  | 21 ++
> > >   drivers/gpu/drm/i915/gt/intel_gt_regs.h |  2 ++
> > >   drivers/gpu/drm/i915/gt/uc/intel_guc.c  | 29 ++---
> > >   drivers/gpu/drm/i915/gt/uc/intel_guc.h  |  5 -
> > >   drivers/gpu/drm/i915/gt/uc/intel_uc.c   |  8 +--
> > >   5 files changed, 45 insertions(+), 20 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
> > > b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > > index f26882fdc24c..e33ed9ae1439 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > > @@ -17,6 +17,9 @@
> > >   static void guc_irq_handler(struct intel_guc *guc, u16 iir)
> > >   {
> > > + if (unlikely(!guc->interrupts.enabled))
> > > + return;
> > > +
> > >   if (iir & GUC_INTR_GUC2HOST)
> > >   intel_guc_to_host_event_handler(guc);
> > >   }
> > > @@ -249,6 +252,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
> > >   {
> > >   struct intel_uncore *uncore = gt->uncore;
> > >   u32 irqs = GT_RENDER_USER_INTERRUPT;
> > > + u32 guc_mask = intel_uc_wants_guc(>->uc) ? GUC_INTR_GUC2HOST : 0;
> > >   const u32 gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1);
> > >   u32 dmask;
> > >   u32 smask;
> > > @@ -299,6 +303,19 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
> > >   if (HAS_HECI_GSC(gt->i915))
> > >   intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, 
> > > ~gsc_mask);
> > > + if (guc_mask) {
> > > + /* the enable bit is common for both GTs but the masks are 
> > > separate */
> > > + u32 mask = gt->type == GT_MEDIA ?
> > > + REG_FIELD_PREP(ENGINE0_MASK, guc_mask) :
> > > + REG_FIELD_PREP(ENGINE1_MASK, guc_mask);
> > > +
> > > + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE,
> > > +REG_FIELD_PREP(ENGINE1_MASK, guc_mask));
> > > +
> > > + /* we might not be the first GT to write this reg */
> > > + intel_uncore_rmw(uncore, GEN12_GUC_MGUC_INTR_MASK, mask, 0);
> > > + }
> > > +
> > >   /*
> > >* RPS interrupts will get enabled/disabled on demand when RPS 
> > > itself
> > >* is enabled/disabled.
> > > @@ -307,10 +324,6 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
> > >   gt->pm_imr = ~gt->pm_ier;
> > >   intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
> > >   intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK,  ~0);
> > > -
> > > - /* Same thing for GuC interrupts */
> > > - intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
> > > - intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK,  ~0);
> > >   }
> > >   void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> > > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > > index 1cbb7226400b..792809e49680 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > > @@ -1519,6 +1519,7 @@
> > >   #define   G

[PATCH drm-intel-fixes] drm/i915/gt: Perf_limit_reasons are only available for Gen11+

2022-09-28 Thread Ashutosh Dixit
Register GT0_PERF_LIMIT_REASONS (0x1381a8) is available only for
Gen11+. Therefore ensure perf_limit_reasons sysfs files are created only
for Gen11+. Otherwise on Gen < 5 accessing these files results in the
following oops:

<1> [88.829420] BUG: unable to handle page fault for address: c9bb81a8
<1> [88.829438] #PF: supervisor read access in kernel mode
<1> [88.829447] #PF: error_code(0x) - not-present page

Bspec: 20008
Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/6863
Fixes: fa68bff7cf27 ("drm/i915/gt: Add sysfs throttle frequency interfaces")
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c 
b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index 73a8b46e0234..d09a0e845d09 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -545,8 +545,7 @@ static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_ratl, 
RATL_MASK);
 static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_thermalert, 
VR_THERMALERT_MASK);
 static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_tdc, VR_TDC_MASK);
 
-static const struct attribute *freq_attrs[] = {
-   &dev_attr_punit_req_freq_mhz.attr,
+static const struct attribute *throttle_reason_attrs[] = {
&attr_throttle_reason_status.attr,
&attr_throttle_reason_pl1.attr,
&attr_throttle_reason_pl2.attr,
@@ -763,12 +762,20 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct 
kobject *kobj)
if (!is_object_gt(kobj))
return;
 
-   ret = sysfs_create_files(kobj, freq_attrs);
+   ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr);
if (ret)
drm_warn(>->i915->drm,
-"failed to create gt%u throttle sysfs files (%pe)",
+"failed to create gt%u punit_req_freq_mhz sysfs (%pe)",
 gt->info.id, ERR_PTR(ret));
 
+   if (GRAPHICS_VER(gt->i915) >= 11) {
+   ret = sysfs_create_files(kobj, throttle_reason_attrs);
+   if (ret)
+   drm_warn(>->i915->drm,
+"failed to create gt%u throttle sysfs files 
(%pe)",
+gt->info.id, ERR_PTR(ret));
+   }
+
if (HAS_MEDIA_RATIO_MODE(gt->i915) && intel_uc_uses_guc_slpc(>->uc)) {
ret = sysfs_create_files(kobj, media_perf_power_attrs);
if (ret)
-- 
2.34.1



  1   2   3   >