[PATCH 1/1] drm/amdgpu: fix ftrace event amdgpu_bo_move always move on same heap

2023-12-10 Thread Wang, Beyond
[AMD Official Use Only - General]

Subject: [PATCH 1/1] drm/amdgpu: fix ftrace event amdgpu_bo_move always move on 
same heap

Issue: during evict or validate happened on amdgpu_bo, the 'from' and
'to' is always same in ftrace event of amdgpu_bo_move

where calling the 'trace_amdgpu_bo_move', the comment says move_notify
is called before move happens, but actually it is called after move
happens, here the new_mem is same as bo->resource

Fix: pass old_mem instead when calling amdgpu_bo_move_notify

Signed-off-by: Wang, Beyond wang.bey...@amd.com
---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|  2 +-
3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 7416799..0288495 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1282,7 +1282,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void 
*buffer,
  * amdgpu_bo_move_notify - notification about a memory move
  * @bo: pointer to a buffer object
  * @evict: if this move is evicting the buffer from the graphics address space
- * @new_mem: new information of the bufer object
+ * @old_mem: old information of the buffer object
  *
  * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
  * bookkeeping.
@@ -1290,11 +1290,11 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void 
*buffer,
  */
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
   bool evict,
-  struct ttm_resource *new_mem)
+  struct ttm_resource *old_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo;
-   struct ttm_resource *old_mem = bo->resource;
+   struct ttm_resource *new_mem = bo->resource;

if (!amdgpu_bo_is_amdgpu_bo(bo))
return;
@@ -1315,10 +1315,10 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
atomic64_inc(&adev->num_evictions);

/* update statistics */
-   if (!new_mem)
+   if (!old_mem || !new_mem)
return;

-   /* move_notify is called before move happens */
+   /* move_notify is called after move happens in amdgpu_bo_move */
trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
}

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 876acde..88be4b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -362,7 +362,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void 
*buffer,
   uint64_t *flags);
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
   bool evict,
-  struct ttm_resource *new_mem);
+  struct ttm_resource *old_mem);
void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 41ed6a3..8cc85d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -579,7 +579,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, 
bool evict,
out:
/* update statistics */
atomic64_add(bo->base.size, &adev->num_bytes_moved);
-   amdgpu_bo_move_notify(bo, evict, new_mem);
+   amdgpu_bo_move_notify(bo, evict, old_mem);
return 0;
}

--
2.34.1


[PATCH] drm/amd/display: Disable PSR-SU on Parade 0803 TCON again

2023-12-10 Thread Mario Limonciello
When screen brightness is rapidly changed and PSR-SU is enabled the
display hangs on panels with this TCON even on the latest DCN 3.1.4
microcode (0x8002a81 at this time).

This was disabled previously as commit 072030b17830 ("drm/amd: Disable
PSR-SU on Parade 0803 TCON") but reverted as commit 1e66a17ce546 ("Revert
"drm/amd: Disable PSR-SU on Parade 0803 TCON"") in favor of testing for
a new enough microcode (commit cd2e31a9ab93 ("drm/amd/display: Set minimum
requirement for using PSR-SU on Phoenix")).

As hangs are still happening specifically with this TCON, disable PSR-SU
again for it until it can be root caused.

Cc: sta...@vger.kernel.org
Cc: aaron...@canonical.com
Cc: bi...@gnome.org
Cc: Marc Rossi 
Cc: Hamza Mahfooz 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c 
b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index a522a7c02911..1675314a3ff2 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link)
((dpcd_caps->sink_dev_id_str[1] == 0x08 && 
dpcd_caps->sink_dev_id_str[0] == 0x08) ||
(dpcd_caps->sink_dev_id_str[1] == 0x08 && 
dpcd_caps->sink_dev_id_str[0] == 0x07)))
isPSRSUSupported = false;
+   else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && 
dpcd_caps->sink_dev_id_str[0] == 0x03)
+   isPSRSUSupported = false;
else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1)
isPSRSUSupported = true;
}
-- 
2.34.1



Re: regression/bisected/6.7rc1: Instead of desktop I see a horizontal flashing bar with a picture of the desktop background on white screen

2023-12-10 Thread Thorsten Leemhuis
On 05.12.23 14:23, Linux regression tracking (Thorsten Leemhuis) wrote:
> On 27.11.23 19:11, Alex Deucher wrote:
>> On Wed, Nov 15, 2023 at 1:52 PM Lee, Alvin  wrote:
>>>
>>> This change has a DMCUB dependency - are you able to update your DMCUB 
>>> version as well?
>>>
>>> This version mismatch issue is something I'll need to fix in driver for 
>>> Linux.
>>
>> @Mahfooz, Hamza @Alvin Lee any update on a fix for this?
> 
> Still no news afaics. Or was there any progress I missed?

Alex, Christian, Xinhui, what's up here?

In an earlier message from Christian it sounded like you considered this
problem something that should be fixed. But it looks like nothing much
happened since then. Or am I missing something?

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
If I did something stupid, please tell me, as explained on that page.

#regzbot poke

>>> -Original Message-
>>> From: Mikhail Gavrilov 
>>> Sent: Wednesday, November 15, 2023 1:22 PM
>>> To: Mahfooz, Hamza 
>>> Cc: Lee, Alvin ; Wu, Hersen ; 
>>> Wheeler, Daniel ; Deucher, Alexander 
>>> ; Linux List Kernel Mailing 
>>> ; amd-gfx list 
>>> Subject: Re: regression/bisected/6.7rc1: Instead of desktop I see a 
>>> horizontal flashing bar with a picture of the desktop background on white 
>>> screen
>>>
>>> On Wed, Nov 15, 2023 at 11:14 PM Hamza Mahfooz  
>>> wrote:

 What version of DMUB firmware are you on?
 The easiest way to find out would be using the following:

 # dmesg | grep DMUB

>>>
>>> Sapphire AMD Radeon RX 7900 XTX PULSE OC:
>>> ❯ dmesg | grep DMUB
>>> [   14.341362] [drm] Loading DMUB firmware via PSP: version=0x07002100
>>> [   14.725547] [drm] DMUB hardware initialized: version=0x07002100
>>>
>>> Reference GIGABYTE Radeon RX 7900 XTX 24G:
>>> ❯ dmesg | grep DMUB
>>> [   11.405115] [drm] Loading DMUB firmware via PSP: version=0x07002100
>>> [   11.773395] [drm] DMUB hardware initialized: version=0x07002100
>>>
>>>
>>> --
>>> Best Regards,
>>> Mike Gavrilov.


[PATCH v2 1/2] drm/buddy: Implement tracking clear page feature

2023-12-10 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list.

  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 169 +++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
 include/drm/drm_buddy.h   |  18 +-
 5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, &vres->blocks);
+   drm_buddy_free_list(mm, &vres->blocks, 0);
mutex_unlock(&mgr->lock);
 error_fini:
ttm_resource_fini(man, &vres->base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, &vres->blocks);
+   drm_buddy_free_list(mm, &vres->blocks, 0);
mutex_unlock(&mgr->lock);
 
atomic64_sub(vis_usage, &mgr->vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
-   drm_buddy_free_list(&mgr->mm, &rsv->allocated);
+   drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(&block->link, node->link.prev, &node->link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
 
+   if (drm_buddy_block_is_clear(block)) {
+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
 
return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(&buddy->link);
 
drm_block_free(mm, block);
@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
  * @mm: DRM buddy manager
  * @objects: input list head to free blocks
  */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
+void drm_buddy_free_list(struct drm_buddy *mm,
+struct list_head *objects,
+unsigned long flags)
 {
struct drm_buddy_block *block, *on;
 
+   if (flags & DRM_BUDDY_CLEARED) {
+   lis

[PATCH v2 2/2] drm/amdgpu: Enable clear page functionality

2023-12-10 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1:(Christian)
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared(&cursor) just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 16 +++---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 53 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 15 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 109 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cef920a93924..5a01b6266772 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -598,8 +599,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = &adev->mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -629,15 +629,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 05991c5c8ddb..252b384194c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -,6 +,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, 
uint32_t src_data,
return 0;
 }
 
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+   struct dma_resv *resv,
+   struct dma_fence **fence)
+{
+   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+   struct amdgpu_res_cursor cursor;
+   struct dma_fence *f = NULL;
+   u64 addr;
+   int r;
+
+   if (!adev->mman.buffer_funcs_enabled)
+   return -EINVAL;
+
+   amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+   mutex_lock(&adev->mman.gtt_window_lock);
+   while (cursor.remaining) {
+   struct dma_fence *next = NULL;
+