[WHY]
sriov has the higher bit violation when flushing tlb

[HOW]
normalize the registers to keep lower 16-bit(dword aligned) to aviod higher bit 
violation
RLCG will mask xcd out and always assume it's accessing its own xcd

Signed-off-by: Jane Jian <jane.j...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 29 +++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 8d8763ebe027..87a6a610e467 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -55,6 +55,14 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
 #define mmSMNAID_XCD1_MCA_SMU 0x38430400       /* SMN AID XCD1 */
 #define mmSMNXCD_XCD0_MCA_SMU 0x40430400       /* SMN XCD XCD0 */
 
+#define XCC_REG_RANGE_0_LOW  0x2000     /* XCC gfxdec0 lower Bound */
+#define XCC_REG_RANGE_0_HIGH 0x3400     /* XCC gfxdec0 upper Bound */
+#define XCC_REG_RANGE_1_LOW  0xA000     /* XCC gfxdec1 lower Bound */
+#define XCC_REG_RANGE_1_HIGH 0x10000    /* XCC gfxdec1 upper Bound */
+
+#define NORMALIZE_XCC_REG_OFFSET(offset) \
+       (offset & 0xFFFF)
+
 struct amdgpu_gfx_ras gfx_v9_4_3_ras;
 
 static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev);
@@ -217,9 +225,24 @@ static void gfx_v9_4_3_init_golden_registers(struct 
amdgpu_device *adev)
        }
 }
 
+static uint32_t gfx_v9_4_3_normalize_xcc_reg_offset(uint32_t reg)
+{
+       uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg);
+
+       /* If it is an XCC reg, normalize the reg to keep
+          lower 16 bits in local xcc */
+
+       if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < 
XCC_REG_RANGE_0_HIGH)) ||
+               ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < 
XCC_REG_RANGE_1_HIGH)))
+               return normalized_reg;
+       else
+               return reg;
+}
+
 static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
                                       bool wc, uint32_t reg, uint32_t val)
 {
+       reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
        amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
                                WRITE_DATA_DST_SEL(0) |
@@ -234,6 +257,8 @@ static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring 
*ring, int eng_sel,
                                  uint32_t addr1, uint32_t ref, uint32_t mask,
                                  uint32_t inv)
 {
+       addr0 = gfx_v9_4_3_normalize_xcc_reg_offset(addr0);
+       addr1 = gfx_v9_4_3_normalize_xcc_reg_offset(addr1);
        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
        amdgpu_ring_write(ring,
                                 /* memory (1) or register (0) */
@@ -2725,6 +2750,8 @@ static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring 
*ring, uint32_t reg,
 {
        struct amdgpu_device *adev = ring->adev;
 
+       reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+
        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
        amdgpu_ring_write(ring, 0 |     /* src: register*/
                                (5 << 8) |      /* dst: memory */
@@ -2742,6 +2769,8 @@ static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring 
*ring, uint32_t reg,
 {
        uint32_t cmd = 0;
 
+       reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+
        switch (ring->funcs->type) {
        case AMDGPU_RING_TYPE_GFX:
                cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
-- 
2.34.1

Reply via email to