This is needed so that SEL reg values are restored on exit from IFPC.
Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 82 +++++++++++++++++++++++++--
drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 11 +++-
drivers/gpu/drm/msm/adreno/a8xx_gpu.c | 1 +
3 files changed, 87 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 61c6b0e781ce..e047ed550347 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -946,6 +946,7 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE));
}
lock->dynamic_list_len = dyn_pwrup_reglist_count;
+ a6xx_gpu->dynamic_sel_reglist_offset = dyn_pwrup_reglist_count;
}
static int a7xx_preempt_start(struct msm_gpu *gpu)
@@ -2535,11 +2536,60 @@ static bool a6xx_progress(struct msm_gpu *gpu, struct
msm_ringbuffer *ring)
return progress;
}
+static void
+perfcntr_select(struct msm_ringbuffer *ring, enum adreno_pipe pipe,
+ uint32_t regidx, uint32_t *countables, uint32_t nr,
+ uint32_t **reglist)
+{
+ OUT_PKT4(ring, regidx, nr);
+ for (unsigned i = 0; i < nr; i++)
+ OUT_RING(ring, countables[i]);
+
+ if (!*reglist)
+ return;
+
+ for (unsigned i = 0; i < nr; i++) {
+ /*
+ * Bitfield is in same position on a7xx, but only 2 bits..
+ * which is sufficient for NONE/BR/BV:
+ */
+ *(*reglist)++ = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe);
+ *(*reglist)++ = regidx + i;
+ *(*reglist)++ = countables[i];
+ }
+}
+
static void
a6xx_perfcntr_configure(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
const struct msm_perfcntr_stream *stream)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
enum adreno_pipe pipe = PIPE_NONE;
+ uint32_t *reglist = NULL;
+ uint32_t *reglist_sel_start;
+
+ if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) {
+ WARN_ON(!a6xx_gpu->pwrup_reglist_emitted);
+
+ struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr;
+ int off = (2 * lock->ifpc_list_len) +
+ (2 * lock->preemption_list_len) +
+ (3 * a6xx_gpu->dynamic_sel_reglist_offset);
+
+ reglist = (uint32_t *)&lock->regs[0];
+ reglist += off;
+ reglist_sel_start = reglist;
+
+ /* Clear any previously configured SEL reg entries: */
+ lock->dynamic_list_len = a6xx_gpu->dynamic_sel_reglist_offset;
+
+ /*
+ * Ensure CP sees the dynamic_list_len update before we
+ * start modifying the SEL entries:
+ */
+ wmb();
+ }
for (unsigned i = 0; i < stream->nr_groups; i++) {
unsigned group_idx = msm_perfcntr_group_idx(stream, i);
@@ -2567,17 +2617,15 @@ a6xx_perfcntr_configure(struct msm_gpu *gpu, struct
msm_ringbuffer *ring,
const struct msm_perfcntr_counter *counter =
&group->counters[base];
unsigned nr = group_state->allocated_counters;
- OUT_PKT4(ring, counter->select_reg, nr);
- for (unsigned c = 0; c < nr; c++)
- OUT_RING(ring, group_state->countables[c]);
+ perfcntr_select(ring, pipe, counter->select_reg,
+ group_state->countables, nr, ®list);
for (unsigned s = 0; s <
ARRAY_SIZE(counter->slice_select_regs); s++) {
if (!counter->slice_select_regs[s])
break;
- OUT_PKT4(ring, counter->slice_select_regs[s], nr);
- for (unsigned c = 0; c < nr; c++)
- OUT_RING(ring, group_state->countables[c]);
+ perfcntr_select(ring, pipe,
counter->slice_select_regs[s],
+ group_state->countables, nr, ®list);
}
}
@@ -2591,6 +2639,28 @@ a6xx_perfcntr_configure(struct msm_gpu *gpu, struct
msm_ringbuffer *ring,
OUT_RING(ring, upper_32_bits(rbmemptr(ring, perfcntr_fence)));
OUT_RING(ring, stream->sel_fence);
+ /*
+ * Update the pwrup reglist size before flushing. Kgsl does a shared-
+ * memory spinlock dance with SQE to avoid racing with IFPC exit. But
+ * we can skip that since the ringbuffer programming will be executed
+ * by SQE after dynamic reglist size is updated. So even if we lose
+ * the race, the register programming in the rb will overwrite/correct
+ * the SEL regs restored by SQE on IFPC exit, before sampling begins.
+ */
+ if (reglist) {
+ struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr;
+ unsigned nr_regs = (reglist_sel_start - reglist) / 3;
+
+ /*
+ * Ensure CP sees updates to the pwrup_reglist before it
+ * sees the new (increased) length:
+ */
+ wmb();
+
+ /* Update dynamic reglist len to include new SEL reg
programming: */
+ lock->dynamic_list_len = a6xx_gpu->dynamic_sel_reglist_offset +
nr_regs;
+ }
+
a6xx_flush_yield(gpu, ring);
/* Check to see if we need to start preemption */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 3491a24a9320..f3cc9478b079 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -21,17 +21,19 @@ struct cpu_gpu_lock {
uint32_t cpu_req;
uint32_t turn;
union {
+ /* a6xx: */
struct {
uint16_t list_length;
uint16_t list_offset;
};
+ /* a7xx+: */
struct {
uint8_t ifpc_list_len;
uint8_t preemption_list_len;
uint16_t dynamic_list_len;
};
};
- uint64_t regs[62];
+ uint64_t regs[];
};
/**
@@ -100,6 +102,13 @@ struct a6xx_gpu {
uint64_t pwrup_reglist_iova;
bool pwrup_reglist_emitted;
+ /*
+ * Offset of start of SEL regs appended to pwrup_reglist. This
+ * is equal to lock->dynamic_list_len if no SEL regs are appended
+ * to the end of the dynamic reglist.
+ */
+ uint16_t dynamic_sel_reglist_offset;
+
bool has_whereami;
void __iomem *llc_mmio;
diff --git a/drivers/gpu/drm/msm/adreno/a8xx_gpu.c
b/drivers/gpu/drm/msm/adreno/a8xx_gpu.c
index 6c040f718176..2ce7c6ac4521 100644
--- a/drivers/gpu/drm/msm/adreno/a8xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a8xx_gpu.c
@@ -468,6 +468,7 @@ static void a8xx_patch_pwrup_reglist(struct msm_gpu *gpu)
}
lock->dynamic_list_len = dyn_pwrup_reglist_count;
+ a6xx_gpu->dynamic_sel_reglist_offset = dyn_pwrup_reglist_count;
done:
a8xx_aperture_clear(gpu);
--
2.54.0