On Tue, Jul 13, 2021 at 08:15:01PM -0700, Matt Roper wrote:
> From: Stuart Summers <stuart.summ...@intel.com>
> 
> Xe_HP changes the format of the context ID from past platforms.
> 
> Signed-off-by: Stuart Summers <stuart.summ...@intel.com>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.rama...@intel.com>
> Signed-off-by: Matt Roper <matthew.d.ro...@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atw...@intel.com>
> ---
>  .../drm/i915/gt/intel_execlists_submission.c  | 74 ++++++++++++++++---
>  drivers/gpu/drm/i915/gt/intel_lrc.c           |  8 ++
>  drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |  2 +
>  drivers/gpu/drm/i915/i915_perf.c              | 29 +++++---
>  drivers/gpu/drm/i915/i915_reg.h               |  5 ++
>  5 files changed, 97 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
> b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index cf1ac0010056..be99a74e6e09 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -153,6 +153,12 @@
>  #define GEN12_CSB_CTX_VALID(csb_dw) \
>       (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
>  
> +#define XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE        BIT(1) /* upper csb 
> dword */
> +#define XEHP_CSB_SW_CTX_ID_MASK                      GENMASK(31, 10)
> +#define XEHP_IDLE_CTX_ID                     0xFFFF
> +#define XEHP_CSB_CTX_VALID(csb_dw) \
> +     (FIELD_GET(XEHP_CSB_SW_CTX_ID_MASK, csb_dw) != XEHP_IDLE_CTX_ID)
> +
>  /* Typical size of the average request (2 pipecontrols and a MI_BB) */
>  #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
>  
> @@ -478,6 +484,16 @@ __execlists_schedule_in(struct i915_request *rq)
>               /* Use a fixed tag for OA and friends */
>               GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
>               ce->lrc.ccid = ce->tag;
> +     } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
> +             /* We don't need a strict matching tag, just different values */
> +             unsigned int tag = ffs(READ_ONCE(engine->context_tag));
> +
> +             GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
> +             clear_bit(tag - 1, &engine->context_tag);
> +             ce->lrc.ccid = tag << (XEHP_SW_CTX_ID_SHIFT - 32);
> +
> +             BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
> +
>       } else {
>               /* We don't need a strict matching tag, just different values */
>               unsigned int tag = __ffs(engine->context_tag);
> @@ -588,8 +604,14 @@ static void __execlists_schedule_out(struct i915_request 
> * const rq,
>               intel_engine_add_retire(engine, ce->timeline);
>  
>       ccid = ce->lrc.ccid;
> -     ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
> -     ccid &= GEN12_MAX_CONTEXT_HW_ID;
> +     if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
> +             ccid >>= XEHP_SW_CTX_ID_SHIFT - 32;
> +             ccid &= XEHP_MAX_CONTEXT_HW_ID;
> +     } else {
> +             ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
> +             ccid &= GEN12_MAX_CONTEXT_HW_ID;
> +     }
> +
>       if (ccid < BITS_PER_LONG) {
>               GEM_BUG_ON(ccid == 0);
>               GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
> @@ -1648,13 +1670,24 @@ static void invalidate_csb_entries(const u64 *first, 
> const u64 *last)
>   *     bits 44-46: reserved
>   *     bits 47-57: sw context id of the lrc the GT switched away from
>   *     bits 58-63: sw counter of the lrc the GT switched away from
> + *
> + * Xe_HP csb shuffles things around compared to TGL:
> + *
> + *     bits 0-3:   context switch detail (same possible values as TGL)
> + *     bits 4-9:   engine instance
> + *     bits 10-25: sw context id of the lrc the GT switched to
> + *     bits 26-31: sw counter of the lrc the GT switched to
> + *     bit  32:    semaphore wait mode (poll or signal), Only valid when
> + *                 switch detail is set to "wait on semaphore"
> + *     bit  33:    switched to new queue
> + *     bits 34-41: wait detail (for switch detail 1 to 4)
> + *     bits 42-57: sw context id of the lrc the GT switched away from
> + *     bits 58-63: sw counter of the lrc the GT switched away from
>   */
> -static bool gen12_csb_parse(const u64 csb)
> +static inline bool
> +__gen12_csb_parse(bool ctx_to_valid, bool ctx_away_valid, bool new_queue,
> +               u8 switch_detail)
>  {
> -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
> -     bool new_queue =
> -             lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> -
>       /*
>        * The context switch detail is not guaranteed to be 5 when a preemption
>        * occurs, so we can't just check for that. The check below works for
> @@ -1663,7 +1696,7 @@ static bool gen12_csb_parse(const u64 csb)
>        * would require some extra handling, but we don't support that.
>        */
>       if (!ctx_away_valid || new_queue) {
> -             GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
> +             GEM_BUG_ON(!ctx_to_valid);
>               return true;
>       }
>  
> @@ -1672,10 +1705,26 @@ static bool gen12_csb_parse(const u64 csb)
>        * context switch on an unsuccessful wait instruction since we always
>        * use polling mode.
>        */
> -     GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
> +     GEM_BUG_ON(switch_detail);
>       return false;
>  }
>  
> +static bool xehp_csb_parse(const u64 csb)
> +{
> +     return __gen12_csb_parse(XEHP_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt 
> to */
> +                              XEHP_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt 
> away */
> +                              upper_32_bits(csb) & 
> XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE,
> +                              GEN12_CTX_SWITCH_DETAIL(lower_32_bits(csb)));
> +}
> +
> +static bool gen12_csb_parse(const u64 csb)
> +{
> +     return __gen12_csb_parse(GEN12_CSB_CTX_VALID(lower_32_bits(csb)), /* 
> cxt to */
> +                              GEN12_CSB_CTX_VALID(upper_32_bits(csb)), /* 
> cxt away */
> +                              lower_32_bits(csb) & 
> GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE,
> +                              GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
> +}
> +
>  static bool gen8_csb_parse(const u64 csb)
>  {
>       return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> @@ -1840,7 +1889,9 @@ process_csb(struct intel_engine_cs *engine, struct 
> i915_request **inactive)
>               ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
>                            head, upper_32_bits(csb), lower_32_bits(csb));
>  
> -             if (GRAPHICS_VER(engine->i915) >= 12)
> +             if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
> +                     promote = xehp_csb_parse(csb);
> +             else if (GRAPHICS_VER(engine->i915) >= 12)
>                       promote = gen12_csb_parse(csb);
>               else
>                       promote = gen8_csb_parse(csb);
> @@ -3327,7 +3378,8 @@ int intel_execlists_submission_setup(struct 
> intel_engine_cs *engine)
>               execlists->csb_size = GEN11_CSB_ENTRIES;
>  
>       engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
> -     if (GRAPHICS_VER(engine->i915) >= 11) {
> +     if (GRAPHICS_VER(engine->i915) >= 11 &&
> +         GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) {
>               execlists->ccid |= engine->instance << 
> (GEN11_ENGINE_INSTANCE_SHIFT - 32);
>               execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 
> 32);
>       }
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 8ada1afe3d22..7f8fe6726504 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1101,6 +1101,14 @@ setup_indirect_ctx_bb(const struct intel_context *ce,
>   *      bits 55-60:    SW counter
>   *      bits 61-63:    engine class
>   *
> + * On Xe_HP, the upper dword of the descriptor has a new format:
> + *
> + *      bits 32-37:    virtual function number
> + *      bit 38:        mbz, reserved for use by hardware
> + *      bits 39-54:    SW context ID
> + *      bits 55-57:    reserved
> + *      bits 58-63:    SW counter
> + *
>   * engine info, SW context ID and SW counter need to form a unique number
>   * (Context ID) per lrc.
>   */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h 
> b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index 41e5350a7a05..9548f4ade068 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -91,5 +91,7 @@
>  #define GEN11_MAX_CONTEXT_HW_ID      (1 << 11) /* exclusive */
>  /* in Gen12 ID 0x7FF is reserved to indicate idle */
>  #define GEN12_MAX_CONTEXT_HW_ID      (GEN11_MAX_CONTEXT_HW_ID - 1)
> +/* in Xe_HP ID 0xFFFF is reserved to indicate "invalid context" */
> +#define XEHP_MAX_CONTEXT_HW_ID       0xFFFF
>  
>  #endif /* _INTEL_LRC_REG_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index b4ec114a4698..324ec3f35672 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1284,17 +1284,26 @@ static int oa_get_render_ctx_id(struct 
> i915_perf_stream *stream)
>               break;
>  
>       case 11:
> -     case 12: {
> -             stream->specific_ctx_id_mask =
> -                     ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << 
> (GEN11_SW_CTX_ID_SHIFT - 32);
> -             /*
> -              * Pick an unused context id
> -              * 0 - BITS_PER_LONG are used by other contexts
> -              * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
> -              */
> -             stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << 
> (GEN11_SW_CTX_ID_SHIFT - 32);
> +     case 12:
> +             if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
> +                     stream->specific_ctx_id_mask =
> +                             ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
> +                             (XEHP_SW_CTX_ID_SHIFT - 32);
> +                     stream->specific_ctx_id =
> +                             (XEHP_MAX_CONTEXT_HW_ID - 1) <<
> +                             (XEHP_SW_CTX_ID_SHIFT - 32);
> +             } else {
> +                     stream->specific_ctx_id_mask =
> +                             ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << 
> (GEN11_SW_CTX_ID_SHIFT - 32);
> +                     /*
> +                      * Pick an unused context id
> +                      * 0 - BITS_PER_LONG are used by other contexts
> +                      * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle 
> context
> +                      */
> +                     stream->specific_ctx_id =
> +                             (GEN12_MAX_CONTEXT_HW_ID - 1) << 
> (GEN11_SW_CTX_ID_SHIFT - 32);
> +             }
>               break;
> -     }
>  
>       default:
>               MISSING_CASE(GRAPHICS_VER(ce->engine->i915));
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 1eca88ac7057..60d780d17d5d 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -4172,6 +4172,11 @@ enum {
>  #define GEN11_ENGINE_INSTANCE_SHIFT 48
>  #define GEN11_ENGINE_INSTANCE_WIDTH 6
>  
> +#define XEHP_SW_CTX_ID_SHIFT 39
> +#define XEHP_SW_CTX_ID_WIDTH 16
> +#define XEHP_SW_COUNTER_SHIFT 58
> +#define XEHP_SW_COUNTER_WIDTH 6
> +
>  #define CHV_CLK_CTL1                 _MMIO(0x101100)
>  #define VLV_CLK_CTL2                 _MMIO(0x101104)
>  #define   CLK_CTL2_CZCOUNT_30NS_SHIFT        28
> -- 
> 2.25.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to