[AMD Official Use Only - AMD Internal Distribution Only] ou are right , the define seems different . Originally I think we only have one gchub for the device and two mmhub from 1 to 2 . so it seems each xcc will have one gchub which make sense to me . but for mmhub0(0-3) and mmhub1(0-1) I'm confused of how they are mapped to hw side . MES only deal with one mmhub for normal gfx12 , and two mmhub for gfx12_1 .
Regards Shaoyun.liu -----Original Message----- From: Alex Deucher <alexdeuc...@gmail.com> Sent: Wednesday, August 20, 2025 3:57 PM To: Liu, Shaoyun <shaoyun....@amd.com> Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12 On Wed, Aug 20, 2025 at 2:47 PM Shaoyun Liu <shaoyun....@amd.com> wrote: > > From MES version 0x81, it provide the new API INV_TLBS that support > invalidate tlbs with PASID. > > Signed-off-by: Shaoyun Liu <shaoyun....@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 16 ++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 22 ++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 25 +++++++++++++++++++++++++ > 3 files changed, 63 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > index c0d2c195fe2e..1b6e7b4fde36 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > @@ -280,6 +280,19 @@ struct mes_reset_queue_input { > bool is_kq; > }; > > +enum amdgpu_mes_hub_id { > + AMDGPU_MES_GC_HUB = 0, > + AMDGPU_MES_MM_HUB0 = 1, > + AMDGPU_MES_MM_HUB1 = 2, > +}; > + > +struct mes_inv_tlbs_pasid_input { > + uint32_t xcc_id; > + uint16_t pasid; > + uint8_t hub_id; > + uint8_t flush_type; > +}; > + > enum mes_misc_opcode { > MES_MISC_OP_WRITE_REG, > MES_MISC_OP_READ_REG, > @@ -367,6 +380,9 @@ struct amdgpu_mes_funcs { > > int (*reset_hw_queue)(struct amdgpu_mes *mes, > struct mes_reset_queue_input *input); > + > + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes, > + struct mes_inv_tlbs_pasid_input *input); > }; > > #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > index feb92e107af8..ef58a849d67d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > @@ -339,6 +339,28 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct > amdgpu_device *adev, > uint16_t queried; > int vmid, i; > > + if (adev->enable_uni_mes && > adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready && > + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= > + 0x81) { > + > + struct mes_inv_tlbs_pasid_input input = {0}; > + input.pasid = pasid; > + input.flush_type = flush_type; > + if (all_hub) { > + for_each_set_bit(i, adev->vmhubs_mask, > + AMDGPU_MAX_VMHUBS) { > + /* > + * For gfx12, the index i from vmhubs_mask > matchs AMDGPU_MES_HUB_ID, > + * Need to convert them if they are not match > in future asic > + */ Are you sure about this? From above: > + AMDGPU_MES_GC_HUB = 0, > + AMDGPU_MES_MM_HUB0 = 1, > + AMDGPU_MES_MM_HUB1 = 2, And then in amdgpu_vm.h: /* * max number of VMHUB * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1 */ #define AMDGPU_MAX_VMHUBS 13 #define AMDGPU_GFXHUB_START 0 #define AMDGPU_MMHUB0_START 8 #define AMDGPU_MMHUB1_START 12 #define AMDGPU_GFXHUB(x) (AMDGPU_GFXHUB_START + (x)) #define AMDGPU_MMHUB0(x) (AMDGPU_MMHUB0_START + (x)) #define AMDGPU_MMHUB1(x) (AMDGPU_MMHUB1_START + (x)) Alex > + input.hub_id = i; > + > adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); > + } > + } else { > + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, > &input); > + } > + return; > + } > + > for (vmid = 1; vmid < 16; vmid++) { > bool valid; > > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > index 6b222630f3fa..bcaaccf28765 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > @@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = { > "SET_SE_MODE", > "SET_GANG_SUBMIT", > "SET_HW_RSRC_1", > + "INVALIDATE_TLBS", > }; > > static const char *mes_v12_0_misc_opcodes[] = { @@ -879,6 +880,29 @@ > static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, > offsetof(union MESAPI__RESET, api_status)); } > > +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes, > + struct mes_inv_tlbs_pasid_input > +*input) { > + union MESAPI__INV_TLBS mes_inv_tlbs; > + > + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs)); > + > + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER; > + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS; > + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS; > + > + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0; > + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type; > + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid; > + /*The AMDGPU_MES_HUB_ID from input matchs mes expection on gfx12*/ > + mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id; > + > + return mes_v12_0_submit_pkt_and_poll_completion(mes, > AMDGPU_MES_KIQ_PIPE, > + &mes_inv_tlbs, sizeof(mes_inv_tlbs), > + offsetof(union MESAPI__INV_TLBS, api_status)); > + > +} > + > static const struct amdgpu_mes_funcs mes_v12_0_funcs = { > .add_hw_queue = mes_v12_0_add_hw_queue, > .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -888,6 +912,7 > @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { > .resume_gang = mes_v12_0_resume_gang, > .misc_op = mes_v12_0_misc_op, > .reset_hw_queue = mes_v12_0_reset_hw_queue, > + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid, > }; > > static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device > *adev, > -- > 2.34.1 >