[PATCH 1/2] drm/amdgpu: reverse PDBs order
The hiberachy of page table is as below, which aligns hw names. PDB2->PDB1->PDB0->PTB, accordingly: level3 --- PDB2 level2 --- PDB1 level1 --- PDB0 level0 --- PTB Change-Id: I2d748e5e96cffe18294c104c4b192d910b2f8e6b Signed-off-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 37 ++ 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3ecdbdfb04dd..8904ccf78fc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -148,8 +148,8 @@ struct amdgpu_prt_cb { static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, unsigned level) { - if (level != adev->vm_manager.num_level) - return 9 * (adev->vm_manager.num_level - level - 1) + + if (level != 0) + return 9 * (level - 1) + adev->vm_manager.block_size; else /* For the page tables on the leaves */ @@ -162,20 +162,27 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * * Calculate the number of entries in a page directory or page table. + * The hiberachy of page table is as below, which aligns hw names. + * PDB2->PDB1->PDB0->PTB, accordingly: + * level3 --- PDB2 + * level2 --- PDB1 + * level1 --- PDB0 + * level0 --- PTB */ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, unsigned level) { - unsigned shift = amdgpu_vm_level_shift(adev, 0); + unsigned shift = amdgpu_vm_level_shift(adev, + adev->vm_manager.num_level); - if (level == 0) + if (level == adev->vm_manager.num_level) /* For the root directory */ return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift; - else if (level != adev->vm_manager.num_level) + else if (level != 0) /* Everything in between */ return 512; else - /* For the page tables on the leaves */ + /* For the page tables on the leaves(PTB) */ return AMDGPU_VM_PTE_COUNT(adev); } @@ -312,6 +319,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, u64 flags; uint64_t init_value = 0; + BUG_ON(level > adev->vm_manager.num_level); + if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); @@ -332,7 +341,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (to > parent->last_entry_used) parent->last_entry_used = to; - ++level; + level--; saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); @@ -346,7 +355,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (vm->pte_support_ats) { init_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != adev->vm_manager.num_level - 1) + /* != PDB0 */ + if (level != 1) init_value |= AMDGPU_PDE_PTE; } @@ -389,7 +399,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, entry->addr = 0; } - if (level < adev->vm_manager.num_level) { + if (level > 0) { uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; uint64_t sub_eaddr = (pt_idx == to) ? eaddr : ((1 << shift) - 1); @@ -435,7 +445,8 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; - return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0); + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, + adev->vm_manager.num_level); } /** @@ -1319,19 +1330,19 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, struct amdgpu_vm_pt **entry, struct amdgpu_vm_pt **parent) { - unsigned level = 0; + unsigned level = p->adev->vm_manager.num_level; *parent = NULL; *entry = &p->vm->root; while ((*entry)->entries) { - unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level++); + unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level--); idx %= amdgpu_bo_size((*entry)->base.bo) / 8; *parent = *entry; *entry = &(*entry)->entries[idx]; } - if (level != p->adev->vm_manager.num_level) + if (level != 0) *entry = NULL; } -- 2.14.1 ___ amd-gfx mailing list amd-gfx
[PATCH 2/2] drm/amdgpu: fix pte index calculation
Change-Id: I40ecf31ad4b51022a2c0c076ae45188b6e9d63de Signed-off-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8904ccf78fc9..affe64e42cef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1335,11 +1335,13 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, *parent = NULL; *entry = &p->vm->root; while ((*entry)->entries) { - unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level--); + unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level); - idx %= amdgpu_bo_size((*entry)->base.bo) / 8; + idx %= amdgpu_vm_num_entries(p->adev, level); *parent = *entry; *entry = &(*entry)->entries[idx]; + if (level) + level--; } if (level != 0) -- 2.14.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH umr] fix specifying device by ASIC name
Signed-off-by: Tom St Denis --- src/lib/discover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/discover.c b/src/lib/discover.c index e1ccea1d20e4..94d1c676718a 100644 --- a/src/lib/discover.c +++ b/src/lib/discover.c @@ -152,7 +152,7 @@ struct umr_asic *umr_discover_asic(struct umr_options *options) snprintf(name, sizeof(name)-1, "/sys/kernel/debug/dri/%d/name", options->instance); f = fopen(name, "r"); - if (!f && !options->no_kernel && !options->use_pci) { + if (!f && options->instance >= 0 && !options->no_kernel && !options->use_pci) { int found = 0; if (!options->quiet) { f = popen("lsmod | grep ^amdgpu", "r"); -- 2.12.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: fix pte index calculation
What is wrong with the old approach? I would rather say that the address should be limited by the level shift instead. This way we avoid the modulo altogether. Christian. Am 08.12.2017 um 11:56 schrieb Chunming Zhou: Change-Id: I40ecf31ad4b51022a2c0c076ae45188b6e9d63de Signed-off-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8904ccf78fc9..affe64e42cef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1335,11 +1335,13 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, *parent = NULL; *entry = &p->vm->root; while ((*entry)->entries) { - unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level--); + unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level); - idx %= amdgpu_bo_size((*entry)->base.bo) / 8; + idx %= amdgpu_vm_num_entries(p->adev, level); *parent = *entry; *entry = &(*entry)->entries[idx]; + if (level) + level--; } if (level != 0) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu: reverse PDBs order
Am 08.12.2017 um 11:56 schrieb Chunming Zhou: The hiberachy of page table is as below, which aligns hw names. PDB2->PDB1->PDB0->PTB, accordingly: level3 --- PDB2 level2 --- PDB1 level1 --- PDB0 level0 --- PTB Change-Id: I2d748e5e96cffe18294c104c4b192d910b2f8e6b Signed-off-by: Chunming Zhou Yeah, thought about that as well. But I'm not sure if that is a good idea or not. --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 37 ++ 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3ecdbdfb04dd..8904ccf78fc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -148,8 +148,8 @@ struct amdgpu_prt_cb { static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, unsigned level) { - if (level != adev->vm_manager.num_level) - return 9 * (adev->vm_manager.num_level - level - 1) + + if (level != 0) + return 9 * (level - 1) + adev->vm_manager.block_size; else /* For the page tables on the leaves */ @@ -162,20 +162,27 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * * Calculate the number of entries in a page directory or page table. + * The hiberachy of page table is as below, which aligns hw names. + * PDB2->PDB1->PDB0->PTB, accordingly: + * level3 --- PDB2 + * level2 --- PDB1 + * level1 --- PDB0 + * level0 --- PTB */ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, unsigned level) { - unsigned shift = amdgpu_vm_level_shift(adev, 0); + unsigned shift = amdgpu_vm_level_shift(adev, + adev->vm_manager.num_level); - if (level == 0) + if (level == adev->vm_manager.num_level) /* For the root directory */ return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift; - else if (level != adev->vm_manager.num_level) + else if (level != 0) /* Everything in between */ return 512; else - /* For the page tables on the leaves */ + /* For the page tables on the leaves(PTB) */ return AMDGPU_VM_PTE_COUNT(adev); } @@ -312,6 +319,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, u64 flags; uint64_t init_value = 0; + BUG_ON(level > adev->vm_manager.num_level); + if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); @@ -332,7 +341,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (to > parent->last_entry_used) parent->last_entry_used = to; - ++level; + level--; Post decrement/increment please. saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); @@ -346,7 +355,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (vm->pte_support_ats) { init_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != adev->vm_manager.num_level - 1) BTW: If I'm not completely mistaken that code is incorrect and should rather be "level != adev->vm_manager.num_level". + /* != PDB0 */ + if (level != 1) Which would that make it level != 0. Regards, Christian. init_value |= AMDGPU_PDE_PTE; } @@ -389,7 +399,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, entry->addr = 0; } - if (level < adev->vm_manager.num_level) { + if (level > 0) { uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; uint64_t sub_eaddr = (pt_idx == to) ? eaddr : ((1 << shift) - 1); @@ -435,7 +445,8 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; - return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0); + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, + adev->vm_manager.num_level); } /** @@ -1319,19 +1330,19 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, struct amdgpu_vm_pt **entry, struct amdgpu_vm_pt **parent) { - unsigned level = 0; + unsigned level = p->adev->vm_manager.num_level; *parent = NULL; *entry = &p->vm->root; while ((*entry)->entries) { - unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level++); + unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level--); idx %= amdg
Re: [PATCH 1/2] drm/amdgpu: reverse PDBs order
On Fri, Dec 8, 2017 at 5:56 AM, Chunming Zhou wrote: > The hiberachy of page table is as below, which aligns hw names. > PDB2->PDB1->PDB0->PTB, accordingly: > level3 --- PDB2 > level2 --- PDB1 > level1 --- PDB0 > level0 --- PTB What's the advantage of this change? It's not clear from the commit message. Alex > > Change-Id: I2d748e5e96cffe18294c104c4b192d910b2f8e6b > Signed-off-by: Chunming Zhou > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 37 > ++ > 1 file changed, 24 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 3ecdbdfb04dd..8904ccf78fc9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -148,8 +148,8 @@ struct amdgpu_prt_cb { > static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, > unsigned level) > { > - if (level != adev->vm_manager.num_level) > - return 9 * (adev->vm_manager.num_level - level - 1) + > + if (level != 0) > + return 9 * (level - 1) + > adev->vm_manager.block_size; > else > /* For the page tables on the leaves */ > @@ -162,20 +162,27 @@ static unsigned amdgpu_vm_level_shift(struct > amdgpu_device *adev, > * @adev: amdgpu_device pointer > * > * Calculate the number of entries in a page directory or page table. > + * The hiberachy of page table is as below, which aligns hw names. > + * PDB2->PDB1->PDB0->PTB, accordingly: > + * level3 --- PDB2 > + * level2 --- PDB1 > + * level1 --- PDB0 > + * level0 --- PTB > */ > static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, > unsigned level) > { > - unsigned shift = amdgpu_vm_level_shift(adev, 0); > + unsigned shift = amdgpu_vm_level_shift(adev, > + adev->vm_manager.num_level); > > - if (level == 0) > + if (level == adev->vm_manager.num_level) > /* For the root directory */ > return round_up(adev->vm_manager.max_pfn, 1 << shift) >> > shift; > - else if (level != adev->vm_manager.num_level) > + else if (level != 0) > /* Everything in between */ > return 512; > else > - /* For the page tables on the leaves */ > + /* For the page tables on the leaves(PTB) */ > return AMDGPU_VM_PTE_COUNT(adev); > } > > @@ -312,6 +319,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device > *adev, > u64 flags; > uint64_t init_value = 0; > > + BUG_ON(level > adev->vm_manager.num_level); > + > if (!parent->entries) { > unsigned num_entries = amdgpu_vm_num_entries(adev, level); > > @@ -332,7 +341,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device > *adev, > if (to > parent->last_entry_used) > parent->last_entry_used = to; > > - ++level; > + level--; > saddr = saddr & ((1 << shift) - 1); > eaddr = eaddr & ((1 << shift) - 1); > > @@ -346,7 +355,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device > *adev, > > if (vm->pte_support_ats) { > init_value = AMDGPU_PTE_DEFAULT_ATC; > - if (level != adev->vm_manager.num_level - 1) > + /* != PDB0 */ > + if (level != 1) > init_value |= AMDGPU_PDE_PTE; > > } > @@ -389,7 +399,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device > *adev, > entry->addr = 0; > } > > - if (level < adev->vm_manager.num_level) { > + if (level > 0) { > uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; > uint64_t sub_eaddr = (pt_idx == to) ? eaddr : > ((1 << shift) - 1); > @@ -435,7 +445,8 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, > saddr /= AMDGPU_GPU_PAGE_SIZE; > eaddr /= AMDGPU_GPU_PAGE_SIZE; > > - return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0); > + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, > + adev->vm_manager.num_level); > } > > /** > @@ -1319,19 +1330,19 @@ void amdgpu_vm_get_entry(struct > amdgpu_pte_update_params *p, uint64_t addr, > struct amdgpu_vm_pt **entry, > struct amdgpu_vm_pt **parent) > { > - unsigned level = 0; > + unsigned level = p->adev->vm_manager.num_level; > > *parent = NULL; > *entry = &p->vm->root; > while ((*entry)->entries) { > - unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, > level++); > + unsigned idx = addr >> amdgpu_vm_level_shift(p->a
Re: [PATCH 1/2] drm/amdgpu: reverse PDBs order
Am 08.12.2017 um 15:58 schrieb Alex Deucher: On Fri, Dec 8, 2017 at 5:56 AM, Chunming Zhou wrote: The hiberachy of page table is as below, which aligns hw names. PDB2->PDB1->PDB0->PTB, accordingly: level3 --- PDB2 level2 --- PDB1 level1 --- PDB0 level0 --- PTB What's the advantage of this change? It's not clear from the commit message. Well you align a bit better with how our hardware guys tend to describe the levels. But level0 becomes PTB and level1 becomes PDB0 etc..., so I don't think we win much here. Christian. Alex Change-Id: I2d748e5e96cffe18294c104c4b192d910b2f8e6b Signed-off-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 37 ++ 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3ecdbdfb04dd..8904ccf78fc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -148,8 +148,8 @@ struct amdgpu_prt_cb { static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, unsigned level) { - if (level != adev->vm_manager.num_level) - return 9 * (adev->vm_manager.num_level - level - 1) + + if (level != 0) + return 9 * (level - 1) + adev->vm_manager.block_size; else /* For the page tables on the leaves */ @@ -162,20 +162,27 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * * Calculate the number of entries in a page directory or page table. + * The hiberachy of page table is as below, which aligns hw names. + * PDB2->PDB1->PDB0->PTB, accordingly: + * level3 --- PDB2 + * level2 --- PDB1 + * level1 --- PDB0 + * level0 --- PTB */ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, unsigned level) { - unsigned shift = amdgpu_vm_level_shift(adev, 0); + unsigned shift = amdgpu_vm_level_shift(adev, + adev->vm_manager.num_level); - if (level == 0) + if (level == adev->vm_manager.num_level) /* For the root directory */ return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift; - else if (level != adev->vm_manager.num_level) + else if (level != 0) /* Everything in between */ return 512; else - /* For the page tables on the leaves */ + /* For the page tables on the leaves(PTB) */ return AMDGPU_VM_PTE_COUNT(adev); } @@ -312,6 +319,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, u64 flags; uint64_t init_value = 0; + BUG_ON(level > adev->vm_manager.num_level); + if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); @@ -332,7 +341,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (to > parent->last_entry_used) parent->last_entry_used = to; - ++level; + level--; saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); @@ -346,7 +355,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (vm->pte_support_ats) { init_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != adev->vm_manager.num_level - 1) + /* != PDB0 */ + if (level != 1) init_value |= AMDGPU_PDE_PTE; } @@ -389,7 +399,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, entry->addr = 0; } - if (level < adev->vm_manager.num_level) { + if (level > 0) { uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; uint64_t sub_eaddr = (pt_idx == to) ? eaddr : ((1 << shift) - 1); @@ -435,7 +445,8 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; - return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0); + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, + adev->vm_manager.num_level); } /** @@ -1319,19 +1330,19 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, struct amdgpu_vm_pt **entry, struct amdgpu_vm_pt **parent) { - unsigned level = 0; + unsigned level = p->adev->vm_manager.num_level; *parent = NULL; *entry = &p->vm->root; while ((*entry)->entries) { - unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level++); + unsi
[PATCH 1/8] drm/amdgpu: stop joining PDEs
That doesn't hit any more most of the time anyway. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 41 ++ 1 file changed, 7 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3ecdbdfb04dd..d15b6edf7cce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1076,8 +1076,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, struct amdgpu_bo *shadow; struct amdgpu_ring *ring = NULL; uint64_t pd_addr, shadow_addr = 0; - uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; - unsigned count = 0, pt_idx, ndw = 0; + unsigned pt_idx, ndw = 0; struct amdgpu_job *job; struct amdgpu_pte_update_params params; struct dma_fence *fence = NULL; @@ -1149,41 +1148,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; - pde = pd_addr + pt_idx * 8; incr = amdgpu_bo_size(bo); - if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt) || - (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { - - if (count) { - if (shadow) - params.func(¶ms, - last_shadow, - last_pt, count, - incr, - AMDGPU_PTE_VALID); - - params.func(¶ms, last_pde, - last_pt, count, incr, - AMDGPU_PTE_VALID); - } - - count = 1; - last_pde = pde; - last_shadow = shadow_addr + pt_idx * 8; - last_pt = pt; - } else { - ++count; + if (shadow) { + pde = shadow_addr + pt_idx * 8; + params.func(¶ms, pde, pt, 1, incr, + AMDGPU_PTE_VALID); } - } - if (count) { - if (vm->root.base.bo->shadow) - params.func(¶ms, last_shadow, last_pt, - count, incr, AMDGPU_PTE_VALID); - - params.func(¶ms, last_pde, last_pt, - count, incr, AMDGPU_PTE_VALID); + pde = pd_addr + pt_idx * 8; + params.func(¶ms, pde, pt, 1, incr, AMDGPU_PTE_VALID); } if (!vm->use_cpu_for_update) { -- 2.11.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/8] drm/amdgpu: avoid the modulo in amdgpu_vm_get_entry
We can do this with a simple mask as well. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 796375484f6f..400a00fababd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1288,11 +1288,11 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, *parent = NULL; *entry = &p->vm->root; while ((*entry)->entries) { - unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level++); + unsigned shift = amdgpu_vm_level_shift(p->adev, level++); - idx %= amdgpu_bo_size((*entry)->base.bo) / 8; *parent = *entry; - *entry = &(*entry)->entries[idx]; + *entry = &(*entry)->entries[addr >> shift]; + addr &= (1ULL << shift) - 1; } if (level != p->adev->vm_manager.num_level) -- 2.11.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/8] drm/amdgpu: remove last_entry_used from the VM code
Not needed any more. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 52 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 - 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 400a00fababd..ae5451bf5873 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -329,9 +329,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, to >= amdgpu_vm_num_entries(adev, level)) return -EINVAL; - if (to > parent->last_entry_used) - parent->last_entry_used = to; - ++level; saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); @@ -1187,16 +1184,19 @@ static int amdgpu_vm_update_pde(struct amdgpu_device *adev, * * Mark all PD level as invalid after an error. */ -static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent) +static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt *parent, + unsigned level) { - unsigned pt_idx; + unsigned pt_idx, num_entries; /* * Recurse into the subdirectories. This recursion is harmless because * we only have a maximum of 5 layers. */ - for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { + num_entries = amdgpu_vm_num_entries(adev, level); + for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) { struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; if (!entry->base.bo) @@ -1207,7 +1207,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, if (list_empty(&entry->base.vm_status)) list_add(&entry->base.vm_status, &vm->relocated); spin_unlock(&vm->status_lock); - amdgpu_vm_invalidate_level(vm, entry); + amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); } } @@ -1249,7 +1249,8 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, r = amdgpu_vm_update_pde(adev, vm, pt, entry); if (r) { - amdgpu_vm_invalidate_level(vm, &vm->root); + amdgpu_vm_invalidate_level(adev, vm, + &vm->root, 0); return r; } spin_lock(&vm->status_lock); @@ -1652,7 +1653,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, error_free: amdgpu_job_free(job); - amdgpu_vm_invalidate_level(vm, &vm->root); + amdgpu_vm_invalidate_level(adev, vm, &vm->root, 0); return r; } @@ -2716,26 +2717,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, /** * amdgpu_vm_free_levels - free PD/PT levels * - * @level: PD/PT starting level to free + * @adev: amdgpu device structure + * @parent: PD/PT starting level to free + * @level: level of parent structure * * Free the page directory or page table level and all sub levels. */ -static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) +static void amdgpu_vm_free_levels(struct amdgpu_device *adev, + struct amdgpu_vm_pt *parent, + unsigned level) { - unsigned i; + unsigned i, num_entries = amdgpu_vm_num_entries(adev, level); - if (level->base.bo) { - list_del(&level->base.bo_list); - list_del(&level->base.vm_status); - amdgpu_bo_unref(&level->base.bo->shadow); - amdgpu_bo_unref(&level->base.bo); + if (parent->base.bo) { + list_del(&parent->base.bo_list); + list_del(&parent->base.vm_status); + amdgpu_bo_unref(&parent->base.bo->shadow); + amdgpu_bo_unref(&parent->base.bo); } - if (level->entries) - for (i = 0; i <= level->last_entry_used; i++) - amdgpu_vm_free_levels(&level->entries[i]); + if (parent->entries) + for (i = 0; i < num_entries; i++) + amdgpu_vm_free_levels(adev, &parent->entries[i], + level + 1); - kvfree(level->entries); + kvfree(parent->entries); } /** @@ -2793,7 +2799,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) { dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); } else { - amdgpu_vm_free_levels(&vm->root); +
[PATCH 2/8] drm/amdgpu: update one PDE at a time
Horrible inefficient, but avoids problems when the root PD size becomes to big. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 81 +++--- 1 file changed, 36 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d15b6edf7cce..796375484f6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1069,17 +1069,20 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, * Makes sure all entries in @parent are up to date. * Returns 0 for success, error for failure. */ -static int amdgpu_vm_update_level(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent) +static int amdgpu_vm_update_pde(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt *parent, + struct amdgpu_vm_pt *entry) { + struct amdgpu_pte_update_params params; + struct amdgpu_bo *bo = entry->base.bo; struct amdgpu_bo *shadow; struct amdgpu_ring *ring = NULL; uint64_t pd_addr, shadow_addr = 0; - unsigned pt_idx, ndw = 0; struct amdgpu_job *job; - struct amdgpu_pte_update_params params; struct dma_fence *fence = NULL; + unsigned ndw = 0; + uint64_t pde, pt; uint32_t incr; int r; @@ -1102,20 +1105,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - /* padding, etc. */ + /* should be sufficient for two commands plus padding, etc. */ ndw = 64; - /* assume the worst case */ - ndw += parent->last_entry_used * 6; - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - - if (shadow) { + if (shadow) shadow_addr = amdgpu_bo_gpu_offset(shadow); - ndw *= 2; - } else { + else shadow_addr = 0; - } r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); if (r) @@ -1125,40 +1122,32 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, params.func = amdgpu_vm_do_set_ptes; } + spin_lock(&vm->status_lock); + list_del_init(&entry->base.vm_status); + spin_unlock(&vm->status_lock); - /* walk over the address space and update the directory */ - for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { - struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - struct amdgpu_bo *bo = entry->base.bo; - uint64_t pde, pt; - - if (bo == NULL) - continue; - - spin_lock(&vm->status_lock); - list_del_init(&entry->base.vm_status); - spin_unlock(&vm->status_lock); - - pt = amdgpu_bo_gpu_offset(bo); - pt = amdgpu_gart_get_vm_pde(adev, pt); - /* Don't update huge pages here */ - if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) || - parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID)) - continue; - - parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; + pt = amdgpu_bo_gpu_offset(bo); + pt = amdgpu_gart_get_vm_pde(adev, pt); + /* Don't update huge pages here */ + if (entry->addr & AMDGPU_PDE_PTE || + entry->addr == (pt | AMDGPU_PTE_VALID)) { + if (!vm->use_cpu_for_update) + amdgpu_job_free(job); + return 0; + } - incr = amdgpu_bo_size(bo); - if (shadow) { - pde = shadow_addr + pt_idx * 8; - params.func(¶ms, pde, pt, 1, incr, - AMDGPU_PTE_VALID); - } + entry->addr = pt | AMDGPU_PTE_VALID; - pde = pd_addr + pt_idx * 8; - params.func(¶ms, pde, pt, 1, incr, AMDGPU_PTE_VALID); + incr = amdgpu_bo_size(bo); + if (shadow) { + pde = shadow_addr + (entry - parent->entries) * 8; + params.func(¶ms, pde, pt, 1, incr, + AMDGPU_PTE_VALID); } + pde = pd_addr + (entry - parent->entries) * 8; + params.func(¶ms, pde, pt, 1, incr, AMDGPU_PTE_VALID); + if (!vm->use_cpu_for_update) { if (params.ib->length_dw == 0) { amdgpu_job_free(job); @@ -1249,14 +1238,16 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
[PATCH 7/8] drm/amdgpu: allow get_vm_pde to change flags as well
And also provide the level for which we need a PDE. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 ++ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 12 +++- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 12 +++- 11 files changed, 54 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7db9a7fa15a8..e5e0fbd43273 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -346,7 +346,8 @@ struct amdgpu_gart_funcs { uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, uint32_t flags); /* get the pde for a given mc addr */ - u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr); + void (*get_vm_pde)(struct amdgpu_device *adev, int level, + u64 *dst, u64 *flags); uint32_t (*get_invalidate_req)(unsigned int vm_id); }; @@ -1783,7 +1784,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) -#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr)) +#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6a35c58100f7..61e4359a22ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1070,9 +1070,10 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, struct amdgpu_vm_pt *parent, struct amdgpu_vm_pt *entry) { - struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL; + struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; uint64_t pd_addr, shadow_addr = 0; - uint64_t pde, pt; + uint64_t pde, pt, flags; + unsigned level; /* Don't update huge pages here */ if (entry->huge) @@ -1087,15 +1088,19 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, shadow_addr = amdgpu_bo_gpu_offset(shadow); } + for (level = 0, pbo = parent->base.bo->parent; pbo; ++level) + pbo = pbo->parent; + pt = amdgpu_bo_gpu_offset(bo); - pt = amdgpu_gart_get_vm_pde(params->adev, pt); + flags = AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); if (shadow) { pde = shadow_addr + (entry - parent->entries) * 8; - params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID); + params->func(params, pde, pt, 1, 0, flags); } pde = pd_addr + (entry - parent->entries) * 8; - params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID); + params->func(params, pde, pt, 1, 0, flags); } /* @@ -1305,7 +1310,6 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, !(flags & AMDGPU_PTE_VALID)) { dst = amdgpu_bo_gpu_offset(entry->base.bo); - dst = amdgpu_gart_get_vm_pde(p->adev, dst); flags = AMDGPU_PTE_VALID; } else { /* Set the huge page flag to stop scanning at this PDE */ @@ -1314,9 +1318,11 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, if (!entry->huge && !(flags & AMDGPU_PDE_PTE)) return; - entry->huge = !!(flags & AMDGPU_PDE_PTE); + amdgpu_gart_get_vm_pde(p->adev, p->adev->vm_manager.num_level - 1, + &dst, &flags); + if (use_cpu_update) { /* In case a huge page is replaced with a system
[PATCH 5/8] drm/amdgpu: remove keeping the addr of the VM PDs
No more double house keeping. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ae5451bf5873..abb3d4fb49f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -383,7 +383,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_add(&entry->base.vm_status, &vm->relocated); spin_unlock(&vm->status_lock); - entry->addr = 0; } if (level < adev->vm_manager.num_level) { @@ -1126,15 +1125,12 @@ static int amdgpu_vm_update_pde(struct amdgpu_device *adev, pt = amdgpu_bo_gpu_offset(bo); pt = amdgpu_gart_get_vm_pde(adev, pt); /* Don't update huge pages here */ - if (entry->addr & AMDGPU_PDE_PTE || - entry->addr == (pt | AMDGPU_PTE_VALID)) { + if (entry->huge) { if (!vm->use_cpu_for_update) amdgpu_job_free(job); return 0; } - entry->addr = pt | AMDGPU_PTE_VALID; - incr = amdgpu_bo_size(bo); if (shadow) { pde = shadow_addr + (entry - parent->entries) * 8; @@ -1202,7 +1198,6 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, if (!entry->base.bo) continue; - entry->addr = ~0ULL; spin_lock(&vm->status_lock); if (list_empty(&entry->base.vm_status)) list_add(&entry->base.vm_status, &vm->relocated); @@ -1335,10 +1330,10 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, flags |= AMDGPU_PDE_PTE; } - if (entry->addr == (dst | flags)) + if (!entry->huge && !(flags & AMDGPU_PDE_PTE)) return; - entry->addr = (dst | flags); + entry->huge = !!(flags & AMDGPU_PDE_PTE); if (use_cpu_update) { /* In case a huge page is replaced with a system @@ -1412,7 +1407,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, amdgpu_vm_handle_huge_pages(params, entry, parent, nptes, dst, flags); /* We don't need to update PTEs for huge pages */ - if (entry->addr & AMDGPU_PDE_PTE) + if (entry->huge) continue; pt = entry->base.bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 7a308a1ea048..228f63e9ac5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -137,7 +137,7 @@ struct amdgpu_vm_bo_base { struct amdgpu_vm_pt { struct amdgpu_vm_bo_basebase; - uint64_taddr; + boolhuge; /* array of page tables, one for each directory entry */ struct amdgpu_vm_pt *entries; -- 2.11.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven
Instead of falling back to 2 level and very limited address space use 2+1 PD support and 128TB + 512GB of virtual address space. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++ drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 42 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 26 ++--- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 49 5 files changed, 86 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e5e0fbd43273..9517c0f76d27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -541,6 +541,7 @@ struct amdgpu_mc { u64 private_aperture_end; /* protects concurrent invalidation */ spinlock_t invalidate_lock; + booltranslate_further; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 228f63e9ac5e..79134f0c26d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -69,6 +69,9 @@ struct amdgpu_bo_list_entry; /* PDE is handled as PTE for VEGA10 */ #define AMDGPU_PDE_PTE (1ULL << 54) +/* PTE is handled as PDE for VEGA10 */ +#define AMDGPU_PTE_TRANSLATE_FURTHER (1ULL << 56) + /* VEGA10 only */ #define AMDGPU_PTE_MTYPE(a)((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index f1effadfbaa6..a56f77259130 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -144,8 +144,15 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); tmp = mmVM_L2_CNTL3_DEFAULT; - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + if (adev->mc.translate_further) { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } else { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); tmp = mmVM_L2_CNTL4_DEFAULT; @@ -183,31 +190,40 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - int i; + unsigned num_level, block_size; uint32_t tmp; + int i; + + num_level = adev->vm_manager.num_level; + block_size = adev->vm_manager.block_size; + if (adev->mc.translate_further) + num_level -= 1; + else + block_size -= 9; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, - adev->vm_manager.num_level); + num_level); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - EXECUTE_PROTECTION_FAULT_ENA
[PATCH 6/8] drm/amdgpu: batch PDE updates again
Now instead of one submission for each PDE batch them together over all PDs who need an update. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 206 +++-- 1 file changed, 94 insertions(+), 112 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index abb3d4fb49f4..6a35c58100f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1056,121 +1056,46 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, } /* - * amdgpu_vm_update_level - update a single level in the hierarchy + * amdgpu_vm_update_pde - update a single level in the hierarchy * - * @adev: amdgpu_device pointer + * @param: parameters for the update * @vm: requested vm * @parent: parent directory + * @entry: entry to update * - * Makes sure all entries in @parent are up to date. - * Returns 0 for success, error for failure. + * Makes sure the requested entry in parent is up to date. */ -static int amdgpu_vm_update_pde(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent, - struct amdgpu_vm_pt *entry) +static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, +struct amdgpu_vm *vm, +struct amdgpu_vm_pt *parent, +struct amdgpu_vm_pt *entry) { - struct amdgpu_pte_update_params params; - struct amdgpu_bo *bo = entry->base.bo; - struct amdgpu_bo *shadow; - struct amdgpu_ring *ring = NULL; + struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL; uint64_t pd_addr, shadow_addr = 0; - struct amdgpu_job *job; - struct dma_fence *fence = NULL; - unsigned ndw = 0; uint64_t pde, pt; - uint32_t incr; - int r; - - if (!parent->entries) - return 0; - - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - shadow = parent->base.bo->shadow; + /* Don't update huge pages here */ + if (entry->huge) + return; if (vm->use_cpu_for_update) { pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); - if (unlikely(r)) - return r; - - params.func = amdgpu_vm_cpu_set_ptes; } else { - ring = container_of(vm->entity.sched, struct amdgpu_ring, - sched); - - /* should be sufficient for two commands plus padding, etc. */ - ndw = 64; - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); + shadow = parent->base.bo->shadow; if (shadow) shadow_addr = amdgpu_bo_gpu_offset(shadow); - else - shadow_addr = 0; - - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); - if (r) - return r; - - params.ib = &job->ibs[0]; - params.func = amdgpu_vm_do_set_ptes; } - spin_lock(&vm->status_lock); - list_del_init(&entry->base.vm_status); - spin_unlock(&vm->status_lock); - pt = amdgpu_bo_gpu_offset(bo); - pt = amdgpu_gart_get_vm_pde(adev, pt); - /* Don't update huge pages here */ - if (entry->huge) { - if (!vm->use_cpu_for_update) - amdgpu_job_free(job); - return 0; - } - - incr = amdgpu_bo_size(bo); + pt = amdgpu_gart_get_vm_pde(params->adev, pt); if (shadow) { pde = shadow_addr + (entry - parent->entries) * 8; - params.func(¶ms, pde, pt, 1, incr, - AMDGPU_PTE_VALID); + params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID); } pde = pd_addr + (entry - parent->entries) * 8; - params.func(¶ms, pde, pt, 1, incr, AMDGPU_PTE_VALID); - - if (!vm->use_cpu_for_update) { - if (params.ib->length_dw == 0) { - amdgpu_job_free(job); - } else { - amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, -parent->base.bo->tbo.resv, -AMDGPU_FENCE_OWNER_VM, false); - if (shadow) - amdgpu_sync_resv(adev, &job->sync, -shadow->tbo.resv, -AMDGPU_FENCE_OWNER_VM, false); - - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, ring, &vm-
[PATCH 3/4] drm/amdgpu: drop the bios scratch reg callbacks from nbio
They are not used any longer. We get the scratch register locations from the vbios directly now. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 3 --- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 14 -- drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 14 -- 3 files changed, 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 98d8a5f2dcfb..d27cc6cbcd04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1455,9 +1455,6 @@ struct amdgpu_nbio_funcs { u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); u32 (*get_rev_id)(struct amdgpu_device *adev); - u32 (*get_atombios_scratch_regs)(struct amdgpu_device *adev, uint32_t idx); - void (*set_atombios_scratch_regs)(struct amdgpu_device *adev, - uint32_t idx, uint32_t val); void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); void (*hdp_flush)(struct amdgpu_device *adev); u32 (*get_memsize)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 0d3514808092..d4da663d5eb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -43,18 +43,6 @@ static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) return tmp; } -static u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx) -{ - return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); -} - -static void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx, uint32_t val) -{ - WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); -} - static void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) { if (enable) @@ -284,8 +272,6 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { .get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset, .get_pcie_data_offset = nbio_v6_1_get_pcie_data_offset, .get_rev_id = nbio_v6_1_get_rev_id, - .get_atombios_scratch_regs = nbio_v6_1_get_atombios_scratch_regs, - .set_atombios_scratch_regs = nbio_v6_1_set_atombios_scratch_regs, .mc_access_enable = nbio_v6_1_mc_access_enable, .hdp_flush = nbio_v6_1_hdp_flush, .get_memsize = nbio_v6_1_get_memsize, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 29d7b4fd7a88..17a9131a4598 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -44,18 +44,6 @@ static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) return tmp; } -static u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx) -{ - return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); -} - -static void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx, uint32_t val) -{ - WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); -} - static void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) { if (enable) @@ -279,8 +267,6 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { .get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset, .get_pcie_data_offset = nbio_v7_0_get_pcie_data_offset, .get_rev_id = nbio_v7_0_get_rev_id, - .get_atombios_scratch_regs = nbio_v7_0_get_atombios_scratch_regs, - .set_atombios_scratch_regs = nbio_v7_0_set_atombios_scratch_regs, .mc_access_enable = nbio_v7_0_mc_access_enable, .hdp_flush = nbio_v7_0_hdp_flush, .get_memsize = nbio_v7_0_get_memsize, -- 2.13.6 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/4] drm/amdgpu: drop soc15_init_golden_registers
The golden register arrays were empty so the function was effectively useless. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 31 --- 1 file changed, 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f505276639eb..a16e8d9a8fa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -231,35 +231,6 @@ static u32 soc15_get_config_memsize(struct amdgpu_device *adev) return adev->nbio_funcs->get_memsize(adev); } -static const u32 vega10_golden_init[] = -{ -}; - -static const u32 raven_golden_init[] = -{ -}; - -static void soc15_init_golden_registers(struct amdgpu_device *adev) -{ - /* Some of the registers might be dependent on GRBM_GFX_INDEX */ - mutex_lock(&adev->grbm_idx_mutex); - - switch (adev->asic_type) { - case CHIP_VEGA10: - amdgpu_program_register_sequence(adev, -vega10_golden_init, - ARRAY_SIZE(vega10_golden_init)); - break; - case CHIP_RAVEN: - amdgpu_program_register_sequence(adev, -raven_golden_init, -ARRAY_SIZE(raven_golden_init)); - break; - default: - break; - } - mutex_unlock(&adev->grbm_idx_mutex); -} static u32 soc15_get_xclk(struct amdgpu_device *adev) { return adev->clock.spll.reference_freq; @@ -745,8 +716,6 @@ static int soc15_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* move the golden regs per IP block */ - soc15_init_golden_registers(adev); /* enable pcie gen2/3 link */ soc15_pcie_gen3_enable(adev); /* enable aspm */ -- 2.13.6 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/4] drm/amdgpu: convert nbio to use callbacks
Cleans up and consolidates all of the per-asic logic. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 50 -- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 15 + drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c| 81 ++- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h| 25 --- drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c| 105 +- drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h| 19 -- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c| 13 +--- drivers/gpu/drm/amd/amdgpu/soc15.c| 40 drivers/gpu/drm/amd/amdgpu/soc15_common.h | 16 - drivers/gpu/drm/amd/amdgpu/vega10_ih.c| 11 +--- 11 files changed, 204 insertions(+), 178 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e329faa6166f..98d8a5f2dcfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1432,16 +1432,52 @@ typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, u /* * amdgpu nbio functions * - * Fix me : - * Put more NBIO specifc func wraper here , for now just try to minimize the - * change to avoid use SOC15_REG_OFFSET in the constant array */ +struct nbio_hdp_flush_reg { + u32 ref_and_mask_cp0; + u32 ref_and_mask_cp1; + u32 ref_and_mask_cp2; + u32 ref_and_mask_cp3; + u32 ref_and_mask_cp4; + u32 ref_and_mask_cp5; + u32 ref_and_mask_cp6; + u32 ref_and_mask_cp7; + u32 ref_and_mask_cp8; + u32 ref_and_mask_cp9; + u32 ref_and_mask_sdma0; + u32 ref_and_mask_sdma1; +}; struct amdgpu_nbio_funcs { - u32 (*get_hdp_flush_req_offset)(struct amdgpu_device*); - u32 (*get_hdp_flush_done_offset)(struct amdgpu_device*); - u32 (*get_pcie_index_offset)(struct amdgpu_device*); - u32 (*get_pcie_data_offset)(struct amdgpu_device*); + const struct nbio_hdp_flush_reg *hdp_flush_reg; + u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); + u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); + u32 (*get_rev_id)(struct amdgpu_device *adev); + u32 (*get_atombios_scratch_regs)(struct amdgpu_device *adev, uint32_t idx); + void (*set_atombios_scratch_regs)(struct amdgpu_device *adev, + uint32_t idx, uint32_t val); + void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); + void (*hdp_flush)(struct amdgpu_device *adev); + u32 (*get_memsize)(struct amdgpu_device *adev); + void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index); + void (*enable_doorbell_aperture)(struct amdgpu_device *adev, +bool enable); + void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, + bool enable); + void (*ih_doorbell_range)(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, +bool enable); + void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*ih_control)(struct amdgpu_device *adev); + void (*init_registers)(struct amdgpu_device *adev); + void (*detect_hw_virt)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 50a1edb7c32e..3fd13b77e71e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3552,12 +3552,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg; - - if (ring->adev->flags & AMD_IS_APU) - nbio_hf_reg = &nbio_v7_0_hdp_flush_reg; - else - nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { switch (ring->me) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index aef5f70547ac..695e0eada1cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -38,8 +38,6 @@ #include "s
[PATCH 1/4] drm/amdgpu: make function names consistent in nbio files
All functions should have nbio_v* prefix. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 16 drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 16 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 9a3546f8b42b..947d6e4a01f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -212,22 +212,22 @@ void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags) *flags |= AMD_CG_SUPPORT_BIF_LS; } -static u32 get_hdp_flush_req_offset(struct amdgpu_device *adev) +static u32 nbio_v6_1_get_hdp_flush_req_offset(struct amdgpu_device *adev) { return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ); } -static u32 get_hdp_flush_done_offset(struct amdgpu_device *adev) +static u32 nbio_v6_1_get_hdp_flush_done_offset(struct amdgpu_device *adev) { return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE); } -static u32 get_pcie_index_offset(struct amdgpu_device *adev) +static u32 nbio_v6_1_get_pcie_index_offset(struct amdgpu_device *adev) { return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX); } -static u32 get_pcie_data_offset(struct amdgpu_device *adev) +static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev) { return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA); } @@ -248,10 +248,10 @@ const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { }; const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { - .get_hdp_flush_req_offset = get_hdp_flush_req_offset, - .get_hdp_flush_done_offset = get_hdp_flush_done_offset, - .get_pcie_index_offset = get_pcie_index_offset, - .get_pcie_data_offset = get_pcie_data_offset, + .get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v6_1_get_pcie_data_offset, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index ce869f37a382..851f58e0b9d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -182,22 +182,22 @@ void nbio_v7_0_ih_control(struct amdgpu_device *adev) WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); } -static u32 get_hdp_flush_req_offset(struct amdgpu_device *adev) +static u32 nbio_v7_0_get_hdp_flush_req_offset(struct amdgpu_device *adev) { return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ); } -static u32 get_hdp_flush_done_offset(struct amdgpu_device *adev) +static u32 nbio_v7_0_get_hdp_flush_done_offset(struct amdgpu_device *adev) { return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE); } -static u32 get_pcie_index_offset(struct amdgpu_device *adev) +static u32 nbio_v7_0_get_pcie_index_offset(struct amdgpu_device *adev) { return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2); } -static u32 get_pcie_data_offset(struct amdgpu_device *adev) +static u32 nbio_v7_0_get_pcie_data_offset(struct amdgpu_device *adev) { return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); } @@ -218,9 +218,9 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = { }; const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { - .get_hdp_flush_req_offset = get_hdp_flush_req_offset, - .get_hdp_flush_done_offset = get_hdp_flush_done_offset, - .get_pcie_index_offset = get_pcie_index_offset, - .get_pcie_data_offset = get_pcie_data_offset, + .get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v7_0_get_pcie_data_offset, }; -- 2.13.6 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: setup the shared and private apertures on gfx9
Same as previous asics. This was not yet set for gfx9. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 17 - drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3fd13b77e71e..7564f87b084e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1529,11 +1529,18 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) for (i = 0; i < 16; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ - tmp = 0; - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, - SH_MEM_ALIGNMENT_MODE_UNALIGNED); - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); + if (i == 0) { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); + } else { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); + tmp = adev->mc.shared_aperture_start >> 48; + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); + } } soc15_grbm_select(adev, 0, 0, 0, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 695e0eada1cd..dbfb746a390c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -497,6 +497,14 @@ static int gmc_v9_0_early_init(void *handle) gmc_v9_0_set_gart_funcs(adev); gmc_v9_0_set_irq_funcs(adev); + adev->mc.shared_aperture_start = 0x2000ULL; + adev->mc.shared_aperture_end = + adev->mc.shared_aperture_start + (4ULL << 30) - 1; + adev->mc.private_aperture_start = + adev->mc.shared_aperture_end + 1; + adev->mc.private_aperture_end = + adev->mc.private_aperture_start + (4ULL << 30) - 1; + return 0; } -- 2.13.6 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH v2 2/2] drm/amdgpu: Move to gtt before cpu accesses dma buf.
To improve cpu read performance. This is implemented for APUs currently. v2: Adapt to change https://lists.freedesktop.org/archives/amd-gfx/2017-October/015174.html Change-Id: I7a583e23a9ee706e0edd2a46f4e4186a609368e3 --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 58 +++ 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f8657c3..193db70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gobj, int flags); +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); int amdgpu_gem_prime_pin(struct drm_gem_object *obj); void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 31383e0..df30b08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = amdgpu_gem_prime_export, - .gem_prime_import = drm_gem_prime_import, + .gem_prime_import = amdgpu_gem_prime_import, .gem_prime_pin = amdgpu_gem_prime_pin, .gem_prime_unpin = amdgpu_gem_prime_unpin, .gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index ae9c106..de6f599 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -26,6 +26,7 @@ #include #include "amdgpu.h" +#include "amdgpu_display.h" #include #include @@ -164,6 +165,33 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) return bo->tbo.resv; } +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { true, false }; + u32 domain = amdgpu_framebuffer_domains(adev); + long ret = 0; + bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); + + if (!reads || !(domain | AMDGPU_GEM_DOMAIN_GTT) || bo->pin_count) + return 0; + + /* move to gtt */ + ret = amdgpu_bo_reserve(bo, false); + if (unlikely(ret != 0)) + return ret; + + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + + amdgpu_bo_unreserve(bo); + return ret; +} + +static struct dma_buf_ops amdgpu_dmabuf_ops; +static atomic_t aops_lock; + struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gobj, int flags) @@ -178,5 +206,35 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, buf = drm_gem_prime_export(dev, gobj, flags); if (!IS_ERR(buf)) buf->file->f_mapping = dev->anon_inode->i_mapping; + + while (amdgpu_dmabuf_ops.begin_cpu_access != amdgpu_gem_begin_cpu_access) + { + if (!atomic_cmpxchg(&aops_lock, 0, 1)) { + amdgpu_dmabuf_ops = *(buf->ops); + amdgpu_dmabuf_ops.begin_cpu_access = amdgpu_gem_begin_cpu_access; + } + } + buf->ops = &amdgpu_dmabuf_ops; + return buf; } + +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct drm_gem_object *obj; + + if (dma_buf->ops == &amdgpu_dmabuf_ops) { + obj = dma_buf->priv; + if (obj->dev == dev) { + /* +* Importing dmabuf exported from out own gem increases +* refcount on gem itself instead of f_count of dmabuf. +*/ + drm_gem_object_get(obj); + return obj; + } + } + + return drm_gem_prime_import(dev, dma_buf); +} -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.o
[PATCH v2 1/2] drm/amdgpu: allow framebuffer in GART memory as well
From: Christian König On CZ and newer APUs we can pin the fb into GART as well as VRAM. v2: Don't enable gpu_vm_support for Raven yet since it leads to a black screen. Need to debug this further before enabling. Change-Id: Id0f8af3110e54a3aabf7a258871867bc121cc1a2 Signed-off-by: Christian König Reviewed-by: Andrey Grodzovsky Acked-by: Alex Deucher Signed-off-by: Samuel Li --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_display.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c| 10 ++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 11 +-- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index d704a45..d9fdc19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -29,6 +29,7 @@ #include "amdgpu_i2c.h" #include "atom.h" #include "amdgpu_connectors.h" +#include "amdgpu_display.h" #include #include @@ -188,7 +189,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, goto cleanup; } - r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base); + r = amdgpu_bo_pin(new_abo, amdgpu_framebuffer_domains(adev), &base); if (unlikely(r != 0)) { DRM_ERROR("failed to pin new abo buffer before flip\n"); goto unreserve; @@ -501,6 +502,17 @@ static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { .create_handle = amdgpu_user_framebuffer_create_handle, }; +uint32_t amdgpu_framebuffer_domains(struct amdgpu_device *adev) +{ + uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; + + if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN && + adev->flags & AMD_IS_APU) + domain |= AMDGPU_GEM_DOMAIN_GTT; + + return domain; +} + int amdgpu_framebuffer_init(struct drm_device *dev, struct amdgpu_framebuffer *rfb, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 11ae4ab..f241949 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -23,6 +23,7 @@ #ifndef __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__ +uint32_t amdgpu_framebuffer_domains(struct amdgpu_device *adev); struct drm_framebuffer * amdgpu_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 90fa8e8..9be3228 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -38,6 +38,8 @@ #include +#include "amdgpu_display.h" + /* object hierarchy - this contains a helper + a amdgpu fb the helper contains a pointer to amdgpu framebuffer baseclass. @@ -124,7 +126,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, struct drm_gem_object *gobj = NULL; struct amdgpu_bo *abo = NULL; bool fb_tiled = false; /* useful for testing */ - u32 tiling_flags = 0; + u32 tiling_flags = 0, domain; int ret; int aligned_size, size; int height = mode_cmd->height; @@ -135,12 +137,12 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, /* need to align pitch with crtc limits */ mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, fb_tiled); + domain = amdgpu_framebuffer_domains(adev); height = ALIGN(mode_cmd->height, 8); size = mode_cmd->pitches[0] * height; aligned_size = ALIGN(size, PAGE_SIZE); - ret = amdgpu_gem_object_create(adev, aligned_size, 0, - AMDGPU_GEM_DOMAIN_VRAM, + ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_VRAM_CLEARED, @@ -166,7 +168,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, } - ret = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, NULL); + ret = amdgpu_bo_pin(abo, domain, NULL); if (ret) { amdgpu_bo_unreserve(abo); goto out_unref; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0d5047e..5e58dba 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2991,11 +2991,13 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, { struct amdgpu_framebuffer *afb; struct drm_gem_o
[PATCH 01/37] drm/amd: add new interface to query cu info
From: Flora Cui Signed-off-by: Flora Cui Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 19 +++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index fe3079a..3a93ffe 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -46,6 +46,20 @@ enum kfd_preempt_type { KFD_PREEMPT_TYPE_WAVEFRONT_RESET, }; +struct kfd_cu_info { + uint32_t num_shader_engines; + uint32_t num_shader_arrays_per_engine; + uint32_t num_cu_per_sh; + uint32_t cu_active_number; + uint32_t cu_ao_mask; + uint32_t simd_per_cu; + uint32_t max_waves_per_simd; + uint32_t wave_front_size; + uint32_t max_scratch_slots_per_cu; + uint32_t lds_size; + uint32_t cu_bitmap[4][4]; +}; + enum kgd_memory_pool { KGD_POOL_SYSTEM_CACHEABLE = 1, KGD_POOL_SYSTEM_WRITECOMBINE = 2, @@ -153,6 +167,8 @@ struct tile_config { * * @get_tile_config: Returns GPU-specific tiling mode information * + * @get_cu_info: Retrieves activated cu info + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -239,6 +255,9 @@ struct kfd2kgd_calls { void (*set_scratch_backing_va)(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); + + void (*get_cu_info)(struct kgd_dev *kgd, + struct kfd_cu_info *cu_info); }; /** -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 04/37] drm/amdgpu: Implement get_local_mem_info
From: Harish Kasiviswanathan Implement new kgd-kfd interface function get_local_mem_info. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Ben Goz Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 30 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 + 4 files changed, 34 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index cfb7827..56f6c12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -252,6 +252,36 @@ uint64_t get_vmem_size(struct kgd_dev *kgd) return adev->mc.real_vram_size; } +void get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : +~((1ULL << 32) - 1); + resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size; + + memset(mem_info, 0, sizeof(*mem_info)); + if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) { + mem_info->local_mem_size_public = adev->mc.visible_vram_size; + mem_info->local_mem_size_private = adev->mc.real_vram_size - + adev->mc.visible_vram_size; + } else { + mem_info->local_mem_size_public = 0; + mem_info->local_mem_size_private = adev->mc.real_vram_size; + } + mem_info->vram_width = adev->mc.vram_width; + + pr_debug("Address base: 0x%llx limit 0x%llx public 0x%llx private 0x%llx\n", + adev->mc.aper_base, aper_limit, + mem_info->local_mem_size_public, + mem_info->local_mem_size_private); + + if (amdgpu_sriov_vf(adev)) + mem_info->mem_clk_max = adev->clock.default_mclk / 100; + else + mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; +} + uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a8fa225..bc5385a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -57,6 +57,8 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **cpu_ptr); void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); uint64_t get_vmem_size(struct kgd_dev *kgd); +void get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index c9b98d0..b705608 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -174,6 +174,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, .get_vmem_size = get_vmem_size, + .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, .alloc_pasid = amdgpu_vm_alloc_pasid, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index c538e30..b0e581a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -133,6 +133,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, .get_vmem_size = get_vmem_size, + .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, .alloc_pasid = amdgpu_vm_alloc_pasid, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 05/37] drm/amdkfd: Stop using get_vmem_size KGD-KFD interface
From: Harish Kasiviswanathan get_vmem_size() is deprecated. Instead use get_local_mem_info(). Signed-off-by: Harish Kasiviswanathan Signed-off-by: Ben Goz Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 9d03a56..cb0303a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1073,11 +1073,15 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) uint32_t buf[7]; uint64_t local_mem_size; int i; + struct kfd_local_mem_info local_mem_info; if (!gpu) return 0; - local_mem_size = gpu->kfd2kgd->get_vmem_size(gpu->kgd); + gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info); + + local_mem_size = local_mem_info.local_mem_size_private + + local_mem_info.local_mem_size_public; buf[0] = gpu->pdev->devfn; buf[1] = gpu->pdev->subsystem_vendor; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 12/37] drm/amdkfd: Coding style cleanup
From: Kent Russell Minor cleanup that was missed previously because code moved around. Signed-off-by: Kent Russell Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 12 ++-- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 1e331be..f2dda60 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -91,11 +91,11 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem) int i = 0; pr_info("Found memory entry in CRAT table with proximity_domain=%d\n", - mem->promixity_domain); + mem->proximity_domain); list_for_each_entry(dev, &topology_device_list, list) { - if (mem->promixity_domain == i) { + if (mem->proximity_domain == i) { props = kfd_alloc_struct(props); - if (props == NULL) + if (!props) return -ENOMEM; if (dev->node_props.cpu_cores_count == 0) @@ -141,7 +141,7 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache) if (id == dev->node_props.cpu_core_id_base || id == dev->node_props.simd_id_base) { props = kfd_alloc_struct(props); - if (props == NULL) + if (!props) return -ENOMEM; props->processor_id_low = id; @@ -190,7 +190,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink) list_for_each_entry(dev, &topology_device_list, list) { if (id_from == i) { props = kfd_alloc_struct(props); - if (props == NULL) + if (!props) return -ENOMEM; props->node_from = id_from; @@ -260,7 +260,7 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr) ret = kfd_parse_subtype_iolink(iolink); break; default: - pr_warn("Unknown subtype (%d) in CRAT\n", + pr_warn("Unknown subtype %d in CRAT\n", sub_type_hdr->type); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 15371cb..920697b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -127,7 +127,7 @@ struct crat_subtype_memory { uint8_t length; uint16_treserved; uint32_tflags; - uint32_tpromixity_domain; + uint32_tproximity_domain; uint32_tbase_addr_low; uint32_tbase_addr_high; uint32_tlength_low; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 2b3fe95..b6cf785 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -895,7 +895,7 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) up_write(&topology_lock); - if (res == 0) + if (!res) kfd_notify_gpu_change(gpu_id, 0); return res; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 10/37] drm/amdkfd: Fix memory leaks in kfd topology
From: Yong Zhao Kobject created using kobject_create_and_add() can be freed using kobject_put() when there is no referenece any more. However, kobject memory allocated with kzalloc() has to set up a release callback in order to free it when the counter decreases to 0. Otherwise it causes memory leak. Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index b614746..9b9824f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -501,11 +501,17 @@ static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr, return ret; } +static void kfd_topology_kobj_release(struct kobject *kobj) +{ + kfree(kobj); +} + static const struct sysfs_ops sysprops_ops = { .show = sysprops_show, }; static struct kobj_type sysprops_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &sysprops_ops, }; @@ -541,6 +547,7 @@ static const struct sysfs_ops iolink_ops = { }; static struct kobj_type iolink_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &iolink_ops, }; @@ -568,6 +575,7 @@ static const struct sysfs_ops mem_ops = { }; static struct kobj_type mem_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &mem_ops, }; @@ -607,6 +615,7 @@ static const struct sysfs_ops cache_ops = { }; static struct kobj_type cache_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &cache_ops, }; @@ -729,6 +738,7 @@ static const struct sysfs_ops node_ops = { }; static struct kobj_type node_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &node_ops, }; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 17/37] drm/amdkfd: Turn verbose topology messages into pr_debug
Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 17 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cd31dfb..8a0a9a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -33,7 +33,7 @@ static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) dev->node_props.capability |= HSA_CAP_ATS_PRESENT; - pr_info("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, + pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, cu->processor_id_low); } @@ -52,7 +52,7 @@ static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; - pr_info("CU GPU: id_base=%d\n", cu->processor_id_low); + pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); } /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct @@ -63,7 +63,7 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, { struct kfd_topology_device *dev; - pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", + pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", cu->proximity_domain, cu->hsa_capability); list_for_each_entry(dev, device_list, list) { if (cu->proximity_domain == dev->proximity_domain) { @@ -88,7 +88,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, struct kfd_mem_properties *props; struct kfd_topology_device *dev; - pr_info("Found memory entry in CRAT table with proximity_domain=%d\n", + pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", mem->proximity_domain); list_for_each_entry(dev, device_list, list) { if (mem->proximity_domain == dev->proximity_domain) { @@ -133,7 +133,7 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, id = cache->processor_id_low; - pr_info("Found cache entry in CRAT table with processor_id=%d\n", id); + pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); list_for_each_entry(dev, device_list, list) if (id == dev->node_props.cpu_core_id_base || id == dev->node_props.simd_id_base) { @@ -182,7 +182,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, id_from = iolink->proximity_domain_from; id_to = iolink->proximity_domain_to; - pr_info("Found IO link entry in CRAT table with id_from=%d\n", id_from); + pr_debug("Found IO link entry in CRAT table with id_from=%d\n", + id_from); list_for_each_entry(dev, device_list, list) { if (id_from == dev->proximity_domain) { props = kfd_alloc_struct(props); @@ -248,13 +249,13 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, /* * For now, nothing to do here */ - pr_info("Found TLB entry in CRAT table (not processing)\n"); + pr_debug("Found TLB entry in CRAT table (not processing)\n"); break; case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: /* * For now, nothing to do here */ - pr_info("Found CCOMPUTE entry in CRAT table (not processing)\n"); + pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); break; case CRAT_SUBTYPE_IOLINK_AFFINITY: iolink = (struct crat_subtype_iolink *)sub_type_hdr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index b2d2b7e..001e473 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -890,7 +890,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE; - pr_info("Adding doorbell packet type capability\n"); + pr_debug("Adding doorbell packet type capability\n"); } if (!res) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 14/37] drm/amdkfd: Decouple CRAT parsing from device list update
From: Harish Kasiviswanathan Currently, CRAT parsing is intertwined with topology_device_list and hence repeated calls to kfd_parse_crat_table() will fail. Decouple kfd_parse_crat_table() and topology_device_list. kfd_parse_crat_table() will parse CRAT and add topology devices to a temporary list temp_topology_device_list and then kfd_topology_update_device_list will move contents from temporary list to master list. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Kent Russell Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 118 +- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 84 ++--- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 6 +- 4 files changed, 132 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index e264f5d..cd31dfb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -23,8 +23,6 @@ #include "kfd_crat.h" #include "kfd_topology.h" -static int topology_crat_parsed; -extern struct list_head topology_device_list; extern struct kfd_system_properties sys_props; static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, @@ -57,16 +55,18 @@ static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, pr_info("CU GPU: id_base=%d\n", cu->processor_id_low); } -/* kfd_parse_subtype_cu is called when the topology mutex is already acquired */ -static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu) +/* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct + * topology device present in the device_list + */ +static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, + struct list_head *device_list) { struct kfd_topology_device *dev; - int i = 0; pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", cu->proximity_domain, cu->hsa_capability); - list_for_each_entry(dev, &topology_device_list, list) { - if (cu->proximity_domain == i) { + list_for_each_entry(dev, device_list, list) { + if (cu->proximity_domain == dev->proximity_domain) { if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) kfd_populated_cu_info_cpu(dev, cu); @@ -74,26 +74,24 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu) kfd_populated_cu_info_gpu(dev, cu); break; } - i++; } return 0; } -/* - * kfd_parse_subtype_mem is called when the topology mutex is - * already acquired +/* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct + * topology device present in the device_list */ -static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem) +static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, + struct list_head *device_list) { struct kfd_mem_properties *props; struct kfd_topology_device *dev; - int i = 0; pr_info("Found memory entry in CRAT table with proximity_domain=%d\n", mem->proximity_domain); - list_for_each_entry(dev, &topology_device_list, list) { - if (mem->proximity_domain == i) { + list_for_each_entry(dev, device_list, list) { + if (mem->proximity_domain == dev->proximity_domain) { props = kfd_alloc_struct(props); if (!props) return -ENOMEM; @@ -118,17 +116,16 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem) break; } - i++; } return 0; } -/* - * kfd_parse_subtype_cache is called when the topology mutex - * is already acquired +/* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct + * topology device present in the device_list */ -static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache) +static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, + struct list_head *device_list) { struct kfd_cache_properties *props; struct kfd_topology_device *dev; @@ -137,7 +134,7 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache) id = cache->processor_id_low; pr_info("Found cache entry in CRAT table with processor_id=%d\n", id); - list_for_each_entry(dev, &topology_device_list, list) + list_for_each_entry(dev, device_list, list) if (id == dev->node_props.cpu_core_id_base || id == dev->node_props.simd_id_base) { props = kfd_alloc_struct(props); @@ -171,15 +168,14 @@ static
[PATCH 11/37] drm/amdkfd: Group up CRAT related functions
Take CRAT related functions out of kfd_topology.c and place them in kfd_crat.c. This is the initial step of supporting more CRAT features, i.e. creating virtual CRAT table for KFD devices without CRAT. Signed-off-by: Amber Lin Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/Makefile | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 350 ++ drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 332 +--- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 3 +- 5 files changed, 360 insertions(+), 330 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_crat.c diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 5263e4d..153fb31 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -14,7 +14,7 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ - kfd_dbgdev.o kfd_dbgmgr.o + kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c new file mode 100644 index 000..1e331be --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -0,0 +1,350 @@ +/* + * Copyright 2015-2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include "kfd_crat.h" +#include "kfd_topology.h" + +static int topology_crat_parsed; +extern struct list_head topology_device_list; +extern struct kfd_system_properties sys_props; + +static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) +{ + dev->node_props.cpu_cores_count = cu->num_cpu_cores; + dev->node_props.cpu_core_id_base = cu->processor_id_low; + if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) + dev->node_props.capability |= HSA_CAP_ATS_PRESENT; + + pr_info("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, + cu->processor_id_low); +} + +static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) +{ + dev->node_props.simd_id_base = cu->processor_id_low; + dev->node_props.simd_count = cu->num_simd_cores; + dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; + dev->node_props.max_waves_per_simd = cu->max_waves_simd; + dev->node_props.wave_front_size = cu->wave_front_size; + dev->node_props.mem_banks_count = cu->num_banks; + dev->node_props.array_count = cu->num_arrays; + dev->node_props.cu_per_simd_array = cu->num_cu_per_array; + dev->node_props.simd_per_cu = cu->num_simd_per_cu; + dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; + if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) + dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; + pr_info("CU GPU: id_base=%d\n", cu->processor_id_low); +} + +/* kfd_parse_subtype_cu is called when the topology mutex is already acquired */ +static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu) +{ + struct kfd_topology_device *dev; + int i = 0; + + pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", + cu->proximity_domain, cu->hsa_capability); + list_for_each_entry(dev, &topology_device_list, list) { + if (cu->proximity_domain == i) { + if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) + kfd_populated_cu_info_cpu(dev, cu); + +
[PATCH 09/37] drm/amdkfd: Topology: Fix location_id
From: Harish Kasiviswanathan Fix location_id format to match Thunk specification. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ca2e51a..b614746 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1166,8 +1166,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) cu_info.cu_active_number; dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; - dev->node_props.location_id = (gpu->pdev->bus->number << 24) + - (gpu->pdev->devfn & 0xff); + dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number, + gpu->pdev->devfn); /* * TODO: Retrieve max engine clock values from KGD */ -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 13/37] drm/amdkfd: Reorganize CRAT fetching from ACPI
From: Harish Kasiviswanathan Reorganize and rename kfd_topology_get_crat_acpi function. In this way acpi_get_table(..) needs to be called only once. This will also aid in dGPU topology implementation. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Kent Russell Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 41 +-- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 40 ++ 3 files changed, 54 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index f2dda60..e264f5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -319,17 +319,29 @@ int kfd_parse_crat_table(void *crat_image) return 0; } -int kfd_topology_get_crat_acpi(void *crat_image, size_t *size) +/* + * kfd_create_crat_image_acpi - Allocates memory for CRAT image and + * copies CRAT from ACPI (if available). + * NOTE: Call kfd_destroy_crat_image to free CRAT image memory + * + * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then + * crat_image will be NULL + * @size: [OUT] size of crat_image + * + * Return 0 if successful else return -ve value + */ +int kfd_create_crat_image_acpi(void **crat_image, size_t *size) { struct acpi_table_header *crat_table; acpi_status status; + void *pcrat_image; - if (!size) + if (!crat_image) return -EINVAL; - /* -* Fetch the CRAT table from ACPI -*/ + *crat_image = NULL; + + /* Fetch the CRAT table from ACPI */ status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); if (status == AE_NOT_FOUND) { pr_warn("CRAT table not found\n"); @@ -341,10 +353,25 @@ int kfd_topology_get_crat_acpi(void *crat_image, size_t *size) return -EINVAL; } - if (*size >= crat_table->length && crat_image != NULL) - memcpy(crat_image, crat_table, crat_table->length); + pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); + if (!pcrat_image) + return -ENOMEM; + + memcpy(pcrat_image, crat_table, crat_table->length); + *crat_image = pcrat_image; *size = crat_table->length; return 0; } + +/* + * kfd_destroy_crat_image + * + * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) + * + */ +void kfd_destroy_crat_image(void *crat_image) +{ + kfree(crat_image); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 920697b..da83105 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -291,7 +291,8 @@ struct cdit_header { #pragma pack() -int kfd_topology_get_crat_acpi(void *crat_image, size_t *size); +int kfd_create_crat_image_acpi(void **crat_image, size_t *size); +void kfd_destroy_crat_image(void *crat_image); int kfd_parse_crat_table(void *crat_image); #endif /* KFD_CRAT_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index b6cf785..35da4af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -699,35 +699,31 @@ int kfd_topology_init(void) /* * Get the CRAT image from the ACPI */ - ret = kfd_topology_get_crat_acpi(crat_image, &image_size); - if (ret == 0 && image_size > 0) { - pr_info("Found CRAT image with size=%zd\n", image_size); - crat_image = kmalloc(image_size, GFP_KERNEL); - if (!crat_image) { - ret = -ENOMEM; - pr_err("No memory for allocating CRAT image\n"); + ret = kfd_create_crat_image_acpi(&crat_image, &image_size); + if (!ret) { + ret = kfd_parse_crat_table(crat_image); + if (ret) goto err; - } - ret = kfd_topology_get_crat_acpi(crat_image, &image_size); - - if (ret == 0) { - down_write(&topology_lock); - ret = kfd_parse_crat_table(crat_image); - if (ret == 0) - ret = kfd_topology_update_sysfs(); - up_write(&topology_lock); - } else { - pr_err("Couldn't get CRAT table size from ACPI\n"); - } - kfree(crat_image); } else if (ret == -ENODATA) { + /* TODO: Create fake CRAT table */ ret = 0; + goto err; } else { pr_err("Couldn't get CRAT table size from ACPI\n"); + goto err; } + down_write(&topology_lock); + ret = kfd_topology_update_sysfs(); + up_write(&topolo
[PATCH 08/37] drm/amdkfd: Update number of compute unit from KGD
From: Flora Cui Overwrite the active simd_count from KGD at driver loading time. This is based on assumption that register GC_USER_SHADER_ARRAY_CONFIG won’t get changed. V2: remove the incorrect simd_count reported at loading module. Signed-off-by: Flora Cui Reviewed by: Yair Shachar< yair.shac...@amd.com> Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index cb0303a..ca2e51a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -133,8 +133,7 @@ static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; - pr_info("CU GPU: simds=%d id_base=%d\n", cu->num_simd_cores, - cu->processor_id_low); + pr_info("CU GPU: id_base=%d\n", cu->processor_id_low); } /* kfd_parse_subtype_cu is called when the topology mutex is already acquired */ @@ -1124,6 +1123,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) { uint32_t gpu_id; struct kfd_topology_device *dev; + struct kfd_cu_info cu_info; int res; gpu_id = kfd_generate_gpu_id(gpu); @@ -1161,6 +1161,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->gpu_id = gpu_id; gpu->id = gpu_id; + dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info); + dev->node_props.simd_count = dev->node_props.simd_per_cu * + cu_info.cu_active_number; dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; dev->node_props.location_id = (gpu->pdev->bus->number << 24) + -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 02/37] drm/amdgpu: add amdgpu interface to query cu info
From: Flora Cui Signed-off-by: Flora Cui Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 23 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 7 +++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 + 7 files changed, 39 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cbcb6a1..a6552c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -954,6 +954,7 @@ struct amdgpu_gfx_config { }; struct amdgpu_cu_info { + uint32_t simd_per_cu; uint32_t max_waves_per_simd; uint32_t wave_front_size; uint32_t max_scratch_slots_per_cu; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index f7fa767..cfb7827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -271,3 +271,26 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) return amdgpu_dpm_get_sclk(adev, false) / 100; } + +void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_cu_info acu_info = adev->gfx.cu_info; + + memset(cu_info, 0, sizeof(*cu_info)); + if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) + return; + + cu_info->cu_active_number = acu_info.number; + cu_info->cu_ao_mask = acu_info.ao_cu_mask; + memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], + sizeof(acu_info.bitmap)); + cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; + cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; + cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; + cu_info->simd_per_cu = acu_info.simd_per_cu; + cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; + cu_info->wave_front_size = acu_info.wave_front_size; + cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; + cu_info->lds_size = acu_info.lds_size; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8d689ab..a8fa225 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -60,6 +60,7 @@ uint64_t get_vmem_size(struct kgd_dev *kgd); uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); +void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 12feba8..c9b98d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -200,6 +200,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, + .get_cu_info = get_cu_info }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index b380495..c538e30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -161,6 +161,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, + .get_cu_info = get_cu_info }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 5c8a7a4..43f9e10 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -48,6 +48,8 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#define NUM_SIMD_PER_CU 0x4 /* missing from the gfx_7 IP headers */ + #define GFX7_NUM_GFX_RINGS 1 #define GFX7_MEC_HPD_SIZE 2048 @@ -5282,6 +5284,11 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + cu_info->max_waves_per_simd = 10; + cu_info->max_scratch_slots_per_cu = 32; + cu_info->w
[PATCH 06/37] drm/amdkfd: Remove deprecated get_vmem_size
From: Harish Kasiviswanathan Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 10 -- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 - 4 files changed, 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 56f6c12..972ecf0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -242,16 +242,6 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) kfree(mem); } -uint64_t get_vmem_size(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = - (struct amdgpu_device *)kgd; - - BUG_ON(kgd == NULL); - - return adev->mc.real_vram_size; -} - void get_local_mem_info(struct kgd_dev *kgd, struct kfd_local_mem_info *mem_info) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index bc5385a..eed7dea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -56,7 +56,6 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr); void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); -uint64_t get_vmem_size(struct kgd_dev *kgd); void get_local_mem_info(struct kgd_dev *kgd, struct kfd_local_mem_info *mem_info); uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index b705608..c9e2fbe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -173,7 +173,6 @@ static int get_tile_config(struct kgd_dev *kgd, static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index b0e581a..72ff646 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -132,7 +132,6 @@ static int get_tile_config(struct kgd_dev *kgd, static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 03/37] drm/amd: Add get_local_mem_info to KGD-KFD interface
From: Harish Kasiviswanathan Add get_local_mem_info which provides more information about local memory than get_vmem_size: * public and private framebuffer size * memory clock Signed-off-by: Harish Kasiviswanathan Signed-off-by: Ben Goz Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 12 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 3a93ffe..c58389c 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -60,6 +60,14 @@ struct kfd_cu_info { uint32_t cu_bitmap[4][4]; }; +/* For getting GPU local memory information from KGD */ +struct kfd_local_mem_info { + uint64_t local_mem_size_private; + uint64_t local_mem_size_public; + uint32_t vram_width; + uint32_t mem_clk_max; +}; + enum kgd_memory_pool { KGD_POOL_SYSTEM_CACHEABLE = 1, KGD_POOL_SYSTEM_WRITECOMBINE = 2, @@ -122,6 +130,8 @@ struct tile_config { * * @get_vmem_size: Retrieves (physical) size of VRAM * + * @get_local_mem_info: Retrieves information about GPU local memory + * * @get_gpu_clock_counter: Retrieves GPU clock counter * * @get_max_engine_clock_in_mhz: Retrieves maximum GPU clock in MHz @@ -181,6 +191,8 @@ struct kfd2kgd_calls { void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj); uint64_t (*get_vmem_size)(struct kgd_dev *kgd); + void (*get_local_mem_info)(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 21/37] drm/amdkfd: Add topology support for dGPUs
From: Harish Kasiviswanathan Generate and parse VCRAT tables for dGPUs in kfd_topology_add_device. Some information that isn't available in the CRAT table is patched into the topology after parsing. HSA_CAP_DOORBELL_TYPE_1_0 is dependent on the ASIC feature CP_HQD_PQ_CONTROL.SLOT_BASED_WPTR, which was not introduced in VI until Carrizo. Report HSA_CAP_DOORBELL_TYPE_PRE_1_0 on Tonga ASICs. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Ben Goz Signed-off-by: Amber Lin Signed-off-by: Jay Cornwall Signed-off-by: Kent Russell Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 594 +- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 188 -- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 8 +- 5 files changed, 746 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 00732ec..ba7577b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -20,10 +20,117 @@ * OTHER DEALINGS IN THE SOFTWARE. */ #include +#include #include "kfd_crat.h" #include "kfd_priv.h" #include "kfd_topology.h" +/* GPU Processor ID base for dGPUs for which VCRAT needs to be created. + * GPU processor ID are expressed with Bit[31]=1. + * The base is set to 0x8000_ + 0x1000 to avoid collision with GPU IDs + * used in the CRAT. + */ +static uint32_t gpu_processor_id_low = 0x80001000; + +/* Return the next available gpu_processor_id and increment it for next GPU + * @total_cu_count - Total CUs present in the GPU including ones + * masked off + */ +static inline unsigned int get_and_inc_gpu_processor_id( + unsigned int total_cu_count) +{ + int current_id = gpu_processor_id_low; + + gpu_processor_id_low += total_cu_count; + return current_id; +} + +/* Static table to describe GPU Cache information */ +struct kfd_gpu_cache_info { + uint32_tcache_size; + uint32_tcache_level; + uint32_tflags; + /* Indicates how many Compute Units share this cache +* Value = 1 indicates the cache is not shared +*/ + uint32_tnum_cu_shared; +}; + +static struct kfd_gpu_cache_info kaveri_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + + }, + { + /* Scalar L1 Instruction Cache (in SQC module) per bank */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache (in SQC module) per bank */ + .cache_size = 8, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + + /* TODO: Add L2 Cache information */ +}; + + +static struct kfd_gpu_cache_info carrizo_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache (in SQC module) per bank */ + .cache_size = 8, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 4, + }, + { + /* Scalar L1 Data Cache (in SQC module) per bank. */ + .cache_size = 4, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 4, + }, + + /* TODO: Add L2 Cache information */ +}; + +/* NOTE: In future if more information is added to struct kfd_gpu_cache_info + * the following ASICs may need a separate table. + */ +#define hawaii_cache_info kaveri_cache_info +#define tonga_cache_info carrizo_c
[PATCH 15/37] drm/amdkfd: Support enumerating non-GPU devices
From: Harish Kasiviswanathan Modify kfd_topology_enum_kfd_devices(..) function to support non-GPU nodes. The function returned NULL when it encountered non-GPU (say CPU) nodes. This caused kfd_ioctl_create_event and kfd_init_apertures to fail for Intel + Tonga. kfd_topology_enum_kfd_devices will now parse all the nodes and return valid kfd_dev for nodes with GPU. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 7 ++- drivers/gpu/drm/amd/amdkfd/kfd_pasid.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 2 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c| 18 +++--- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index c59384b..7377513 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -300,9 +300,14 @@ int kfd_init_apertures(struct kfd_process *process) struct kfd_process_device *pdd; /*Iterating over all devices*/ - while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && + while (kfd_topology_enum_kfd_devices(id, &dev) == 0 && id < NUM_OF_SUPPORTED_GPUS) { + if (!dev) { + id++; /* Skip non GPU devices */ + continue; + } + pdd = kfd_create_process_device_data(dev, process); if (!pdd) { pr_err("Failed to create process device data\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index d6a7961..15fff44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -59,7 +59,7 @@ unsigned int kfd_pasid_alloc(void) struct kfd_dev *dev = NULL; unsigned int i = 0; - while ((dev = kfd_topology_enum_kfd_devices(i)) != NULL) { + while ((kfd_topology_enum_kfd_devices(i, &dev)) == 0) { if (dev && dev->kfd2kgd) { kfd2kgd = dev->kfd2kgd; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0c96a6b..69a6206 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -670,7 +670,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu); int kfd_topology_remove_device(struct kfd_dev *gpu); struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); -struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); +int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); /* Interrupts */ int kfd_interrupt_init(struct kfd_dev *dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index f64350b..b2d2b7e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -927,22 +927,26 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) return res; } -/* - * When idx is out of bounds, the function will return NULL +/* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD + * topology. If GPU device is found @idx, then valid kfd_dev pointer is + * returned through @kdev + * Return -0: On success (@kdev will be NULL for non GPU nodes) + * -1: If end of list */ -struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx) +int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) { struct kfd_topology_device *top_dev; - struct kfd_dev *device = NULL; uint8_t device_idx = 0; + *kdev = NULL; down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) { if (device_idx == idx) { - device = top_dev->gpu; - break; + *kdev = top_dev->gpu; + up_read(&topology_lock); + return 0; } device_idx++; @@ -950,7 +954,7 @@ struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx) up_read(&topology_lock); - return device; + return -1; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 28/37] drm/amdkfd: Add support for displaying VRAM usage
From: Kent Russell Add a sysfs file in topology (node/x/memory_banks/X/used_memory) that reports the current VRAM usage for that node. Only works for GPU nodes at this time. Signed-off-by: Kent Russell Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 49 +++ drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 4 ++- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 7f0d41e..7f04038 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -186,6 +186,8 @@ struct kfd_topology_device *kfd_create_topology_device( sysfs_show_gen_prop(buffer, "%s %llu\n", name, value) #define sysfs_show_32bit_val(buffer, value) \ sysfs_show_gen_prop(buffer, "%u\n", value) +#define sysfs_show_64bit_val(buffer, value) \ + sysfs_show_gen_prop(buffer, "%llu\n", value) #define sysfs_show_str_val(buffer, value) \ sysfs_show_gen_prop(buffer, "%s\n", value) @@ -268,11 +270,23 @@ static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, { ssize_t ret; struct kfd_mem_properties *mem; + uint64_t used_mem; /* Making sure that the buffer is an empty string */ buffer[0] = 0; - mem = container_of(attr, struct kfd_mem_properties, attr); + if (strcmp(attr->name, "used_memory") == 0) { + mem = container_of(attr, struct kfd_mem_properties, attr_used); + if (mem->gpu) { + used_mem = mem->gpu->kfd2kgd->get_vram_usage( + mem->gpu->kgd); + return sysfs_show_64bit_val(buffer, used_mem); + } + /* TODO: Report APU/CPU-allocated memory; For now return 0 */ + return 0; + } + + mem = container_of(attr, struct kfd_mem_properties, attr_props); sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type); sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes); sysfs_show_32bit_prop(buffer, "flags", mem->flags); @@ -527,7 +541,12 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) if (dev->kobj_mem) { list_for_each_entry(mem, &dev->mem_props, list) if (mem->kobj) { - kfd_remove_sysfs_file(mem->kobj, &mem->attr); + /* TODO: Remove when CPU/APU supported */ + if (dev->node_props.cpu_cores_count == 0) + sysfs_remove_file(mem->kobj, + &mem->attr_used); + kfd_remove_sysfs_file(mem->kobj, + &mem->attr_props); mem->kobj = NULL; } kobject_del(dev->kobj_mem); @@ -629,12 +648,23 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, if (ret < 0) return ret; - mem->attr.name = "properties"; - mem->attr.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&mem->attr); - ret = sysfs_create_file(mem->kobj, &mem->attr); + mem->attr_props.name = "properties"; + mem->attr_props.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&mem->attr_props); + ret = sysfs_create_file(mem->kobj, &mem->attr_props); if (ret < 0) return ret; + + /* TODO: Support APU/CPU memory usage */ + if (dev->node_props.cpu_cores_count == 0) { + mem->attr_used.name = "used_memory"; + mem->attr_used.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&mem->attr_used); + ret = sysfs_create_file(mem->kobj, &mem->attr_used); + if (ret < 0) + return ret; + } + i++; } @@ -1075,15 +1105,22 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) { struct kfd_topology_device *dev; struct kfd_topology_device *out_dev = NULL; + struct kfd_mem_properties *mem; down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) if (!dev->gpu && (dev->node_props.simd_count > 0)) { dev->gpu = gpu; out_dev = dev; + + /* Assign mem->gpu */ + list_for_each_entry(mem, &dev->mem_props, list) + mem->gpu = dev->gpu; + break; }
[PATCH 29/37] PCI: Add pci_enable_atomic_ops_to_root
From: Jay Cornwall The PCIe 3.0 AtomicOp (6.15) feature allows atomic transctions to be requested by, routed through and completed by PCIe components. Routing and completion do not require software support. Component support for each is detectable via the DEVCAP2 register. AtomicOp requests are permitted only if a component's DEVCTL2.ATOMICOP_REQUESTER_ENABLE field is set. This capability cannot be detected but is a no-op if set on a component with no support. These requests can only be serviced if the upstream components support AtomicOp completion and/or routing to a component which does. A concrete example is the AMD Fiji-class GPU, which is specified to support AtomicOp requests, routed through a PLX 8747 switch (advertising AtomicOp routing) to a Haswell host bridge (advertising AtomicOp completion support). When AtomicOp requests are disabled the GPU logs attempts to initiate requests to an MMIO register for debugging. Add pci_enable_atomic_ops_to_root for per-device control over AtomicOp requests. Upstream bridges are checked for AtomicOp routing capability and the call fails if any lack this capability. The root port is checked for AtomicOp completion capabilities and the call fails if it does not support any. Routes to other PCIe components are not checked for AtomicOp routing and completion capabilities. v2: Check for AtomicOp route to root port with AtomicOp completion v3: Style fixes v4: Endpoint to root port only, check upstream egress blocking v5: Rebase, use existing PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK define CC: linux-...@vger.kernel.org Signed-off-by: Jay Cornwall Signed-off-by: Felix Kuehling --- drivers/pci/pci.c | 81 +++ include/linux/pci.h | 1 + include/uapi/linux/pci_regs.h | 2 ++ 3 files changed, 84 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6078dfc..89a8bb0 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2966,6 +2966,87 @@ bool pci_acs_path_enabled(struct pci_dev *start, } /** + * pci_enable_atomic_ops_to_root - enable AtomicOp requests to root port + * @dev: the PCI device + * + * Return 0 if the device is capable of generating AtomicOp requests, + * all upstream bridges support AtomicOp routing, egress blocking is disabled + * on all upstream ports, and the root port supports 32-bit, 64-bit and/or + * 128-bit AtomicOp completion, or negative otherwise. + */ +int pci_enable_atomic_ops_to_root(struct pci_dev *dev) +{ + struct pci_bus *bus = dev->bus; + + if (!pci_is_pcie(dev)) + return -EINVAL; + + switch (pci_pcie_type(dev)) { + /* +* PCIe 3.0, 6.15 specifies that endpoints and root ports are permitted +* to implement AtomicOp requester capabilities. +*/ + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_LEG_END: + case PCI_EXP_TYPE_RC_END: + break; + default: + return -EINVAL; + } + + while (bus->parent) { + struct pci_dev *bridge = bus->self; + u32 cap; + + pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap); + + switch (pci_pcie_type(bridge)) { + /* +* Upstream, downstream and root ports may implement AtomicOp +* routing capabilities. AtomicOp routing via a root port is +* not considered. +*/ + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_DOWNSTREAM: + if (!(cap & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) + return -EINVAL; + break; + + /* +* Root ports are permitted to implement AtomicOp completion +* capabilities. +*/ + case PCI_EXP_TYPE_ROOT_PORT: + if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | +PCI_EXP_DEVCAP2_ATOMIC_COMP64 | +PCI_EXP_DEVCAP2_ATOMIC_COMP128))) + return -EINVAL; + break; + } + + /* +* Upstream ports may block AtomicOps on egress. +*/ + if (pci_pcie_type(bridge) == PCI_EXP_TYPE_UPSTREAM) { + u32 ctl2; + + pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, + &ctl2); + if (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK) + return -EINVAL; + } + + bus = bus->parent; + } + + pcie_capability_set_word(dev, PCI_EXP_DEVCTL2, +PCI_EXP_DEVCTL2_ATOMIC_REQ); + + return 0; +} +EXPORT_SYMBOL(pci_enable_atomic_ops_to_root); + +/** * pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge
[PATCH 07/37] drm/amd: Remove get_vmem_size from KGD-KFD interface
From: Harish Kasiviswanathan Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index c58389c..0899cee 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -128,8 +128,6 @@ struct tile_config { * * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture * - * @get_vmem_size: Retrieves (physical) size of VRAM - * * @get_local_mem_info: Retrieves information about GPU local memory * * @get_gpu_clock_counter: Retrieves GPU clock counter @@ -190,7 +188,6 @@ struct kfd2kgd_calls { void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj); - uint64_t (*get_vmem_size)(struct kgd_dev *kgd); void (*get_local_mem_info)(struct kgd_dev *kgd, struct kfd_local_mem_info *mem_info); uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 00/37] KFD dGPU topology and initialization
This patch series adds support for dGPU topology to KFD and implements everything needed to initialize KFD on dGPUs. This is still missing dGPU memory management APIs, so it's not going to be able to run any user mode tests yet. But device information about CPUs and supported dGPUs should be reported correctly in /sys/class/kfd/kfd/topology/nodes/*. Patches 1-10 are small fixes and additions to the topology code. Patches 11-19 reorganize the topology code to prepare for dGPU support. Patches 20 and 21 add topology support for CPUs and dGPUs respectively. Patches 22-28 add more topology features and fixes/workarounds. Patch 29 adds a helper to enable PCIe atomics to the PCI driver. Patches 30-36 enable KFD initialization on dGPUs Patch 37 enables KFD initialization on supported dGPUs in AMDGPU. This is my last patch series this year. I worked hard to finish this before my year-end vacation. Amber Lin (1): drm/amdkfd: Add perf counters to topology Ben Goz (1): drm/amdkfd: Add AQL Queue Memory flag on topology Felix Kuehling (13): drm/amdkfd: Group up CRAT related functions drm/amdkfd: Turn verbose topology messages into pr_debug drm/amdkfd: Simplify counting of memory banks drm/amdkfd: Add topology support for CPUs drm/amdkfd: Module option to disable CRAT table drm/amdkfd: Conditionally enable PCIe atomics drm/amdkfd: Make IOMMUv2 code conditional drm/amdkfd: Make sched_policy a per-device setting drm/amdkfd: Add dGPU support to the device queue manager drm/amdkfd: Add dGPU support to the MQD manager drm/amdkfd: Add dGPU support to kernel_queue_init drm/amdkfd: Add dGPU device IDs and device info drm/amdgpu: Enable KFD initialization on dGPUs Flora Cui (3): drm/amd: add new interface to query cu info drm/amdgpu: add amdgpu interface to query cu info drm/amdkfd: Update number of compute unit from KGD Harish Kasiviswanathan (13): drm/amd: Add get_local_mem_info to KGD-KFD interface drm/amdgpu: Implement get_local_mem_info drm/amdkfd: Stop using get_vmem_size KGD-KFD interface drm/amdkfd: Remove deprecated get_vmem_size drm/amd: Remove get_vmem_size from KGD-KFD interface drm/amdkfd: Topology: Fix location_id drm/amdkfd: Reorganize CRAT fetching from ACPI drm/amdkfd: Decouple CRAT parsing from device list update drm/amdkfd: Support enumerating non-GPU devices drm/amdkfd: sync IOLINK defines to thunk spec drm/amdkfd: Fix sibling_map[] size drm/amdkfd: Add topology support for dGPUs drm/amdkfd: Ignore ACPI CRAT for non-APU systems Jay Cornwall (1): PCI: Add pci_enable_atomic_ops_to_root Kent Russell (3): drm/amdkfd: Coding style cleanup drm/amdgpu: Add support for reporting VRAM usage drm/amdkfd: Add support for displaying VRAM usage Philip Cox (1): drm/amdkfd: Fixup incorrect info in the CZ CRAT table Yong Zhao (1): drm/amdkfd: Fix memory leaks in kfd topology drivers/gpu/drm/amd/amdgpu/amdgpu.h|1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 65 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |4 +- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c |7 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |5 + drivers/gpu/drm/amd/amdkfd/Kconfig |2 +- drivers/gpu/drm/amd/amdkfd/Makefile|2 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |3 +- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 1271 drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 42 +- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c|3 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c| 230 +++- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 33 +- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h |5 + .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 56 + .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 93 ++ drivers/gpu/drm/amd/amdkfd/kfd_events.c|2 + drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c |7 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |5 + drivers/gpu/drm/amd/amdkfd/kfd_module.c|5 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c |7 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 35 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c| 21 + drivers/gpu/drm/amd/amdkfd/kfd_pasid.c |2 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 21 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 17 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |3 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1054 +--- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 39 +- drivers/gpu/drm/amd/include/kgd_kfd_interface.h| 35 +- drivers/pci/pci.c | 81 ++ include/linux/pci.h
[PATCH 27/37] drm/amdgpu: Add support for reporting VRAM usage
From: Kent Russell Add functions to report the vram_usage from the amdgpu_device Signed-off-by: Kent Russell Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 7 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 3 ++- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 3 +++ 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 972ecf0..51284bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -314,3 +314,10 @@ void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; cu_info->lds_size = acu_info.lds_size; } + +uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index eed7dea..2a519f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -62,6 +62,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index c9e2fbe..3d60e1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -200,7 +200,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, - .get_cu_info = get_cu_info + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 72ff646..66b513e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -161,7 +161,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, - .get_cu_info = get_cu_info + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 0899cee..a6752bd 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -177,6 +177,8 @@ struct tile_config { * * @get_cu_info: Retrieves activated cu info * + * @get_vram_usage: Returns current VRAM usage + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -267,6 +269,7 @@ struct kfd2kgd_calls { void (*get_cu_info)(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); + uint64_t (*get_vram_usage)(struct kgd_dev *kgd); }; /** -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 24/37] drm/amdkfd: Add AQL Queue Memory flag on topology
From: Ben Goz This is needed for enabling a user-mode workaround for an AQL queue wrapping HW bug on Tonga. Signed-off-by: Ben Goz Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 4 drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 80bc71d..e7daf2c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -455,6 +455,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); } + if (dev->gpu->device_info->asic_family == CHIP_TONGA) + dev->node_props.capability |= + HSA_CAP_AQL_QUEUE_DOUBLE_MAP; + sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute", dev->node_props.max_engine_clk_fcompute); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index b9f3142..53fca1f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -45,6 +45,7 @@ #define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 #define HSA_CAP_DOORBELL_TYPE_1_0 0x1 +#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x4000 struct kfd_node_properties { uint32_t cpu_cores_count; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 20/37] drm/amdkfd: Add topology support for CPUs
Currently, the KFD topology information is generated by parsing the CRAT (ACPI) table. However, at present CRAT table is available only for AMD APUs. To support CPUs on systems without a CRAT table, the KFD driver will create a Virtual CRAT (VCRAT) table and then the existing code will parse this table to generate topology. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 321 +- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 9 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 190 +++--- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 3 + 5 files changed, 489 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index ea1e0af..00732ec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -21,10 +21,9 @@ */ #include #include "kfd_crat.h" +#include "kfd_priv.h" #include "kfd_topology.h" -extern struct kfd_system_properties sys_props; - static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) { @@ -281,7 +280,7 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, uint32_t proximity_domain) { - struct kfd_topology_device *top_dev; + struct kfd_topology_device *top_dev = NULL; struct crat_subtype_generic *sub_type_hdr; uint16_t node_id; int ret = 0; @@ -314,10 +313,10 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, goto err; } - sys_props.platform_id = - (*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK; - sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id); - sys_props.platform_rev = crat_table->revision; + memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); + memcpy(top_dev->oem_table_id, crat_table->oem_table_id, + CRAT_OEMTABLEID_LENGTH); + top_dev->oem_revision = crat_table->oem_revision; sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < @@ -385,8 +384,312 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) return 0; } -/* - * kfd_destroy_crat_image +/* Memory required to create Virtual CRAT. + * Since there is no easy way to predict the amount of memory required, the + * following amount are allocated for CPU and GPU Virtual CRAT. This is + * expected to cover all known conditions. But to be safe additional check + * is put in the code to ensure we don't overwrite. + */ +#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) +#define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE) + +/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node + * + * @numa_node_id: CPU NUMA node id + * @avail_size: Available size in the memory + * @sub_type_hdr: Memory into which compute info will be filled in + * + * Return 0 if successful else return -ve value + */ +static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, + int proximity_domain, + struct crat_subtype_computeunit *sub_type_hdr) +{ + const struct cpumask *cpumask; + + *avail_size -= sizeof(struct crat_subtype_computeunit); + if (*avail_size < 0) + return -ENOMEM; + + memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); + + /* Fill in subtype header data */ + sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); + sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; + + cpumask = cpumask_of_node(numa_node_id); + + /* Fill in CU data */ + sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; + sub_type_hdr->proximity_domain = proximity_domain; + sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); + if (sub_type_hdr->processor_id_low == -1) + return -EINVAL; + + sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); + + return 0; +} + +/* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node + * + * @numa_node_id: CPU NUMA node id + * @avail_size: Available size in the memory + * @sub_type_hdr: Memory into which compute info will be filled in + * + * Return 0 if successful else return -ve value + */ +static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, + int proximity_domain, + struct crat_subtype_memory *sub_type_hdr) +{ + uint64_t mem_in_bytes = 0; + pg_data_t *pgdat;
[PATCH 16/37] drm/amdkfd: sync IOLINK defines to thunk spec
From: Harish Kasiviswanathan Current thunk spec v1.07 dated Feb 1, 2016 Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 21 - 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 4e683ae..3ac55a6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -222,9 +222,12 @@ struct crat_subtype_ccompute { /* * HSA IO Link Affinity structure and definitions */ -#define CRAT_IOLINK_FLAGS_ENABLED 0x0001 -#define CRAT_IOLINK_FLAGS_COHERENCY0x0002 -#define CRAT_IOLINK_FLAGS_RESERVED 0xfffc +#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) +#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) +#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) +#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) +#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) +#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffe0 /* * IO interface types @@ -232,8 +235,16 @@ struct crat_subtype_ccompute { #define CRAT_IOLINK_TYPE_UNDEFINED 0 #define CRAT_IOLINK_TYPE_HYPERTRANSPORT1 #define CRAT_IOLINK_TYPE_PCIEXPRESS2 -#define CRAT_IOLINK_TYPE_OTHER 3 -#define CRAT_IOLINK_TYPE_MAX 255 +#define CRAT_IOLINK_TYPE_AMBA 3 +#define CRAT_IOLINK_TYPE_MIPI 4 +#define CRAT_IOLINK_TYPE_QPI_1_1 5 +#define CRAT_IOLINK_TYPE_RESERVED1 6 +#define CRAT_IOLINK_TYPE_RESERVED2 7 +#define CRAT_IOLINK_TYPE_RAPID_IO 8 +#define CRAT_IOLINK_TYPE_INFINIBAND 9 +#define CRAT_IOLINK_TYPE_RESERVED3 10 +#define CRAT_IOLINK_TYPE_OTHER 11 +#define CRAT_IOLINK_TYPE_MAX 255 #define CRAT_IOLINK_RESERVED_LENGTH 24 -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 18/37] drm/amdkfd: Simplify counting of memory banks
Only count memory banks in one place. Ignore redundant num_banks entry in crat_subtype_computeunit. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 14 ++ drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 - 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 8a0a9a0..ea1e0af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -45,7 +45,6 @@ static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; dev->node_props.max_waves_per_simd = cu->max_waves_simd; dev->node_props.wave_front_size = cu->wave_front_size; - dev->node_props.mem_banks_count = cu->num_banks; dev->node_props.array_count = cu->num_arrays; dev->node_props.cu_per_simd_array = cu->num_cu_per_array; dev->node_props.simd_per_cu = cu->num_simd_per_cu; @@ -111,7 +110,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, mem->length_low; props->width = mem->width; - dev->mem_bank_count++; + dev->node_props.mem_banks_count++; list_add_tail(&props->list, &dev->mem_props); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 001e473..17e8daf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -335,18 +335,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, "simd_count", dev->node_props.simd_count); - - if (dev->mem_bank_count < dev->node_props.mem_banks_count) { - pr_info_once("mem_banks_count truncated from %d to %d\n", - dev->node_props.mem_banks_count, - dev->mem_bank_count); - sysfs_show_32bit_prop(buffer, "mem_banks_count", - dev->mem_bank_count); - } else { - sysfs_show_32bit_prop(buffer, "mem_banks_count", - dev->node_props.mem_banks_count); - } - + sysfs_show_32bit_prop(buffer, "mem_banks_count", + dev->node_props.mem_banks_count); sysfs_show_32bit_prop(buffer, "caches_count", dev->node_props.caches_count); sysfs_show_32bit_prop(buffer, "io_links_count", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 0d98b61..17b2d43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -137,7 +137,6 @@ struct kfd_topology_device { uint32_tgpu_id; uint32_tproximity_domain; struct kfd_node_properties node_props; - uint32_tmem_bank_count; struct list_headmem_props; uint32_tcache_count; struct list_headcache_props; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 25/37] drm/amdkfd: Module option to disable CRAT table
Some systems have broken CRAT tables. Add a module option to ignore a CRAT table. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 5 + drivers/gpu/drm/amd/amdkfd/kfd_module.c | 5 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 ++ 3 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index ba7577b..a028623 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -694,6 +694,11 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) return -EINVAL; } + if (ignore_crat) { + pr_info("CRAT table disabled by module option\n"); + return -ENODATA; + } + pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); if (!pcrat_image) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index f50e494..3ac72be 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -69,6 +69,11 @@ module_param(send_sigterm, int, 0444); MODULE_PARM_DESC(send_sigterm, "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); +int ignore_crat; +module_param(ignore_crat, int, 0444); +MODULE_PARM_DESC(ignore_crat, + "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); + static int amdkfd_init_completed; int kgd2kfd_init(unsigned int interface_version, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f0327c2..6a48d29 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -104,6 +104,12 @@ extern int cwsr_enable; */ extern int send_sigterm; +/* + * Ignore CRAT table during KFD initialization, can be used to work around + * broken CRAT tables on some AMD systems + */ +extern int ignore_crat; + /** * enum kfd_sched_policy * -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 22/37] drm/amdkfd: Add perf counters to topology
From: Amber Lin For hardware blocks whose performance counters are accessed via MMIO registers, KFD provides the support for those privileged blocks. IOMMU is one of those privileged blocks. Most performance counter properties required by Thunk are available at /sys/bus/event_source/devices/amd_iommu. This patch adds properties to topology in KFD sysfs for information not available in /sys/bus/event_source/devices/amd_iommu. They are shown at /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf/iommu/ formatted as /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf//, i.e. /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf/iommu/max_concurrent. For dGPUs, who don't have IOMMU, nothing appears under /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf. Signed-off-by: Amber Lin Signed-off-by: Kent Russell Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 116 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 13 2 files changed, 127 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 7fe7ee0..52d20f5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -104,6 +104,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) struct kfd_mem_properties *mem; struct kfd_cache_properties *cache; struct kfd_iolink_properties *iolink; + struct kfd_perf_properties *perf; list_del(&dev->list); @@ -128,6 +129,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) kfree(iolink); } + while (dev->perf_props.next != &dev->perf_props) { + perf = container_of(dev->perf_props.next, + struct kfd_perf_properties, list); + list_del(&perf->list); + kfree(perf); + } + kfree(dev); } @@ -162,6 +170,7 @@ struct kfd_topology_device *kfd_create_topology_device( INIT_LIST_HEAD(&dev->mem_props); INIT_LIST_HEAD(&dev->cache_props); INIT_LIST_HEAD(&dev->io_link_props); + INIT_LIST_HEAD(&dev->perf_props); list_add_tail(&dev->list, device_list); @@ -328,6 +337,39 @@ static struct kobj_type cache_type = { .sysfs_ops = &cache_ops, }; +/** Sysfs of Performance Counters **/ + +struct kfd_perf_attr { + struct kobj_attribute attr; + uint32_t data; +}; + +static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs, + char *buf) +{ + struct kfd_perf_attr *attr; + + buf[0] = 0; + attr = container_of(attrs, struct kfd_perf_attr, attr); + if (!attr->data) /* invalid data for PMC */ + return 0; + else + return sysfs_show_32bit_val(buf, attr->data); +} + +#define KFD_PERF_DESC(_name, _data)\ +{ \ + .attr = __ATTR(_name, 0444, perf_show, NULL), \ + .data = _data, \ +} + +static struct kfd_perf_attr perf_attr_iommu[] = { + KFD_PERF_DESC(max_concurrent, 0), + KFD_PERF_DESC(num_counters, 0), + KFD_PERF_DESC(counter_ids, 0), +}; +// + static ssize_t node_show(struct kobject *kobj, struct attribute *attr, char *buffer) { @@ -452,6 +494,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; + struct kfd_perf_properties *perf; if (dev->kobj_iolink) { list_for_each_entry(iolink, &dev->io_link_props, list) @@ -488,6 +531,16 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) dev->kobj_mem = NULL; } + if (dev->kobj_perf) { + list_for_each_entry(perf, &dev->perf_props, list) { + kfree(perf->attr_group); + perf->attr_group = NULL; + } + kobject_del(dev->kobj_perf); + kobject_put(dev->kobj_perf); + dev->kobj_perf = NULL; + } + if (dev->kobj_node) { sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); sysfs_remove_file(dev->kobj_node, &dev->attr_name); @@ -504,8 +557,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; + struct kfd_perf_properties *perf; int ret; - uint32_t i; + uint32_t i, num_attrs; + struct attribute **attrs; if (WARN_ON(dev->kobj_node)) return -EEXIST; @@ -534,6 +589,10 @@ static int kfd_
[PATCH 19/37] drm/amdkfd: Fix sibling_map[] size
From: Harish Kasiviswanathan Change kfd_cache_properties.sibling_map[256] to kfd_cache_properties.sibling_map[32]. Since, CRAT uses bitmap for sibling_map, it is more efficient to use bitmap in the kfd structure also. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 20 +--- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 4 +--- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 17e8daf..622feda 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -263,7 +263,7 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, char *buffer) { ssize_t ret; - uint32_t i; + uint32_t i, j; struct kfd_cache_properties *cache; /* Making sure that the buffer is an empty string */ @@ -281,12 +281,18 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency); sysfs_show_32bit_prop(buffer, "type", cache->cache_type); snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer); - for (i = 0; i < KFD_TOPOLOGY_CPU_SIBLINGS; i++) - ret = snprintf(buffer, PAGE_SIZE, "%s%d%s", - buffer, cache->sibling_map[i], - (i == KFD_TOPOLOGY_CPU_SIBLINGS-1) ? - "\n" : ","); - + for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) + for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) { + /* Check each bit */ + if (cache->sibling_map[i] & (1 << j)) + ret = snprintf(buffer, PAGE_SIZE, +"%s%d%s", buffer, 1, ","); + else + ret = snprintf(buffer, PAGE_SIZE, +"%s%d%s", buffer, 0, ","); + } + /* Replace the last "," with end of line */ + *(buffer + strlen(buffer) - 1) = 0xA; return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 17b2d43..50a741b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -91,8 +91,6 @@ struct kfd_mem_properties { struct attributeattr; }; -#define KFD_TOPOLOGY_CPU_SIBLINGS 256 - #define HSA_CACHE_TYPE_DATA0x0001 #define HSA_CACHE_TYPE_INSTRUCTION 0x0002 #define HSA_CACHE_TYPE_CPU 0x0004 @@ -109,7 +107,7 @@ struct kfd_cache_properties { uint32_tcache_assoc; uint32_tcache_latency; uint32_tcache_type; - uint8_t sibling_map[KFD_TOPOLOGY_CPU_SIBLINGS]; + uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; struct kobject *kobj; struct attributeattr; }; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 26/37] drm/amdkfd: Ignore ACPI CRAT for non-APU systems
From: Harish Kasiviswanathan Some AMD motherboards without an APU have a broken CRAT table which causes KFD initialization failures or incorrect information about NUMA nodes, CPU cores or system memory. Ignore CRAT tables without GPUs and rely on KFD's code to create a CRAT table for the CPU. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 25 ++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index e7daf2c..7f0d41e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -904,6 +904,25 @@ static void kfd_add_non_crat_information(struct kfd_topology_device *kdev) /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ } +/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices. + * Ignore CRAT for all other devices. AMD APU is identified if both CPU + * and GPU cores are present. + * @device_list - topology device list created by parsing ACPI CRAT table. + * @return - TRUE if invalid, FALSE is valid. + */ +static bool kfd_is_acpi_crat_invalid(struct list_head *device_list) +{ + struct kfd_topology_device *dev; + + list_for_each_entry(dev, device_list, list) { + if (dev->node_props.cpu_cores_count && + dev->node_props.simd_count) + return false; + } + pr_info("Ignoring ACPI CRAT on non-APU system\n"); + return true; +} + int kfd_topology_init(void) { void *crat_image = NULL; @@ -936,7 +955,7 @@ int kfd_topology_init(void) /* * Get the CRAT image from the ACPI. If ACPI doesn't have one -* create a virtual CRAT. +* or if ACPI CRAT is invalid create a virtual CRAT. * NOTE: The current implementation expects all AMD APUs to have * CRAT. If no CRAT is available, it is assumed to be a CPU */ @@ -945,7 +964,8 @@ int kfd_topology_init(void) ret = kfd_parse_crat_table(crat_image, &temp_topology_device_list, proximity_domain); - if (ret) { + if (ret || + kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { kfd_release_topology_device_list( &temp_topology_device_list); kfd_destroy_crat_image(crat_image); @@ -971,7 +991,6 @@ int kfd_topology_init(void) goto err; } } - kdev = list_first_entry(&temp_topology_device_list, struct kfd_topology_device, list); kfd_add_perf_to_topology(kdev); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 23/37] drm/amdkfd: Fixup incorrect info in the CZ CRAT table
From: Philip Cox * Wrong value for max_waves_per_simd * Missing ATC capability bit Signed-off-by: Philip Cox Signed-off-by: Kent Russell Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 52d20f5..80bc71d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1229,12 +1229,15 @@ int kfd_topology_add_device(struct kfd_dev *gpu) /* Fix errors in CZ CRAT. * simd_count: Carrizo CRAT reports wrong simd_count, probably * because it doesn't consider masked out CUs -* capability flag: Carrizo CRAT doesn't report IOMMU -* flags. TODO: Fix this. +* max_waves_per_simd: Carrizo reports wrong max_waves_per_simd +* capability flag: Carrizo CRAT doesn't report IOMMU flags */ - if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) + if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { dev->node_props.simd_count = cu_info.simd_per_cu * cu_info.cu_active_number; + dev->node_props.max_waves_per_simd = 10; + dev->node_props.capability |= HSA_CAP_ATS_PRESENT; + } kfd_debug_print_topology(); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 30/37] drm/amdkfd: Conditionally enable PCIe atomics
This will be needed for most dGPUs. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 13 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a8fa33a..e3f6453 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -41,6 +41,7 @@ static const struct kfd_device_info kaveri_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_pci_atomics = false, }; static const struct kfd_device_info carrizo_device_info = { @@ -53,6 +54,7 @@ static const struct kfd_device_info carrizo_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_pci_atomics = false, }; struct kfd_deviceid { @@ -127,6 +129,17 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return NULL; } + if (device_info->needs_pci_atomics) { + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. +*/ + if (pci_enable_atomic_ops_to_root(pdev) < 0) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics", +pdev->vendor, pdev->device); + return NULL; + } + } + kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); if (!kfd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6a48d29..eebfb1e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -158,6 +158,7 @@ struct kfd_device_info { uint8_t num_of_watch_points; uint16_t mqd_size_aligned; bool supports_cwsr; + bool needs_pci_atomics; }; struct kfd_mem_obj { -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 32/37] drm/amdkfd: Make sched_policy a per-device setting
Some dGPUs don't support HWS. Allow them to use a per-device sched_policy that may be different from the global default. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c| 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_device.c| 2 +- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 22 +++--- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 1 + .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++- 6 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 62c3d9c..6fe2496 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -901,7 +901,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, mutex_unlock(&p->mutex); - if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && + pdd->qpd.vmid != 0) dev->kfd2kgd->set_scratch_backing_va( dev->kgd, args->va_addr, pdd->qpd.vmid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 3da25f7..9d4af96 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -33,6 +33,7 @@ #include "kfd_pm4_headers_diq.h" #include "kfd_dbgmgr.h" #include "kfd_dbgdev.h" +#include "kfd_device_queue_manager.h" static DEFINE_MUTEX(kfd_dbgmgr_mutex); @@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) } /* get actual type of DBGDevice cpsch or not */ - if (sched_policy == KFD_SCHED_POLICY_NO_HWS) + if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) type = DBGDEV_TYPE_NODIQ; kfd_dbgdev_init(new_buff->dbgdev, pdev, type); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index eb5d67f..b133fba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -348,7 +348,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->pdev->device); pr_debug("Starting kfd with the following scheduling policy %d\n", - sched_policy); + kfd->dqm->sched_policy); goto out; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d0693fd..3e2f53b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -385,7 +385,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) prev_active = q->properties.is_active; /* Make sure the queue is unmapped before updating the MQD */ - if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); if (retval) { @@ -417,7 +417,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) else if (!q->properties.is_active && prev_active) dqm->queue_count--; - if (sched_policy != KFD_SCHED_POLICY_NO_HWS) + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || @@ -1097,7 +1097,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, alternate_aperture_base, alternate_aperture_size); - if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) + if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) program_sh_mem_settings(dqm, qpd); pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", @@ -1242,6 +1242,22 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) if (!dqm) return NULL; + switch (dev->device_info->asic_family) { + /* HWS is not available on Hawaii. */ + case CHIP_HAWAII: + /* HWS depends on CWSR for timely dequeue. CWSR is not +* available on Tonga. +* +* FIXME: This argument also applies to Kaveri. +*/ + case CHIP_TONGA: + dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; + break; + default: + dqm->sched_policy = sched_policy; + break; + } + dqm->dev = dev; switch (sched_policy) { case KFD_SCHED_POLICY_HWS: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_
[PATCH 31/37] drm/amdkfd: Make IOMMUv2 code conditional
dGPUs work without IOMMUv2. Make IOMMUv2 initialization dependent on ASIC information. Also allow building KFD without IOMMUv2 support. This is still useful for dGPUs and prepares for enabling KFD on architectures that don't support AMD IOMMUv2. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/Kconfig| 2 +- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 8 +++- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 62 +-- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 17 ++--- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 2 + 8 files changed, 74 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index bc5a294..5bbeb95 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,6 +4,6 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 + depends on DRM_AMDGPU && X86_64 help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index a028623..2fc8c08 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ #include +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) #include +#endif #include "kfd_crat.h" #include "kfd_priv.h" #include "kfd_topology.h" @@ -1035,15 +1037,17 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, struct crat_subtype_generic *sub_type_hdr; struct crat_subtype_computeunit *cu; struct kfd_cu_info cu_info; - struct amd_iommu_device_info iommu_info; int avail_size = *size; uint32_t total_num_of_cu; int num_of_cache_entries = 0; int cache_mem_filled = 0; int ret = 0; +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) + struct amd_iommu_device_info iommu_info; const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | AMD_IOMMU_DEVICE_FLAG_PRI_SUP | AMD_IOMMU_DEVICE_FLAG_PASID_SUP; +#endif struct kfd_local_mem_info local_mem_info; if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) @@ -1104,12 +1108,14 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, /* Check if this node supports IOMMU. During parsing this flag will * translate to HSA_CAP_ATS_PRESENT */ +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) iommu_info.flags = 0; if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { if ((iommu_info.flags & required_iommu_flags) == required_iommu_flags) cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; } +#endif crat_table->length += sub_type_hdr->length; crat_table->total_entries++; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index e3f6453..eb5d67f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) #include +#endif #include #include #include @@ -31,6 +33,7 @@ #define MQD_SIZE_ALIGNED 768 +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, .max_pasid_bits = 16, @@ -41,6 +44,7 @@ static const struct kfd_device_info kaveri_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = true, .needs_pci_atomics = false, }; @@ -54,8 +58,10 @@ static const struct kfd_device_info carrizo_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = true, .needs_pci_atomics = false, }; +#endif struct kfd_deviceid { unsigned short did; @@ -64,6 +70,7 @@ struct kfd_deviceid { /* Please keep this sorted by increasing device id. */ static const struct kfd_deviceid supported_devices[] = { +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) { 0x1304, &kaveri_device_info },/* Kaveri */ { 0x1305, &kaveri_device_info },/* Kaveri */ { 0x1306, &kaveri_device_info },/* Kaveri */ @@ -91,6 +98,7 @@ static const struct kfd_deviceid supported_devices[] =
[PATCH 33/37] drm/amdkfd: Add dGPU support to the device queue manager
GFXv7 and v8 dGPUs use a different addressing mode for KFD compared to APUs (GPUVM64 vs HSA64). And dGPUs don't support MTYPE_CC. They use MTYPE_UC instead for memory that requires coherency. Signed-off-by: Felix Kuehling --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 11 +++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 4 + .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 56 + .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 93 ++ 4 files changed, 164 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 3e2f53b..092653f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1308,6 +1308,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_KAVERI: device_queue_manager_init_cik(&dqm->asic_ops); break; + + case CHIP_HAWAII: + device_queue_manager_init_cik_hawaii(&dqm->asic_ops); + break; + + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + device_queue_manager_init_vi_tonga(&dqm->asic_ops); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 9fdc9c2..68be0aa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -185,8 +185,12 @@ struct device_queue_manager { void device_queue_manager_init_cik( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_cik_hawaii( + struct device_queue_manager_asic_ops *asic_ops); void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 28e48c9..aed4c21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -34,8 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, uint64_t alternate_aperture_size); static int update_qpd_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd); +static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); void device_queue_manager_init_cik( struct device_queue_manager_asic_ops *asic_ops) @@ -45,6 +50,14 @@ void device_queue_manager_init_cik( asic_ops->init_sdma_vm = init_sdma_vm; } +void device_queue_manager_init_cik_hawaii( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; + asic_ops->update_qpd = update_qpd_cik_hawaii; + asic_ops->init_sdma_vm = init_sdma_vm_hawaii; +} + static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) { /* In 64-bit mode, we can only control the top 3 bits of the LDS, @@ -132,6 +145,36 @@ static int update_qpd_cik(struct device_queue_manager *dqm, return 0; } +static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | + DEFAULT_MTYPE(MTYPE_NONCACHED) | + APE1_MTYPE(MTYPE_NONCACHED); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit +* aperture addresses. +*/ + te
[PATCH 35/37] drm/amdkfd: Add dGPU support to kernel_queue_init
Recognize dGPU ASIC families. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 5dc6567..69f4964 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -297,10 +297,15 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, switch (dev->device_info->asic_family) { case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: kernel_queue_init_vi(&kq->ops_asic_specific); break; case CHIP_KAVERI: + case CHIP_HAWAII: kernel_queue_init_cik(&kq->ops_asic_specific); break; default: -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 37/37] drm/amdgpu: Enable KFD initialization on dGPUs
Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 51284bf..27878cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -78,10 +78,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: + case CHIP_HAWAII: kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); break; #endif case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; default: -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 36/37] drm/amdkfd: Add dGPU device IDs and device info
Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 153 +++- 1 file changed, 151 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index b133fba..df9972b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -63,12 +63,118 @@ static const struct kfd_device_info carrizo_device_info = { }; #endif +static const struct kfd_device_info hawaii_device_info = { + .asic_family = CHIP_HAWAII, + .max_pasid_bits = 16, + /* max num of queues for KV.TODO should be a dynamic value */ + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info tonga_device_info = { + .asic_family = CHIP_TONGA, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info tonga_vf_device_info = { + .asic_family = CHIP_TONGA, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info fiji_device_info = { + .asic_family = CHIP_FIJI, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info fiji_vf_device_info = { + .asic_family = CHIP_FIJI, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + + +static const struct kfd_device_info polaris10_device_info = { + .asic_family = CHIP_POLARIS10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info polaris10_vf_device_info = { + .asic_family = CHIP_POLARIS10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info polaris11_device_info = { + .asic_family = CHIP_POLARIS11, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + + struct kfd_deviceid { unsigned short did; const struct kfd_device_info *device_info; }; -/* Please keep this sorted by increasing device id. */ static const struct kfd_deviceid supported_devices[] = { #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) { 0x1304, &kaveri_device_info },/* Kaveri */ @@ -97,8 +203,51 @@ static const struct kfd_deviceid supported_devices[] = { { 0x9874, &carrizo_device_info }, /* Carrizo */ { 0x9875, &carrizo_device_info }, /* Carrizo */ { 0x9876, &carrizo_device_info }, /* Carrizo */ - { 0x9877, &carrizo_device_info }/* Carrizo */ + { 0x9877, &carrizo_device_info }, /* Carrizo */ #endif +
[PATCH 34/37] drm/amdkfd: Add dGPU support to the MQD manager
On dGPUs don't set ATC addressing bits and use MTYPE_UC for coherent memory. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 7 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 35 ++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 21 ++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 4 +++ 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index dfd260e..ee7061e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -29,8 +29,15 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, switch (dev->device_info->asic_family) { case CHIP_KAVERI: return mqd_manager_init_cik(type, dev); + case CHIP_HAWAII: + return mqd_manager_init_cik_hawaii(type, dev); case CHIP_CARRIZO: return mqd_manager_init_vi(type, dev); + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + return mqd_manager_init_vi_tonga(type, dev); default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index f8ef4a0..fbe3f83 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -170,14 +170,19 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, mms); } -static int update_mqd(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) +static int __update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, unsigned int atc_bit) { struct cik_mqd *m; m = get_mqd(mqd); m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | - DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; + DEFAULT_MIN_AVAIL_SIZE; + m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; + if (atc_bit) { + m->cp_hqd_pq_control |= PQ_ATC_EN; + m->cp_hqd_ib_control |= IB_ATC_EN; + } /* * Calculating queue size which is log base 2 of actual queue size -1 @@ -202,6 +207,18 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, return 0; } +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + return __update_mqd(mm, mqd, q, 1); +} + +static int update_mqd_hawaii(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + return __update_mqd(mm, mqd, q, 0); +} + static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -441,3 +458,15 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, return mqd; } +struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + mqd = mqd_manager_init_cik(type, dev); + if (!mqd) + return NULL; + if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) + mqd->update_mqd = update_mqd_hawaii; + return mqd; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 971aec0..58221c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -151,6 +151,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); + m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); m->cp_hqd_pq_doorbell_control = q->doorbell_off << @@ -208,6 +210,12 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, return __update_mqd(mm, mqd, q, MTYPE_CC, 1); } +static int update_mqd_tonga(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + return __update_mqd(mm, mqd, q, MTYPE_UC, 0); +} + static int destroy_mqd(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout, uint32_t pipe_id, @@ -432,3 +440,16 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, return mqd; } + +struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, +