On 8/7/2015 11:56 AM, MichaÅ Winiarski wrote: > On Fri, Aug 07, 2015 at 10:45:21AM +0100, Michel Thierry wrote: >> Gen8+ supports 48-bit virtual addresses, but some objects must always be >> allocated inside the 32-bit address range. >> >> In specific, any resource used with flat/heapless (0x00000000-0xfffff000) >> General State Heap (GSH) or Instruction State Heap (ISH) must be in a >> 32-bit range, because the General State Offset and Instruction State Offset >> are limited to 32-bits. >> >> The i915 driver has been modified to provide a flag to set when the 4GB >> limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS). >> 48-bit range will only be used when explicitly requested. >> >> Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag >> set automatically, while calls to drm_intel_bo_emit_reloc will clear it. >> >> v2: Make set/clear functions nops on pre-gen8 platforms, and use them >> internally in emit_reloc functions (Ben) >> s/48BADDRESS/48B_ADDRESS/ (Dave) >> v3: Keep set/clear functions internal, no-one needs to use them directly. >> >> References: >> http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html >> Cc: Ben Widawsky <ben at bwidawsk.net> >> Cc: Dave Gordon <david.s.gordon at intel.com> >> Signed-off-by: Michel Thierry <michel.thierry at intel.com> >> --- >> include/drm/i915_drm.h | 3 ++- >> intel/intel_bufmgr.c | 16 ++++++++++++++ >> intel/intel_bufmgr.h | 6 +++++- >> intel/intel_bufmgr_gem.c | 54 >> +++++++++++++++++++++++++++++++++++++++++++---- >> intel/intel_bufmgr_priv.h | 11 ++++++++++ >> 5 files changed, 84 insertions(+), 6 deletions(-) >> >> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h >> index ded43b1..426b25c 100644 >> --- a/include/drm/i915_drm.h >> +++ b/include/drm/i915_drm.h >> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 { >> #define EXEC_OBJECT_NEEDS_FENCE (1<<0) >> #define EXEC_OBJECT_NEEDS_GTT (1<<1) >> #define EXEC_OBJECT_WRITE (1<<2) >> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) >> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) >> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) >> __u64 flags; >> >> __u64 rsvd1; >> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c >> index 14ea9f9..0bd5191 100644 >> --- a/intel/intel_bufmgr.c >> +++ b/intel/intel_bufmgr.c >> @@ -202,6 +202,22 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t >> offset, >> drm_intel_bo *target_bo, uint32_t target_offset, >> uint32_t read_domains, uint32_t write_domain) >> { >> + if (bo->bufmgr->bo_clear_supports_48b_address) >> + bo->bufmgr->bo_clear_supports_48b_address(target_bo); > > This is always true - you assign func to this func pointer unconditionally. > Also - why not return some meaningful value if the user does not have > enable_ppgtt=3 set? You can get that from I915_PARAM_HAS_ALIASING_PPGTT right > now. Check for gen >= 8 seems rather inadequate, and even then you're not > returning anything useful to the caller. > >> + >> + return bo->bufmgr->bo_emit_reloc(bo, offset, >> + target_bo, target_offset, >> + read_domains, write_domain); >> +} >> + > > Using emit_reloc to set a BO flag seems confusing and can be error prone, > emit_reloc can be called many times and caller needs to be careful and call > the > right function for each reloc. > >> +int >> +drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset, >> + drm_intel_bo *target_bo, uint32_t target_offset, >> + uint32_t read_domains, uint32_t write_domain) >> +{ >> + if (bo->bufmgr->bo_set_supports_48b_address) >> + bo->bufmgr->bo_set_supports_48b_address(target_bo); > > Same situation as with clear_supports_48b_address. > >> + >> return bo->bufmgr->bo_emit_reloc(bo, offset, >> target_bo, target_offset, >> read_domains, write_domain); >> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h >> index 285919e..8f91ffe 100644 >> --- a/intel/intel_bufmgr.h >> +++ b/intel/intel_bufmgr.h >> @@ -87,7 +87,8 @@ struct _drm_intel_bo { >> /** >> * Last seen card virtual address (offset from the beginning of the >> * aperture) for the object. This should be used to fill relocation >> - * entries when calling drm_intel_bo_emit_reloc() >> + * entries when calling drm_intel_bo_emit_reloc() or >> + * drm_intel_bo_emit_reloc_48bit() >> */ >> uint64_t offset64; >> }; >> @@ -147,6 +148,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo >> ** bo_array, int count); >> int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, >> drm_intel_bo *target_bo, uint32_t target_offset, >> uint32_t read_domains, uint32_t write_domain); >> +int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset, >> + drm_intel_bo *target_bo, uint32_t >> target_offset, >> + uint32_t read_domains, uint32_t write_domain); >> int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, >> drm_intel_bo *target_bo, >> uint32_t target_offset, >> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c >> index 41de396..713ed4e 100644 >> --- a/intel/intel_bufmgr_gem.c >> +++ b/intel/intel_bufmgr_gem.c >> @@ -140,6 +140,7 @@ typedef struct _drm_intel_bufmgr_gem { >> } drm_intel_bufmgr_gem; >> >> #define DRM_INTEL_RELOC_FENCE (1<<0) >> +#define DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS (2<<0) > > Why are you duplicating information about support for 48B in both emit reloc > function argument and struct bo_gem? > >> typedef struct _drm_intel_reloc_target_info { >> drm_intel_bo *bo; >> @@ -237,6 +238,14 @@ struct _drm_intel_bo_gem { >> bool is_userptr; >> >> /** >> + * Boolean of whether this buffer can be in the whole 48-bit address. >> + * >> + * By default, buffers will be keep in a 32-bit range, unless this >> + * flag is explicitly set. >> + */ >> + bool supports_48b_address; >> + >> + /** >> * Size in bytes of this buffer and its relocation descendents. >> * >> * Used to avoid costly tree walking in >> @@ -463,13 +472,18 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo) >> } >> >> static void >> -drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) >> +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence, >> + int supports_48b_address) >> { >> drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; >> drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; >> int index; >> >> if (bo_gem->validate_index != -1) { >> + if (supports_48b_address) { >> + bufmgr_gem->exec2_objects[bo_gem->validate_index].flags >> |= >> + EXEC_OBJECT_SUPPORTS_48B_ADDRESS; >> + } >> if (need_fence) >> bufmgr_gem->exec2_objects[bo_gem->validate_index].flags >> |= >> EXEC_OBJECT_NEEDS_FENCE; >> @@ -508,6 +522,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int >> need_fence) >> bufmgr_gem->exec2_objects[index].flags |= >> EXEC_OBJECT_NEEDS_FENCE; >> } >> + if (supports_48b_address) { >> + bufmgr_gem->exec2_objects[index].flags |= >> + EXEC_OBJECT_SUPPORTS_48B_ADDRESS; >> + } >> bufmgr_gem->exec_count++; >> } >> >> @@ -1925,11 +1943,33 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, >> else >> bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; >> >> + if (target_bo_gem->supports_48b_address) >> + bo_gem->reloc_target_info[bo_gem->reloc_count].flags |= >> + DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS; >> + >> bo_gem->reloc_count++; >> >> return 0; >> } >> >> +static void drm_intel_gem_bo_set_supports_48b_address(drm_intel_bo *bo) >> +{ >> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; >> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; >> + >> + if (bufmgr_gem->gen >= 8) >> + bo_gem->supports_48b_address = 1; >> +} >> + >> +static void drm_intel_gem_bo_clear_supports_48b_address(drm_intel_bo *bo) >> +{ >> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; >> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; >> + >> + if (bufmgr_gem->gen >= 8) >> + bo_gem->supports_48b_address = 0; >> +} >> + >> static int >> drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, >> drm_intel_bo *target_bo, uint32_t target_offset, >> @@ -2043,7 +2083,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) >> >> for (i = 0; i < bo_gem->reloc_count; i++) { >> drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; >> - int need_fence; >> + int need_fence, supports_48b_addr; >> >> if (target_bo == bo) >> continue; >> @@ -2056,8 +2096,12 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) >> need_fence = (bo_gem->reloc_target_info[i].flags & >> DRM_INTEL_RELOC_FENCE); >> >> + supports_48b_addr = (bo_gem->reloc_target_info[i].flags & >> + DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS); >> + >> /* Add the target to the validate list */ >> - drm_intel_add_validate_buffer2(target_bo, need_fence); >> + drm_intel_add_validate_buffer2(target_bo, need_fence, >> + supports_48b_addr); >> } >> } >> >> @@ -2217,7 +2261,7 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context >> *ctx, >> /* Add the batch buffer to the validation list. There are no >> relocations >> * pointing to it. >> */ >> - drm_intel_add_validate_buffer2(bo, 0); >> + drm_intel_add_validate_buffer2(bo, 0, 0); >> >> memclear(execbuf); >> execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; >> @@ -3299,6 +3343,8 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) >> bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; >> bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; >> bufmgr_gem->bufmgr.bo_emit_reloc_fence = >> drm_intel_gem_bo_emit_reloc_fence; >> + bufmgr_gem->bufmgr.bo_set_supports_48b_address = >> drm_intel_gem_bo_set_supports_48b_address; >> + bufmgr_gem->bufmgr.bo_clear_supports_48b_address = >> drm_intel_gem_bo_clear_supports_48b_address; >> bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; >> bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; >> bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; >> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h >> index 59ebd18..774fa02 100644 >> --- a/intel/intel_bufmgr_priv.h >> +++ b/intel/intel_bufmgr_priv.h >> @@ -152,6 +152,17 @@ struct _drm_intel_bufmgr { >> void (*destroy) (drm_intel_bufmgr *bufmgr); >> >> /** >> + * Set/Clear 48-bit address support flag in a given bo. >> + * >> + * Any resource used with flat/heapless (0x00000000-0xfffff000) >> + * General State Heap (GSH) or Intructions State Heap (ISH) must >> + * be in a 32-bit range. 48-bit range will only be used when explicitly >> + * requested. >> + */ >> + void (*bo_set_supports_48b_address) (drm_intel_bo *bo); >> + void (*bo_clear_supports_48b_address) (drm_intel_bo *bo); >> + >> + /** >> * Add relocation entry in reloc_buf, which will be updated with the >> * target buffer's real offset on on command submission. >> * >> -- >> 2.5.0 > > You also haven't updated the dump_validation_list with proper format specifier > (addresses are truncated otherwise). > > I really don't like hiding set_supports_48b_address/clear_supports_48b_address > in emit reloc - making this explicit would be a better approach. > Of course this is just my opinion, but let me post a bit different version of > this API - just as an RFC so you can see how it would look with explicit set > on > bo and without adding another emit_reloc variant. >
Hi MichaÅ, Ben suggested having the set/clear in emit reloc as this is the only place mesa cares about these wa. But I see your point, this will be used not only by mesa, so we should have something that is good for all the other projects. -Michel