On Sat, May 4, 2019 at 9:39 AM Nicolai Hähnle <nhaeh...@gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > Upcoming changes to LLVM will emit LDS objects as symbols in the ELF > symbol table, with relocations that will be resolved with this change. > > Callers will also be able to define LDS symbols that are shared between > shader parts. This will be used by radeonsi for the ESGS ring in gfx9+ > merged shaders. > --- > src/amd/common/ac_rtld.c | 210 ++++++++++++++++-- > src/amd/common/ac_rtld.h | 39 +++- > src/gallium/drivers/radeonsi/si_compute.c | 9 +- > src/gallium/drivers/radeonsi/si_debug.c | 22 +- > src/gallium/drivers/radeonsi/si_shader.c | 61 +++-- > src/gallium/drivers/radeonsi/si_shader.h | 5 +- > .../drivers/radeonsi/si_state_shaders.c | 2 +- > 7 files changed, 296 insertions(+), 52 deletions(-) > > diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c > index 4e0468d2062..3df7b3ba51f 100644 > --- a/src/amd/common/ac_rtld.c > +++ b/src/amd/common/ac_rtld.c > @@ -24,25 +24,31 @@ > #include "ac_rtld.h" > > #include <gelf.h> > #include <libelf.h> > #include <stdarg.h> > #include <stdio.h> > #include <stdlib.h> > #include <string.h> > > #include "ac_binary.h" > +#include "ac_gpu_info.h" > +#include "util/u_dynarray.h" > #include "util/u_math.h" > > // Old distributions may not have this enum constant > #define MY_EM_AMDGPU 224 > > +#ifndef STT_AMDGPU_LDS > +#define STT_AMDGPU_LDS 13 > +#endif > + > #ifndef R_AMDGPU_NONE > #define R_AMDGPU_NONE 0 > #define R_AMDGPU_ABS32_LO 1 > #define R_AMDGPU_ABS32_HI 2 > #define R_AMDGPU_ABS64 3 > #define R_AMDGPU_REL32 4 > #define R_AMDGPU_REL64 5 > #define R_AMDGPU_ABS32 6 > #define R_AMDGPU_GOTPCREL 7 > #define R_AMDGPU_GOTPCREL32_LO 8 > @@ -97,41 +103,155 @@ static void report_elf_errorf(const char *fmt, ...) > PRINTFLIKE(1, 2); > static void report_elf_errorf(const char *fmt, ...) > { > va_list va; > va_start(va, fmt); > report_erroraf(fmt, va); > va_end(va); > > fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno())); > } > > +/** > + * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and > shader > + * \p part_idx. > + */ > +static const struct ac_rtld_symbol *find_symbol(const struct > util_dynarray *symbols, > + const char *name, unsigned > part_idx) > +{ > + util_dynarray_foreach(symbols, struct ac_rtld_symbol, symbol) { > + if ((symbol->part_idx == ~0u || symbol->part_idx == > part_idx) && > + !strcmp(name, symbol->name)) > + return symbol; > + } > + return 0; > +} > + > +static int compare_symbol_by_align(const void *lhsp, const void *rhsp) > +{ > + const struct ac_rtld_symbol *lhs = lhsp; > + const struct ac_rtld_symbol *rhs = rhsp; > + if (rhs->align > lhs->align) > + return -1; > + if (rhs->align < lhs->align) > + return 1; > + return 0; > +} > + > +/** > + * Sort the given symbol list by decreasing alignment and assign offsets. > + */ > +static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned > num_symbols, > + uint64_t *ptotal_size) > +{ > + qsort(symbols, num_symbols, sizeof(*symbols), > compare_symbol_by_align); > + > + uint64_t total_size = *ptotal_size; > + > + for (unsigned i = 0; i < num_symbols; ++i) { > + struct ac_rtld_symbol *s = &symbols[i]; > + assert(util_is_power_of_two_nonzero(s->align)); > + > + total_size = align64(total_size, s->align); > + s->offset = total_size; > + > + if (total_size + s->size < total_size) { > + report_errorf("%s: size overflow", __FUNCTION__); > + return false; > + } > + > + total_size += s->size; > + } > + > + *ptotal_size = total_size; > + return true; > +} > + > +/** > + * Read LDS symbols from the given \p section of the ELF of \p part and > append > + * them to the LDS symbols list. > + * > + * Shared LDS symbols are filtered out. > + */ > +static bool read_private_lds_symbols(struct ac_rtld_binary *binary, > + unsigned part_idx, > + Elf_Scn *section, > + uint32_t *lds_end_align) > +{ > +#define report_elf_if(cond) \ > + do { \ > + if ((cond)) { \ > + report_errorf(#cond); \ > + return false; \ > + } \ > + } while (false) > + > + struct ac_rtld_part *part = &binary->parts[part_idx]; > + Elf64_Shdr *shdr = elf64_getshdr(section); > + uint32_t strtabidx = shdr->sh_link; > + Elf_Data *symbols_data = elf_getdata(section, NULL); > + report_elf_if(!symbols_data); > + > + const Elf64_Sym *symbol = symbols_data->d_buf; > + size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym); > + > + for (size_t j = 0; j < num_symbols; ++j, ++symbol) { > + if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS) > + continue; > + > + report_elf_if(symbol->st_size > 1u << 29); > + > + struct ac_rtld_symbol s = {}; > + s.name = elf_strptr(part->elf, strtabidx, > symbol->st_name); > + s.size = symbol->st_size; > + s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16); > + s.part_idx = part_idx; > + > + if (!strcmp(s.name, "__lds_end")) { > + report_elf_if(s.size != 0); > + *lds_end_align = MAX2(*lds_end_align, s.align); > + continue; > + } > + > + const struct ac_rtld_symbol *shared = > + find_symbol(&binary->lds_symbols, s.name, > part_idx); > + if (shared) { > + report_elf_if(s.align > shared->align); > + report_elf_if(s.size > shared->size); > + continue; > + } > + > + util_dynarray_append(&binary->lds_symbols, struct > ac_rtld_symbol, s); > + } > + > + return true; > + > +#undef report_elf_if > +} > + > /** > * Open a binary consisting of one or more shader parts. > * > * \param binary the uninitialized struct > - * \param num_parts number of shader parts > - * \param elf_ptrs pointers to the in-memory ELF objects for each shader > part > - * \param elf_sizes sizes (in bytes) of the in-memory ELF objects > + * \param i binary opening parameters > */ > -bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts, > - const char * const *elf_ptrs, > - const uint64_t *elf_sizes) > +bool ac_rtld_open(struct ac_rtld_binary *binary, > + struct ac_rtld_open_info i) > { > /* One of the libelf implementations > * (http://www.mr511.de/software/english.htm) requires calling > * elf_version() before elf_memory(). > */ > elf_version(EV_CURRENT); > > memset(binary, 0, sizeof(*binary)); > - binary->num_parts = num_parts; > - binary->parts = calloc(sizeof(*binary->parts), num_parts); > + binary->num_parts = i.num_parts; > + binary->parts = calloc(sizeof(*binary->parts), i.num_parts); > if (!binary->parts) > return false; > > uint64_t pasted_text_size = 0; > uint64_t rx_align = 1; > uint64_t rx_size = 0; > > #define report_if(cond) \ > do { \ > if ((cond)) { \ > @@ -140,25 +260,44 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, > unsigned num_parts, > } \ > } while (false) > #define report_elf_if(cond) \ > do { \ > if ((cond)) { \ > report_elf_errorf(#cond); \ > goto fail; \ > } \ > } while (false) > > - /* First pass over all parts: open ELFs and determine the > placement of > - * sections in the memory image. */ > - for (unsigned i = 0; i < num_parts; ++i) { > - struct ac_rtld_part *part = &binary->parts[i]; > - part->elf = elf_memory((char *)elf_ptrs[i], elf_sizes[i]); > + /* Copy and layout shared LDS symbols. */ > + util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol, > i.num_shared_lds_symbols); > + memcpy(binary->lds_symbols.data, i.shared_lds_symbols, > binary->lds_symbols.size); > + > + util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol, > symbol) > + symbol->part_idx = ~0u; > + > + unsigned max_lds_size = i.info->chip_class >= CIK ? 64 * 1024 : 32 > * 1024; > For ESGS and LSHS, the max LDS size is 32K, because the other half is reserved for PS. A GPU hang can occur if more LDS is used for those stages. Marek
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev