Now that we're softpinning the address of our BOs in anv & i965, the addresses selected start at the top of the addressing space. This is a problem for the current implementation of aubinator which uses only a 40bit mmapped address space.
This change keeps track of all the memory writes from the aub file and fetch them on request by the batch decoder. As a result we can get rid of the 1<<40 mmapped address space and only rely on the mmap aub file \o/ Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com> --- src/intel/tools/aubinator.c | 116 +++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 48 deletions(-) diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c index 3120e82b22e..4a60a606f63 100644 --- a/src/intel/tools/aubinator.c +++ b/src/intel/tools/aubinator.c @@ -68,8 +68,14 @@ char *input_file = NULL, *xml_path = NULL; struct gen_device_info devinfo; struct gen_batch_decode_ctx batch_ctx; -uint64_t gtt_size, gtt_end; -void *gtt; +static struct memory_chunk { + void *map; + uint64_t address; + uint32_t size; +} *mem_chunks; +static uint32_t n_mem_chunks = 0; +static uint32_t n_allocated_mem_chunks = 0; + uint64_t general_state_base; uint64_t surface_state_base; uint64_t dynamic_state_base; @@ -78,6 +84,32 @@ uint64_t instruction_bound; FILE *outfile; +static void +add_mem_chunk(void *data, uint64_t address, uint32_t size) +{ + if (n_mem_chunks >= n_allocated_mem_chunks) { + n_allocated_mem_chunks = n_allocated_mem_chunks == 0 ? 4 : (2 * n_allocated_mem_chunks); + mem_chunks = realloc(mem_chunks, sizeof(*mem_chunks) * n_allocated_mem_chunks); + } + + for (int c = 0; c < n_mem_chunks; c++) { + struct memory_chunk *chunk = &mem_chunks[c]; + if (address <= chunk->address && size >= chunk->size) { + memmove(chunk, &mem_chunks[c + 1], + (n_mem_chunks - c - 1) * sizeof(struct memory_chunk)); + mem_chunks[n_mem_chunks - 1].map = data; + mem_chunks[n_mem_chunks - 1].address = address & ((1ULL << 48) - 1); + mem_chunks[n_mem_chunks - 1].size = size; + return; + } + } + + mem_chunks[n_mem_chunks].map = data; + mem_chunks[n_mem_chunks].address = address & ((1ULL << 48) - 1); + mem_chunks[n_mem_chunks].size = size; + n_mem_chunks++; +} + static inline uint32_t field(uint32_t value, int start, int end) { @@ -90,12 +122,6 @@ field(uint32_t value, int start, int end) struct brw_instruction; -static inline int -valid_offset(uint32_t offset) -{ - return offset < gtt_end; -} - #define GEN_ENGINE_RENDER 1 #define GEN_ENGINE_BLITTER 2 @@ -118,13 +144,7 @@ handle_trace_block(uint32_t *p) case AUB_TRACE_OP_DATA_WRITE: if (address_space != AUB_TRACE_MEMTYPE_GTT) break; - if (gtt_size < offset + size) { - fprintf(stderr, "overflow gtt space: %s\n", strerror(errno)); - exit(EXIT_FAILURE); - } - memcpy((char *) gtt + offset, data, size); - if (gtt_end < offset + size) - gtt_end = offset + size; + add_mem_chunk(data, offset, size); break; case AUB_TRACE_OP_COMMAND_WRITE: switch (type) { @@ -142,7 +162,6 @@ handle_trace_block(uint32_t *p) (void)engine; /* TODO */ gen_print_batch(&batch_ctx, data, size, 0); - gtt_end = 0; break; } } @@ -150,15 +169,19 @@ handle_trace_block(uint32_t *p) static struct gen_batch_decode_bo get_gen_batch_bo(void *user_data, uint64_t address) { - if (address > gtt_end) - return (struct gen_batch_decode_bo) { .map = NULL }; - - /* We really only have one giant address range */ - return (struct gen_batch_decode_bo) { - .addr = 0, - .map = gtt, - .size = gtt_size - }; + for (int c = 0; c < n_mem_chunks; c++) { + struct memory_chunk *chunk = &mem_chunks[n_mem_chunks - 1 - c]; + if (chunk->address <= address && + (chunk->address + chunk->size) > address) { + return (struct gen_batch_decode_bo) { + .addr = chunk->address, + .map = chunk->map, + .size = chunk->size, + }; + } + } + + return (struct gen_batch_decode_bo) { .map = NULL }; } static void @@ -252,7 +275,7 @@ handle_memtrace_reg_write(uint32_t *p) static int blitter_elsp_writes = 0; static int render_elsq0 = 0; static int blitter_elsq0 = 0; - uint8_t *pphwsp; + uint32_t pphwsp_offset; if (offset == 0x2230) { render_elsp_writes++; @@ -270,25 +293,38 @@ handle_memtrace_reg_write(uint32_t *p) return; } + + if (render_elsp_writes > 3 || blitter_elsp_writes > 3) { render_elsp_writes = blitter_elsp_writes = 0; - pphwsp = (uint8_t*)gtt + (value & 0xfffff000); + pphwsp_offset = value & 0xfffff000; } else if (offset == 0x2550) { engine = GEN_ENGINE_RENDER; - pphwsp = (uint8_t*)gtt + (render_elsq0 & 0xfffff000); + pphwsp_offset = render_elsq0 & 0xfffff000; } else if (offset == 0x22550) { engine = GEN_ENGINE_BLITTER; - pphwsp = (uint8_t*)gtt + (blitter_elsq0 & 0xfffff000); + pphwsp_offset = blitter_elsq0 & 0xfffff000; } else { return; } + struct gen_batch_decode_bo pphwsp_bo = get_gen_batch_bo(NULL, pphwsp_offset); + if (!pphwsp_bo.addr) + fprintf(stderr, "Unable to find HW context image\n"); + + const void *pphwsp = pphwsp_bo.map + (pphwsp_offset - pphwsp_bo.addr); const uint32_t pphwsp_size = 4096; - uint32_t *context = (uint32_t*)(pphwsp + pphwsp_size); + const uint32_t *context = (const uint32_t*)(pphwsp + pphwsp_size); uint32_t ring_buffer_head = context[5]; uint32_t ring_buffer_tail = context[7]; uint32_t ring_buffer_start = context[9]; - uint32_t *commands = (uint32_t*)((uint8_t*)gtt + ring_buffer_start + ring_buffer_head); + + struct gen_batch_decode_bo ring_bo = get_gen_batch_bo(NULL, ring_buffer_start); + if (!ring_bo.addr) + fprintf(stderr, "Unable to find context ringbuffer\n"); + + const void *ring_ptr = ring_bo.map + (ring_buffer_start - ring_bo.addr); + const uint32_t *commands = (const uint32_t*)(ring_ptr + ring_buffer_head); (void)engine; /* TODO */ gen_print_batch(&batch_ctx, commands, ring_buffer_tail - ring_buffer_head, 0); } @@ -304,14 +340,7 @@ handle_memtrace_mem_write(uint32_t *p) if (address_space != 1) return; - if (gtt_size < address + size) { - fprintf(stderr, "overflow gtt space: %s\n", strerror(errno)); - exit(EXIT_FAILURE); - } - - memcpy((char *) gtt + address, data, size); - if (gtt_end < address + size) - gtt_end = address + size; + add_mem_chunk(data, address, size); } struct aub_file { @@ -637,15 +666,6 @@ int main(int argc, char *argv[]) else file = aub_file_open(input_file); - /* mmap a terabyte for our gtt space. */ - gtt_size = 1ull << 40; - gtt = mmap(NULL, gtt_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); - if (gtt == MAP_FAILED) { - fprintf(stderr, "failed to alloc gtt space: %s\n", strerror(errno)); - exit(EXIT_FAILURE); - } - while (aub_file_more_stuff(file)) { switch (aub_file_decode_batch(file)) { case AUB_ITEM_DECODE_OK: -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev