Signed-off-by: Gert Wollny <gw.foss...@gmail.com> --- .../state_tracker/st_glsl_to_tgsi_array_merge.cpp | 389 ++++++++++++++++++++- .../state_tracker/st_glsl_to_tgsi_array_merge.h | 26 +- 2 files changed, 413 insertions(+), 2 deletions(-)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.cpp index 89b9bf66d4..1e8aca99a2 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.cpp @@ -21,6 +21,109 @@ * DEALINGS IN THE SOFTWARE. */ +/* A short overview on how the array merging works: + * + * Inputs: + * - per array information: live range, access mask, size + * - the program + * + * Output: + * - the program with updated array addressing + * + * Pseudo algorithm: + * + * repeat + * for all pairs of arrays: + * if they have non-overlapping live ranges and equal access masks: + * - pick shorter array + * - merge its live range into the longer array + * - set its merge target array to the longer array + * - mark the shorter array as processed + * + * for all pairs of arrays: + * if they have overlapping live ranges use in sum at most four components: + * - pick shorter array + * - evaluate reswizzle map to move its components into the components + * that are not used by the longer array + * - set its merge target array to the longer array + * - mark the shorter array as processed + * - bail out loop + * until no more successfull merges were found + * + * for all pairs of arrays: + * if they have non-overlapping live ranges: + * - pick shorter array + * - merge its live range into the longer array + * - set its merge target array to the longer array + * - mark the shorter array as processed + * + * Finalize remapping map so that target arrays are always final, i.e. have + * themselfes no merge target set. + * + * Example: + * ID | Length | Live range | access mask | target id | reswizzle + * ================================================================ + * 1 3 3-10 x___ 0 ____ + * 2 4 13-20 x___ 0 ____ + * 3 8 3-20 x___ 0 ____ + * 4 6 21-40 xy__ 0 ____ + * 5 7 12-30 xy__ 0 ____ + * + * 1. merge live ranges 1 and 2 + * + * ID | Length | Live range | access mask | target id | reswizzle + * ================================================================ + * 1 - - x___ 2 ____ + * 2 4 3-20 x___ 0 ____ + * 3 8 3-20 x___ 0 ____ + * 4 6 21-40 xy__ 0 ____ + * 5 7 12-30 xy__ 0 ____ + * + * + * 3. interleave 2 and 3 + * + * ID | Length | Live range | access mask | target id | reswizzle + * ================================================================ + * 1 - - x___ 2 ____ + * 2 - - x___ 3 _x__ + * 3 8 3-20 xy__ 0 ____ + * 4 6 21-40 xy__ 0 ____ + * 5 7 12-30 xy__ 0 ____ + * + * 3. merge live ranges 3 and 4 + * + * ID | Length | Live range | access mask | target id | reswizzle + * ================================================================ + * 1 - - x___ 2 ____ + * 2 - - x___ 3 _x__ + * 3 8 3-40 xy__ 0 ____ + * 4 - - xy__ 3 ____ + * 5 7 3-21 xy__ 0 ____ + * + * 4. interleave 3 and 5 + * + * ID | Length | Live range | access mask | target id | reswizzle + * ================================================================ + * 1 - - x___ 2 ____ + * 2 - - x___ 3 _x__ + * 3 8 3-40 xy__ 0 ____ + * 4 - - xy__ 3 ____ + * 5 - - xy__ 3 __xy + * + * 5. finalize remapping + * (Array 1 has been merged with 2 that was later interleaved, so + * the reswizzeling must be propagated. + * + * ID | Length | Live range | new access mask | target id | reswizzle + * ================================================================ + * 1 - - _y__ 3 _x__ + * 2 - - _y__ 3 _x__ + * 3 8 3-40 xy__ 0 ____ + * 4 - - xy__ 3 ____ + * 5 - - __zw 3 __xy + * +*/ + #include "program/prog_instruction.h" #include "util/u_math.h" #include <ostream> @@ -37,6 +140,14 @@ using std::unique_ptr; using std::make_unique; #endif +#define ARRAY_MERGE_DEBUG 0 + +#if ARRAY_MERGE_DEBUG > 0 +#define ARRAY_MERGE_DUMP(x) do std::cerr << x; while (0) +#else +#define ARRAY_MERGE_DUMP(x) +#endif + array_live_range::array_live_range(): id(0), @@ -348,5 +459,281 @@ bool operator == (const array_remapping& lhs, const array_remapping& rhs) return true; } -/* end namespace tgsi_array_merge */ +static +bool sort_by_begin(const array_live_range& lhs, const array_live_range& rhs) { + return lhs.begin() < rhs.begin(); } + +/* Helper class to evaluate merging and interleaving of arrays */ +class array_merge_evaluator { +public: + typedef int (*array_merger)(array_live_range& range_1, + array_live_range& range_2, + array_remapping *_remapping); + + array_merge_evaluator(int _narrays, array_live_range *_ranges, + array_remapping *_remapping); + + /** Run the merge strategy on all arrays + * @returns number of successfull merges + */ + int run(array_merger merger, bool always_restart); + +private: + int narrays; + array_live_range *ranges; + array_remapping *remapping; + + +}; + +/** Execute the live range merge */ +static +int merge_live_range(array_live_range& range_1, array_live_range& range_2, + array_remapping *remapping) +{ + if (range_2.time_doesnt_overlap(range_1)) { + if (range_1.array_length() < range_2.array_length()) + std::swap(range_2, range_1); + + ARRAY_MERGE_DUMP("merge " << range_2 << " into " << range_1 << "\n"); + + remapping[range_2.array_id()] = array_remapping(range_1.array_id(), + range_1.access_mask()); + range_1.merge_live_range(range_2); + return 1; + } + return 0; +} + +/** Merge arrays that have non-overlapping live ranges + * and equal access masks. + */ +static +int merge_live_range_equal_swizzle(array_live_range& range_1, + array_live_range& range_2, + array_remapping *remapping) +{ + if (range_1.access_mask() == range_2.access_mask()) + return merge_live_range(range_1, range_2, remapping); + return 0; +} + +static +int array_interleave(array_live_range& range_1, array_live_range& range_2, + array_remapping *remapping) +{ + if ((range_2.used_components() + range_1.used_components() > 4) || + range_1.time_doesnt_overlap(range_2)) + return 0; + + if (range_1.array_length() < range_2.array_length()) + std::swap(range_2, range_1); + + ARRAY_MERGE_DUMP("Interleave " << range_2 << " into " << range_1 << "\n"); + remapping[range_2.array_id()] = array_remapping(range_1.array_id(), + range_1.access_mask(), + range_2.access_mask()); + range_1.merge_live_range(range_2); + range_1.set_access_mask(remapping[range_2.array_id()].combined_access_mask()); + ARRAY_MERGE_DUMP(" Interleaved is " << range_1 << "\n"); + return 1; +} + + + +/* Implementation of the helper classes follows */ +array_merge_evaluator::array_merge_evaluator(int _narrays, + array_live_range *_ranges, + array_remapping *_remapping): + narrays(_narrays), + ranges(_ranges), + remapping(_remapping) +{ +} + +int array_merge_evaluator::run(array_merger merger, bool always_restart) +{ + int remaps = 0; + + for (int i = 0; i < narrays; ++i) { + + if (remapping[ranges[i].array_id()].is_valid()) + continue; + + for (int j = i + 1; j < narrays; ++j) { + + if (!remapping[ranges[j].array_id()].is_valid()) { + int n = merger(ranges[i], ranges[j], remapping); + if (always_restart && n) + return n; + remaps += n; + } + + } + } + return remaps; +} + +/* Estimate the array merging: First in a loop, arrays with equal access mask + * are merged then interleave arrays that together use at most four components, + * and finally arrays are merged regardless of access mask. + * @param[in] narrays number of arrays + * @param[in,out] alt array life times, the merge target life time will be + * updated with the new life time. + * @param[in,out] remapping track the arraay index remapping and reswizzeling. + * @returns number of merged arrays + */ +bool get_array_remapping(int narrays, array_live_range *ranges, + array_remapping *remapping) +{ + int total_remapped = 0; + int n_remapped; + + /* Sort by "begin of live range" so that we don't have to restart searching + * after every merge. + */ + std::sort(ranges, ranges + narrays, sort_by_begin); + array_merge_evaluator merge_evaluator(narrays, ranges, remapping); + + do { + + n_remapped = merge_evaluator.run(merge_live_range_equal_swizzle, false); + + /* try only one array interleave, if successfull, another + * live_range merge is tried. The test MergeAndInterleave5 + * (mesa/st/tests/test_glsl_to_tgsi_array_merge.cpp) + * shows how this can result in more arrays being merged. + */ + n_remapped += merge_evaluator.run(array_interleave, true); + total_remapped += n_remapped; + + ARRAY_MERGE_DUMP("Remapped " << n_remapped << " arrays\n"); + } while (n_remapped > 0); + + total_remapped += merge_evaluator.run(merge_live_range, false); + ARRAY_MERGE_DUMP("Remapped a total of " << total_remapped << " arrays\n"); + + for (int i = 1; i <= narrays; ++i) { + if (remapping[i].is_valid()) { + remapping[i].finalize_mappings(remapping); + } + } + return total_remapped > 0; +} + +/* Remap the arrays in a TGSI program according to the given mapping. + * @param narrays number of arrays + * @param array_sizes array of arrays sizes + * @param map the array remapping information + * @param instructions TGSI program + * @returns number of arrays after remapping + */ +int remap_arrays(int narrays, unsigned *array_sizes, + exec_list *instructions, + array_remapping *map) +{ + /* re-calculate arrays */ +#if __cplusplus < 201402L + int *idx_map = new int[narrays + 1]; + unsigned *old_sizes = new unsigned[narrays + 1]; +#else + unique_ptr<int[]> idx_map = make_unique<int[]>(narrays + 1); + unique_ptr<unsigned[]> old_sizes = make_unique<unsigned[]>(narrays + 1); +#endif + + memcpy(&old_sizes[0], &array_sizes[0], sizeof(unsigned) * narrays); + + /* Evaluate mapping for the array indices and update array sizes */ + int new_narrays = 0; + for (int i = 1; i <= narrays; ++i) { + if (!map[i].is_valid()) { + ++new_narrays; + idx_map[i] = new_narrays; + array_sizes[new_narrays] = old_sizes[i]; + } + } + + /* Map the array ids of merge arrays. */ + for (int i = 1; i <= narrays; ++i) { + if (map[i].is_valid()) { + map[i].set_target_id(idx_map[map[i].target_array_id()]); + } + } + + /* Map the array ids of merge targets that got only renumbered. */ + for (int i = 1; i <= narrays; ++i) { + if (!map[i].is_valid()) { + map[i].set_target_id(idx_map[i]); + } + } + + /* Update the array ids and swizzles in the registers */ + foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) { + for (unsigned j = 0; j < num_inst_src_regs(inst); j++) { + st_src_reg& src = inst->src[j]; + if (src.file == PROGRAM_ARRAY && src.array_id > 0) { + array_remapping& m = map[src.array_id]; + if (m.is_valid()) { + src.array_id = m.target_array_id(); + src.swizzle = m.map_swizzles(src.swizzle); + } + } + } + for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) { + st_src_reg& src = inst->tex_offsets[j]; + if (src.file == PROGRAM_ARRAY && src.array_id > 0) { + array_remapping& m = map[src.array_id]; + if (m.is_valid()) { + src.array_id = m.target_array_id(); + src.swizzle = m.map_swizzles(src.swizzle); + } + } + } + for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) { + st_dst_reg& dst = inst->dst[j]; + if (dst.file == PROGRAM_ARRAY && dst.array_id > 0) { + array_remapping& m = map[dst.array_id]; + if (m.is_valid()) { + assert(j == 0 && + "remapping can only be done for single dest ops"); + dst.array_id = m.target_array_id(); + dst.writemask = m.map_writemask(dst.writemask); + + /* If the target component is moved, then the source swizzles + * must be moved accordingly. + */ + for (unsigned j = 0; j < num_inst_src_regs(inst); j++) { + st_src_reg& src = inst->src[j]; + src.swizzle = m.move_read_swizzles(src.swizzle); + } + } + } + } + } + +#if __cplusplus < 201402L + delete[] old_sizes; + delete[] idx_map; +#endif + + return new_narrays; +} + +} + +using namespace tgsi_array_merge; + +int merge_arrays(int narrays, + unsigned *array_sizes, + exec_list *instructions, + struct array_live_range *arr_live_ranges) +{ + array_remapping *map= new array_remapping[narrays + 1]; + + if (get_array_remapping(narrays, arr_live_ranges, map)) + narrays = remap_arrays(narrays, array_sizes, instructions, map); + + delete[] map; + return narrays; +} \ No newline at end of file diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.h b/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.h index b9fb498e69..44a4027b34 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.h @@ -158,5 +158,29 @@ std::ostream& operator << (std::ostream& os, const array_remapping& am) return os; } +/* Apply the array remapping (internal use, exposed here for testing) */ + bool get_array_remapping(int narrays, array_live_range *array_live_ranges, + array_remapping *remapping); + +/* Apply the array remapping (internal use, exposed here for testing) */ +int remap_arrays(int narrays, unsigned *array_sizes, + exec_list *instructions, + array_remapping *map); + } -#endif + +/** Remap the array access to finalize the array merging and interleaving. + * @param[in] narrays number of input arrays, + * @param[in,out] array_sizes length array of input arrays, on output the + * array sizes will be updated according to the remapping, + * @param[in,out] instructions TGSI program, on output the arrays access is + * remapped to the new array layout, + * @param[in] array_live_ranges live ranges and access information of the + * arrays. + * @returns number of remaining arrays + */ +int merge_arrays(int narrays, + unsigned *array_sizes, + exec_list *instructions, + struct array_live_range *arr_live_ranges); +#endif \ No newline at end of file -- 2.16.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev