RADV doesn't have to lower 16-bits FMA. Original patch from Rhys Perry. Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> --- src/amd/vulkan/radv_shader.c | 4 +++- src/broadcom/compiler/nir_to_vir.c | 4 +++- src/compiler/nir/nir.h | 4 +++- src/compiler/nir/nir_opt_algebraic.py | 4 +++- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 4 +++- src/gallium/drivers/panfrost/midgard/midgard_compile.h | 3 ++- src/gallium/drivers/radeonsi/si_get.c | 4 +++- src/gallium/drivers/vc4/vc4_program.c | 4 +++- 8 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 7c5447c5b56..63d4147460c 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -70,7 +70,9 @@ static const struct nir_shader_compiler_options nir_options = { .lower_unpack_unorm_4x8 = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_fpow = true, .lower_mul_2x32_64 = true, .max_unroll_iterations = 32 diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 2c411b86ed1..c516bd4eb74 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2330,7 +2330,9 @@ const nir_shader_compiler_options v3d_nir_options = { .lower_unpack_half_2x16 = true, .lower_fdiv = true, .lower_find_lsb = true, - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_flrp32 = true, .lower_fpow = true, .lower_fsat = true, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ca9d5e6ffc8..84f7f0ec23b 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2161,7 +2161,9 @@ typedef enum { typedef struct nir_shader_compiler_options { bool lower_fdiv; - bool lower_ffma; + bool lower_ffma16; + bool lower_ffma32; + bool lower_ffma64; bool fuse_ffma; bool lower_flrp16; bool lower_flrp32; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 2e5e43d801e..b1559133281 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -141,7 +141,9 @@ optimizations = [ (('~fadd', a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'), (('~fadd@32', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'), (('~fadd@64', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'), - (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), + (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'), + (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'), + (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'), (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'), (('fdot4', ('vec4', a, b, c, 1.0), d), ('fdph', ('vec3', a, b, c), d)), diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index afad48b5920..957ed5904cd 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -896,7 +896,9 @@ nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize, static const nir_shader_compiler_options nir_options = { .lower_fdiv = false, - .lower_ffma = false, + .lower_ffma16 = false, + .lower_ffma32 = false, + .lower_ffma64 = false, .fuse_ffma = false, /* nir doesn't track mad vs fma */ .lower_flrp32 = true, .lower_flrp64 = true, diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.h b/src/gallium/drivers/panfrost/midgard/midgard_compile.h index b3308c866f6..d609cbf0ae7 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.h +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.h @@ -62,8 +62,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl * solution. */ static const nir_shader_compiler_options midgard_nir_options = { - .lower_ffma = true, .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_sub = true, .lower_scmp = true, .lower_flrp32 = true, diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 6fa67087c7d..de0213995cb 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -490,7 +490,9 @@ static const struct nir_shader_compiler_options nir_options = { .lower_fsat = true, .lower_fdiv = true, .lower_sub = true, - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_pack_snorm_2x16 = true, .lower_pack_snorm_4x8 = true, .lower_pack_unorm_2x16 = true, diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index eb8b3a2c377..e1f166abe82 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2233,7 +2233,9 @@ static const nir_shader_compiler_options nir_options = { .lower_extract_byte = true, .lower_extract_word = true, .lower_fdiv = true, - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_flrp32 = true, .lower_fpow = true, .lower_fsat = true, -- 2.21.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev