[Mesa-dev] [PATCH] nir: delete magic number
Explaining what is the "26" for. PS: I don't have git rights. Can you push this patch for me? Signed-off-by: Elie Tournier --- src/compiler/nir/nir_opt_loop_unroll.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c index 37cbced43d..2bb1a57791 100644 --- a/src/compiler/nir/nir_opt_loop_unroll.c +++ b/src/compiler/nir/nir_opt_loop_unroll.c @@ -26,6 +26,8 @@ #include "nir_control_flow.h" #include "nir_loop_analyze.h" +#define GLSL_IR_UNROLL_LIMIT 26 + /* Prepare this loop for unrolling by first converting to lcssa and then * converting the phis from the loops first block and the block that follows * the loop into regs. Partially converting out of SSA allows us to unroll @@ -460,7 +462,7 @@ is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li) return true; bool loop_not_too_large = - li->num_instructions * li->trip_count <= max_iter * 26; + li->num_instructions * li->trip_count <= max_iter * GLSL_IR_UNROLL_LIMIT; return loop_not_too_large; } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] nir: delete magic number
Signed-off-by: Elie Tournier --- src/compiler/nir/nir_opt_loop_unroll.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c index 37cbced43d..035a030239 100644 --- a/src/compiler/nir/nir_opt_loop_unroll.c +++ b/src/compiler/nir/nir_opt_loop_unroll.c @@ -26,6 +26,14 @@ #include "nir_control_flow.h" #include "nir_loop_analyze.h" + +/* This limit is chosen fairly arbitrarily. The GLSL IR limit is 25. + * However, due to slight differences in the way the two IRs count + * instructions, some loops that would unroll with GLSL IR fail to unroll + * if we set this to 25 so we set it to 26. + */ +#define LOOP_UNROLL_LIMIT 26 + /* Prepare this loop for unrolling by first converting to lcssa and then * converting the phis from the loops first block and the block that follows * the loop into regs. Partially converting out of SSA allows us to unroll @@ -460,7 +468,7 @@ is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li) return true; bool loop_not_too_large = - li->num_instructions * li->trip_count <= max_iter * 26; + li->num_instructions * li->trip_count <= max_iter * LOOP_UNROLL_LIMIT; return loop_not_too_large; } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl/tests: Add UINT64 and INT64 types
Seems good to me. Reviewed-by: Elie Tournier On Sat, Feb 11, 2017 at 05:31:09PM -0500, Rhys Kidd wrote: > glsl/tests/uniform_initializer_utils.cpp:83:14: warning: enumeration value > ‘GLSL_TYPE_UINT64’ not handled in switch [-Wswitch] >switch (type->base_type) { > ^ > glsl/tests/uniform_initializer_utils.cpp:83:14: warning: enumeration value > ‘GLSL_TYPE_INT64’ not handled in switch [-Wswitch] > > Fixes: 8ce53d4a2f3 ("glsl: Add basic ARB_gpu_shader_int64 types") > Signed-off-by: Rhys Kidd > --- > src/compiler/glsl/tests/uniform_initializer_utils.cpp | 18 ++ > 1 file changed, 18 insertions(+) > > diff --git a/src/compiler/glsl/tests/uniform_initializer_utils.cpp > b/src/compiler/glsl/tests/uniform_initializer_utils.cpp > index ec64be1..2a1a168 100644 > --- a/src/compiler/glsl/tests/uniform_initializer_utils.cpp > +++ b/src/compiler/glsl/tests/uniform_initializer_utils.cpp > @@ -96,6 +96,12 @@ generate_data_element(void *mem_ctx, const glsl_type *type, >case GLSL_TYPE_DOUBLE: >data.d[i] = double(values[idx]); >break; > + case GLSL_TYPE_UINT64: > + data.u64[i] = (uint64_t) values[idx]; > + break; > + case GLSL_TYPE_INT64: > + data.i64[i] = (int64_t) values[idx]; > + break; >case GLSL_TYPE_ATOMIC_UINT: >case GLSL_TYPE_STRUCT: >case GLSL_TYPE_ARRAY: > @@ -130,6 +136,12 @@ generate_data_element(void *mem_ctx, const glsl_type > *type, >case GLSL_TYPE_DOUBLE: >ASSERT_EQ(data.d[i], val->value.d[i]); >break; > + case GLSL_TYPE_UINT64: > + ASSERT_EQ(data.u64[i], val->value.u64[i]); > + break; > + case GLSL_TYPE_INT64: > + ASSERT_EQ(data.i64[i], val->value.i64[i]); > + break; >case GLSL_TYPE_ATOMIC_UINT: >case GLSL_TYPE_STRUCT: >case GLSL_TYPE_ARRAY: > @@ -236,6 +248,12 @@ verify_data(gl_constant_value *storage, unsigned > storage_array_size, >case GLSL_TYPE_DOUBLE: > EXPECT_EQ(val->value.d[i], *(double *)&storage[i*2].i); > break; > + case GLSL_TYPE_UINT64: > + EXPECT_EQ(val->value.u64[i], *(uint64_t *)&storage[i*2].i); > + break; > + case GLSL_TYPE_INT64: > + EXPECT_EQ(val->value.i64[i], *(int64_t *)&storage[i*2].i); > + break; > case GLSL_TYPE_ATOMIC_UINT: >case GLSL_TYPE_STRUCT: >case GLSL_TYPE_ARRAY: > -- > 2.9.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nir: Delete unused arg in get_iteration
nir_const_value is not needed in get_iteration Signed-off-by: Elie Tournier --- I don't have the git access. Please push it for me. BR, Elie --- src/compiler/nir/nir_loop_analyze.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index a5f464a45d..6afad9e603 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -359,7 +359,7 @@ find_loop_terminators(loop_info_state *state) static int32_t get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step, - nir_const_value *limit, nir_alu_instr *alu) + nir_const_value *limit) { int32_t iter; @@ -490,7 +490,7 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step, trip_offset = 1; } - int iter_int = get_iteration(cond_alu->op, initial, step, limit, alu); + int iter_int = get_iteration(cond_alu->op, initial, step, limit); /* If iter_int is negative the loop is ill-formed or is the conditional is * unsigned with a huge iteration count so don't bother going any further. -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC] nir: Improve induction variables detection
The actual code detect only basic induction variables (i = i +/- c). I'm working on improve the detection in order to reconize derived induction varibles (j = c1 * i +/- c2). I obtain the code below. I'm not sure about the nir_derived_induction_var. Should I replace "nir_loop_variable *def_outside_loop" by "nir_basic_indiction_var"? Comments are welcome. The goal is to implement a strength reduction algo. Signed-off-by: Elie Tournier --- src/compiler/nir/nir_loop_analyze.c | 46 - 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index 6afad9e603..587726914a 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -29,10 +29,12 @@ typedef enum { undefined, invariant, not_invariant, - basic_induction + basic_induction, + derived_induction } nir_loop_variable_type; struct nir_basic_induction_var; +struct nir_derived_induction_var; typedef struct { /* A link for the work list */ @@ -49,6 +51,9 @@ typedef struct { /* If this is of type basic_induction */ struct nir_basic_induction_var *ind; + /* If this is of type derived_induction */ + struct nir_derived_induction_var *derived_ind; + /* True if variable is in an if branch or a nested loop */ bool in_control_flow; @@ -61,6 +66,14 @@ typedef struct nir_basic_induction_var { nir_loop_variable *def_outside_loop; /* The phi-src outside the loop */ } nir_basic_induction_var; +typedef struct nir_derived_induction_var { + nir_op alu_op; /* The type of alu-operation*/ + nir_loop_variable *alu_def; /* The def of the alu-operation */ + nir_loop_variable *invariant_0; /* The invariant alu-operand*/ + nir_loop_variable *invariant_1; /* The invariant alu-operand*/ + nir_loop_variable *def_outside_loop; /* The phi-src outside the loop */ +} nir_derived_induction_var; + typedef struct { /* The loop we store information for */ nir_loop *loop; @@ -227,6 +240,7 @@ compute_induction_information(loop_info_state *state) nir_phi_instr *phi = nir_instr_as_phi(var->def->parent_instr); nir_basic_induction_var *biv = rzalloc(state, nir_basic_induction_var); + nir_derived_induction_var *div = rzalloc(state, nir_derived_induction_var); nir_foreach_phi_src(src, phi) { nir_loop_variable *src_var = get_loop_var(src->src.ssa, state); @@ -239,9 +253,11 @@ compute_induction_information(loop_info_state *state) if (!src_var->in_loop) { biv->def_outside_loop = src_var; +div->def_outside_loop = src_var; } else if (is_var_alu(src_var)) { nir_alu_instr *alu = nir_instr_as_alu(src_var->def->parent_instr); +/* basic induction variable (i = i +/- c) */ if (nir_op_infos[alu->op].num_inputs == 2) { biv->alu_def = src_var; biv->alu_op = alu->op; @@ -253,6 +269,23 @@ compute_induction_information(loop_info_state *state) biv->invariant = get_loop_var(alu->src[i].src.ssa, state); } } +/* derived induction variable (j = c1 * i +/- c2 ) */ +if (nir_op_infos[alu->op].num_inputs == 3) { + div->alu_def = src_var; + div->alu_op = alu->op; + + for (unsigned i = 0; i < 3; i++) { + /* Is one of the operands const, an other the induction var, + * and the last an other const + */ + if (alu->src[i].src.ssa->parent_instr->type == nir_instr_type_load_const && + alu->src[1-i].src.ssa == &phi->dest.ssa && + alu->src[2-i].src.ssa->parent_instr->type == nir_instr_type_load_const) { + div->invariant_0 = get_loop_var(alu->src[i].src.ssa, state); + div->invariant_1 = get_loop_var(alu->src[2-i].src.ssa, state); + } + } +} } } @@ -265,8 +298,19 @@ compute_induction_information(loop_info_state *state) var->ind = biv; found_induction_var = true; + } else if (div->alu_def && div->def_outside_loop && div->invariant_0 && + div->invariant_1 && is_var_constant(div->def_outside_loop)) { + assert(is_var_constant(div->invariant_0)); + assert(is_var_constant(div->invariant_1)); + div->alu_def->type = derived_induction; + div->alu_def->derived_ind = div; + var->type = derived_induction; + var->derived_ind = div; + + found_induction_
Re: [Mesa-dev] [PATCH] nir: Delete unused arg in get_iteration
Some1 to review this patch? On 21/02/17 16:15, Elie Tournier wrote: nir_const_value is not needed in get_iteration Oops typo: nir_alu_instr is not needed in get_iteration Signed-off-by: Elie Tournier --- I don't have the git access. Please push it for me. BR, Elie --- src/compiler/nir/nir_loop_analyze.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index a5f464a45d..6afad9e603 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -359,7 +359,7 @@ find_loop_terminators(loop_info_state *state) static int32_t get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step, - nir_const_value *limit, nir_alu_instr *alu) + nir_const_value *limit) { int32_t iter; @@ -490,7 +490,7 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step, trip_offset = 1; } - int iter_int = get_iteration(cond_alu->op, initial, step, limit, alu); + int iter_int = get_iteration(cond_alu->op, initial, step, limit); /* If iter_int is negative the loop is ill-formed or is the conditional is * unsigned with a huge iteration count so don't bother going any further. -- Elie Tournier Collabora Ltd. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Thoughts on fp64 for GLES?
On Fri, Jan 25, 2019 at 02:15:25PM -0600, Jason Ekstrand wrote: > On Fri, Jan 25, 2019 at 1:53 PM Stéphane Marchesin < > stephane.marche...@gmail.com> wrote: > > > On Fri, Jan 25, 2019 at 2:25 AM Gert Wollny wrote: > > > > > > Am Donnerstag, den 24.01.2019, 22:25 -0800 schrieb Stéphane Marchesin: > > > > > > > > Yes, it's for running virgl on top of GLES. To emulate fp64 in GL on > > > > the guest side, we need fp64 on the host... > > > > > > BTW: we could also get it emulated from the guest side. When Elie (in > > > CC) initially proposed the fp64 emulation series it was for r600 and > > > TGSI was emitted. The created shaders are horribly long and it is > > > certainly not performant, but if it's just for getting OpenGL 4.0 > > > exposed it should be good enough. > > > > Yes, Ilia suggested this on IRC yesterday. My impression is that not > > many applications/games need high performance fp64 (it's likely mostly > > compute stuff, which is not our target). I could be wrong though. If > > anyone knows differently, please tell us :) > > > > In our experience, we have yet to see an app actually use the extension. > If we do have such apps, it'd be better to do it in hardware when > available. However, if it's just so that you can claim support, maybe a > GLES extension isn't worth the bother? I don't have a particularly strong > opinion at the moment beyond "fp64 sounds like the most non-ES thing ever". Out of curiousity, what did you want to put in that extension? > > > > > > > > I'm not sure though how much work it would be to add this to the soft > > > fp64 as it has now landed for NIR, though. > > > > Yes, with virgl not using NIR, I am not sure how much work soft fp64 > > will require. > > > > The core of the soft fp64 stuff is a library of GLSL functions which > actually implement it. We just compile them to NIR and then lower fp64 > math to function calls and inline. You could write a lowering pass in > GLSL, TGSI, or a back-end compiler based on those easily enough. I think the best option is to write a lowering pass in GLSL to be able to use some optimizations later on. I will see if I can come up with a proof-of-concept. > > --Jason > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Thoughts on fp64 for GLES?
On Fri, Jan 25, 2019 at 11:52:56AM -0800, Stéphane Marchesin wrote: > On Fri, Jan 25, 2019 at 2:25 AM Gert Wollny wrote: > > > > Am Donnerstag, den 24.01.2019, 22:25 -0800 schrieb Stéphane Marchesin: > > > > > > Yes, it's for running virgl on top of GLES. To emulate fp64 in GL on > > > the guest side, we need fp64 on the host... > > > > BTW: we could also get it emulated from the guest side. When Elie (in > > CC) initially proposed the fp64 emulation series it was for r600 and > > TGSI was emitted. The created shaders are horribly long and it is > > certainly not performant, but if it's just for getting OpenGL 4.0 > > exposed it should be good enough. > > Yes, Ilia suggested this on IRC yesterday. My impression is that not > many applications/games need high performance fp64 (it's likely mostly > compute stuff, which is not our target). I could be wrong though. If > anyone knows differently, please tell us :) > > > > > I'm not sure though how much work it would be to add this to the soft > > fp64 as it has now landed for NIR, though. > > Yes, with virgl not using NIR, I am not sure how much work soft fp64 > will require. I spent a bit of time on the project recently. My thinking so far: * FP64 is bad . But everyone knows that. :) * Using the current soft fp64 require to emulate int64. * Soft fp64 and int64 involve function call which is, iiuc, not really supported in TGSI. * Soft fp64 is tied to NIR. Some pass/hack need to be port to GLSLIR. So the project will require a lot of work. Elie > > Stéphane > > > > > > Best, > > Gert > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Thoughts on fp64 for GLES?
On Wednesday, 13 February 2019, Ilia Mirkin wrote: > On Wed, Feb 13, 2019 at 12:47 PM Elie Tournier > wrote: > > > > On Fri, Jan 25, 2019 at 11:52:56AM -0800, Stéphane Marchesin wrote: > > > On Fri, Jan 25, 2019 at 2:25 AM Gert Wollny > wrote: > > > > > > > > Am Donnerstag, den 24.01.2019, 22:25 -0800 schrieb Stéphane > Marchesin: > > > > > > > > > > Yes, it's for running virgl on top of GLES. To emulate fp64 in GL > on > > > > > the guest side, we need fp64 on the host... > > > > > > > > BTW: we could also get it emulated from the guest side. When Elie (in > > > > CC) initially proposed the fp64 emulation series it was for r600 and > > > > TGSI was emitted. The created shaders are horribly long and it is > > > > certainly not performant, but if it's just for getting OpenGL 4.0 > > > > exposed it should be good enough. > > > > > > Yes, Ilia suggested this on IRC yesterday. My impression is that not > > > many applications/games need high performance fp64 (it's likely mostly > > > compute stuff, which is not our target). I could be wrong though. If > > > anyone knows differently, please tell us :) > > > > > > > > > > > I'm not sure though how much work it would be to add this to the soft > > > > fp64 as it has now landed for NIR, though. > > > > > > Yes, with virgl not using NIR, I am not sure how much work soft fp64 > > > will require. > > > > I spent a bit of time on the project recently. > > My thinking so far: > > * FP64 is bad . But everyone knows that. :) > > * Using the current soft fp64 require to emulate int64. > > * Soft fp64 and int64 involve function call which is, iiuc, not really > > supported in TGSI. > > * Soft fp64 is tied to NIR. Some pass/hack need to be port to GLSLIR. > > > > So the project will require a lot of work. > > But what's the alternative? Let's say you make a spec to expose > "proper" fp64 in GLES. No one outside mesa will implement this (why > bother). Certainly not the Adreno/Mali proprietary stacks of the > world. I'm not saying that we should get an extension. My point was, it's a lot of work. > > And if you are on a stack that implements this in GLES, you might as > well be using desktop GL anyways... > > So going back to the original -- what use-case are you trying to cover > that's not already covered some other way? iiuc, Stephane want to run GL desktop on top of GLES. In order to expose a bigger version of GL, he need fp64 support. > > -ilia > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Thoughts on fp64 for GLES?
On Wednesday, 13 February 2019, Ilia Mirkin wrote: > On Wed, Feb 13, 2019 at 1:29 PM Elie Tournier > wrote: > > > > > > > > On Wednesday, 13 February 2019, Ilia Mirkin > wrote: > >> > >> On Wed, Feb 13, 2019 at 12:47 PM Elie Tournier > wrote: > >> > > >> > On Fri, Jan 25, 2019 at 11:52:56AM -0800, Stéphane Marchesin wrote: > >> > > On Fri, Jan 25, 2019 at 2:25 AM Gert Wollny > wrote: > >> > > > > >> > > > Am Donnerstag, den 24.01.2019, 22:25 -0800 schrieb Stéphane > Marchesin: > >> > > > > > >> > > > > Yes, it's for running virgl on top of GLES. To emulate fp64 in > GL on > >> > > > > the guest side, we need fp64 on the host... > >> > > > > >> > > > BTW: we could also get it emulated from the guest side. When Elie > (in > >> > > > CC) initially proposed the fp64 emulation series it was for r600 > and > >> > > > TGSI was emitted. The created shaders are horribly long and it is > >> > > > certainly not performant, but if it's just for getting OpenGL 4.0 > >> > > > exposed it should be good enough. > >> > > > >> > > Yes, Ilia suggested this on IRC yesterday. My impression is that not > >> > > many applications/games need high performance fp64 (it's likely > mostly > >> > > compute stuff, which is not our target). I could be wrong though. If > >> > > anyone knows differently, please tell us :) > >> > > > >> > > > > >> > > > I'm not sure though how much work it would be to add this to the > soft > >> > > > fp64 as it has now landed for NIR, though. > >> > > > >> > > Yes, with virgl not using NIR, I am not sure how much work soft fp64 > >> > > will require. > >> > > >> > I spent a bit of time on the project recently. > >> > My thinking so far: > >> > * FP64 is bad . But everyone knows that. :) > >> > * Using the current soft fp64 require to emulate int64. > >> > * Soft fp64 and int64 involve function call which is, iiuc, not really > >> > supported in TGSI. > >> > * Soft fp64 is tied to NIR. Some pass/hack need to be port to GLSLIR. > >> > > >> > So the project will require a lot of work. > >> > >> But what's the alternative? Let's say you make a spec to expose > >> "proper" fp64 in GLES. No one outside mesa will implement this (why > >> bother). Certainly not the Adreno/Mali proprietary stacks of the > >> world. > > > > > > I'm not saying that we should get an extension. > > My point was, it's a lot of work. > >> > >> > >> And if you are on a stack that implements this in GLES, you might as > >> well be using desktop GL anyways... > >> > >> So going back to the original -- what use-case are you trying to cover > >> that's not already covered some other way? > > > > > > iiuc, Stephane want to run GL desktop on top of GLES. > > In order to expose a bigger version of GL, he need fp64 support. > > Right, I get that high-level desire. But it seems like if the > extension route is taken, this will only happen for cases where a > desktop GL driver is readily available as well already, so why the > requirement to run on GLES? No, the host will only support GLES. To be fair, I would prefer to avoid the extension route. > > -ilia > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Thoughts on fp64 for GLES?
On Wednesday, 13 February 2019, Stéphane Marchesin < stephane.marche...@gmail.com> wrote: > On Wed, Feb 13, 2019 at 10:29 AM Elie Tournier > wrote: > > > > > > > > On Wednesday, 13 February 2019, Ilia Mirkin > wrote: > >> > >> On Wed, Feb 13, 2019 at 12:47 PM Elie Tournier > wrote: > >> > > >> > On Fri, Jan 25, 2019 at 11:52:56AM -0800, Stéphane Marchesin wrote: > >> > > On Fri, Jan 25, 2019 at 2:25 AM Gert Wollny > wrote: > >> > > > > >> > > > Am Donnerstag, den 24.01.2019, 22:25 -0800 schrieb Stéphane > Marchesin: > >> > > > > > >> > > > > Yes, it's for running virgl on top of GLES. To emulate fp64 in > GL on > >> > > > > the guest side, we need fp64 on the host... > >> > > > > >> > > > BTW: we could also get it emulated from the guest side. When Elie > (in > >> > > > CC) initially proposed the fp64 emulation series it was for r600 > and > >> > > > TGSI was emitted. The created shaders are horribly long and it is > >> > > > certainly not performant, but if it's just for getting OpenGL 4.0 > >> > > > exposed it should be good enough. > >> > > > >> > > Yes, Ilia suggested this on IRC yesterday. My impression is that not > >> > > many applications/games need high performance fp64 (it's likely > mostly > >> > > compute stuff, which is not our target). I could be wrong though. If > >> > > anyone knows differently, please tell us :) > >> > > > >> > > > > >> > > > I'm not sure though how much work it would be to add this to the > soft > >> > > > fp64 as it has now landed for NIR, though. > >> > > > >> > > Yes, with virgl not using NIR, I am not sure how much work soft fp64 > >> > > will require. > >> > > >> > I spent a bit of time on the project recently. > >> > My thinking so far: > >> > * FP64 is bad . But everyone knows that. :) > >> > * Using the current soft fp64 require to emulate int64. > >> > * Soft fp64 and int64 involve function call which is, iiuc, not really > >> > supported in TGSI. > >> > * Soft fp64 is tied to NIR. Some pass/hack need to be port to GLSLIR. > >> > > >> > So the project will require a lot of work. > >> > >> But what's the alternative? Let's say you make a spec to expose > >> "proper" fp64 in GLES. No one outside mesa will implement this (why > >> bother). Certainly not the Adreno/Mali proprietary stacks of the > >> world. > > > > > > I'm not saying that we should get an extension. > > My point was, it's a lot of work. > >> > >> > >> And if you are on a stack that implements this in GLES, you might as > >> well be using desktop GL anyways... > >> > >> So going back to the original -- what use-case are you trying to cover > >> that's not already covered some other way? > > > > > > iiuc, Stephane want to run GL desktop on top of GLES. > > In order to expose a bigger version of GL, he need fp64 support. > > > Yes, at a high level, softfp64 seems to solve the problem we have. If > a TGSI lowering pass is too complex, could we do it as a GLSL lowering > pass? > > Hi Stéphane, Currently, we lower everything in GLSL then we convert to TGSI. The issue is that the lowering pass generate something like 'call _umul builtin'. Then we try to convert it to TGSI. The problem appears here. A solution would be to inline the function in GLSL but I'm scared than the following shader will be huge. > Stéphane > > >> > >> > >> -ilia > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir/algebraic: Replace a-fract(a) with floor(a)
That make me think that I have some algebraic patches too. Reviewed-by: Elie Tournier On Saturday, 23 February 2019, Ian Romanick wrote: > From: Ian Romanick > > I noticed this while looking at a shader that was affected by Tim's > "more loop unrolling" series. > > All Gen6+ platforms had similar results. (Skylake shown) > total instructions in shared programs: 15437001 -> 15435259 (-0.01%) > instructions in affected programs: 213651 -> 211909 (-0.82%) > helped: 988 > HURT: 0 > helped stats (abs) min: 1 max: 27 x̄: 1.76 x̃: 1 > helped stats (rel) min: 0.15% max: 11.54% x̄: 1.14% x̃: 0.59% > 95% mean confidence interval for instructions value: -1.89 -1.63 > 95% mean confidence interval for instructions %-change: -1.23% -1.05% > Instructions are helped. > > total cycles in shared programs: 383007378 -> 382997063 (<.01%) > cycles in affected programs: 1650825 -> 1640510 (-0.62%) > helped: 679 > HURT: 302 > helped stats (abs) min: 1 max: 348 x̄: 23.39 x̃: 14 > helped stats (rel) min: 0.04% max: 28.77% x̄: 1.61% x̃: 0.98% > HURT stats (abs) min: 1 max: 250 x̄: 18.43 x̃: 7 > HURT stats (rel) min: 0.04% max: 25.86% x̄: 1.41% x̃: 0.53% > 95% mean confidence interval for cycles value: -13.05 -7.98 > 95% mean confidence interval for cycles %-change: -0.86% -0.50% > Cycles are helped. > > Iron Lake and GM45 had similar results. (GM45 shown) > total instructions in shared programs: 5043616 -> 5043010 (-0.01%) > instructions in affected programs: 119691 -> 119085 (-0.51%) > helped: 432 > HURT: 0 > helped stats (abs) min: 1 max: 27 x̄: 1.40 x̃: 1 > helped stats (rel) min: 0.10% max: 8.11% x̄: 0.66% x̃: 0.39% > 95% mean confidence interval for instructions value: -1.58 -1.23 > 95% mean confidence interval for instructions %-change: -0.72% -0.59% > Instructions are helped. > > total cycles in shared programs: 128139812 -> 128135762 (<.01%) > cycles in affected programs: 3829724 -> 3825674 (-0.11%) > helped: 602 > HURT: 0 > helped stats (abs) min: 2 max: 486 x̄: 6.73 x̃: 6 > helped stats (rel) min: 0.02% max: 4.85% x̄: 0.19% x̃: 0.10% > 95% mean confidence interval for cycles value: -8.40 -5.05 > 95% mean confidence interval for cycles %-change: -0.22% -0.16% > Cycles are helped. > --- > src/compiler/nir/nir_opt_algebraic.py | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/compiler/nir/nir_opt_algebraic.py > b/src/compiler/nir/nir_opt_algebraic.py > index ba27d702b5d..c8fc938cc8f 100644 > --- a/src/compiler/nir/nir_opt_algebraic.py > +++ b/src/compiler/nir/nir_opt_algebraic.py > @@ -127,6 +127,7 @@ optimizations = [ > (('flrp@32', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), > 'options->lower_flrp32'), > (('flrp@64', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), > 'options->lower_flrp64'), > (('ffloor', a), ('fsub', a, ('ffract', a)), 'options->lower_ffloor'), > + (('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), > '!options->lower_ffloor'), > (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'), > (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), > 'options->lower_fceil'), > (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1', > ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'), > -- > 2.14.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] doxygen: update glsl link
--- doxygen/glsl.doxy | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/doxygen/glsl.doxy b/doxygen/glsl.doxy index 9915ba2..0f4c822 100644 --- a/doxygen/glsl.doxy +++ b/doxygen/glsl.doxy @@ -9,11 +9,10 @@ PROJECT_NAME = "Mesa GLSL module" #--- # configuration options related to the input files #--- -INPUT = ../src/glsl/ +INPUT = ../src/compiler/glsl/ +FILE_PATTERNS = *.c *.cpp *.h RECURSIVE = NO -EXCLUDE= ../src/glsl/glsl_lexer.cpp \ - ../src/glsl/glsl_parser.cpp \ - ../src/glsl/glsl_parser.h +EXCLUDE= EXCLUDE_PATTERNS = #--- # configuration options related to the HTML output -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] doxygen: update glsl link
Signed-off-by: Elie TOURNIER --- doxygen/glsl.doxy | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doxygen/glsl.doxy b/doxygen/glsl.doxy index 9915ba2..ef71a4a 100644 --- a/doxygen/glsl.doxy +++ b/doxygen/glsl.doxy @@ -9,11 +9,12 @@ PROJECT_NAME = "Mesa GLSL module" #--- # configuration options related to the input files #--- -INPUT = ../src/glsl/ +INPUT = ../src/compiler/glsl/ +FILE_PATTERNS = *.c *.cpp *.h RECURSIVE = NO -EXCLUDE= ../src/glsl/glsl_lexer.cpp \ - ../src/glsl/glsl_parser.cpp \ - ../src/glsl/glsl_parser.h +EXCLUDE= ../src/compiler/glsl/glsl_lexer.cpp \ + ../src/compiler/glsl/glsl_parser.cpp \ + ../src/compiler/glsl/glsl_parser.h EXCLUDE_PATTERNS = #--- # configuration options related to the HTML output -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] doxygen: Generate Doxygen for NIR
Now, one can do the following to generate and read the nir Doxygen: cd $MESA_TOP/doxygen make firefox nir/index.html Signed-off-by: Elie TOURNIER --- doxygen/.gitignore | 1 + doxygen/Makefile| 3 ++- doxygen/header.html | 1 + doxygen/nir.doxy| 50 ++ 4 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 doxygen/nir.doxy diff --git a/doxygen/.gitignore b/doxygen/.gitignore index a5f3921..ed94bed 100644 --- a/doxygen/.gitignore +++ b/doxygen/.gitignore @@ -14,6 +14,7 @@ main math math_subset miniglx +nir radeondrm radeonfb radeon_subset diff --git a/doxygen/Makefile b/doxygen/Makefile index 01c2691..cbbb40e 100644 --- a/doxygen/Makefile +++ b/doxygen/Makefile @@ -18,7 +18,8 @@ FULL = \ tnl.doxy \ tnl_dd.doxy \ gbm.doxy \ - i965.doxy + i965.doxy \ + nir.doxy full: $(FULL:.doxy=.tag) $(foreach FILE,$(FULL),doxygen $(FILE);) diff --git a/doxygen/header.html b/doxygen/header.html index abd736f..8e656c1 100644 --- a/doxygen/header.html +++ b/doxygen/header.html @@ -8,6 +8,7 @@ core | glapi | glsl | +nir | vbo | math | shader | diff --git a/doxygen/nir.doxy b/doxygen/nir.doxy new file mode 100644 index 000..7e431ac --- /dev/null +++ b/doxygen/nir.doxy @@ -0,0 +1,50 @@ +# Doxyfile 0.1 + +@INCLUDE = common.doxy + +#--- +# General configuration options +#--- +PROJECT_NAME = "Mesa NIR module" +#--- +# Configuration options related to the input files +#--- +INPUT = ../src/compiler/nir +FILE_PATTERNS = *.c *.cpp *.h +RECURSIVE = NO +EXCLUDE= +EXCLUDE_PATTERNS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_SOURCE_FILES= NO +#--- +# Configuration options related to the HTML output +#--- +HTML_OUTPUT= nir +#--- +# Configuration options related to the preprocessor +#--- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION= NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES= YES +INCLUDE_PATH = ../include/ +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--- +# Configuration::additions related to external references +#--- +TAGFILES = glsl.tag=../glsl \ + main.tag=../main \ + math.tag=../math \ + swrast.tag=../swrast \ + swrast_setup.tag=../swrast_setup \ + tnl.tag=../tnl \ + tnl_dd.tag=../tnl_dd \ + vbo.tag=vbo +GENERATE_TAGFILE = nir.tag -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATH v2] doxygen: Generate Doxygen for NIR
Now, one can do the following to generate and read the nir Doxygen: cd $MESA_TOP/doxygen make firefox nir/index.html Update v2: Correct TAGFILES in nir.doxy Signed-off-by: Elie TOURNIER --- doxygen/.gitignore | 1 + doxygen/Makefile| 3 ++- doxygen/header.html | 1 + doxygen/nir.doxy| 50 ++ 4 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 doxygen/nir.doxy diff --git a/doxygen/.gitignore b/doxygen/.gitignore index a5f3921..ed94bed 100644 --- a/doxygen/.gitignore +++ b/doxygen/.gitignore @@ -14,6 +14,7 @@ main math math_subset miniglx +nir radeondrm radeonfb radeon_subset diff --git a/doxygen/Makefile b/doxygen/Makefile index 01c2691..cbbb40e 100644 --- a/doxygen/Makefile +++ b/doxygen/Makefile @@ -18,7 +18,8 @@ FULL = \ tnl.doxy \ tnl_dd.doxy \ gbm.doxy \ - i965.doxy + i965.doxy \ + nir.doxy full: $(FULL:.doxy=.tag) $(foreach FILE,$(FULL),doxygen $(FILE);) diff --git a/doxygen/header.html b/doxygen/header.html index abd736f..8e656c1 100644 --- a/doxygen/header.html +++ b/doxygen/header.html @@ -8,6 +8,7 @@ core | glapi | glsl | +nir | vbo | math | shader | diff --git a/doxygen/nir.doxy b/doxygen/nir.doxy new file mode 100644 index 000..cad7380 --- /dev/null +++ b/doxygen/nir.doxy @@ -0,0 +1,50 @@ +# Doxyfile 0.1 + +@INCLUDE = common.doxy + +#--- +# General configuration options +#--- +PROJECT_NAME = "Mesa NIR module" +#--- +# Configuration options related to the input files +#--- +INPUT = ../src/compiler/nir +FILE_PATTERNS = *.c *.cpp *.h +RECURSIVE = NO +EXCLUDE= +EXCLUDE_PATTERNS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_SOURCE_FILES= NO +#--- +# Configuration options related to the HTML output +#--- +HTML_OUTPUT= nir +#--- +# Configuration options related to the preprocessor +#--- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION= NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES= YES +INCLUDE_PATH = ../include/ +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--- +# Configuration::additions related to external references +#--- +TAGFILES = glsl.tag=../glsl \ + main.tag=../main \ + math.tag=../math \ + swrast.tag=../swrast \ + swrast_setup.tag=../swrast_setup \ + tnl.tag=../tnl \ + tnl_dd.tag=../tnl_dd \ + vbo.tag=../vbo +GENERATE_TAGFILE = nir.tag -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] Update Doxygen for Windows users
Now Windows users have the same doxygen files than *nix users Not tested (I don't have a windows) Signed-off-by: Elie TOURNIER --- doxygen/doxy.bat | 7 +++ 1 file changed, 7 insertions(+) diff --git a/doxygen/doxy.bat b/doxygen/doxy.bat index e566ca3..408964e 100644 --- a/doxygen/doxy.bat +++ b/doxygen/doxy.bat @@ -6,6 +6,9 @@ doxygen swrast_setup.doxy doxygen tnl.doxy doxygen core.doxy doxygen glapi.doxy +doxygen glsl.doxy +doxygen nir.doxy +doxygen i965.doxy echo Building again, to resolve tags doxygen tnl_dd.doxy @@ -14,4 +17,8 @@ doxygen math.doxy doxygen swrast.doxy doxygen swrast_setup.doxy doxygen tnl.doxy +doxygen core.doxy doxygen glapi.doxy +doxygen glsl.doxy +doxygen nir.doxy +doxygen i965.doxy -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] egl: add gitignore
Since commit ce562f9e3fa, two new files are generated. We don't want to track them. Signed-off-by: Elie Tournier --- src/egl/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/egl/.gitignore diff --git a/src/egl/.gitignore b/src/egl/.gitignore new file mode 100644 index 00..32331e9f3f --- /dev/null +++ b/src/egl/.gitignore @@ -0,0 +1,2 @@ +g_egldispatchstubs.c +g_egldispatchstubs.h -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/8] nir: Undo possible damage caused by rearranging or-compounded float compares
On Tue, Jan 16, 2018 at 04:44:42PM -0800, Ian Romanick wrote: > From: Ian Romanick > > shader-db results: > > Skylake and Broadwell had similar results (Skylake shown) > total instructions in shared programs: 14526033 -> 14526021 (<.01%) > instructions in affected programs: 1450 -> 1438 (-0.83%) > helped: 12 > HURT: 0 > helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 > helped stats (rel) min: 0.68% max: 0.86% x̄: 0.83% x̃: 0.86% > > total cycles in shared programs: 533121762 -> 533118710 (<.01%) > cycles in affected programs: 75423 -> 72371 (-4.05%) > helped: 11 > HURT: 1 > helped stats (abs) min: 60 max: 300 x̄: 278.18 x̃: 300 > helped stats (rel) min: 1.15% max: 4.42% x̄: 4.09% x̃: 4.42% > HURT stats (abs) min: 8 max: 8 x̄: 8.00 x̃: 8 > HURT stats (rel) min: 0.46% max: 0.46% x̄: 0.46% x̃: 0.46% > > Haswell, Ivy Bridge and Sandy Bridge had similar results (Haswell shown). > total cycles in shared programs: 409473831 -> 409472382 (<.01%) > cycles in affected programs: 46203 -> 44754 (-3.14%) > helped: 12 > HURT: 2 > helped stats (abs) min: 1 max: 140 x̄: 121.75 x̃: 140 > helped stats (rel) min: 0.07% max: 4.01% x̄: 3.41% x̃: 4.01% > HURT stats (abs) min: 4 max: 8 x̄: 6.00 x̃: 6 > HURT stats (rel) min: 0.17% max: 0.45% x̄: 0.31% x̃: 0.31% > abs t: 3.56, p: 0.39% > rel t: 3.19, p: 0.78% > Positive result. > > No changes on GM45 or Iron Lake. > > Signed-off-by: Ian Romanick > --- > src/compiler/nir/nir_opt_algebraic.py | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/src/compiler/nir/nir_opt_algebraic.py > b/src/compiler/nir/nir_opt_algebraic.py > index 09538ba..cb690d3 100644 > --- a/src/compiler/nir/nir_opt_algebraic.py > +++ b/src/compiler/nir/nir_opt_algebraic.py > @@ -205,6 +205,13 @@ optimizations = [ > (('~ior', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('fmin', b, > c))), > (('~ior', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('fmax', a, b), > c)), > > + # These patterns can result when (a < b || a < c) => (a < min(b, c)) > + # transformations occur before constant propagation and loop-unrolling. > + (('~flt', a, ('fmax', b, a)), ('flt', a, b)), > + (('~flt', ('fmin', a, b), a), ('flt', b, a)), > + (('~fge', a, ('fmin', b, a)), True), > + (('~fge', ('fmax', a, b), a), True), > + Hello Ian, Inspired by the lines above, I try to add: + (('~flt', a, ('fmin', b, a)), False), + (('~flt', ('fmax', a, b), a), False), Using my shader-db I've got the following results (KabyLake processor): total instructions in shared programs: 12973560 -> 12973510 (-0.00%) instructions in affected programs: 514 -> 464 (-9.73%) helped: 2 HURT: 0 total cycles in shared programs: 513052691 -> 513051607 (-0.00%) cycles in affected programs: 20638 -> 19554 (-5.25%) helped: 2 HURT: 0 You can probably add these 2 lines. ;) > (('fabs', ('slt', a, b)), ('slt', a, b)), > (('fabs', ('sge', a, b)), ('sge', a, b)), > (('fabs', ('seq', a, b)), ('seq', a, b)), > -- > 2.9.5 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/8] Algebraic optimizations
On Tue, Jan 16, 2018 at 04:44:39PM -0800, Ian Romanick wrote: > This is the first series to resurrect some work that I started as long > as 2.5 years ago. A lot of that work produced mixed bag results, but > that was before nir_opt_algebraic.py had the "is_used_once" modifier. > Without this, the last patch was more like 50 helped / 500 hurt on most > platforms. > > All of the data is presented using modifications that I made to > shader-db's report.py script. See the patch series > https://patchwork.freedesktop.org/series/36425/. > I add a comment on patch 3. Otherwise, the series is Reviewed-by: Elie Tournier > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] meson: fix typo in isl
Signed-off-by: Elie Tournier --- src/intel/isl/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/isl/meson.build b/src/intel/isl/meson.build index 789175e256..54024b4d11 100644 --- a/src/intel/isl/meson.build +++ b/src/intel/isl/meson.build @@ -101,5 +101,5 @@ if with_tests build_by_default : false, ) - test('isl_surf_get_imaage_offset', isl_surf_get_image_offset_test) + test('isl_surf_get_image_offset', isl_surf_get_image_offset_test) endif -- 2.14.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] docs: Fix GLSL compiler link
The doc wasn't update since we moved the glsl compiler to src/compiler/glsl. I also updated the description of the standalone compiler. Signed-off-by: Elie Tournier --- docs/shading.html | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/shading.html b/docs/shading.html index cf989ce902..f50f316f2d 100644 --- a/docs/shading.html +++ b/docs/shading.html @@ -180,7 +180,7 @@ Here's an example of using the compiler to compile a vertex shader and emit GL_ARB_vertex_program-style instructions: -src/glsl/glsl_compiler --dump-ast myshader.vert +src/compiler/glsl/glsl_compiler --dump-ast myshader.vert Options include @@ -188,7 +188,10 @@ Options include --dump-ast - dump GPU code --dump-hir - dump high-level IR code --dump-lir - dump low-level IR code ---link - ??? +--dump-builder - dump GLSL IR code +--link - link shaders +--just-log - display shader / linker info if exist +--version - define the GLSL version to use @@ -196,7 +199,7 @@ Options include The source code for Mesa's shading language compiler is in the -src/glsl/ directory. +src/compiler/glsl/ directory. -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] shader-db: Update the README
Use the binary to run shader-db instead of run.py Signed-off-by: Elie Tournier --- README | 18 -- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/README b/README index 5e9bb2d..6f6a7e2 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ === What === -A giant pile of shaders from various apps, for whatever purpose. In +A giant pile of shaders from various apps, for whatever purpose. In particular, we use it to capture assembly output of the shader compiler for analysis of regressions in compiler behavior. @@ -16,10 +16,16 @@ MESA_SHADER_CAPTURE_PATH=dirpath executable # "fdupes" can be used to remove duplicates +=== Compiling === + +Some libraries are required when building. See section "Dependencies" below. +To build the binary, do: +make === i965 Usage === === Running shaders === + ./run shaders 2> err | tee new-run # To run just a subset: @@ -34,8 +40,8 @@ To compile shaders for an i965 PCI ID different from your system, pass to run. === Analysis === -./report.py old-run new-run +./report.py old-run new-run === radeonsi Usage === @@ -46,6 +52,7 @@ to run. Note that a debug mesa build required (ie. --enable-debug) === Analysis === + ./si-report.py old-run new-run === freedreno Usage === @@ -59,15 +66,22 @@ Note that a debug mesa build required (ie. --enable-debug) -1 option for disabling multi-threading is required to avoid garbled shader dumps. === Analysis === + ./fd-report.py old-run new-run === Dependencies === + run requires some GNU C extensions, render nodes (/dev/dri/renderD128), libepoxy, OpenMP, and Mesa configured with --with-egl-platforms=x11,drm === jemalloc === + Since run compiles shaders in different threads, malloc/free locking overhead from inside Mesa can be expensive. Preloading jemalloc can cut significant amounts of time: LD_PRELOAD=/usr/lib64/libjemalloc.so.1 ./run shaders 2> err | tee new-run + +=== Depreciated === + +run.py is obsolete. Use the 'run' binary instead. -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nir: add min/max optimisation
Add the following optimisations: min(x, -x) = -abs(x) min(x, -abs(x)) = -abs(x) min(x, abs(x)) = x max(x, -abs(x)) = x max(x, abs(x)) = abs(x) max(x, -x) = abs(x) shader-db: total instructions in shared programs: 13067779 -> 13067775 (-0.00%) instructions in affected programs: 249 -> 245 (-1.61%) helped: 4 HURT: 0 total cycles in shared programs: 252054838 -> 252054806 (-0.00%) cycles in affected programs: 504 -> 472 (-6.35%) helped: 2 HURT: 0 Signed-off-by: Elie Tournier --- src/compiler/nir/nir_opt_algebraic.py | 12 1 file changed, 12 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index a557f7bf37..0dfa53fbf4 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -171,6 +171,18 @@ optimizations = [ (('imax', a, a), a), (('umin', a, a), a), (('umax', a, a), a), + (('fmin', a, ('fneg', a)), ('fneg', ('fabs', a))), + (('imin', a, ('ineg', a)), ('ineg', ('iabs', a))), + (('fmin', a, ('fneg', ('fabs', a))), ('fneg', ('fabs', a))), + (('imin', a, ('ineg', ('iabs', a))), ('ineg', ('iabs', a))), + (('fmin', a, ('fabs', a)), a), + (('imin', a, ('iabs', a)), a), + (('fmax', a, ('fneg', ('fabs', a))), a), + (('imax', a, ('ineg', ('iabs', a))), a), + (('fmax', a, ('fabs', a)), ('fabs', a)), + (('imax', a, ('iabs', a)), ('iabs', a)), + (('fmax', a, ('fneg', a)), ('fabs', a)), + (('imax', a, ('ineg', a)), ('iabs', a)), (('~fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'), (('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'), (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] docs: Fix GLSL compiler link
The doc wasn't update since we moved the glsl compiler to src/compiler/glsl. I also updated the description of the standalone compiler. v2: Mention that just-log argument removes headers/separators. Mention that version argument is mandatory. Like version argument is mandatory, add --version to the command line exemple. Signed-off-by: Elie Tournier --- docs/shading.html | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/shading.html b/docs/shading.html index cf989ce902..b0ed249e1d 100644 --- a/docs/shading.html +++ b/docs/shading.html @@ -172,7 +172,7 @@ This tool is useful for: -After building Mesa, the compiler can be found at src/glsl/glsl_compiler +After building Mesa, the compiler can be found at src/compiler/glsl/glsl_compiler @@ -180,7 +180,7 @@ Here's an example of using the compiler to compile a vertex shader and emit GL_ARB_vertex_program-style instructions: -src/glsl/glsl_compiler --dump-ast myshader.vert +src/compiler/glsl/glsl_compiler --version XXX --dump-ast myshader.vert Options include @@ -188,7 +188,11 @@ Options include --dump-ast - dump GPU code --dump-hir - dump high-level IR code --dump-lir - dump low-level IR code ---link - ??? +--dump-builder - dump GLSL IR code +--link - link shaders +--just-log - display only shader / linker info if exist, +without any header or separator +--version - [Mandatory] define the GLSL version to use @@ -196,7 +200,7 @@ Options include The source code for Mesa's shading language compiler is in the -src/glsl/ directory. +src/compiler/glsl/ directory. -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 00/10] WIP: NIR soft fp64 for ARB_gpu_shader_fp64 on gen6
I've got this series on my laptop for too long so I send it even if it's still in progress. The goal of this work is to enable ARB_gpu_shader_fp64 on gen6. Most of the algorithms come from "Berkeley SoftFloat" [1]. You can find a branch on my github [2]. So far we have: Patches 1-5 seems to do the job correctly. Patches 6-9 fail the Piglit tests but we handle zero, inf and NaN with success. Some tests pass if I increase the tolerance. All comments and suggestions are very welcome. [1] http://www.jhauser.us/arithmetic/SoftFloat.html [2] https://github.com/Hopetech/mesa/tree/nir_arb_gpu_shader_fp64 Elie Tournier (10): nir/lower_double_ops: lower abs() nir/lower_double_ops: lower neg() nir/lower_double_ops: lower sign() nir/lower_double_ops: lower eq() nir/lower_double_ops: lower lt() nir/lower_double_ops: lower mul() nir/lower_double_ops: lower div() nir/lower_double_ops: lower add() nir/lower_double_ops: lower sub() mesa: enable ARB_gpu_shader_fp64 on Gen6 src/compiler/nir/nir.h | 11 +- src/compiler/nir/nir_lower_double_ops.c | 1713 ++ src/intel/compiler/brw_nir.c | 11 +- src/mesa/drivers/dri/i965/intel_extensions.c |1 + 4 files changed, 1734 insertions(+), 2 deletions(-) -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 05/10] nir/lower_double_ops: lower lt()
Signed-off-by: Elie Tournier --- src/compiler/nir/nir.h | 3 +- src/compiler/nir/nir_lower_double_ops.c | 68 + src/intel/compiler/brw_nir.c| 3 +- 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7b1a4655ca..89d5dd8e1b 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2572,7 +2572,8 @@ typedef enum { nir_lower_dabs = (1 << 9), nir_lower_dneg = (1 << 10), nir_lower_dsign = (1 << 11), - nir_lower_deq = (1 << 12) + nir_lower_deq = (1 << 12), + nir_lower_dlt = (1 << 13) } nir_lower_doubles_options; bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index d3e05bf519..38743206a8 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -36,6 +36,16 @@ * - 32-bit integer and floating point arithmetic */ +static nir_ssa_def * +get_sign(nir_builder *b, nir_ssa_def *src) +{ + /* get bits 32-63 */ + nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src); + + /* extract bit 32 of the high word */ + return nir_ubitfield_extract(b, hi, nir_imm_int(b, 31), nir_imm_int(b, 1)); +} + /* Creates a double with the exponent bits set to a given integer value */ static nir_ssa_def * set_exponent(nir_builder *b, nir_ssa_def *src, nir_ssa_def *exp) @@ -126,6 +136,18 @@ fix_inv_result(nir_builder *b, nir_ssa_def *res, nir_ssa_def *src, } static nir_ssa_def * +lt64(nir_builder *b, nir_ssa_def *x_hi, nir_ssa_def *x_lo, + nir_ssa_def *y_hi, nir_ssa_def *y_lo) +{ + nir_ssa_def *lt_hi = nir_flt(b, x_hi, y_hi); + nir_ssa_def *eq_hi = nir_ieq(b, x_hi, y_hi); + nir_ssa_def *lt_lo = nir_flt(b, x_lo, y_lo); + + /* return (x_hi < y_hi) || ((x_hi == y_hi) && (x_lo < y_lo)); */ + return nir_ior(b, lt_hi, nir_iand(b, eq_hi, lt_lo)); +} + +static nir_ssa_def * lower_rcp(nir_builder *b, nir_ssa_def *src) { /* normalize the input to avoid range issues */ @@ -557,6 +579,40 @@ lower_feq64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) nir_iand(b, eq_x_lo, eq_xy_hi; } +static nir_ssa_def * +lower_flt64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_si = get_sign(b, x); + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *y_si = get_sign(b, y); + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); + + nir_ssa_def *xy_lo = nir_ior(b, x_lo, y_lo); + nir_ssa_def *xy_hi = nir_ior(b, x_hi, y_hi); + nir_ssa_def *shl_xy_hi = nir_ishl(b, xy_hi, nir_imm_int(b, 1)); + nir_ssa_def *xy_hi_wo_si = nir_ior(b, shl_xy_hi, xy_lo); + nir_ssa_def *ne_xy = nir_ine(b, xy_hi_wo_si, nir_imm_int(b, 0)); + + /* if x or y is a nan +*return false; +* if (x_si != y_si) +*return x_si && (x_hi | y_hi)<<1)) | x_lo | y_lo) != 0); +* return +* x_si ? lt64(y_hi, y_lo, x_hi, x_lo) : lt64(x_hi, x_lo, y_hi, y_lo); +*/ + return nir_bcsel(b, +nir_ior(b, is_nan(b, x), is_nan(b, y)), +nir_imm_int(b, NIR_FALSE), +nir_bcsel(b, + nir_ine(b, x_si, y_si), + nir_iand(b, x_si, ne_xy), + nir_bcsel(b, x_si, + lt64(b, y_hi, y_lo, x_hi, x_lo), + lt64(b, x_hi, x_lo, y_hi, y_lo; +} + static bool lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) { @@ -630,6 +686,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) return false; break; + case nir_op_flt: + if (!(options & nir_lower_dlt)) + return false; + break; + default: return false; } @@ -695,6 +756,13 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) } break; + case nir_op_flt: { + nir_ssa_def *src1 = nir_fmov_alu(&bld, instr->src[1], + instr->dest.dest.ssa.num_components); + result = lower_flt64(&bld, src, src1); + } + break; + default: unreachable("unhandled opcode"); } diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 7b8b34b4ba..374230a89b 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -513,7 +513,8 @@ nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, nir_lower_dabs | nir_lower_dneg |
[Mesa-dev] [RFC 01/10] nir/lower_double_ops: lower abs()
Signed-off-by: Elie Tournier --- src/compiler/nir/nir.h | 3 ++- src/compiler/nir/nir_lower_double_ops.c | 20 src/intel/compiler/brw_nir.c| 3 ++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ce5b434d56..0ec09f54e2 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2568,7 +2568,8 @@ typedef enum { nir_lower_dceil = (1 << 5), nir_lower_dfract = (1 << 6), nir_lower_dround_even = (1 << 7), - nir_lower_dmod = (1 << 8) + nir_lower_dmod = (1 << 8), + nir_lower_dabs = (1 << 9) } nir_lower_doubles_options; bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index b3543bc696..4ea8d339cc 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -456,6 +456,16 @@ lower_mod(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1) nir_imm_double(b, 0.0)); } +static nir_ssa_def * +lower_fabs64(nir_builder *b, nir_ssa_def *src) +{ + nir_ssa_def *src_lo = nir_unpack_64_2x32_split_x(b, src); + nir_ssa_def *src_hi = nir_unpack_64_2x32_split_y(b, src); + /* Clear the sign bit */ + nir_ssa_def *new_src_hi = nir_iand(b, src_hi, nir_imm_int(b, 0x7FFF)); + return nir_pack_64_2x32_split(b, src_lo, new_src_hi); +} + static bool lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) { @@ -509,6 +519,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) return false; break; + case nir_op_fabs: + if (!(options & nir_lower_dabs)) + return false; + break; + default: return false; } @@ -554,6 +569,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) result = lower_mod(&bld, src, src1); } break; + + case nir_op_fabs: + result = lower_fabs64(&bld, src); + break; + default: unreachable("unhandled opcode"); } diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 36ccdf3cb1..8e41a6eaea 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -509,7 +509,8 @@ nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, nir_lower_dceil | nir_lower_dfract | nir_lower_dround_even | - nir_lower_dmod); + nir_lower_dmod | + nir_lower_dabs); OPT(nir_lower_64bit_pack); } while (progress); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 06/10] nir/lower_double_ops: lower mul()
Signed-off-by: Elie Tournier --- src/compiler/nir/nir.h | 3 +- src/compiler/nir/nir_lower_double_ops.c | 749 src/intel/compiler/brw_nir.c| 3 +- 3 files changed, 753 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 89d5dd8e1b..58045e3d42 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2573,7 +2573,8 @@ typedef enum { nir_lower_dneg = (1 << 10), nir_lower_dsign = (1 << 11), nir_lower_deq = (1 << 12), - nir_lower_dlt = (1 << 13) + nir_lower_dlt = (1 << 13), + nir_lower_dmul = (1 << 14) } nir_lower_doubles_options; bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 38743206a8..807fa18fc1 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -36,6 +36,20 @@ * - 32-bit integer and floating point arithmetic */ +/* Creates a double with the sign bits set to a given integer value */ +static nir_ssa_def * +set_sign(nir_builder *b, nir_ssa_def *src, nir_ssa_def *sign) +{ + /* Split into bits 0-31 and 32-63 */ + nir_ssa_def *lo = nir_unpack_64_2x32_split_x(b, src); + nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src); + + /* The exponent is bits 63, or 31 of the high word */ + nir_ssa_def *new_hi = nir_bfi(b, nir_imm_int(b, 0x8000), sign, hi); + /* recombine */ + return nir_pack_64_2x32_split(b, lo, new_hi); +} + static nir_ssa_def * get_sign(nir_builder *b, nir_ssa_def *src) { @@ -73,6 +87,57 @@ get_exponent(nir_builder *b, nir_ssa_def *src) } static nir_ssa_def * +set_frac_hi(nir_builder *b, nir_ssa_def *src, nir_ssa_def *frac_hi) +{ + /* Split into bits 0-31 and 32-63 */ + nir_ssa_def *lo = nir_unpack_64_2x32_split_x(b, src); + nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src); + + /* The frac_lo is bits 32-51, or 0-19 of the high word */ + nir_ssa_def *new_hi = nir_bfi(b, nir_imm_int(b, 0x000F), frac_hi, hi); + /* recombine */ + return nir_pack_64_2x32_split(b, lo, new_hi); +} + +static nir_ssa_def * +get_frac_hi(nir_builder *b, nir_ssa_def *src) +{ + /* get bits 32-63 */ + nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src); + + /* extract bits 0-19 of the high word */ + return nir_ubitfield_extract(b, hi, nir_imm_int(b, 0), nir_imm_int(b, 20)); +} + +static nir_ssa_def * +set_frac_lo(nir_builder *b, nir_ssa_def *src, nir_ssa_def *frac_lo) +{ + nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src); + /* recombine */ + return nir_pack_64_2x32_split(b, frac_lo, hi); +} + +static nir_ssa_def * +get_frac_lo(nir_builder *b, nir_ssa_def *src) +{ + /* get bits 0-31 */ + return nir_unpack_64_2x32_split_x(b, src); +} + +static nir_ssa_def * +pack_fp64(nir_builder *b, nir_ssa_def *z_si, + nir_ssa_def *z_exp, + nir_ssa_def *z_frac_hi, nir_ssa_def *z_frac_lo) +{ + nir_ssa_def *z = nir_imm_double(b, 0.0); + z = set_sign(b, z, z_si); + z = set_exponent(b, z, z_exp); + z = set_frac_hi(b, z, z_frac_hi); + z = set_frac_lo(b, z, z_frac_lo); + return z; +} + +static nir_ssa_def * is_nan(nir_builder *b, nir_ssa_def *src) { nir_ssa_def *src_lo = nir_unpack_64_2x32_split_x(b, src); @@ -90,6 +155,247 @@ is_nan(nir_builder *b, nir_ssa_def *src) nir_imm_int(b, 0x000F; } +static nir_ssa_def * +is_signaling_nan(nir_builder *b, nir_ssa_def *src) +{ + nir_ssa_def *src_lo = nir_unpack_64_2x32_split_x(b, src); + nir_ssa_def *src_hi = nir_unpack_64_2x32_split_y(b, src); + + /* return (((src_hi>>19) & 0xFFF) == 0xFFE ) && +*(src_lo || (src_hi & 0x0007)); +*/ + return nir_iand(b, + nir_ieq(b, + nir_iand(b, +nir_ishr(b, src_hi, nir_imm_int(b, 19)), +nir_imm_int(b, 0xFFF)), + nir_imm_int(b, 0xFFE)), + nir_ior(b, src_lo, nir_iand(b, + src_hi, + nir_imm_int(b, 0x0007; +} + +static nir_ssa_def * +propagate_fp64_nan(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_is_nan = is_nan(b, x); + nir_ssa_def *x_is_signaling_nan = is_signaling_nan(b, x); + nir_ssa_def *y_is_nan = is_nan(b, y); + + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); + + x_hi = nir_ior(b, x_hi, nir_imm_int(b, 0x0008)); + y_hi = nir_ior(b, y_hi, nir_imm_int(b, 0x0008)); + x = nir_pack_64_2x32_split(b,
[Mesa-dev] [RFC 02/10] nir/lower_double_ops: lower neg()
Signed-off-by: Elie Tournier --- src/compiler/nir/nir.h | 3 ++- src/compiler/nir/nir_lower_double_ops.c | 45 + src/intel/compiler/brw_nir.c| 3 ++- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 0ec09f54e2..e891d21499 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2569,7 +2569,8 @@ typedef enum { nir_lower_dfract = (1 << 6), nir_lower_dround_even = (1 << 7), nir_lower_dmod = (1 << 8), - nir_lower_dabs = (1 << 9) + nir_lower_dabs = (1 << 9), + nir_lower_dneg = (1 << 10) } nir_lower_doubles_options; bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 4ea8d339cc..eb16e513ae 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -62,6 +62,24 @@ get_exponent(nir_builder *b, nir_ssa_def *src) return nir_ubitfield_extract(b, hi, nir_imm_int(b, 20), nir_imm_int(b, 11)); } +static nir_ssa_def * +is_nan(nir_builder *b, nir_ssa_def *src) +{ + nir_ssa_def *src_lo = nir_unpack_64_2x32_split_x(b, src); + nir_ssa_def *src_hi = nir_unpack_64_2x32_split_y(b, src); + + /* return (0xFFE0 <= (src_hi<<1)) && +*(src_lo || (src_hi & 0x000F)); +*/ + return nir_iand(b, + nir_fge(b, + nir_ishl(b, src_hi, nir_imm_int(b, 1)), + nir_imm_int(b, 0xFFE0)), + nir_ior(b, src_lo, + nir_iand(b,src_hi, + nir_imm_int(b, 0x000F; +} + /* Return infinity with the sign of the given source which is +/-0 */ static nir_ssa_def * @@ -466,6 +484,24 @@ lower_fabs64(nir_builder *b, nir_ssa_def *src) return nir_pack_64_2x32_split(b, src_lo, new_src_hi); } +static nir_ssa_def * +lower_fneg64(nir_builder *b, nir_ssa_def *src) +{ + nir_ssa_def *src_lo = nir_unpack_64_2x32_split_x(b, src); + nir_ssa_def *src_hi = nir_unpack_64_2x32_split_y(b, src); + src_hi = nir_ixor(b, src_hi, +nir_ishl(b, + nir_imm_int(b, 1), + nir_imm_int(b, 31))); + + /* Return the negate value of the src. +* If the src is not a number (NaN), return the src. +*/ + return nir_bcsel(b, is_nan(b, src), + src, + nir_pack_64_2x32_split(b, src_lo, src_hi)); +} + static bool lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) { @@ -524,6 +560,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) return false; break; + case nir_op_fneg: + if (!(options & nir_lower_dneg)) + return false; + break; + default: return false; } @@ -574,6 +615,10 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) result = lower_fabs64(&bld, src); break; + case nir_op_fneg: + result = lower_fneg64(&bld, src); + break; + default: unreachable("unhandled opcode"); } diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 8e41a6eaea..bf6935b1c2 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -510,7 +510,8 @@ nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, nir_lower_dfract | nir_lower_dround_even | nir_lower_dmod | - nir_lower_dabs); + nir_lower_dabs | + nir_lower_dneg); OPT(nir_lower_64bit_pack); } while (progress); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 04/10] nir/lower_double_ops: lower eq()
Signed-off-by: Elie Tournier --- src/compiler/nir/nir.h | 3 ++- src/compiler/nir/nir_lower_double_ops.c | 43 + src/intel/compiler/brw_nir.c| 3 ++- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 10dd6b1056..7b1a4655ca 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2571,7 +2571,8 @@ typedef enum { nir_lower_dmod = (1 << 8), nir_lower_dabs = (1 << 9), nir_lower_dneg = (1 << 10), - nir_lower_dsign = (1 << 11) + nir_lower_dsign = (1 << 11), + nir_lower_deq = (1 << 12) } nir_lower_doubles_options; bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 5c0d62b554..d3e05bf519 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -526,6 +526,37 @@ lower_fsign64(nir_builder *b, nir_ssa_def *src) nir_ior(b, sign, one))); } +static nir_ssa_def * +lower_feq64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); + + nir_ssa_def *eq_lo = nir_ieq(b, x_lo, y_lo); + nir_ssa_def *eq_hi = nir_ieq(b, x_hi, y_hi); + nir_ssa_def *eq_x_lo = nir_ieq(b, x_lo, nir_imm_int(b, 0)); + nir_ssa_def *eq_xy_hi = nir_ieq(b, + nir_ishl(b, +nir_ior(b, x_hi, y_hi), +nir_imm_int(b, 1)), + nir_imm_int(b, 0)); + /* if x or y is a nan +*return false; +* else +*return (x_lo == y_lo) && +* ((x_hi == y_hi) || +* ((x_lo == 0) && (((x_hi | y_hi)<<1) == 0))); +*/ + return nir_bcsel(b, nir_ior(b, is_nan(b, x), is_nan(b, y)), + nir_imm_int(b, NIR_FALSE), + nir_iand(b, eq_lo, + nir_ior(b, + eq_hi, + nir_iand(b, eq_x_lo, eq_xy_hi; +} + static bool lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) { @@ -594,6 +625,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) return false; break; + case nir_op_feq: + if (!(options & nir_lower_deq)) + return false; + break; + default: return false; } @@ -652,6 +688,13 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) result = lower_fsign64(&bld, src); break; + case nir_op_feq: { + nir_ssa_def *src1 = nir_fmov_alu(&bld, instr->src[1], + instr->dest.dest.ssa.num_components); + result = lower_feq64(&bld, src, src1); + } + break; + default: unreachable("unhandled opcode"); } diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 405d756e6c..7b8b34b4ba 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -512,7 +512,8 @@ nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, nir_lower_dmod | nir_lower_dabs | nir_lower_dneg | - nir_lower_dsign); + nir_lower_dsign | + nir_lower_deq); OPT(nir_lower_64bit_pack); } while (progress); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 03/10] nir/lower_double_ops: lower sign()
Signed-off-by: Elie Tournier --- src/compiler/nir/nir.h | 3 ++- src/compiler/nir/nir_lower_double_ops.c | 33 + src/intel/compiler/brw_nir.c| 3 ++- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index e891d21499..10dd6b1056 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2570,7 +2570,8 @@ typedef enum { nir_lower_dround_even = (1 << 7), nir_lower_dmod = (1 << 8), nir_lower_dabs = (1 << 9), - nir_lower_dneg = (1 << 10) + nir_lower_dneg = (1 << 10), + nir_lower_dsign = (1 << 11) } nir_lower_doubles_options; bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index eb16e513ae..5c0d62b554 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -502,6 +502,30 @@ lower_fneg64(nir_builder *b, nir_ssa_def *src) nir_pack_64_2x32_split(b, src_lo, src_hi)); } +static nir_ssa_def * +lower_fsign64(nir_builder *b, nir_ssa_def *src) +{ + nir_ssa_def *src_lo = nir_unpack_64_2x32_split_x(b, src); + nir_ssa_def *src_hi = nir_unpack_64_2x32_split_y(b, src); + + nir_ssa_def *is_zero = nir_ieq(b, + nir_ior(b, + nir_ishl(b, src_hi, + nir_imm_int(b, 1)), + src_lo), + nir_imm_int(b, 0)); + + nir_ssa_def *sign = nir_iand(b, src_hi, nir_imm_int(b, 0x8000)); + nir_ssa_def *one = nir_imm_int(b, 0x3FF0); + + return nir_bcsel(b, +is_zero, +nir_imm_double(b, 0), +nir_pack_64_2x32_split(b, + nir_imm_int(b, 0), + nir_ior(b, sign, one))); +} + static bool lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) { @@ -565,6 +589,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) return false; break; + case nir_op_fsign: + if (!(options & nir_lower_dsign)) + return false; + break; + default: return false; } @@ -619,6 +648,10 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) result = lower_fneg64(&bld, src); break; + case nir_op_fsign: + result = lower_fsign64(&bld, src); + break; + default: unreachable("unhandled opcode"); } diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index bf6935b1c2..405d756e6c 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -511,7 +511,8 @@ nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, nir_lower_dround_even | nir_lower_dmod | nir_lower_dabs | - nir_lower_dneg); + nir_lower_dneg | + nir_lower_dsign); OPT(nir_lower_64bit_pack); } while (progress); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 07/10] nir/lower_double_ops: lower div()
Signed-off-by: Elie Tournier --- src/compiler/nir/nir.h | 3 +- src/compiler/nir/nir_lower_double_ops.c | 138 src/intel/compiler/brw_nir.c| 3 +- 3 files changed, 142 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 58045e3d42..d9925c25c7 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2574,7 +2574,8 @@ typedef enum { nir_lower_dsign = (1 << 11), nir_lower_deq = (1 << 12), nir_lower_dlt = (1 << 13), - nir_lower_dmul = (1 << 14) + nir_lower_dmul = (1 << 14), + nir_lower_ddiv = (1 << 15) } nir_lower_doubles_options; bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 807fa18fc1..5d6944e15f 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -1350,6 +1350,132 @@ lower_fmul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) } +static nir_ssa_def * +div64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + return lower_fmul64(b, x, lower_rcp(b, y)); +} + +static nir_ssa_def * +lower_fdiv64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_si = get_sign(b, x); + nir_ssa_def *x_exp = get_exponent(b, x); + nir_ssa_def *x_frac_lo = get_frac_hi(b, x); + nir_ssa_def *x_frac_hi = get_frac_lo(b, x); + nir_ssa_def *y_si = get_sign(b, y); + nir_ssa_def *y_exp = get_exponent(b, y); + nir_ssa_def *y_frac_lo = get_frac_lo(b, y); + nir_ssa_def *y_frac_hi = get_frac_hi(b, y); + + nir_ssa_def *z_si = nir_ixor(b, x_si, y_si); + nir_ssa_def *x_frac = nir_ior(b, x_frac_hi, x_frac_lo); + nir_ssa_def *x_exp_frac = nir_ior(b, x_exp, x_frac); + nir_ssa_def *y_frac = nir_ior(b, y_frac_hi, y_frac_lo); + + nir_ssa_def *zero = nir_imm_int(b, 0); + + /* Result of NaN, Inf and subnormal division */ + nir_ssa_def *propagate_nan = propagate_fp64_nan(b, x, y); + + nir_ssa_def *pack_inf_fp64 = pack_fp64(b, + z_si, + nir_imm_int(b, 0x7FF), + zero, + zero); + + nir_ssa_def *pack_zero_fp64 = pack_fp64(b, + z_si, + zero, + zero, + zero); + + nir_ssa_def *default_nan = + nir_pack_64_2x32_split(b, + nir_imm_int(b, 0x), + nir_imm_int(b, 0x)); + + nir_ssa_def *x_exp_sub = x_exp; + nir_ssa_def *x_frac_hi_sub = x_frac_hi; + nir_ssa_def *x_frac_lo_sub = x_frac_lo; + normalize_fp64_subnormal(b, +x_frac_hi, x_frac_lo, +&x_exp_sub, +&x_frac_hi_sub, &x_frac_lo_sub); + nir_ssa_def *x_sub = pack_fp64(b, x_si, + x_exp_sub, + x_frac_hi_sub, x_frac_lo_sub); + nir_ssa_def *normalize_x = div64(b, x_sub, y); + + nir_ssa_def *y_exp_sub = y_exp; + nir_ssa_def *y_frac_hi_sub = y_frac_hi; + nir_ssa_def *y_frac_lo_sub = y_frac_lo; + normalize_fp64_subnormal(b, +y_frac_hi, y_frac_lo, +&y_exp_sub, +&y_frac_hi_sub, &y_frac_lo_sub); + nir_ssa_def *y_sub = pack_fp64(b, y_si, + y_exp_sub, + y_frac_hi_sub, y_frac_lo_sub); + nir_ssa_def *normalize_y = div64(b, x, y_sub); + + /* +* Handle the different exeption before compute the division. +* +* If x / Inf, return 0. +* If Inf / Inf, return Inf. +* If Inf / 0, we return a default NaN (0x) +* +* If x / NaN or NaN / y, we propagate the NaN. +* If NaN / NaN, we select the correct NaN to propagate. +* +* If x and y are equal to 0, we return a default NaN. +* If x is equal to 0, we return 0. +* If y is equal to 0, we return Inf. +* +* If x or y is a subnormal (exponent == 0 and significant != 0), +* we normalize this entry and realize the division. +*/ + return + nir_bcsel(b, +nir_ieq(b, x_exp, nir_imm_int(b, 0x7FF)), +nir_bcsel(b, + x_frac, + propagate_nan, + nir_bcsel(b, +nir_ieq(b, y_exp, nir_imm_int(b, 0x7FF)), +nir_bcsel(b, + y_frac, + propagate_nan
[Mesa-dev] [RFC 10/10] mesa: enable ARB_gpu_shader_fp64 on Gen6
Signed-off-by: Elie Tournier --- src/mesa/drivers/dri/i965/intel_extensions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 30f2c37695..fce377eed0 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -170,6 +170,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_enhanced_layouts = true; ctx->Extensions.ARB_ES3_compatibility = true; ctx->Extensions.ARB_fragment_layer_viewport = true; + ctx->Extensions.ARB_gpu_shader_fp64 = true; ctx->Extensions.ARB_sample_shading = true; ctx->Extensions.ARB_shading_language_420pack = true; ctx->Extensions.ARB_texture_buffer_object = true; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 08/10] nir/lower_double_ops: lower add()
Signed-off-by: Elie Tournier --- src/compiler/nir/nir.h | 3 +- src/compiler/nir/nir_lower_double_ops.c | 593 src/intel/compiler/brw_nir.c| 3 +- 3 files changed, 597 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d9925c25c7..d161380b1b 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2575,7 +2575,8 @@ typedef enum { nir_lower_deq = (1 << 12), nir_lower_dlt = (1 << 13), nir_lower_dmul = (1 << 14), - nir_lower_ddiv = (1 << 15) + nir_lower_ddiv = (1 << 15), + nir_lower_dadd = (1 << 16) } nir_lower_doubles_options; bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 5d6944e15f..db1a3c0b72 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -229,6 +229,81 @@ short_shl64(nir_builder *b, nir_ssa_def *src_hi, nir_ssa_def *src_lo, nir_imm_int(b, 31); } +/* Shifts the 64-bit value formed by concatenating `src_0' and `src_1' right by + * the number of bits given in `count'. If any nonzero bits are shifted off, + * they are "jammed" into the least significant bit of the result by setting the + * least significant bit to 1. The value of `count' can be arbitrarily large; + * in particular, if `count' is greater than 64, the result will be either 0 + * or 1, depending on whether the concatenation of `src_0' and `src_1' is zero + * or nonzero. The result is broken into two 32-bit pieces which are stored at + * the locations pointed to by `z0Ptr' and `z1Ptr'. + */ +static void +shift64_right_jamming(nir_builder *b, + nir_ssa_def *src_0, + nir_ssa_def *src_1, + nir_ssa_def *count, + nir_ssa_def **z0Ptr, + nir_ssa_def **z1Ptr) +{ + nir_ssa_def *neg_count = nir_iand(b, + nir_ineg(b, count), + nir_imm_int(b, 31)); + + nir_ssa_def *zero = nir_imm_int(b, 0); + + nir_ssa_def *is_count_0 = nir_ieq(b, count, zero); + nir_ssa_def *is_count_lt32 = nir_ilt(b, count, nir_imm_int(b, 32)); + nir_ssa_def *is_count_32 = nir_ieq(b, count, nir_imm_int(b, 32)); + nir_ssa_def *is_count_lt64 = nir_ilt(b, count, nir_imm_int(b, 64)); + + *z0Ptr = nir_bcsel(b, + is_count_0, + src_0, + nir_bcsel(b, +is_count_lt32, +nir_ishr(b, src_0, count), +zero)); + + nir_ssa_def *z1_1 = nir_ior(b, + nir_ishl(b, src_0, neg_count), + nir_ior(b, + nir_ishr(b, src_1, count), + nir_ine(b, + nir_ishl(b, src_1, neg_count), + zero))); + + nir_ssa_def *z1_2 = nir_ior(b, + nir_ishr(b, src_0, + nir_iand(b, +count, +nir_imm_int(b, 31))), + nir_ine(b, + nir_ior(b, + nir_ishl(b, src_0, neg_count), + src_1), + zero)); + + *z1Ptr = + nir_bcsel(b, +is_count_0, +src_1, +nir_bcsel(b, + is_count_lt32, + z1_1, + nir_bcsel(b, +is_count_32, +nir_ior(b, src_0, + nir_ine(b, src_1, zero)), +nir_bcsel(b, + is_count_lt64, + z1_2, + nir_ine(b, + nir_ior(b, src_0, + src_1), + zero); +} + /* Shifts the 96-bit value formed by concatenating `src_0', `src_1', and `src_2' * right by 32 _plus_ the number of bits given in `count'. The shifted result * is at most 64 nonzero bits; these are broken into two 32-bit pieces which are @@ -469,6 +544,22 @@ add64(nir_bu
[Mesa-dev] [RFC 09/10] nir/lower_double_ops: lower sub()
Signed-off-by: Elie Tournier --- src/compiler/nir/nir.h | 3 ++- src/compiler/nir/nir_lower_double_ops.c | 24 src/intel/compiler/brw_nir.c| 3 ++- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d161380b1b..c5c955c92e 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2576,7 +2576,8 @@ typedef enum { nir_lower_dlt = (1 << 13), nir_lower_dmul = (1 << 14), nir_lower_ddiv = (1 << 15), - nir_lower_dadd = (1 << 16) + nir_lower_dadd = (1 << 16), + nir_lower_dsub = (1 << 17) } nir_lower_doubles_options; bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index db1a3c0b72..f7fb7dccd4 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -2057,6 +2057,18 @@ lower_fadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) sub_frac_fp64(b, x_si, x, y)); } +static nir_ssa_def * +lower_fsub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_si = get_sign(b, x); + nir_ssa_def *y_si = get_sign(b, y); + + return nir_bcsel(b, +nir_ieq(b, x_si, y_si), +sub_frac_fp64(b, x_si, x, y), +add_frac_fp64(b, x_si, x, y)); +} + static bool lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) { @@ -2150,6 +2162,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) return false; break; + case nir_op_fsub: + if (!(options & nir_lower_dsub)) + return false; + break; + default: return false; } @@ -2243,6 +2260,13 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) } break; + case nir_op_fsub: { + nir_ssa_def *src1 = nir_fmov_alu(&bld, instr->src[1], + instr->dest.dest.ssa.num_components); + result = lower_fsub64(&bld, src, src1); + } + break; + default: unreachable("unhandled opcode"); } diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 67e8cea2b6..8ef56813b6 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -517,7 +517,8 @@ nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, nir_lower_dlt | nir_lower_dmul | nir_lower_ddiv | - nir_lower_dadd); + nir_lower_dadd | + nir_lower_dsub); OPT(nir_lower_64bit_pack); } while (progress); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] virgl: add ARB_tessellation_shader support.
On Fri, Jun 08, 2018 at 03:15:36PM +1000, Dave Airlie wrote: > From: Dave Airlie > > This should add all the pieces to enable tess shaders on virgl. Hi I think that we also need to add the following code: --- a/src/gallium/drivers/virgl/virgl_winsys.h +++ b/src/gallium/drivers/virgl/virgl_winsys.h @@ -127,7 +127,7 @@ static inline void virgl_ws_fill_new_caps_defaults(struct virgl_drm_caps *caps) caps->caps.v2.max_geom_total_output_components = 1024; caps->caps.v2.max_vertex_outputs = 32; caps->caps.v2.max_vertex_attribs = 16; - caps->caps.v2.max_shader_patch_varyings = 0; + caps->caps.v2.max_shader_patch_varyings = 30; caps->caps.v2.min_texel_offset = -8; caps->caps.v2.max_texel_offset = 7; caps->caps.v2.min_texture_gather_offset = -8; Elie > > This + fp64 enabled should give GL4.1 > --- > docs/features.txt | 2 +- > src/gallium/drivers/virgl/virgl_context.c | 82 > -- > src/gallium/drivers/virgl/virgl_encode.c | 21 +++- > src/gallium/drivers/virgl/virgl_encode.h | 4 ++ > src/gallium/drivers/virgl/virgl_protocol.h | 5 ++ > src/gallium/drivers/virgl/virgl_screen.c | 10 +++- > 6 files changed, 116 insertions(+), 8 deletions(-) > > diff --git a/docs/features.txt b/docs/features.txt > index 6e5cbc8b11e..bc3fdbc8890 100644 > --- a/docs/features.txt > +++ b/docs/features.txt > @@ -130,7 +130,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, > radeonsi >GL_ARB_gpu_shader_fp64DONE (i965/gen7+, > llvmpipe, softpipe) >GL_ARB_sample_shading DONE (i965/gen6+, > nv50, virgl) >GL_ARB_shader_subroutine DONE (freedreno, > i965/gen6+, nv50, llvmpipe, softpipe, swr, virgl) > - GL_ARB_tessellation_shaderDONE (i965/gen7+) > + GL_ARB_tessellation_shaderDONE (i965/gen7+, > virgl) >GL_ARB_texture_buffer_object_rgb32DONE (freedreno, > i965/gen6+, llvmpipe, softpipe, swr, virgl) >GL_ARB_texture_cube_map_array DONE (i965/gen6+, > nv50, llvmpipe, softpipe, virgl) >GL_ARB_texture_gather DONE (freedreno, > i965/gen6+, nv50, llvmpipe, softpipe, swr, virgl) > diff --git a/src/gallium/drivers/virgl/virgl_context.c > b/src/gallium/drivers/virgl/virgl_context.c > index 8d701bb8f40..cee58faa12e 100644 > --- a/src/gallium/drivers/virgl/virgl_context.c > +++ b/src/gallium/drivers/virgl/virgl_context.c > @@ -469,9 +469,13 @@ static void *virgl_shader_encoder(struct pipe_context > *ctx, > struct tgsi_token *new_tokens; > int ret; > > - new_tokens = virgl_tgsi_transform(vctx, shader->tokens); > - if (!new_tokens) > - return NULL; > + if (type != PIPE_SHADER_TESS_CTRL && > + type != PIPE_SHADER_TESS_EVAL) { > + new_tokens = virgl_tgsi_transform(vctx, shader->tokens); > + if (!new_tokens) > + return NULL; > + } else > + new_tokens = shader->tokens; > > handle = virgl_object_assign_handle(); > /* encode VS state */ > @@ -482,7 +486,8 @@ static void *virgl_shader_encoder(struct pipe_context > *ctx, >return NULL; > } > > - FREE(new_tokens); > + if (new_tokens != shader->tokens) > + FREE(new_tokens); > return (void *)(unsigned long)handle; > > } > @@ -492,6 +497,18 @@ static void *virgl_create_vs_state(struct pipe_context > *ctx, > return virgl_shader_encoder(ctx, shader, PIPE_SHADER_VERTEX); > } > > +static void *virgl_create_tcs_state(struct pipe_context *ctx, > + const struct pipe_shader_state *shader) > +{ > + return virgl_shader_encoder(ctx, shader, PIPE_SHADER_TESS_CTRL); > +} > + > +static void *virgl_create_tes_state(struct pipe_context *ctx, > + const struct pipe_shader_state *shader) > +{ > + return virgl_shader_encoder(ctx, shader, PIPE_SHADER_TESS_EVAL); > +} > + > static void *virgl_create_gs_state(struct pipe_context *ctx, > const struct pipe_shader_state *shader) > { > @@ -534,6 +551,26 @@ virgl_delete_vs_state(struct pipe_context *ctx, > virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); > } > > +static void > +virgl_delete_tcs_state(struct pipe_context *ctx, > + void *tcs) > +{ > + uint32_t handle = (unsigned long)tcs; > + struct virgl_context *vctx = virgl_context(ctx); > + > + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); > +} > + > +static void > +virgl_delete_tes_state(struct pipe_context *ctx, > + void *tes) > +{ > + uint32_t handle = (unsigned long)tes; > + struct virgl_context *vctx = virgl_context(ctx); > + > + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); > +} > + > static voi
Re: [Mesa-dev] [PATCH] virgl: add ARB_tessellation_shader support. (v2)
On Wed, Jun 13, 2018 at 11:03:55AM +1000, Dave Airlie wrote: > From: Dave Airlie > > This should add all the pieces to enable tess shaders on virgl. > > v2: fixup transform to handle tess and strip out precise. > set default for max patch varyings to work around issue when > tess gets enabled from v1 caps but v2 caps aren't in place. (Elie) Reviewed-by: Elie Tournier > --- > src/gallium/auxiliary/tgsi/tgsi_transform.c | 4 -- > src/gallium/drivers/virgl/virgl_context.c | 69 > + > src/gallium/drivers/virgl/virgl_encode.c| 21 - > src/gallium/drivers/virgl/virgl_encode.h| 4 ++ > src/gallium/drivers/virgl/virgl_protocol.h | 5 +++ > src/gallium/drivers/virgl/virgl_screen.c| 10 - > src/gallium/drivers/virgl/virgl_winsys.h| 2 +- > 7 files changed, 107 insertions(+), 8 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c > b/src/gallium/auxiliary/tgsi/tgsi_transform.c > index cd076c9e79e..4b2b10f50ad 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c > @@ -140,10 +140,6 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in, >return -1; > } > procType = parse.FullHeader.Processor.Processor; > - assert(procType == PIPE_SHADER_FRAGMENT || > - procType == PIPE_SHADER_VERTEX || > - procType == PIPE_SHADER_GEOMETRY); > - > > /** > ** Setup output shader > diff --git a/src/gallium/drivers/virgl/virgl_context.c > b/src/gallium/drivers/virgl/virgl_context.c > index 8d701bb8f40..e6f8dc85256 100644 > --- a/src/gallium/drivers/virgl/virgl_context.c > +++ b/src/gallium/drivers/virgl/virgl_context.c > @@ -492,6 +492,18 @@ static void *virgl_create_vs_state(struct pipe_context > *ctx, > return virgl_shader_encoder(ctx, shader, PIPE_SHADER_VERTEX); > } > > +static void *virgl_create_tcs_state(struct pipe_context *ctx, > + const struct pipe_shader_state *shader) > +{ > + return virgl_shader_encoder(ctx, shader, PIPE_SHADER_TESS_CTRL); > +} > + > +static void *virgl_create_tes_state(struct pipe_context *ctx, > + const struct pipe_shader_state *shader) > +{ > + return virgl_shader_encoder(ctx, shader, PIPE_SHADER_TESS_EVAL); > +} > + > static void *virgl_create_gs_state(struct pipe_context *ctx, > const struct pipe_shader_state *shader) > { > @@ -534,6 +546,26 @@ virgl_delete_vs_state(struct pipe_context *ctx, > virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); > } > > +static void > +virgl_delete_tcs_state(struct pipe_context *ctx, > + void *tcs) > +{ > + uint32_t handle = (unsigned long)tcs; > + struct virgl_context *vctx = virgl_context(ctx); > + > + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); > +} > + > +static void > +virgl_delete_tes_state(struct pipe_context *ctx, > + void *tes) > +{ > + uint32_t handle = (unsigned long)tes; > + struct virgl_context *vctx = virgl_context(ctx); > + > + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); > +} > + > static void virgl_bind_vs_state(struct pipe_context *ctx, > void *vss) > { > @@ -543,6 +575,24 @@ static void virgl_bind_vs_state(struct pipe_context *ctx, > virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_VERTEX); > } > > +static void virgl_bind_tcs_state(struct pipe_context *ctx, > + void *vss) > +{ > + uint32_t handle = (unsigned long)vss; > + struct virgl_context *vctx = virgl_context(ctx); > + > + virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_TESS_CTRL); > +} > + > +static void virgl_bind_tes_state(struct pipe_context *ctx, > + void *vss) > +{ > + uint32_t handle = (unsigned long)vss; > + struct virgl_context *vctx = virgl_context(ctx); > + > + virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_TESS_EVAL); > +} > + > static void virgl_bind_gs_state(struct pipe_context *ctx, > void *vss) > { > @@ -801,6 +851,18 @@ static void virgl_set_clip_state(struct pipe_context > *ctx, > virgl_encoder_set_clip_state(vctx, clip); > } > > +static void virgl_set_tess_state(struct pipe_context *ctx, > + const float default_outer_level[4], > + const float default_inner_level[2]) > +{ > + struct virgl_context *vctx = virgl_context(ctx);
Re: [Mesa-dev] [PATCH] Plumb invariant output attrib thru TGSI
On Mon, Jun 18, 2018 at 12:06:42PM +0100, Jakob Bornecrantz wrote: > TGSI already has a invariant field on declarations, we are running > into a bug with virgl because st_glsl_to_tgsi completely drops the > invariant flag on the floor when it comes to declarations (tho precise > is added to the ops). But virgl can't express precise ops (only > invariant and precise declarations) only declarations. > > Going to do some testing on this patch soon. > > Cheers, Jakob. Supposing that it doesn't brake any gallium driver. Maybe Marek can confirm. I only test on qemu with virgl. Reviewed-by: Elie Tournier > > On Tue, Apr 10, 2018 at 7:02 PM Marek Olšák wrote: > > > > This doesn't change TGSI. It only changes utilities around it. > > > > Marek > > > > On Mon, Apr 9, 2018 at 6:02 PM, Joe M. Kniss wrote: > >> > >> Add support for glsl 'invariant' modifier for output data declarations. > >> Gallium drivers that use TGSI serialization currently loose invariant > >> modifiers in glsl shaders. > >> > >> Tested: chromiumos on qemu with virglrenderer. > >> Signed-off-by: Joe M. Kniss > >> --- > >> src/gallium/auxiliary/tgsi/tgsi_strings.c | 2 ++ > >> src/gallium/auxiliary/tgsi/tgsi_strings.h | 2 ++ > >> src/gallium/auxiliary/tgsi/tgsi_text.c | 18 +++ > >> src/gallium/auxiliary/tgsi/tgsi_ureg.c | 27 ++ > >> src/gallium/auxiliary/tgsi/tgsi_ureg.h | 4 +++- > >> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 +-- > >> 6 files changed, 45 insertions(+), 16 deletions(-) > >> > >> diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c > >> b/src/gallium/auxiliary/tgsi/tgsi_strings.c > >> index 4f28b49ce8..434871273f 100644 > >> --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c > >> +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c > >> @@ -185,6 +185,8 @@ const char > >> *tgsi_interpolate_locations[TGSI_INTERPOLATE_LOC_COUNT] = > >> "SAMPLE", > >> }; > >> > >> +const char *tgsi_invariant_name = "INVARIANT"; > >> + > >> const char *tgsi_primitive_names[PIPE_PRIM_MAX] = > >> { > >> "POINTS", > >> diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.h > >> b/src/gallium/auxiliary/tgsi/tgsi_strings.h > >> index bb2d3458dd..20e3f7127f 100644 > >> --- a/src/gallium/auxiliary/tgsi/tgsi_strings.h > >> +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.h > >> @@ -52,6 +52,8 @@ extern const char > >> *tgsi_interpolate_names[TGSI_INTERPOLATE_COUNT]; > >> > >> extern const char *tgsi_interpolate_locations[TGSI_INTERPOLATE_LOC_COUNT]; > >> > >> +extern const char *tgsi_invariant_name; > >> + > >> extern const char *tgsi_primitive_names[PIPE_PRIM_MAX]; > >> > >> extern const char *tgsi_fs_coord_origin_names[2]; > >> diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c > >> b/src/gallium/auxiliary/tgsi/tgsi_text.c > >> index 02241a66bf..815b1ee65d 100644 > >> --- a/src/gallium/auxiliary/tgsi/tgsi_text.c > >> +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c > >> @@ -1586,10 +1586,6 @@ static boolean parse_declaration( struct > >> translate_ctx *ctx ) > >> break; > >> } > >>} > >> - if (i == TGSI_INTERPOLATE_COUNT) { > >> - report_error( ctx, "Expected semantic or interpolate attribute" > >> ); > >> - return FALSE; > >> - } > >> } > >> > >> cur = ctx->cur; > >> @@ -1609,6 +1605,20 @@ static boolean parse_declaration( struct > >> translate_ctx *ctx ) > >>} > >> } > >> > >> + cur = ctx->cur; > >> + eat_opt_white( &cur ); > >> + if (*cur == ',' && !is_vs_input) { > >> + cur++; > >> + eat_opt_white( &cur ); > >> + if (str_match_nocase_whole( &cur, tgsi_invariant_name )) { > >> + decl.Declaration.Invariant = 1; > >> + ctx->cur = cur; > >> + } else { > >> + report_error( ctx, "Expected semantic, interpolate attribute, or > >> invariant "); > >> + return FALSE; > >> + } > >> + } > >> + > >> advance = tgsi_build_full_declaration( > >>&decl, > >>ctx->tokens_cur
Re: [Mesa-dev] [virglrenderer-devel] [PATCH] virgl: set texture buffer offset alignment to disable ARB_texture_buffer_range.
Wrong ML. I add On Fri, May 18, 2018 at 10:46:29AM +1000, Dave Airlie wrote: > From: Dave Airlie > > The host side hasn't got support for this feature yet, so don't enable it > unless we get the caps from the host. > > This makes the texture buffer range piglit tests skip now. > > Fixes: fe0647df5a7 (virgl: add offset alignment values to to v2 caps struct) Acked-by: Elie Tournier > --- > src/gallium/drivers/virgl/virgl_winsys.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/virgl/virgl_winsys.h > b/src/gallium/drivers/virgl/virgl_winsys.h > index 99e98ad9c9c..690e610e199 100644 > --- a/src/gallium/drivers/virgl/virgl_winsys.h > +++ b/src/gallium/drivers/virgl/virgl_winsys.h > @@ -132,7 +132,7 @@ static inline void virgl_ws_fill_new_caps_defaults(struct > virgl_drm_caps *caps) > caps->caps.v2.max_texel_offset = 7; > caps->caps.v2.min_texture_gather_offset = -8; > caps->caps.v2.max_texture_gather_offset = 7; > - caps->caps.v2.texture_buffer_offset_alignment = 32; > + caps->caps.v2.texture_buffer_offset_alignment = 0; > caps->caps.v2.uniform_buffer_offset_alignment = 256; > } > #endif > -- > 2.14.3 > > ___ > virglrenderer-devel mailing list > virglrenderer-de...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/virglrenderer-devel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/14] st/mesa: set ctx->Const.SubPixelBits
On Tue, Aug 28, 2018 at 12:40:25PM +0100, Jakob Bornecrantz wrote: > On Thu, Aug 9, 2018 at 12:57 AM Marek Olšák wrote: > > > > From: Marek Olšák > > This patch causes regressions in dEQP[1] on virgl running on a > radeonSI device. Not a lot of drivers set > PIPE_CAP_VIEWPORT_SUBPIXEL_BITS but SubPixelBits is by default set to > 4, but this overwrites it without checking if the returned value is > zero or not. Looking around it seems that a lot of other drivers just > returns zero for PIPE_CAP_VIEWPORT_SUBPIXEL_BITS not just virgl, so > this probably causes regressions on more drivers then virgl. I can also see the regression on the intel driver. > > Cheers, Jakob. > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallium: Correctly handle no config context creation
This patch fixes the following Piglit test: spec@egl_mesa_configless_context@basic It also fixes few test in a virgl guest. Suggested-by: Emil Velikov Signed-off-by: Elie Tournier --- I cc'ed some Gallium driver people. Can you check if this patch doesn't introduce any bug on your HW? Sorry for the spam. Cheers. src/gallium/include/state_tracker/st_api.h | 2 ++ src/gallium/state_trackers/dri/dri_screen.c | 4 +++- src/mesa/state_tracker/st_manager.c | 9 - 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h index 61152e3546..2b63b8a3d2 100644 --- a/src/gallium/include/state_tracker/st_api.h +++ b/src/gallium/include/state_tracker/st_api.h @@ -190,6 +190,8 @@ struct st_egl_image */ struct st_visual { + bool no_config; + /** * Available buffers. Bitfield of ST_ATTACHMENT_*_MASK bits. */ diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index 027e85024f..308e23685e 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -308,8 +308,10 @@ dri_fill_st_visual(struct st_visual *stvis, { memset(stvis, 0, sizeof(*stvis)); - if (!mode) + if (!mode) { + stvis->no_config = true; return; + } /* Deduce the color format. */ switch (mode->redMask) { diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 69286b5791..c1e647b30e 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -834,6 +834,7 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, struct st_context *shared_ctx = (struct st_context *) shared_stctxi; struct st_context *st; struct pipe_context *pipe; + struct gl_config* mode_ptr; struct gl_config mode; gl_api api; bool no_error = false; @@ -893,7 +894,13 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, } st_visual_to_context_mode(&attribs->visual, &mode); - st = st_create_context(api, pipe, &mode, shared_ctx, + + if (&attribs->visual.no_config) + mode_ptr = NULL; + else + mode_ptr = &mode; + + st = st_create_context(api, pipe, mode_ptr, shared_ctx, &attribs->options, no_error); if (!st) { *error = ST_CONTEXT_ERROR_NO_MEMORY; -- 2.18.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] gallium: Correctly handle no config context creation
This patch fixes the following Piglit test: spec@egl_mesa_configless_context@basic It also fixes few test in a virgl guest. v2: Evaluate the value of no_config (Ilia) Suggested-by: Emil Velikov Signed-off-by: Elie Tournier --- src/gallium/include/state_tracker/st_api.h | 2 ++ src/gallium/state_trackers/dri/dri_screen.c | 4 +++- src/mesa/state_tracker/st_manager.c | 9 - 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h index 61152e3546..2b63b8a3d2 100644 --- a/src/gallium/include/state_tracker/st_api.h +++ b/src/gallium/include/state_tracker/st_api.h @@ -190,6 +190,8 @@ struct st_egl_image */ struct st_visual { + bool no_config; + /** * Available buffers. Bitfield of ST_ATTACHMENT_*_MASK bits. */ diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index 027e85024f..308e23685e 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -308,8 +308,10 @@ dri_fill_st_visual(struct st_visual *stvis, { memset(stvis, 0, sizeof(*stvis)); - if (!mode) + if (!mode) { + stvis->no_config = true; return; + } /* Deduce the color format. */ switch (mode->redMask) { diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 69286b5791..9ed316b0f7 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -834,6 +834,7 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, struct st_context *shared_ctx = (struct st_context *) shared_stctxi; struct st_context *st; struct pipe_context *pipe; + struct gl_config* mode_ptr; struct gl_config mode; gl_api api; bool no_error = false; @@ -893,7 +894,13 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, } st_visual_to_context_mode(&attribs->visual, &mode); - st = st_create_context(api, pipe, &mode, shared_ctx, + + if (attribs->visual.no_config) + mode_ptr = NULL; + else + mode_ptr = &mode; + + st = st_create_context(api, pipe, mode_ptr, shared_ctx, &attribs->options, no_error); if (!st) { *error = ST_CONTEXT_ERROR_NO_MEMORY; -- 2.18.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] gallium: Correctly handle no config context creation
If you don't mind, can you please push this patch for me? I don't have git access. Thanks a lot, Elie On Fri, 7 Sep 2018 at 22:49, Marek Olšák wrote: > > Reviewed-by: Marek Olšák > > Marek > > On Thu, Sep 6, 2018 at 5:46 AM, Elie Tournier wrote: > > This patch fixes the following Piglit test: > > spec@egl_mesa_configless_context@basic > > It also fixes few test in a virgl guest. > > > > v2: Evaluate the value of no_config (Ilia) > > > > Suggested-by: Emil Velikov > > Signed-off-by: Elie Tournier > > --- > > src/gallium/include/state_tracker/st_api.h | 2 ++ > > src/gallium/state_trackers/dri/dri_screen.c | 4 +++- > > src/mesa/state_tracker/st_manager.c | 9 - > > 3 files changed, 13 insertions(+), 2 deletions(-) > > > > diff --git a/src/gallium/include/state_tracker/st_api.h > > b/src/gallium/include/state_tracker/st_api.h > > index 61152e3546..2b63b8a3d2 100644 > > --- a/src/gallium/include/state_tracker/st_api.h > > +++ b/src/gallium/include/state_tracker/st_api.h > > @@ -190,6 +190,8 @@ struct st_egl_image > > */ > > struct st_visual > > { > > + bool no_config; > > + > > /** > > * Available buffers. Bitfield of ST_ATTACHMENT_*_MASK bits. > > */ > > diff --git a/src/gallium/state_trackers/dri/dri_screen.c > > b/src/gallium/state_trackers/dri/dri_screen.c > > index 027e85024f..308e23685e 100644 > > --- a/src/gallium/state_trackers/dri/dri_screen.c > > +++ b/src/gallium/state_trackers/dri/dri_screen.c > > @@ -308,8 +308,10 @@ dri_fill_st_visual(struct st_visual *stvis, > > { > > memset(stvis, 0, sizeof(*stvis)); > > > > - if (!mode) > > + if (!mode) { > > + stvis->no_config = true; > >return; > > + } > > > > /* Deduce the color format. */ > > switch (mode->redMask) { > > diff --git a/src/mesa/state_tracker/st_manager.c > > b/src/mesa/state_tracker/st_manager.c > > index 69286b5791..9ed316b0f7 100644 > > --- a/src/mesa/state_tracker/st_manager.c > > +++ b/src/mesa/state_tracker/st_manager.c > > @@ -834,6 +834,7 @@ st_api_create_context(struct st_api *stapi, struct > > st_manager *smapi, > > struct st_context *shared_ctx = (struct st_context *) shared_stctxi; > > struct st_context *st; > > struct pipe_context *pipe; > > + struct gl_config* mode_ptr; > > struct gl_config mode; > > gl_api api; > > bool no_error = false; > > @@ -893,7 +894,13 @@ st_api_create_context(struct st_api *stapi, struct > > st_manager *smapi, > > } > > > > st_visual_to_context_mode(&attribs->visual, &mode); > > - st = st_create_context(api, pipe, &mode, shared_ctx, > > + > > + if (attribs->visual.no_config) > > + mode_ptr = NULL; > > + else > > + mode_ptr = &mode; > > + > > + st = st_create_context(api, pipe, mode_ptr, shared_ctx, > >&attribs->options, no_error); > > if (!st) { > >*error = ST_CONTEXT_ERROR_NO_MEMORY; > > -- > > 2.18.0 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium: Fix uninitialized variable warning in compute test.
On Mon, Nov 26, 2018 at 01:13:16PM -0800, Eric Anholt wrote: > The compiler doesn't know that ny != 0, so x might be uninitialized for > the printf at the end. Reviewed-by: Elie Tournier > --- > src/gallium/tests/trivial/compute.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/tests/trivial/compute.c > b/src/gallium/tests/trivial/compute.c > index afe5d3e9f2be..20e5a4f140c9 100644 > --- a/src/gallium/tests/trivial/compute.c > +++ b/src/gallium/tests/trivial/compute.c > @@ -240,7 +240,7 @@ static void check_tex(struct context *ctx, int slot, >util_format_get_nblocksy(tex->format, tex->height0)); > struct pipe_transfer *xfer; > char *map; > -int x, y, i; > +int x = 0, y, i; > int err = 0; > > if (!check) > -- > 2.20.0.rc1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 04/28] nir: add support for flushing to zero denorm constants
On Wed, Dec 05, 2018 at 04:55:19PM +0100, Samuel Iglesias Gonsálvez wrote: > Signed-off-by: Samuel Iglesias Gonsálvez > --- > src/compiler/nir/nir_opt_constant_folding.c | 74 +++-- > 1 file changed, 68 insertions(+), 6 deletions(-) > > diff --git a/src/compiler/nir/nir_opt_constant_folding.c > b/src/compiler/nir/nir_opt_constant_folding.c > index 1fca530af24..a6df8284e17 100644 > --- a/src/compiler/nir/nir_opt_constant_folding.c > +++ b/src/compiler/nir/nir_opt_constant_folding.c > @@ -39,7 +39,7 @@ struct constant_fold_state { > }; > > static bool > -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) > +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned > execution_mode) > { > nir_const_value src[NIR_MAX_VEC_COMPONENTS]; > > @@ -77,12 +77,39 @@ constant_fold_alu_instr(nir_alu_instr *instr, void > *mem_ctx) > switch(load_const->def.bit_size) { > case 64: > src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]]; > +if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP64 && > +(nir_op_infos[instr->op].input_types[i] == nir_type_float || > + nir_op_infos[instr->op].input_types[i] == > nir_type_float64)) { It maybe a stupid question but should we really test than the input type is a float? Isn't it include in the float64 check? > + if (src[i].u64[j] < 0x0010) > + src[i].u64[j] = 0; > + if (src[i].u64[j] & 0x8000 && > + !(src[i].u64[j] & 0x7ff0)) > + src[i].u64[j] = 0x8000; We can maybe do something like: if ((src[i].u64[j] & 0x000f) && !(src[i].u64[j] & 0x7ff0)) src[i].u64[j] &= 0x8000; > +} > break; > case 32: > src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]]; > +if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP32 && > +(nir_op_infos[instr->op].input_types[i] == nir_type_float || > + nir_op_infos[instr->op].input_types[i] == > nir_type_float32)) { > + if (src[i].u32[j] < 0x0080) > + src[i].u32[j] = 0; > + if (src[i].u32[j] & 0x8000 && > + !(src[i].u32[j] & 0x7f80)) > + src[i].u32[j] = 0x8000; > +} > break; > case 16: > src[i].u16[j] = load_const->value.u16[instr->src[i].swizzle[j]]; > +if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP16 && > +(nir_op_infos[instr->op].input_types[i] == nir_type_float || > + nir_op_infos[instr->op].input_types[i] == > nir_type_float16)) { > + if (src[i].u16[j] < 0x0400) > + src[i].u16[j] = 0; > + if (src[i].u16[j] & 0x8000 && > + !(src[i].u16[j] & 0x7c00)) > + src[i].u16[j] = 0x8000; > +} > break; > case 8: > src[i].u8[j] = load_const->value.u8[instr->src[i].swizzle[j]]; > @@ -106,6 +133,40 @@ constant_fold_alu_instr(nir_alu_instr *instr, void > *mem_ctx) >nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, > bit_size, src); > > + for (unsigned j = 0; j < instr->dest.dest.ssa.num_components; j++) { > + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP64 && > + bit_size == 64 && > + (nir_op_infos[instr->op].output_type == nir_type_float || > + nir_op_infos[instr->op].output_type == nir_type_float64)) { Kind of the same question than before. Is looking at the output type give us the information that we have a 64 bit gloat? So we can drop the bit_size and float check. Once again, I didn't have too much knowleadge in NIR, more in fp64. So it's maybe stupid. ;) > + if (dest.u64[j] < 0x0010) > +dest.u64[j] = 0; > + if (dest.u64[j] & 0x8000 && > + !(dest.u64[j] & 0x7ff0)) > +dest.u64[j] = 0x8000; > + } > + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP32 && > + bit_size == 32 && > + (nir_op_infos[instr->op].output_type == nir_type_float || > + nir_op_infos[instr->op].output_type == nir_type_float32)) { > + if (dest.u32[j] < 0x0080) > +dest.u32[j] = 0; > + if (dest.u32[j] & 0x8000 && > + !(dest.u32[j] & 0x7f80)) > +dest.u32[j] = 0x8000; > + } > + > + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP16 && > + bit_size == 16 && > + (nir_op_infos[instr->op].output_type == nir_type_float || > + nir_op_infos[instr->op].output_type == nir_type_float16)) { > +
Re: [Mesa-dev] [PATCH 10/12] virgl: modify how we handle GL_MAP_FLUSH_EXPLICIT_BIT
On Thu, Dec 06, 2018 at 05:20:42PM -0800, Gurchetan Singh wrote: > Previously, we ignored the the glUnmap(..) operation and > flushed before we flush the cbuf. Now, let's just flush > the data when we unmap. > > Neither method is optimal, for example: > > glMapBufferRange(.., 0, 100, GL_MAP_FLUSH_EXPLICIT_BIT) > glFlushMappedBufferRange(.., 25, 30) > glFlushMappedBufferRange(.., 65, 70) > > We'll end up flushing 25 --> 70. Maybe we can fix this later. I don't know how to feel about that. We clearly go against the spec. We currently know the behavor of this piece of code but in few months, someone will facing the issue and loose lots of time. I would prefer that we fix it now. If we decide to still upstream that code, we should at least add a big warning. > --- > src/gallium/drivers/virgl/virgl_buffer.c | 37 +- > src/gallium/drivers/virgl/virgl_context.c | 34 +--- > src/gallium/drivers/virgl/virgl_context.h | 1 - > src/gallium/drivers/virgl/virgl_resource.h | 13 > 4 files changed, 16 insertions(+), 69 deletions(-) > > diff --git a/src/gallium/drivers/virgl/virgl_buffer.c > b/src/gallium/drivers/virgl/virgl_buffer.c > index d5d728735e..ae828446ec 100644 > --- a/src/gallium/drivers/virgl/virgl_buffer.c > +++ b/src/gallium/drivers/virgl/virgl_buffer.c > @@ -33,7 +33,6 @@ static void virgl_buffer_destroy(struct pipe_screen *screen, > struct virgl_screen *vs = virgl_screen(screen); > struct virgl_buffer *vbuf = virgl_buffer(buf); > > - util_range_destroy(&vbuf->valid_buffer_range); > vs->vws->resource_unref(vs->vws, vbuf->base.hw_res); > FREE(vbuf); > } > @@ -53,7 +52,7 @@ static void *virgl_buffer_transfer_map(struct pipe_context > *ctx, > bool readback; > bool doflushwait = false; > > - if ((usage & PIPE_TRANSFER_READ) && (vbuf->on_list == TRUE)) > + if (usage & PIPE_TRANSFER_READ) >doflushwait = true; > else >doflushwait = virgl_res_needs_flush_wait(vctx, &vbuf->base, usage); > @@ -92,13 +91,19 @@ static void virgl_buffer_transfer_unmap(struct > pipe_context *ctx, > struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); > > if (trans->base.usage & PIPE_TRANSFER_WRITE) { > - if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { > - struct virgl_screen *vs = virgl_screen(ctx->screen); > - vctx->num_transfers++; > - vs->vws->transfer_put(vs->vws, vbuf->base.hw_res, > - &transfer->box, trans->base.stride, > trans->base.layer_stride, trans->offset, transfer->level); > - > + struct virgl_screen *vs = virgl_screen(ctx->screen); > + if (transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) { > + transfer->box.x += trans->range.start; > + transfer->box.width = trans->range.end - trans->range.start; > + trans->offset = transfer->box.x; >} > + > + vctx->num_transfers++; > + vs->vws->transfer_put(vs->vws, vbuf->base.hw_res, > +&transfer->box, trans->base.stride, > +trans->base.layer_stride, trans->offset, > +transfer->level); > + > } > > virgl_resource_destroy_transfer(vctx, trans); > @@ -108,20 +113,10 @@ static void virgl_buffer_transfer_flush_region(struct > pipe_context *ctx, > struct pipe_transfer > *transfer, > const struct pipe_box *box) > { > - struct virgl_context *vctx = virgl_context(ctx); > struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); > + struct virgl_transfer *trans = virgl_transfer(transfer); > > - if (!vbuf->on_list) { > - struct pipe_resource *res = NULL; > - > - list_addtail(&vbuf->flush_list, &vctx->to_flush_bufs); > - vbuf->on_list = TRUE; > - pipe_resource_reference(&res, &vbuf->base.u.b); > - } > - > - util_range_add(&vbuf->valid_buffer_range, transfer->box.x + box->x, > - transfer->box.x + box->x + box->width); > - > + util_range_add(&trans->range, box->x, box->x + box->width); > vbuf->base.clean = FALSE; > } > > @@ -145,7 +140,6 @@ struct pipe_resource *virgl_buffer_create(struct > virgl_screen *vs, > buf->base.u.b.screen = &vs->base; > buf->base.u.vtbl = &virgl_buffer_vtbl; > pipe_reference_init(&buf->base.u.b.reference, 1); > - util_range_init(&buf->valid_buffer_range); > virgl_resource_layout(&buf->base.u.b, &buf->metadata); > > vbind = pipe_to_virgl_bind(template->bind); > @@ -155,6 +149,5 @@ struct pipe_resource *virgl_buffer_create(struct > virgl_screen *vs, > template->width0, 1, 1, 1, 0, > 0, > buf->metadata.total_size); > > - util_range_set_empty(&buf->valid_buffer_range); > return &buf->base.u.b; > } > diff --git a/src/gallium/drivers/virgl/virgl_
Re: [Mesa-dev] [PATCH 10/12] virgl: modify how we handle GL_MAP_FLUSH_EXPLICIT_BIT
On Mon, Dec 10, 2018 at 10:20:36AM -0800, Gurchetan Singh wrote: > Previously, we ignored the the glUnmap(..) operation and > flushed before we flush the cbuf. Now, let's just flush > the data when we unmap. > > Neither method is optimal, for example: > > glMapBufferRange(.., 0, 100, GL_MAP_FLUSH_EXPLICIT_BIT) > glFlushMappedBufferRange(.., 25, 30) > glFlushMappedBufferRange(.., 65, 70) > > We'll end up flushing 25 --> 70. Maybe we can fix this later. > > v2: Add fixme comment in the code (Elie) Thanks. I still have to run some regressions tests. They are a bit slow on my system. So for now, the series is: Acked-by: Elie Tournier > --- > src/gallium/drivers/virgl/virgl_buffer.c | 46 +++--- > src/gallium/drivers/virgl/virgl_context.c | 34 +--- > src/gallium/drivers/virgl/virgl_context.h | 1 - > src/gallium/drivers/virgl/virgl_resource.h | 13 -- > 4 files changed, 25 insertions(+), 69 deletions(-) > > diff --git a/src/gallium/drivers/virgl/virgl_buffer.c > b/src/gallium/drivers/virgl/virgl_buffer.c > index d5d728735e..a20deab549 100644 > --- a/src/gallium/drivers/virgl/virgl_buffer.c > +++ b/src/gallium/drivers/virgl/virgl_buffer.c > @@ -33,7 +33,6 @@ static void virgl_buffer_destroy(struct pipe_screen *screen, > struct virgl_screen *vs = virgl_screen(screen); > struct virgl_buffer *vbuf = virgl_buffer(buf); > > - util_range_destroy(&vbuf->valid_buffer_range); > vs->vws->resource_unref(vs->vws, vbuf->base.hw_res); > FREE(vbuf); > } > @@ -53,7 +52,7 @@ static void *virgl_buffer_transfer_map(struct pipe_context > *ctx, > bool readback; > bool doflushwait = false; > > - if ((usage & PIPE_TRANSFER_READ) && (vbuf->on_list == TRUE)) > + if (usage & PIPE_TRANSFER_READ) >doflushwait = true; > else >doflushwait = virgl_res_needs_flush_wait(vctx, &vbuf->base, usage); > @@ -92,13 +91,19 @@ static void virgl_buffer_transfer_unmap(struct > pipe_context *ctx, > struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); > > if (trans->base.usage & PIPE_TRANSFER_WRITE) { > - if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { > - struct virgl_screen *vs = virgl_screen(ctx->screen); > - vctx->num_transfers++; > - vs->vws->transfer_put(vs->vws, vbuf->base.hw_res, > - &transfer->box, trans->base.stride, > trans->base.layer_stride, trans->offset, transfer->level); > - > + struct virgl_screen *vs = virgl_screen(ctx->screen); > + if (transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) { > + transfer->box.x += trans->range.start; > + transfer->box.width = trans->range.end - trans->range.start; > + trans->offset = transfer->box.x; >} > + > + vctx->num_transfers++; > + vs->vws->transfer_put(vs->vws, vbuf->base.hw_res, > +&transfer->box, trans->base.stride, > +trans->base.layer_stride, trans->offset, > +transfer->level); > + > } > > virgl_resource_destroy_transfer(vctx, trans); > @@ -108,20 +113,19 @@ static void virgl_buffer_transfer_flush_region(struct > pipe_context *ctx, > struct pipe_transfer > *transfer, > const struct pipe_box *box) > { > - struct virgl_context *vctx = virgl_context(ctx); > struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); > + struct virgl_transfer *trans = virgl_transfer(transfer); > > - if (!vbuf->on_list) { > - struct pipe_resource *res = NULL; > - > - list_addtail(&vbuf->flush_list, &vctx->to_flush_bufs); > - vbuf->on_list = TRUE; > - pipe_resource_reference(&res, &vbuf->base.u.b); > - } > - > - util_range_add(&vbuf->valid_buffer_range, transfer->box.x + box->x, > - transfer->box.x + box->x + box->width); > - > + /* > +* FIXME: This is not optimal. For example, > +* > +* glMapBufferRange(.., 0, 100, GL_MAP_FLUSH_EXPLICIT_BIT) > +* glFlushMappedBufferRange(.., 25, 30) > +* glFlushMappedBufferRange(.., 65, 70) > +* > +* We'll end up flushing 25 --> 70. > +*/ > + util_range_add(&trans->range, box->x, box->x + box->width); > vbuf->base.clean = FALSE; > } > > @@ -145,7 +149,6 @@ struct pipe_resource *virgl_
Re: [Mesa-dev] PSA: Please send MRs to the mailing list
On Tuesday, 18 December 2018, Erik Faye-Lund wrote: > On Mon, 2018-12-17 at 16:13 -0600, Jason Ekstrand wrote: > > On Mon, Dec 17, 2018 at 2:13 PM Jason Ekstrand > > wrote: > > > On Mon, Dec 17, 2018 at 1:53 PM Eric Anholt > > > wrote: > > > > Jason Ekstrand writes: > > > > > > > > > I don't know if it was actually in the doc that Jordan wrote up > > > > but it's > > > > > courteous of you to send a quick e-mail to the mailing list > > > > when you create > > > > > a new MR so that people who aren't regularly trolling the list > > > > of MRs are > > > > > at least aware that it exists. Of the 20 MRs that have been > > > > posted so far, > > > > > I think I'm the only one doing this. I'm a big fan of MRs but > > > > I also don't > > > > > want us MR fans to anger the list. :-) > > > > > > > > The conclusion of the MR discussion was that notifying the list > > > > was > > > > optional, I thought. > > > > > > > > + > > > > + If the MR may have interest for most of the Mesa community, > > > > you can > > > > + send an email to the mesa-dev email list including a link to > > > > the MR. > > > > + Don't send the patch to mesa-dev, just the MR link. > > > > + > > > > > > Yeah, I think the problem is that "interest for most of the Mesa > > > community" is very vague. What it really should mean is "unless > > > you're very sure that everyone who cares is skimming through MRs". > > > I've seen multiple MRs which touch on st/mesa stuff in reasonably > > > generic ways and I'm pretty sure the radeon devs and other gallium > > > types were some of the less excited about MRs. > > > > > > > The MR process is heavier-weight than I hoped for, given that > > > > we're > > > > currently requiring rebasing tags into commit messages. I don't > > > > want to > > > > add more overhead to it if we don't have to. > > > > > > I agree. I think the real solution here is that we just need a > > > bot. > > > > > > > I took a swing at this. This python script will do the trick if we > > can find somewhere to host it and harden it a bit against being a > > spam-magnet: > > > > https://paste.fedoraproject.org/paste/EGGpnxNNWEmadt0fWJEfGg > > > > It's really not all that hard, it's just a bit annoying that GitLab > > requires you to run server-side scripting in a web server in order to > > implement a simple hook. > > > > Unfortunately, Ilia's request for a diffstat is easier said than > > done. In order to do that, the script would have to actually pull > > the repo and run git commands. It could be done but would be a *lot* > > more work. I'm not going to attempt that today nor do I really want > > to get stuck maintaining a python git hook library. :-P > > Can't we get this from the gitlab API? > > https://docs.gitlab.com/ee/api/merge_requests.html#get-single-mr-changes > > There is an experimental tool to go through the MR in the terminal: https://zaquestion.github.io/lab It's not exactly what you want but I still share it. Elie > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] r600: implement callstack workaround for evergreen.
On Fri, Mar 09, 2018 at 04:09:43PM +1000, Dave Airlie wrote: > From: Dave Airlie > > This is ported from the sb backend, there are some issues with > evergreen stacks on the boundary between entries and ALU_PUSH_BEFORE > instructions. > > Whenever we are going to use a push before, we check the stack > usage and if we have to use the workaround, then we switch to > a separate push. > > I noticed this problem dealing with some of the soft fp64 shaders, > in nosb mode, they are quite stack happy. More than happy actually! On my system, I get: [1375/1375] skip: 4, pass: 1368, fail: 3 So thanks a lot. > > This fixes all the glitches and inconsistencies I've seen with them > > Signed-off-by: Dave Airlie Tested-by: Elie Tournier > --- > src/gallium/drivers/r600/r600_shader.c | 39 > +++--- > 1 file changed, 31 insertions(+), 8 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 48750fb..3ca7890 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -377,7 +377,7 @@ struct r600_shader_tgsi_instruction { > static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct > pipe_stream_output_info *so, int stream, bool ind); > static const struct r600_shader_tgsi_instruction > r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], > cm_shader_tgsi_instruction[]; > static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); > -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned > reason); > +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned > reason); > static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); > static int tgsi_else(struct r600_shader_ctx *ctx); > static int tgsi_endif(struct r600_shader_ctx *ctx); > @@ -393,6 +393,15 @@ static void r600_bytecode_src(struct > r600_bytecode_alu_src *bc_src, > static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned > temp_reg, > unsigned dst_reg, unsigned mask); > > +static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx) > +{ > + if (ctx->bc->family == CHIP_HEMLOCK || > + ctx->bc->family == CHIP_CYPRESS || > + ctx->bc->family == CHIP_JUNIPER) > + return false; > + return true; > +} > + > static bool ctx_has_doubles(struct r600_shader_ctx *ctx) > { > if (ctx->bc->family == CHIP_ARUBA || > @@ -10182,7 +10191,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops) > return 0; > } > > -static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, > +static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx, >unsigned reason) > { > struct r600_stack_info *stack = &ctx->bc->stack; > @@ -10200,7 +10209,7 @@ static inline void callstack_update_max_depth(struct > r600_shader_ctx *ctx, > /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 > elements on >* the stack must be reserved to hold the current > active/continue >* masks */ > - if (reason == FC_PUSH_VPM) { > + if (reason == FC_PUSH_VPM || stack->push > 0) { > elements += 2; > } > break; > @@ -10226,7 +10235,7 @@ static inline void callstack_update_max_depth(struct > r600_shader_ctx *ctx, >*NOTE: it seems we also need to reserve additional element > in some >*other cases, e.g. when we have 4 levels of PUSH_VPM in > the shader, >*then STACK_SIZE should be 2 instead of 1 */ > - if (reason == FC_PUSH_VPM) { > + if (reason == FC_PUSH_VPM || stack->push > 0) { > elements += 1; > } > break; > @@ -10245,6 +10254,7 @@ static inline void callstack_update_max_depth(struct > r600_shader_ctx *ctx, > > if (entries > stack->max_entries) > stack->max_entries = entries; > + return elements; > } > > static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned > reason) > @@ -10268,7 +10278,7 @@ static inline void callstack_pop(struct > r600_shader_ctx *ctx, unsigned reason) > } > } > > -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned > reason) > +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned > reason) > { > switch (reason) { >
Re: [Mesa-dev] soft fp64 support - main body (glsl/gallium)
On Tue, Mar 13, 2018 at 02:28:04PM +1000, Dave Airlie wrote: > On 13 March 2018 at 14:24, Dave Airlie wrote: > > This is the main code for the soft fp64 work. It's mostly Elie's > > code with a bunch of changes by me. > > > > All the patches are in my tree here, along with some other bits: > https://cgit.freedesktop.org/~airlied/mesa/log/?h=glsl_arb_gpu_shader_fp64_v4 > First, thanks for your help/work on this project. Much much appreciated! Patch 7 doesn't land on the ML probably because of its size so please checkout the tree. For information, the failure are due to ssbo and arrays of arrays. > Dave. > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] soft fp64 support - main body (glsl/gallium)
On Tue, Mar 13, 2018 at 04:54:27PM -0700, Matt Turner wrote: > On Mon, Mar 12, 2018 at 9:24 PM, Dave Airlie wrote: > > This is the main code for the soft fp64 work. It's mostly Elie's > > code with a bunch of changes by me. > > > > This patchset has all the glsl lowering code. (using float64.glsl, > > yes I know checked in files are bad, but not bad enough for anyone > > to have solved int64.glsl yet, so we have a precedent). > Hi Matt > Have you thought about making a NIR backend for R600? > > Elie sent patches for lowering fp64 operations in NIR, and it's what > I'm going to start from when I do the analogous project for some > future Intel hardware. It's sad to duplicate all of this code, much > less all of this effort. Let me know when you start this project, I will be happy to help. Some (bad) idea here. Can we wire the GLSL IR version for your hardware? Once the NIR version finished, we just remove it. > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: Correctly print glTexImage dimensions
texture_format_error_check_gles() displays error like "glTexImage%dD". This patch just replace the %d by the correct dimension. Signed-off-by: Elie Tournier --- src/mesa/main/teximage.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index e5f8bb0718..cc329e6410 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1787,7 +1787,6 @@ texture_formats_agree(GLenum internalFormat, * \param format pixel data format given by the user. * \param type pixel data type given by the user. * \param internalFormat internal format given by the user. - * \param dimensions texture image dimensions (must be 1, 2 or 3). * \param callerName name of the caller function to print in the error message * * \return true if a error is found, false otherwise @@ -1796,8 +1795,7 @@ texture_formats_agree(GLenum internalFormat, */ static bool texture_format_error_check_gles(struct gl_context *ctx, GLenum format, -GLenum type, GLenum internalFormat, -GLuint dimensions, const char *callerName) +GLenum type, GLenum internalFormat, const char *callerName) { GLenum err = _mesa_es3_error_check_format_and_type(ctx, format, type, internalFormat); @@ -1911,9 +1909,11 @@ texture_error_check( struct gl_context *ctx, * Formats and types that require additional extensions (e.g., GL_FLOAT * requires GL_OES_texture_float) are filtered elsewhere. */ + char bufCallerName[20]; + snprintf(bufCallerName, 20, "glTexImage%dD", dimensions); if (_mesa_is_gles(ctx) && - texture_format_error_check_gles(ctx, format, type, internalFormat, - dimensions, "glTexImage%dD")) { + texture_format_error_check_gles(ctx, format, type, + internalFormat, bufCallerName)) { return GL_TRUE; } @@ -2234,8 +2234,7 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions, */ if (_mesa_is_gles(ctx) && texture_format_error_check_gles(ctx, format, type, - internalFormat, - dimensions, callerName)) { + internalFormat, callerName)) { return GL_TRUE; } -- 2.16.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Correctly print glTexImage dimensions
On Fri, Jan 26, 2018 at 02:34:03PM +0200, Tapani Pälli wrote: > Hi; > > On 01/25/2018 05:18 PM, Elie Tournier wrote: > > texture_format_error_check_gles() displays error like "glTexImage%dD". > > This patch just replace the %d by the correct dimension. > > > > Signed-off-by: Elie Tournier > > --- > > src/mesa/main/teximage.c | 13 ++--- > > 1 file changed, 6 insertions(+), 7 deletions(-) > > > > diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c > > index e5f8bb0718..cc329e6410 100644 > > --- a/src/mesa/main/teximage.c > > +++ b/src/mesa/main/teximage.c > > @@ -1787,7 +1787,6 @@ texture_formats_agree(GLenum internalFormat, > >* \param format pixel data format given by the user. > >* \param type pixel data type given by the user. > >* \param internalFormat internal format given by the user. > > - * \param dimensions texture image dimensions (must be 1, 2 or 3). > >* \param callerName name of the caller function to print in the error > > message > >* > >* \return true if a error is found, false otherwise > > @@ -1796,8 +1795,7 @@ texture_formats_agree(GLenum internalFormat, > >*/ > > static bool > > texture_format_error_check_gles(struct gl_context *ctx, GLenum format, > > -GLenum type, GLenum internalFormat, > > -GLuint dimensions, const char *callerName) > > +GLenum type, GLenum internalFormat, const > > char *callerName) > > { > > GLenum err = _mesa_es3_error_check_format_and_type(ctx, format, type, > > internalFormat); > > @@ -1911,9 +1909,11 @@ texture_error_check( struct gl_context *ctx, > > * Formats and types that require additional extensions (e.g., GL_FLOAT > > * requires GL_OES_texture_float) are filtered elsewhere. > > */ > > + char bufCallerName[20]; > > + snprintf(bufCallerName, 20, "glTexImage%dD", dimensions); > > I don't think this is going to work, reason is that this error check is used > both from glTexImage and glTexSubImage. For example _mesa_TexSubImage2D > passes "glTexSubImage2D" as callerName like this: > > _mesa_TexSubImage2D > texsubimage_err > texsubimage_error_check > texture_format_error_check_gles > Hello Tapani, I'm not sure to understand your comment. This patch just call texture_format_error_check_gles() with callerName parameter set correctly. Previously, callerName was "glTexImage%dD" but we didn't display the dimension. So I use snprintf in order to print the image dimension. I should probably split this patch in 2: * Remove unused "dimensions" parameter in texture_format_error_check_gles. * Set callerName with the correct dimension. > > > if (_mesa_is_gles(ctx) && > > - texture_format_error_check_gles(ctx, format, type, internalFormat, > > - dimensions, "glTexImage%dD")) { > > + texture_format_error_check_gles(ctx, format, type, > > + internalFormat, bufCallerName)) { > > return GL_TRUE; > > } > > @@ -2234,8 +2234,7 @@ texsubimage_error_check(struct gl_context *ctx, > > GLuint dimensions, > > */ > > if (_mesa_is_gles(ctx) && > > texture_format_error_check_gles(ctx, format, type, > > - internalFormat, > > - dimensions, callerName)) { > > + internalFormat, callerName)) { > > return GL_TRUE; > > } > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Correctly print glTexImage dimensions
On Mon, Jan 29, 2018 at 01:24:42PM +0200, Tapani Pälli wrote: > > > On 01/29/2018 12:05 PM, Elie Tournier wrote: > > On Fri, Jan 26, 2018 at 02:34:03PM +0200, Tapani Pälli wrote: > > > Hi; > > > > > > On 01/25/2018 05:18 PM, Elie Tournier wrote: > > > > texture_format_error_check_gles() displays error like "glTexImage%dD". > > > > This patch just replace the %d by the correct dimension. > > > > > > > > Signed-off-by: Elie Tournier > > > > --- > > > >src/mesa/main/teximage.c | 13 ++--- > > > >1 file changed, 6 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c > > > > index e5f8bb0718..cc329e6410 100644 > > > > --- a/src/mesa/main/teximage.c > > > > +++ b/src/mesa/main/teximage.c > > > > @@ -1787,7 +1787,6 @@ texture_formats_agree(GLenum internalFormat, > > > > * \param format pixel data format given by the user. > > > > * \param type pixel data type given by the user. > > > > * \param internalFormat internal format given by the user. > > > > - * \param dimensions texture image dimensions (must be 1, 2 or 3). > > > > * \param callerName name of the caller function to print in the > > > > error message > > > > * > > > > * \return true if a error is found, false otherwise > > > > @@ -1796,8 +1795,7 @@ texture_formats_agree(GLenum internalFormat, > > > > */ > > > >static bool > > > >texture_format_error_check_gles(struct gl_context *ctx, GLenum > > > > format, > > > > -GLenum type, GLenum internalFormat, > > > > -GLuint dimensions, const char > > > > *callerName) > > > > +GLenum type, GLenum internalFormat, > > > > const char *callerName) > > > >{ > > > > GLenum err = _mesa_es3_error_check_format_and_type(ctx, format, > > > > type, > > > > > > > > internalFormat); > > > > @@ -1911,9 +1909,11 @@ texture_error_check( struct gl_context *ctx, > > > >* Formats and types that require additional extensions (e.g., > > > > GL_FLOAT > > > >* requires GL_OES_texture_float) are filtered elsewhere. > > > >*/ > > > > + char bufCallerName[20]; > > > > + snprintf(bufCallerName, 20, "glTexImage%dD", dimensions); > > > > > > I don't think this is going to work, reason is that this error check is > > > used > > > both from glTexImage and glTexSubImage. For example _mesa_TexSubImage2D > > > passes "glTexSubImage2D" as callerName like this: > > > > > > _mesa_TexSubImage2D > > > texsubimage_err > > > texsubimage_error_check > > > texture_format_error_check_gles > > > > > Hello Tapani, > > > > I'm not sure to understand your comment. > > This patch just call texture_format_error_check_gles() with callerName > > parameter set correctly. > > Previously, callerName was "glTexImage%dD" but we didn't display the > > dimension. > > So I use snprintf in order to print the image dimension. > > Sorry, now I noticed this works just fine. I was blind before and thought > you did the snprintf in texture_format_error_check_gles. Sorry for not being clear the first time. > > > I should probably split this patch in 2: > > * Remove unused "dimensions" parameter in texture_format_error_check_gles. > > * Set callerName with the correct dimension. > > I'm fine having these on same patch; > Reviewed-by: Tapani Pälli Thanks. Do you mind push this patch? I don't have commit right. > > > > > > > > if (_mesa_is_gles(ctx) && > > > > - texture_format_error_check_gles(ctx, format, type, > > > > internalFormat, > > > > - dimensions, "glTexImage%dD")) { > > > > + texture_format_error_check_gles(ctx, format, type, > > > > + internalFormat, bufCallerName)) > > > > { > > > > return GL_TRUE; > > > > } > > > > @@ -2234,8 +2234,7 @@ texsubimage_error_check(struct gl_context *ctx, > > > > GLuint dimensions, > > > >*/ > > > > if (_mesa_is_gles(ctx) && > > > > texture_format_error_check_gles(ctx, format, type, > > > > - internalFormat, > > > > - dimensions, callerName)) { > > > > + internalFormat, callerName)) { > > > > return GL_TRUE; > > > > } > > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/9] glsl: add u64->fp64 and i64->fp64 builtins.
On Thu, Feb 01, 2018 at 10:00:12AM -0800, Dylan Baker wrote: > There are changes from Makefile.sources that need be ported to corresponding > meson.build files as well. Oh yes, nice spot. I will do it on my local fp64 branch too. > > It looks like builtin_float64.h is generated from float64.glsl, is that right? Like builtin_int64h and int64.glsl, builtin_float64.h is generated from float64.glsl. > I'm not very happy about checking in a generated file for all of the reasons > that we don't check generated files in. I recognize that trying to generate > code > from an in tree binary at compile time makes the cross compiling situation > less > than ideal since we would need to bootstrap the glsl compiler, but it's still > annoying. This is why we stopped using glsl compilation for mesa, and added > tools like nir_builder and ir_builder. I guess that's a pretty big complaint > and > I don't expect you to fix it, but it's frustrating that we're going to have to > check generated code in and deal with all the problems that come along with > that. This project is quite big. Using ir_builder would make this project even bigger. At least for me. ;) > > Dylan > > Quoting Dave Airlie (2018-01-31 19:33:51) > > From: Elie Tournier > > > > This adds the first two fp64 builtins. > > They are for int/uint to fp64 conversions. > > > > cayman hw can't do it, and this avoids me having to lower it in > > the backend, and also starts to provide some of the soft fp64 > > infrastructure. > > > > [airlied: extracted from Elie's tree] > > Signed-off-by: Dave Airlie > > --- > > src/compiler/Makefile.sources | 1 + > > src/compiler/glsl/builtin_float64.h | 872 > > > > src/compiler/glsl/builtin_functions.cpp | 8 + > > src/compiler/glsl/builtin_functions.h | 6 + > > src/compiler/glsl/float64.glsl | 126 + > > src/compiler/glsl/generate_ir.cpp | 2 +- > > 6 files changed, 1014 insertions(+), 1 deletion(-) > > create mode 100644 src/compiler/glsl/builtin_float64.h > > create mode 100644 src/compiler/glsl/float64.glsl > > > > diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources > > index bb93ba68a2..9623b3ea62 100644 > > --- a/src/compiler/Makefile.sources > > +++ b/src/compiler/Makefile.sources > > @@ -22,6 +22,7 @@ LIBGLSL_FILES = \ > > glsl/builtin_functions.cpp \ > > glsl/builtin_functions.h \ > > glsl/builtin_int64.h \ > > + glsl/builtin_float64.h \ > > glsl/builtin_types.cpp \ > > glsl/builtin_variables.cpp \ > > glsl/generate_ir.cpp \ > > diff --git a/src/compiler/glsl/builtin_float64.h > > b/src/compiler/glsl/builtin_float64.h > > new file mode 100644 > > index 00..5747d22418 > > --- /dev/null > > +++ b/src/compiler/glsl/builtin_float64.h > > @@ -0,0 +1,872 @@ > > +ir_function_signature * > > +countLeadingZeros32(void *mem_ctx, builtin_available_predicate avail) > > +{ > > + ir_function_signature *const sig = > > + new(mem_ctx) ir_function_signature(glsl_type::int_type, avail); > > + ir_factory body(&sig->body, mem_ctx); > > + sig->is_defined = true; > > + > > + exec_list sig_parameters; > > + > > + ir_variable *const r000B = new(mem_ctx) > > ir_variable(glsl_type::uint_type, "a", ir_var_function_in); > > + sig_parameters.push_tail(r000B); > > + ir_variable *const r000C = body.make_temp(glsl_type::int_type, > > "return_value"); > > + ir_variable *const r000D = new(mem_ctx) > > ir_variable(glsl_type::int_type, "shiftCount", ir_var_auto); > > + body.emit(r000D); > > + /* IF CONDITION */ > > + ir_expression *const r000F = equal(r000B, body.constant(0u)); > > + ir_if *f000E = new(mem_ctx) ir_if(operand(r000F).val); > > + exec_list *const f000E_parent_instructions = body.instructions; > > + > > + /* THEN INSTRUCTIONS */ > > + body.instructions = &f000E->then_instructions; > > + > > + body.emit(assign(r000C, body.constant(int(32)), 0x01)); > > + > > + > > + /* ELSE INSTRUCTIONS */ > > + body.instructions = &f000E->else_instructions; > > + > > + body.emit(assign(r000D, body.constant(int(0)), 0x01)); > > + > > + /* IF CONDITION */ > > + ir_expression *const r0011 = bit_and(r000B, > > body.constant(4294901760u)); > > + ir_expression *const r0012 = e
Re: [Mesa-dev] [PATCH] i965: remove unused brw_nir_lower_cs_shared()
On Mon, Feb 05, 2018 at 09:22:15AM +1100, Timothy Arceri wrote: > This has been unused since 8761a04d0d93. Reviewed-by: Elie Tournier > --- > src/intel/compiler/brw_nir.c | 8 > src/intel/compiler/brw_nir.h | 1 - > 2 files changed, 9 deletions(-) > > diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c > index dbddef0d04..405985f8b6 100644 > --- a/src/intel/compiler/brw_nir.c > +++ b/src/intel/compiler/brw_nir.c > @@ -503,14 +503,6 @@ brw_nir_lower_fs_outputs(nir_shader *nir) > nir_lower_io(nir, nir_var_shader_out, type_size_dvec4, 0); > } > > -void > -brw_nir_lower_cs_shared(nir_shader *nir) > -{ > - nir_assign_var_locations(&nir->shared, &nir->num_shared, > -type_size_scalar_bytes); > - nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes, 0); > -} > - > #define OPT(pass, ...) ({ \ > bool this_progress = false; \ > NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ > diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h > index 3bef99417e..03f52da08e 100644 > --- a/src/intel/compiler/brw_nir.h > +++ b/src/intel/compiler/brw_nir.h > @@ -113,7 +113,6 @@ void brw_nir_lower_vue_outputs(nir_shader *nir, bool > is_scalar); > void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map > *vue, > GLenum tes_primitive_mode); > void brw_nir_lower_fs_outputs(nir_shader *nir); > -void brw_nir_lower_cs_shared(nir_shader *nir); > > nir_shader *brw_postprocess_nir(nir_shader *nir, > const struct brw_compiler *compiler, > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa] r200: remove left over dead code
On Fri, Feb 09, 2018 at 11:42:50AM +, Eric Engestrom wrote: > 0aaa27f29187ffb739c7 removed the references to this array without > removing the array itself > > Cc: Ian Romanick > Fixes: 0aaa27f29187ffb739c7 "mesa: Pass the translated color logic op > dd_function_table::LogicOpcode" > Signed-off-by: Eric Engestrom Reviewed-by: Elie Tournier > --- > src/mesa/drivers/dri/r200/r200_state.c | 20 > 1 file changed, 20 deletions(-) > > diff --git a/src/mesa/drivers/dri/r200/r200_state.c > b/src/mesa/drivers/dri/r200/r200_state.c > index 33b696347a3d2218a92e..d53225d63abe9409e0ef 100644 > --- a/src/mesa/drivers/dri/r200/r200_state.c > +++ b/src/mesa/drivers/dri/r200/r200_state.c > @@ -1626,26 +1626,6 @@ static void r200RenderMode( struct gl_context *ctx, > GLenum mode ) > FALLBACK( rmesa, R200_FALLBACK_RENDER_MODE, (mode != GL_RENDER) ); > } > > - > -static GLuint r200_rop_tab[] = { > - R200_ROP_CLEAR, > - R200_ROP_AND, > - R200_ROP_AND_REVERSE, > - R200_ROP_COPY, > - R200_ROP_AND_INVERTED, > - R200_ROP_NOOP, > - R200_ROP_XOR, > - R200_ROP_OR, > - R200_ROP_NOR, > - R200_ROP_EQUIV, > - R200_ROP_INVERT, > - R200_ROP_OR_REVERSE, > - R200_ROP_COPY_INVERTED, > - R200_ROP_OR_INVERTED, > - R200_ROP_NAND, > - R200_ROP_SET, > -}; > - > static void r200LogicOpCode(struct gl_context *ctx, enum gl_logicop_mode > opcode) > { > r200ContextPtr rmesa = R200_CONTEXT(ctx); > -- > Cheers, > Eric > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/17] r200: Remove dead rop table
Eric Engestrom was faster. ;) https://patchwork.freedesktop.org/patch/203691/ On 10 February 2018 at 16:32, Eric Anholt wrote: > Fixes: 0aaa27f29187 ("mesa: Pass the translated color logic op > dd_function_table::LogicOpcode") > Cc: Ian Romanick > --- > src/mesa/drivers/dri/r200/r200_state.c | 20 > 1 file changed, 20 deletions(-) > > diff --git a/src/mesa/drivers/dri/r200/r200_state.c > b/src/mesa/drivers/dri/r200/r200_state.c > index 33b696347a3d..d53225d63abe 100644 > --- a/src/mesa/drivers/dri/r200/r200_state.c > +++ b/src/mesa/drivers/dri/r200/r200_state.c > @@ -1626,26 +1626,6 @@ static void r200RenderMode( struct gl_context *ctx, > GLenum mode ) > FALLBACK( rmesa, R200_FALLBACK_RENDER_MODE, (mode != GL_RENDER) ); > } > > - > -static GLuint r200_rop_tab[] = { > - R200_ROP_CLEAR, > - R200_ROP_AND, > - R200_ROP_AND_REVERSE, > - R200_ROP_COPY, > - R200_ROP_AND_INVERTED, > - R200_ROP_NOOP, > - R200_ROP_XOR, > - R200_ROP_OR, > - R200_ROP_NOR, > - R200_ROP_EQUIV, > - R200_ROP_INVERT, > - R200_ROP_OR_REVERSE, > - R200_ROP_COPY_INVERTED, > - R200_ROP_OR_INVERTED, > - R200_ROP_NAND, > - R200_ROP_SET, > -}; > - > static void r200LogicOpCode(struct gl_context *ctx, enum gl_logicop_mode > opcode) > { > r200ContextPtr rmesa = R200_CONTEXT(ctx); > -- > 2.15.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 01/11] glsl: Add "built-in" function to do abs(fp64)
Signed-off-by: Elie Tournier --- src/compiler/Makefile.sources | 1 + src/compiler/glsl/builtin_float64.h | 19 +++ src/compiler/glsl/builtin_functions.cpp | 4 src/compiler/glsl/builtin_functions.h | 3 +++ src/compiler/glsl/float64.glsl | 28 src/compiler/glsl/generate_ir.cpp | 1 + 6 files changed, 56 insertions(+) create mode 100644 src/compiler/glsl/builtin_float64.h create mode 100644 src/compiler/glsl/float64.glsl diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 643a0181d8..b67834246f 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -22,6 +22,7 @@ LIBGLSL_FILES = \ glsl/builtin_functions.cpp \ glsl/builtin_functions.h \ glsl/builtin_int64.h \ + glsl/builtin_float64.h \ glsl/builtin_types.cpp \ glsl/builtin_variables.cpp \ glsl/generate_ir.cpp \ diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h new file mode 100644 index 00..c1ec89d210 --- /dev/null +++ b/src/compiler/glsl/builtin_float64.h @@ -0,0 +1,19 @@ +ir_function_signature * +fabs64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r000B = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r000B); + body.emit(assign(r000B, bit_and(swizzle_x(r000B), body.constant(2147483647u)), 0x01)); + + body.emit(ret(r000B)); + + sig->replace_parameters(&sig_parameters); + return sig; +} diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index e03a50c843..b0b1781725 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3129,6 +3129,10 @@ builtin_builder::create_builtins() generate_ir::umul64(mem_ctx, integer_functions_supported), NULL); + add_function("__builtin_fabs64", +generate_ir::fabs64(mem_ctx, integer_functions_supported), +NULL); + #undef F #undef FI #undef FIUD_VEC diff --git a/src/compiler/glsl/builtin_functions.h b/src/compiler/glsl/builtin_functions.h index 7ae211b48a..abe02d97b6 100644 --- a/src/compiler/glsl/builtin_functions.h +++ b/src/compiler/glsl/builtin_functions.h @@ -63,6 +63,9 @@ umul64(void *mem_ctx, builtin_available_predicate avail); ir_function_signature * sign64(void *mem_ctx, builtin_available_predicate avail); +ir_function_signature * +fabs64(void *mem_ctx, builtin_available_predicate avail); + } #endif /* BULITIN_FUNCTIONS_H */ diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl new file mode 100644 index 00..b8f0c2e444 --- /dev/null +++ b/src/compiler/glsl/float64.glsl @@ -0,0 +1,28 @@ +/* Compile with: + * + * glsl_compiler --version 130 --dump-builder float64.glsl > builtin_float64.h + * + */ + +#version 130 + +/* Software IEEE floating-point rounding mode. + * GLSL spec section "4.7.1 Range and Precision": + * The rounding mode cannot be set and is undefined. + * But here, we are able to define the rounding mode at the compilation time. + */ +#define FLOAT_ROUND_NEAREST_EVEN0 +#define FLOAT_ROUND_TO_ZERO 1 +#define FLOAT_ROUND_DOWN2 +#define FLOAT_ROUND_UP 3 +#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN + +/* Absolute value of a Float64 : + * Clear the sign bit + */ +uvec2 +fabs64( uvec2 a ) +{ +a.x &= 0x7FFFu; +return a; +} diff --git a/src/compiler/glsl/generate_ir.cpp b/src/compiler/glsl/generate_ir.cpp index 255b0484f2..e6ece4860f 100644 --- a/src/compiler/glsl/generate_ir.cpp +++ b/src/compiler/glsl/generate_ir.cpp @@ -29,5 +29,6 @@ using namespace ir_builder; namespace generate_ir { #include "builtin_int64.h" +#include "builtin_float64.h" } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 05/11] glsl: Add "built-in" functions to do lt(fp64, fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 161 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 45 + 4 files changed, 213 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index f8ceacdabf..e825536466 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -334,3 +334,164 @@ fle64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +lt64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0060 = new(mem_ctx) ir_variable(glsl_type::uint_type, "a0", ir_var_function_in); + sig_parameters.push_tail(r0060); + ir_variable *const r0061 = new(mem_ctx) ir_variable(glsl_type::uint_type, "a1", ir_var_function_in); + sig_parameters.push_tail(r0061); + ir_variable *const r0062 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b0", ir_var_function_in); + sig_parameters.push_tail(r0062); + ir_variable *const r0063 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b1", ir_var_function_in); + sig_parameters.push_tail(r0063); + ir_expression *const r0064 = less(r0060, r0062); + ir_expression *const r0065 = equal(r0060, r0062); + ir_expression *const r0066 = less(r0061, r0063); + ir_expression *const r0067 = logic_and(r0065, r0066); + ir_expression *const r0068 = logic_or(r0064, r0067); + body.emit(ret(r0068)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +flt64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0069 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0069); + ir_variable *const r006A = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "b", ir_var_function_in); + sig_parameters.push_tail(r006A); + ir_variable *const r006B = body.make_temp(glsl_type::bool_type, "return_value"); + ir_variable *const r006C = new(mem_ctx) ir_variable(glsl_type::bool_type, "isbNaN", ir_var_auto); + body.emit(r006C); + ir_variable *const r006D = new(mem_ctx) ir_variable(glsl_type::bool_type, "isaNaN", ir_var_auto); + body.emit(r006D); + ir_variable *const r006E = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r006E, bit_and(swizzle_x(r0069), body.constant(1048575u)), 0x01)); + + body.emit(assign(r006E, swizzle_y(r0069), 0x02)); + + ir_variable *const r006F = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r006F, bit_and(swizzle_x(r006A), body.constant(1048575u)), 0x01)); + + body.emit(assign(r006F, swizzle_y(r006A), 0x02)); + + ir_expression *const r0070 = rshift(swizzle_x(r0069), body.constant(int(20))); + ir_expression *const r0071 = bit_and(r0070, body.constant(2047u)); + ir_expression *const r0072 = equal(r0071, body.constant(2047u)); + ir_expression *const r0073 = bit_or(swizzle_x(r006E), swizzle_y(r0069)); + ir_expression *const r0074 = nequal(r0073, body.constant(0u)); + body.emit(assign(r006D, logic_and(r0072, r0074), 0x01)); + + ir_expression *const r0075 = rshift(swizzle_x(r006A), body.constant(int(20))); + ir_expression *const r0076 = bit_and(r0075, body.constant(2047u)); + ir_expression *const r0077 = equal(r0076, body.constant(2047u)); + ir_expression *const r0078 = bit_or(swizzle_x(r006F), swizzle_y(r006A)); + ir_expression *const r0079 = nequal(r0078, body.constant(0u)); + body.emit(assign(r006C, logic_and(r0077, r0079), 0x01)); + + /* IF CONDITION */ + ir_expression *const r007B = logic_or(r006D, r006C); + ir_if *f007A = new(mem_ctx) ir_if(operand(r007B).val); + exec_list *const f007A_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f007A->then_instructions; + + body.emit(assign(r006B, body.constant(false), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f007A->else_instructions; + + ir_variable *const r007C = body.make_temp(glsl_type::uint_type, "extractFloat64Sign_retval"); + body.emit(assign(r007C, rshift(swizzle_x(r0069), body.constant(int(31))), 0x01)); + + ir_variable *const r007D = body
[Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64
From: Elie Tournier This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1]. The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs. Each function can be independently tested using shader_runner from piglit. The piglit files are stored on github [2]. [1] https://lists.freedesktop.org/archives/mesa-dev/2016-November/136718.html [2] https://github.com/Hopetech/libSoftFloat Elie Tournier (11): glsl: Add "built-in" function to do abs(fp64) glsl: Add "built-in" function to do neg(fp64) glsl: Add "built-in" functions to do eq(fp64,fp64) glsl: Add "built-in" functions to do le(fp64,fp64) glsl: Add "built-in" functions to do lt(fp64,fp64) glsl: Add "built-in" functions to do add(fp64,fp64) glsl: Add "built-in" functions to do mul(fp64,fp64) glsl: Add "built-in" functions to do div(fp64,fp64) glsl: Add "built-in" functions to do fp32_to_fp64(fp32) glsl: Add "built-in" functions to do fp64_to_fp32(fp64) glsl: Add fp64 functions to the parser. src/compiler/Makefile.sources | 1 + src/compiler/glsl/builtin_float64.h | 24243 ++ src/compiler/glsl/builtin_functions.cpp |40 + src/compiler/glsl/builtin_functions.h |30 + src/compiler/glsl/float64.glsl | 1378 ++ src/compiler/glsl/generate_ir.cpp | 1 + src/compiler/glsl/glcpp/glcpp-parse.y |10 + 7 files changed, 25703 insertions(+) create mode 100644 src/compiler/glsl/builtin_float64.h create mode 100644 src/compiler/glsl/float64.glsl -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 09/11] glsl: Add "built-in" functions to do fp32_to_fp64(fp32)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 490 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 77 + 4 files changed, 574 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index a795d404c1..b50ebc2dc2 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -23162,3 +23162,493 @@ r1189_data.u[1] = 4294967295; sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +normalizeFloat32Subnormal(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::void_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r1354 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aFrac", ir_var_function_in); + sig_parameters.push_tail(r1354); + ir_variable *const r1355 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zExpPtr", ir_var_function_inout); + sig_parameters.push_tail(r1355); + ir_variable *const r1356 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFracPtr", ir_var_function_inout); + sig_parameters.push_tail(r1356); + ir_variable *const r1357 = new(mem_ctx) ir_variable(glsl_type::uint_type, "shiftCount", ir_var_auto); + body.emit(r1357); + ir_variable *const r1358 = body.make_temp(glsl_type::uint_type, "a"); + body.emit(assign(r1358, r1354, 0x01)); + + ir_variable *const r1359 = body.make_temp(glsl_type::uint_type, "return_value"); + ir_variable *const r135A = new(mem_ctx) ir_variable(glsl_type::uint_type, "shiftCount", ir_var_auto); + body.emit(r135A); + /* IF CONDITION */ + ir_expression *const r135C = equal(r1354, body.constant(0u)); + ir_if *f135B = new(mem_ctx) ir_if(operand(r135C).val); + exec_list *const f135B_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f135B->then_instructions; + + body.emit(assign(r1359, body.constant(32u), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f135B->else_instructions; + + body.emit(assign(r135A, body.constant(0u), 0x01)); + + /* IF CONDITION */ + ir_expression *const r135E = bit_and(r1354, body.constant(4294901760u)); + ir_expression *const r135F = equal(r135E, body.constant(0u)); + ir_if *f135D = new(mem_ctx) ir_if(operand(r135F).val); + exec_list *const f135D_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f135D->then_instructions; + + body.emit(assign(r135A, body.constant(16u), 0x01)); + + body.emit(assign(r1358, lshift(r1354, body.constant(int(16))), 0x01)); + + + body.instructions = f135D_parent_instructions; + body.emit(f135D); + + /* END IF */ + + /* IF CONDITION */ + ir_expression *const r1361 = bit_and(r1358, body.constant(4278190080u)); + ir_expression *const r1362 = equal(r1361, body.constant(0u)); + ir_if *f1360 = new(mem_ctx) ir_if(operand(r1362).val); + exec_list *const f1360_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f1360->then_instructions; + + body.emit(assign(r135A, add(r135A, body.constant(8u)), 0x01)); + + body.emit(assign(r1358, lshift(r1358, body.constant(int(8))), 0x01)); + + + body.instructions = f1360_parent_instructions; + body.emit(f1360); + + /* END IF */ + + /* IF CONDITION */ + ir_expression *const r1364 = bit_and(r1358, body.constant(4026531840u)); + ir_expression *const r1365 = equal(r1364, body.constant(0u)); + ir_if *f1363 = new(mem_ctx) ir_if(operand(r1365).val); + exec_list *const f1363_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f1363->then_instructions; + + body.emit(assign(r135A, add(r135A, body.constant(4u)), 0x01)); + + body.emit(assign(r1358, lshift(r1358, body.constant(int(4))), 0x01)); + + + body.instructions = f1363_parent_instructions; + body.emit(f1363); + + /* END IF */ + + /* IF CONDITION */ + ir_expression *const r1367 = bit_and(r1358, body.constant(3221225472u)); + ir_expression *const r1368 = equal(r1367, body.constant(0u)); + ir_if *f1366 = new(mem_ctx) ir_if(operand(r1368).val); + exec_list *const f1366_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f1366->then_instructions; + + body.emit(assign(r135A, add(r135A, body.constant(2u)), 0x01)); + + body.emit(assig
[Mesa-dev] [RFC 02/11] glsl: Add "built-in" function to do neg(fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 19 +++ src/compiler/glsl/builtin_functions.cpp | 4 src/compiler/glsl/builtin_functions.h | 3 +++ src/compiler/glsl/float64.glsl | 10 ++ 4 files changed, 36 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index c1ec89d210..6df91e10f5 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -17,3 +17,22 @@ fabs64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +fneg64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r000C = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r000C); + body.emit(assign(r000C, bit_xor(swizzle_x(r000C), body.constant(2147483648u)), 0x01)); + + body.emit(ret(r000C)); + + sig->replace_parameters(&sig_parameters); + return sig; +} diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index b0b1781725..a189b84190 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3133,6 +3133,10 @@ builtin_builder::create_builtins() generate_ir::fabs64(mem_ctx, integer_functions_supported), NULL); + add_function("__builtin_fneg64", +generate_ir::fneg64(mem_ctx, integer_functions_supported), +NULL); + #undef F #undef FI #undef FIUD_VEC diff --git a/src/compiler/glsl/builtin_functions.h b/src/compiler/glsl/builtin_functions.h index abe02d97b6..37c6cc33c2 100644 --- a/src/compiler/glsl/builtin_functions.h +++ b/src/compiler/glsl/builtin_functions.h @@ -66,6 +66,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail); ir_function_signature * fabs64(void *mem_ctx, builtin_available_predicate avail); +ir_function_signature * +fneg64(void *mem_ctx, builtin_available_predicate avail); + } #endif /* BULITIN_FUNCTIONS_H */ diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index b8f0c2e444..82875e9407 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -26,3 +26,13 @@ fabs64( uvec2 a ) a.x &= 0x7FFFu; return a; } + +/* Negate value of a Float64 : + * Toggle the sign bit + */ +uvec2 +fneg64( uvec2 a ) +{ +a.x ^= (1u<<31); +return a; +} -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 04/11] glsl: Add "built-in" functions to do le(fp64, fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 179 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 52 ++ 4 files changed, 238 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index e614374d75..f8ceacdabf 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -155,3 +155,182 @@ feq64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +extractFloat64Sign(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r002D = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r002D); + ir_expression *const r002E = rshift(swizzle_x(r002D), body.constant(int(31))); + body.emit(ret(r002E)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +le64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r002F = new(mem_ctx) ir_variable(glsl_type::uint_type, "a0", ir_var_function_in); + sig_parameters.push_tail(r002F); + ir_variable *const r0030 = new(mem_ctx) ir_variable(glsl_type::uint_type, "a1", ir_var_function_in); + sig_parameters.push_tail(r0030); + ir_variable *const r0031 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b0", ir_var_function_in); + sig_parameters.push_tail(r0031); + ir_variable *const r0032 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b1", ir_var_function_in); + sig_parameters.push_tail(r0032); + ir_expression *const r0033 = less(r002F, r0031); + ir_expression *const r0034 = equal(r002F, r0031); + ir_expression *const r0035 = lequal(r0030, r0032); + ir_expression *const r0036 = logic_and(r0034, r0035); + ir_expression *const r0037 = logic_or(r0033, r0036); + body.emit(ret(r0037)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +fle64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0038 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0038); + ir_variable *const r0039 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "b", ir_var_function_in); + sig_parameters.push_tail(r0039); + ir_variable *const r003A = body.make_temp(glsl_type::bool_type, "return_value"); + ir_variable *const r003B = new(mem_ctx) ir_variable(glsl_type::bool_type, "isbNaN", ir_var_auto); + body.emit(r003B); + ir_variable *const r003C = new(mem_ctx) ir_variable(glsl_type::bool_type, "isaNaN", ir_var_auto); + body.emit(r003C); + ir_variable *const r003D = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r003D, bit_and(swizzle_x(r0038), body.constant(1048575u)), 0x01)); + + body.emit(assign(r003D, swizzle_y(r0038), 0x02)); + + ir_variable *const r003E = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r003E, bit_and(swizzle_x(r0039), body.constant(1048575u)), 0x01)); + + body.emit(assign(r003E, swizzle_y(r0039), 0x02)); + + ir_expression *const r003F = rshift(swizzle_x(r0038), body.constant(int(20))); + ir_expression *const r0040 = bit_and(r003F, body.constant(2047u)); + ir_expression *const r0041 = equal(r0040, body.constant(2047u)); + ir_expression *const r0042 = bit_or(swizzle_x(r003D), swizzle_y(r0038)); + ir_expression *const r0043 = nequal(r0042, body.constant(0u)); + body.emit(assign(r003C, logic_and(r0041, r0043), 0x01)); + + ir_expression *const r0044 = rshift(swizzle_x(r0039), body.constant(int(20))); + ir_expression *const r0045 = bit_and(r0044, body.constant(2047u)); + ir_expression *const r0046 = equal(r0045, body.constant(2047u)); + ir_expression *const r0047 = bit_or(swizzle_x(r003E), swizzle_y(r0039)); + ir_expression *const r0048 = nequal(r0047, body.constant(0u)); + body.emit(assign(r003B, logic_and(r0046, r0048), 0x01)); + + /* IF CONDITION */ + ir_expr
[Mesa-dev] [RFC 03/11] glsl: Add "built-in" functions to do eq(fp64, fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 119 src/compiler/glsl/builtin_functions.cpp | 4 ++ src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 41 +++ 4 files changed, 167 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 6df91e10f5..e614374d75 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -36,3 +36,122 @@ fneg64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +extractFloat64Frac(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r000D = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r000D); + ir_variable *const r000E = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r000E, bit_and(swizzle_x(r000D), body.constant(1048575u)), 0x01)); + + body.emit(assign(r000E, swizzle_y(r000D), 0x02)); + + body.emit(ret(r000E)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +extractFloat64Exp(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r000F = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r000F); + ir_expression *const r0010 = rshift(swizzle_x(r000F), body.constant(int(20))); + ir_expression *const r0011 = bit_and(r0010, body.constant(2047u)); + body.emit(ret(r0011)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +feq64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0012 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0012); + ir_variable *const r0013 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "b", ir_var_function_in); + sig_parameters.push_tail(r0013); + ir_variable *const r0014 = body.make_temp(glsl_type::bool_type, "return_value"); + ir_variable *const r0015 = new(mem_ctx) ir_variable(glsl_type::bool_type, "isbNaN", ir_var_auto); + body.emit(r0015); + ir_variable *const r0016 = new(mem_ctx) ir_variable(glsl_type::bool_type, "isaNaN", ir_var_auto); + body.emit(r0016); + ir_variable *const r0017 = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r0017, bit_and(swizzle_x(r0012), body.constant(1048575u)), 0x01)); + + body.emit(assign(r0017, swizzle_y(r0012), 0x02)); + + ir_variable *const r0018 = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r0018, bit_and(swizzle_x(r0013), body.constant(1048575u)), 0x01)); + + body.emit(assign(r0018, swizzle_y(r0013), 0x02)); + + ir_expression *const r0019 = rshift(swizzle_x(r0012), body.constant(int(20))); + ir_expression *const r001A = bit_and(r0019, body.constant(2047u)); + ir_expression *const r001B = equal(r001A, body.constant(2047u)); + ir_expression *const r001C = bit_or(swizzle_x(r0017), swizzle_y(r0012)); + ir_expression *const r001D = nequal(r001C, body.constant(0u)); + body.emit(assign(r0016, logic_and(r001B, r001D), 0x01)); + + ir_expression *const r001E = rshift(swizzle_x(r0013), body.constant(int(20))); + ir_expression *const r001F = bit_and(r001E, body.constant(2047u)); + ir_expression *const r0020 = equal(r001F, body.constant(2047u)); + ir_expression *const r0021 = bit_or(swizzle_x(r0018), swizzle_y(r0013)); + ir_expression *const r0022 = nequal(r0021, body.constant(0u)); + body.emit(assign(r0015, logic_and(r0020, r0022), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0024 = logic_or(r0016, r0015); + ir_if *f0023 = new(mem_ctx) ir_if(operand(r0024).val); + exec_list *const f0023_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0023->then_instructions; + + body.emit(assign(r0014, body.constant(false), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0023->else_instr
[Mesa-dev] [RFC 10/11] glsl: Add "built-in" functions to do fp64_to_fp32(fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 589 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 133 4 files changed, 729 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index b50ebc2dc2..dad5811289 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -23652,3 +23652,592 @@ fp32_to_fp64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +packFloat32(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r13A7 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zSign", ir_var_function_in); + sig_parameters.push_tail(r13A7); + ir_variable *const r13A8 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zExp", ir_var_function_in); + sig_parameters.push_tail(r13A8); + ir_variable *const r13A9 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFrac", ir_var_function_in); + sig_parameters.push_tail(r13A9); + ir_expression *const r13AA = lshift(r13A7, body.constant(int(31))); + ir_expression *const r13AB = lshift(r13A8, body.constant(int(23))); + ir_expression *const r13AC = add(r13AA, r13AB); + ir_expression *const r13AD = add(r13AC, r13A9); + body.emit(ret(r13AD)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +shift32RightJamming(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::void_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r13AE = new(mem_ctx) ir_variable(glsl_type::uint_type, "a", ir_var_function_in); + sig_parameters.push_tail(r13AE); + ir_variable *const r13AF = new(mem_ctx) ir_variable(glsl_type::int_type, "count", ir_var_function_in); + sig_parameters.push_tail(r13AF); + ir_variable *const r13B0 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zPtr", ir_var_function_inout); + sig_parameters.push_tail(r13B0); + ir_variable *const r13B1 = new(mem_ctx) ir_variable(glsl_type::uint_type, "z", ir_var_auto); + body.emit(r13B1); + /* IF CONDITION */ + ir_expression *const r13B3 = equal(r13AF, body.constant(int(0))); + ir_if *f13B2 = new(mem_ctx) ir_if(operand(r13B3).val); + exec_list *const f13B2_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f13B2->then_instructions; + + body.emit(assign(r13B1, r13AE, 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f13B2->else_instructions; + + /* IF CONDITION */ + ir_expression *const r13B5 = less(r13AF, body.constant(int(32))); + ir_if *f13B4 = new(mem_ctx) ir_if(operand(r13B5).val); + exec_list *const f13B4_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f13B4->then_instructions; + + ir_expression *const r13B6 = rshift(r13AE, r13AF); + ir_expression *const r13B7 = neg(r13AF); + ir_expression *const r13B8 = bit_and(r13B7, body.constant(int(31))); + ir_expression *const r13B9 = lshift(r13AE, r13B8); + ir_expression *const r13BA = nequal(r13B9, body.constant(0u)); + ir_expression *const r13BB = expr(ir_unop_b2i, r13BA); + ir_expression *const r13BC = expr(ir_unop_i2u, r13BB); + body.emit(assign(r13B1, bit_or(r13B6, r13BC), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f13B4->else_instructions; + + ir_expression *const r13BD = nequal(r13AE, body.constant(0u)); + ir_expression *const r13BE = expr(ir_unop_b2i, r13BD); + body.emit(assign(r13B1, expr(ir_unop_i2u, r13BE), 0x01)); + + + body.instructions = f13B4_parent_instructions; + body.emit(f13B4); + + /* END IF */ + + + body.instructions = f13B2_parent_instructions; + body.emit(f13B2); + + /* END IF */ + + body.emit(assign(r13B0, r13B1, 0x01)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +roundAndPackFloat32(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_paramete
[Mesa-dev] [RFC 11/11] glsl: Add fp64 functions to the parser.
Like we use two integers to store our fp64, the functions are available with MESA_shader_integer_functions. Signed-off-by: Elie Tournier --- src/compiler/glsl/glcpp/glcpp-parse.y | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index e113253061..84dfc81c80 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -2349,6 +2349,16 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "__have_builtin_builtin_umod64", 1); add_builtin_define(parser, "__have_builtin_builtin_idiv64", 1); add_builtin_define(parser, "__have_builtin_builtin_imod64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fabs64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fneg64", 1); + add_builtin_define(parser, "__have_builtin_builtin_feq64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fle64", 1); + add_builtin_define(parser, "__have_builtin_builtin_flt64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fadd64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fmul64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fdiv64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fp32_to_fp64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fp64_to_fp32", 1); } } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] glsl: Fix indent in dump code
From: Elie Tournier Signed-off-by: Elie Tournier --- On an other thread [1], we discovered than the GLSL compiler have some trouble to indent correctly the dump IR. I don't have git access, please push it for me. [1] https://lists.freedesktop.org/archives/mesa-dev/2017-March/146518.html --- src/compiler/glsl/ir_builder_print_visitor.cpp | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/compiler/glsl/ir_builder_print_visitor.cpp b/src/compiler/glsl/ir_builder_print_visitor.cpp index 825dbe148e..02f15e74ee 100644 --- a/src/compiler/glsl/ir_builder_print_visitor.cpp +++ b/src/compiler/glsl/ir_builder_print_visitor.cpp @@ -372,17 +372,17 @@ ir_builder_print_visitor::visit(ir_constant *ir) switch (ir->type->base_type) { case GLSL_TYPE_UINT: if (ir->value.u[i] != 0) - print_without_indent("r%04X_data.u[%u] = %u;\n", + print_with_indent("r%04X_data.u[%u] = %u;\n", my_index, i, ir->value.u[i]); break; case GLSL_TYPE_INT: if (ir->value.i[i] != 0) - print_without_indent("r%04X_data.i[%u] = %i;\n", + print_with_indent("r%04X_data.i[%u] = %i;\n", my_index, i, ir->value.i[i]); break; case GLSL_TYPE_FLOAT: if (ir->value.u[i] != 0) - print_without_indent("r%04X_data.u[%u] = 0x%08x; /* %f */\n", + print_with_indent("r%04X_data.u[%u] = 0x%08x; /* %f */\n", my_index, i, ir->value.u[i], @@ -395,27 +395,27 @@ ir_builder_print_visitor::visit(ir_constant *ir) memcpy(&v, &ir->value.d[i], sizeof(v)); if (v != 0) - print_without_indent("r%04X_data.u64[%u] = 0x%016" PRIx64 "; /* %g */\n", + print_with_indent("r%04X_data.u64[%u] = 0x%016" PRIx64 "; /* %g */\n", my_index, i, v, ir->value.d[i]); break; } case GLSL_TYPE_UINT64: if (ir->value.u64[i] != 0) - print_without_indent("r%04X_data.u64[%u] = %" PRIu64 ";\n", + print_with_indent("r%04X_data.u64[%u] = %" PRIu64 ";\n", my_index, i, ir->value.u64[i]); break; case GLSL_TYPE_INT64: if (ir->value.i64[i] != 0) - print_without_indent("r%04X_data.i64[%u] = %" PRId64 ";\n", + print_with_indent("r%04X_data.i64[%u] = %" PRId64 ";\n", my_index, i, ir->value.i64[i]); break; case GLSL_TYPE_BOOL: if (ir->value.u[i] != 0) - print_without_indent("r%04X_data.u[%u] = 1;\n", my_index, i); + print_with_indent("r%04X_data.u[%u] = 1;\n", my_index, i); break; default: unreachable("Invalid constant type"); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] glsl: remove unused file
udivmod64 appears in src/compiler/glsl/builtin_int64.h and src/compiler/glsl/udivmod.h The second file seems unused. Fix commit 6b03b345eb64e15e577bc8b2cf04b314a4c70537 This change doesn't affect shader-db. Signed-off-by: Elie Tournier --- src/compiler/glsl/udivmod64.h | 206 -- 1 file changed, 206 deletions(-) delete mode 100644 src/compiler/glsl/udivmod64.h diff --git a/src/compiler/glsl/udivmod64.h b/src/compiler/glsl/udivmod64.h deleted file mode 100644 index b434e456a1..00 --- a/src/compiler/glsl/udivmod64.h +++ /dev/null @@ -1,206 +0,0 @@ -ir_function_signature * -udivmod64(void *mem_ctx, ir_factory &body) -{ - ir_function_signature *const sig = - new(mem_ctx) ir_function_signature(glsl_type::uvec4_type); - exec_list sig_parameters; - - ir_variable *const r0001 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "numer", ir_var_function_in); - sig_parameters.push_tail(r0001); - ir_variable *const r0002 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "denom", ir_var_function_in); - sig_parameters.push_tail(r0002); - ir_variable *const r0003 = new(mem_ctx) ir_variable(glsl_type::int_type, "i", ir_var_auto); - body.emit(r0003); - ir_variable *const r0004 = new(mem_ctx) ir_variable(glsl_type::uint64_t_type, "n64", ir_var_auto); - body.emit(r0004); - ir_variable *const r0005 = new(mem_ctx) ir_variable(glsl_type::int_type, "log2_denom", ir_var_auto); - body.emit(r0005); - ir_variable *const r0006 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "quot", ir_var_auto); - body.emit(r0006); - body.emit(assign(r0006, ir_constant::zero(mem_ctx, glsl_type::uvec2_type), 0x03)); - - ir_expression *const r0007 = expr(ir_unop_find_msb, swizzle_y(r0002)); - body.emit(assign(r0005, add(r0007, body.constant(int(32))), 0x01)); - - /* IF CONDITION */ - ir_expression *const r0009 = equal(swizzle_y(r0002), body.constant(0u)); - ir_expression *const r000A = nequal(swizzle_y(r0001), body.constant(0u)); - ir_expression *const r000B = logic_and(r0009, r000A); - ir_if *f0008 = new(mem_ctx) ir_if(operand(r000B).val); - exec_list *const f0008_parent_instructions = body.instructions; - - /* THEN INSTRUCTIONS */ - body.instructions = &f0008->then_instructions; - - ir_variable *const r000C = new(mem_ctx) ir_variable(glsl_type::int_type, "i", ir_var_auto); - body.emit(r000C); - ir_variable *const r000D = body.make_temp(glsl_type::int_type, "findMSB_retval"); - body.emit(assign(r000D, expr(ir_unop_find_msb, swizzle_x(r0002)), 0x01)); - - body.emit(assign(r0005, r000D, 0x01)); - - body.emit(assign(r000C, sub(body.constant(int(31)), r000D), 0x01)); - - /* LOOP BEGIN */ - ir_loop *f000E = new(mem_ctx) ir_loop(); - exec_list *const f000E_parent_instructions = body.instructions; - - body.instructions = &f000E->body_instructions; - - /* IF CONDITION */ - ir_expression *const r0010 = less(r000C, body.constant(int(1))); - ir_if *f000F = new(mem_ctx) ir_if(operand(r0010).val); - exec_list *const f000F_parent_instructions = body.instructions; - -/* THEN INSTRUCTIONS */ -body.instructions = &f000F->then_instructions; - -body.emit(new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break)); - - - body.instructions = f000F_parent_instructions; - body.emit(f000F); - - /* END IF */ - - /* IF CONDITION */ - ir_expression *const r0012 = lshift(swizzle_x(r0002), r000C); - ir_expression *const r0013 = lequal(r0012, swizzle_y(r0001)); - ir_if *f0011 = new(mem_ctx) ir_if(operand(r0013).val); - exec_list *const f0011_parent_instructions = body.instructions; - -/* THEN INSTRUCTIONS */ -body.instructions = &f0011->then_instructions; - -ir_expression *const r0014 = lshift(swizzle_x(r0002), r000C); -body.emit(assign(r0001, sub(swizzle_y(r0001), r0014), 0x02)); - -ir_expression *const r0015 = lshift(body.constant(1u), r000C); -body.emit(assign(r0006, bit_or(swizzle_y(r0006), r0015), 0x02)); - - - body.instructions = f0011_parent_instructions; - body.emit(f0011); - - /* END IF */ - - body.emit(assign(r000C, add(r000C, body.constant(int(-1))), 0x01)); - - /* LOOP END */ - - body.instructions = f000E_parent_instructions; - body.emit(f000E); - - /* IF CONDITION */ - ir_expression *const r0017 = lequal(swizzle_x(r0002), swizzle_y(r0001)); - ir_if *f0016 = new(mem_ctx) ir_if(operand(r0017).val); - exec_list *const f0016_parent_instructions = body.instructions; - - /* THEN INSTRUCTIONS */ - body.instructions = &f0016->then_instructions; - - body.emit(assign(r0001, sub(swizzle_
[Mesa-dev] [PATCH 04/47] glsl: Add "built-in" functions to do eq(fp64, fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 128 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 46 src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 182 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index d41d114b84..9fc716871d 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -138,3 +138,131 @@ fsign64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +extractFloat64FracLo(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0026 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0026); + ir_swizzle *const r0027 = swizzle_x(r0026); + body.emit(ret(r0027)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +extractFloat64FracHi(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0028 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0028); + ir_expression *const r0029 = bit_and(swizzle_y(r0028), body.constant(1048575u)); + body.emit(ret(r0029)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +extractFloat64Exp(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::int_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r002A = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r002A); + ir_expression *const r002B = rshift(swizzle_y(r002A), body.constant(int(20))); + ir_expression *const r002C = bit_and(r002B, body.constant(2047u)); + ir_expression *const r002D = expr(ir_unop_u2i, r002C); + body.emit(ret(r002D)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +feq64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r002E = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r002E); + ir_variable *const r002F = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "b", ir_var_function_in); + sig_parameters.push_tail(r002F); + ir_variable *const r0030 = body.make_temp(glsl_type::bool_type, "return_value"); + ir_variable *const r0031 = new(mem_ctx) ir_variable(glsl_type::bool_type, "isbNaN", ir_var_auto); + body.emit(r0031); + ir_variable *const r0032 = new(mem_ctx) ir_variable(glsl_type::bool_type, "isaNaN", ir_var_auto); + body.emit(r0032); + ir_expression *const r0033 = rshift(swizzle_y(r002E), body.constant(int(20))); + ir_expression *const r0034 = bit_and(r0033, body.constant(2047u)); + ir_expression *const r0035 = expr(ir_unop_u2i, r0034); + ir_expression *const r0036 = equal(r0035, body.constant(int(2047))); + ir_expression *const r0037 = bit_and(swizzle_y(r002E), body.constant(1048575u)); + ir_expression *const r0038 = bit_or(r0037, swizzle_x(r002E)); + ir_expression *const r0039 = nequal(r0038, body.constant(0u)); + body.emit(assign(r0032, logic_and(r0036, r0039), 0x01)); + + ir_expression *const r003A = rshift(swizzle_y(r002F), body.constant(int(20))); + ir_expression *const r003B = bit_and(r003A, body.constant(2047u)); + ir_expression *const r003C = expr(ir_unop_u2i, r003B); + ir_expression *const r003D = equal(r003C, body.constant(int(2047))); + ir_expression *const r003E = bit_and(swizzle_y(r002F), body.constant(1048575u)); + ir_expression *const r003F = bit_or(r003E, swizzle_x(r002F)); + ir_expression *const r0040 = nequal(r003F, body.constant(0u)); + body.emit(assign(r0031, logic_and(r003D, r0040), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0042 = logic_or(r0032, r00
[Mesa-dev] [PATCH 02/47] glsl: Add "built-in" functions to do neg(fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 72 + src/compiler/glsl/builtin_functions.cpp | 4 ++ src/compiler/glsl/builtin_functions.h | 3 ++ src/compiler/glsl/float64.glsl | 23 +++ src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 103 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 7b572314fa..d8c25da825 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -17,3 +17,75 @@ fabs64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +is_nan(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r000C = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r000C); + ir_expression *const r000D = lshift(swizzle_y(r000C), body.constant(int(1))); + ir_expression *const r000E = lequal(body.constant(4292870144u), r000D); + ir_expression *const r000F = nequal(swizzle_x(r000C), body.constant(0u)); + ir_expression *const r0010 = bit_and(swizzle_y(r000C), body.constant(1048575u)); + ir_expression *const r0011 = nequal(r0010, body.constant(0u)); + ir_expression *const r0012 = logic_or(r000F, r0011); + ir_expression *const r0013 = logic_and(r000E, r0012); + body.emit(ret(r0013)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +fneg64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0014 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0014); + ir_variable *const r0015 = body.make_temp(glsl_type::uvec2_type, "return_value"); + /* IF CONDITION */ + ir_expression *const r0017 = lshift(swizzle_y(r0014), body.constant(int(1))); + ir_expression *const r0018 = lequal(body.constant(4292870144u), r0017); + ir_expression *const r0019 = nequal(swizzle_x(r0014), body.constant(0u)); + ir_expression *const r001A = bit_and(swizzle_y(r0014), body.constant(1048575u)); + ir_expression *const r001B = nequal(r001A, body.constant(0u)); + ir_expression *const r001C = logic_or(r0019, r001B); + ir_expression *const r001D = logic_and(r0018, r001C); + ir_if *f0016 = new(mem_ctx) ir_if(operand(r001D).val); + exec_list *const f0016_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0016->then_instructions; + + body.emit(assign(r0015, r0014, 0x03)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0016->else_instructions; + + body.emit(assign(r0014, bit_xor(swizzle_y(r0014), body.constant(2147483648u)), 0x02)); + + body.emit(assign(r0015, r0014, 0x03)); + + + body.instructions = f0016_parent_instructions; + body.emit(f0016); + + /* END IF */ + + body.emit(ret(r0015)); + + sig->replace_parameters(&sig_parameters); + return sig; +} diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 37cc164a73..0445f733ab 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3328,6 +3328,10 @@ builtin_builder::create_builtins() generate_ir::fabs64(mem_ctx, integer_functions_supported), NULL); + add_function("__builtin_fneg64", +generate_ir::fneg64(mem_ctx, integer_functions_supported), +NULL); + #undef F #undef FI #undef FIUD_VEC diff --git a/src/compiler/glsl/builtin_functions.h b/src/compiler/glsl/builtin_functions.h index deaf640447..adec424292 100644 --- a/src/compiler/glsl/builtin_functions.h +++ b/src/compiler/glsl/builtin_functions.h @@ -70,6 +70,9 @@ udivmod64(void *mem_ctx, builtin_available_predicate avail); ir_function_signature * fabs64(void *mem_ctx, builtin_available_predicate avail); +ir_function_signature * +fneg64(void *mem_ctx, builtin_available_predicate avail); + } #endif /* BULITIN_FUNCTIONS_H */ diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index d798d7e7ac..cef939323b 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -27,3 +27,26 @@ fabs64(uvec2 a) a.y &= 0x7FFFu; return a; } + +/* Returns 1 if the double-precision floating-point value `a
[Mesa-dev] [PATCH 03/47] glsl: Add "built-in" function to do sign(fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 49 + src/compiler/glsl/builtin_functions.cpp | 4 +++ src/compiler/glsl/builtin_functions.h | 3 ++ src/compiler/glsl/float64.glsl | 12 src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 69 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index d8c25da825..d41d114b84 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -89,3 +89,52 @@ fneg64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +fsign64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r001E = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r001E); + ir_variable *const r001F = body.make_temp(glsl_type::uvec2_type, "return_value"); + ir_variable *const r0020 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "retval", ir_var_auto); + body.emit(r0020); + /* IF CONDITION */ + ir_expression *const r0022 = lshift(swizzle_y(r001E), body.constant(int(1))); + ir_expression *const r0023 = bit_or(r0022, swizzle_x(r001E)); + ir_expression *const r0024 = equal(r0023, body.constant(0u)); + ir_if *f0021 = new(mem_ctx) ir_if(operand(r0024).val); + exec_list *const f0021_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0021->then_instructions; + + body.emit(assign(r001F, ir_constant::zero(mem_ctx, glsl_type::uvec2_type), 0x03)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0021->else_instructions; + + body.emit(assign(r0020, body.constant(0u), 0x01)); + + ir_expression *const r0025 = bit_and(swizzle_y(r001E), body.constant(2147483648u)); + body.emit(assign(r0020, bit_or(r0025, body.constant(1072693248u)), 0x02)); + + body.emit(assign(r001F, r0020, 0x03)); + + + body.instructions = f0021_parent_instructions; + body.emit(f0021); + + /* END IF */ + + body.emit(ret(r001F)); + + sig->replace_parameters(&sig_parameters); + return sig; +} diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 0445f733ab..1e3101bac3 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3332,6 +3332,10 @@ builtin_builder::create_builtins() generate_ir::fneg64(mem_ctx, integer_functions_supported), NULL); + add_function("__builtin_fsign64", +generate_ir::fsign64(mem_ctx, integer_functions_supported), +NULL); + #undef F #undef FI #undef FIUD_VEC diff --git a/src/compiler/glsl/builtin_functions.h b/src/compiler/glsl/builtin_functions.h index adec424292..7954373154 100644 --- a/src/compiler/glsl/builtin_functions.h +++ b/src/compiler/glsl/builtin_functions.h @@ -73,6 +73,9 @@ fabs64(void *mem_ctx, builtin_available_predicate avail); ir_function_signature * fneg64(void *mem_ctx, builtin_available_predicate avail); +ir_function_signature * +fsign64(void *mem_ctx, builtin_available_predicate avail); + } #endif /* BULITIN_FUNCTIONS_H */ diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index cef939323b..6561c0e7e6 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -50,3 +50,15 @@ fneg64(uvec2 a) a.y ^= (1u<<31); return a; } + +uvec2 +fsign64(uvec2 a) +{ + if ((a.y << 1 | a.x) == 0u) + return uvec2(0u, 0u); + + uvec2 retval; + retval.x = 0u; + retval.y = (a.y & 0x8000u) | 0x3FF0u; + return retval; +} diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 4486326d86..72011dec68 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -2451,6 +2451,7 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "__have_builtin_builtin_imod64", 1); add_builtin_define(parser, "__have_builtin_builtin_fabs64", 1); add_builtin_define(parser, "__have_builtin_builtin_fneg64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fsign64", 1); } } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 00/47] WIP: fp64 support for r600g
From: Elie Tournier TL;DR This series is a "status update" of my work done for adding fp64 support on r600g. One of the biggest issue is due to a lake of accuracy on the rcp implementation. Divide relay on rcp. A branch is available on https://github.com/Hopetech/mesa/tree/glsl_arb_gpu_shader_fp64_v3 Comments and reviews are welcome. Patches 1-18: These few patches implement the basic fp64 operations. Patches 19-47: Lower operations using the builtin functions previously implemented. Known issues: - operations on matrix crash the system. - sqrt and d2f are not accurate enought so the piglit tests are failling. But sqrt and d2f are working correctly using softpipe. However, implementing sqrt64 as f2d(sqrt32(d2f()) seems to be good enought for Piglit. - rcp is define as pow(pow(x, -0.5), 2) NIR and NV convert the input in a fp32, realize a rcp, convert back to a fp64 and realize some Newton-Raphson step. This is not possible with GLSL IR because using fma will generate a massive builtin_float64.h file. - dot is failing - piglit report crashes on mod. This is a regression, I'm currently working on it. Piglit result: Pass: 917 Fail: 211 Crash: 241 Dave Airlie (2): glsl/lower_64bit: handle any/all operations glsl/lower_64bit: lower d2b using comparison Elie Tournier (45): glsl: Add "built-in" function to do abs(fp64) glsl: Add "built-in" functions to do neg(fp64) glsl: Add "built-in" function to do sign(fp64) glsl: Add "built-in" functions to do eq(fp64, fp64) glsl: Add "built-in" functions to do le(fp64, fp64) glsl: Add "built-in" functions to do lt(fp64, fp64) glsl: Add "built-in" functions to do add(fp64, fp64) glsl: Add "built-in" functions to do mul(fp64, fp64) glsl: Add "built-in" functions to do fp64_to_uint(fp64) glsl: Add "built-in" functions to do uint_to_fp64(uint) glsl: Add "built-in" functions to do fp64_to_int(fp64) glsl: Add "built-in" functions to do int_to_fp64(int) glsl: Add "built-in" functions to do fp64_to_fp32(fp64) glsl: Add "built-in" functions to do fp32_to_fp64(fp32) glsl: Add "built-in" functions to do sqrt(fp64) glsl: Add "built-in" functions to do trunc(fp64) glsl: Add "built-in" functions to do round(fp64) glsl: Add "built-in" functions to do rcp(fp64) glsl: Add a lowering pass for 64-bit float abs() glsl: Add a lowering pass for 64-bit float neg() glsl: Add a lowering pass for 64-bit float sign() glsl: Add a lowering pass for 64-bit float equal() glsl: Add a lowering pass for 64-bit float lequal() glsl: Add a lowering pass for 64-bit float less() glsl: Add a lowering pass for 64-bit float add() glsl: Add a lowering pass for 64-bit float mul() glsl: Add a lowering pass for 64-bit float d2u() glsl: Add a lowering pass for 64-bit float u2d() glsl: Add a lowering pass for 64-bit float d2i() glsl: Add a lowering pass for 64-bit float i2d() glsl: Add a lowering pass for 64-bit float d2f() glsl: Add a lowering pass for 64-bit float f2d() glsl: Add a lowering pass for 64-bit float sqrt() glsl: Add a lowering pass for 64-bit float trunc() glsl: Add a lowering pass for 64-bit float round() glsl: Add a lowering pass for 64-bit float rcp() glsl: Add a lowering pass for 64-bit float gequal() glsl: Add a lowering pass for 64-bit float greater() glsl: Add a lowering pass for 64-bit float nequal() glsl: Add a lowering pass for 64-bit float min() glsl: Add a lowering pass for 64-bit float max() glsl: Add a lowering pass for 64-bit float floor() glsl: Add a lowering pass for 64-bit float ceil() glsl: Add a lowering pass for 64-bit float frac() glsl: Add a lowering pass for 64-bit float div() src/compiler/Makefile.sources | 3 +- src/compiler/glsl/builtin_float64.h| 20310 +++ src/compiler/glsl/builtin_functions.cpp|72 + src/compiler/glsl/builtin_functions.h |54 + src/compiler/glsl/float64.glsl | 1494 ++ src/compiler/glsl/generate_ir.cpp | 1 + src/compiler/glsl/glcpp/glcpp-parse.y |18 + src/compiler/glsl/glsl_parser_extras.cpp | 1 + src/compiler/glsl/ir_optimization.h|25 +- .../glsl/{lower_int64.cpp => lower_64bit.cpp} | 325 +- src/compiler/glsl/lower_instructions.cpp | 139 +- src/mesa/state_tracker/st_extensions.c | 3 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp |27 + 13 files changed, 22442 insertions(+), 30 deletions(-) create mode 100644 src/compiler/glsl/builtin_float64.h create mode 100644 src/compiler/glsl/float64.glsl rename src/compiler/glsl/{lower_int64.cpp => lower_64bit.cpp} (51%) -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/47] glsl: Add "built-in" functions to do lt(fp64, fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 155 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 43 + src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 206 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index aa769d9672..c52e50a6f9 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -439,3 +439,158 @@ fle64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +lt64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0080 = new(mem_ctx) ir_variable(glsl_type::uint_type, "a0", ir_var_function_in); + sig_parameters.push_tail(r0080); + ir_variable *const r0081 = new(mem_ctx) ir_variable(glsl_type::uint_type, "a1", ir_var_function_in); + sig_parameters.push_tail(r0081); + ir_variable *const r0082 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b0", ir_var_function_in); + sig_parameters.push_tail(r0082); + ir_variable *const r0083 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b1", ir_var_function_in); + sig_parameters.push_tail(r0083); + ir_expression *const r0084 = less(r0080, r0082); + ir_expression *const r0085 = equal(r0080, r0082); + ir_expression *const r0086 = less(r0081, r0083); + ir_expression *const r0087 = logic_and(r0085, r0086); + ir_expression *const r0088 = logic_or(r0084, r0087); + body.emit(ret(r0088)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +flt64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0089 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0089); + ir_variable *const r008A = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "b", ir_var_function_in); + sig_parameters.push_tail(r008A); + ir_variable *const r008B = body.make_temp(glsl_type::bool_type, "return_value"); + ir_variable *const r008C = new(mem_ctx) ir_variable(glsl_type::bool_type, "isbNaN", ir_var_auto); + body.emit(r008C); + ir_variable *const r008D = new(mem_ctx) ir_variable(glsl_type::bool_type, "isaNaN", ir_var_auto); + body.emit(r008D); + ir_expression *const r008E = rshift(swizzle_y(r0089), body.constant(int(20))); + ir_expression *const r008F = bit_and(r008E, body.constant(2047u)); + ir_expression *const r0090 = expr(ir_unop_u2i, r008F); + ir_expression *const r0091 = equal(r0090, body.constant(int(2047))); + ir_expression *const r0092 = bit_and(swizzle_y(r0089), body.constant(1048575u)); + ir_expression *const r0093 = bit_or(r0092, swizzle_x(r0089)); + ir_expression *const r0094 = nequal(r0093, body.constant(0u)); + body.emit(assign(r008D, logic_and(r0091, r0094), 0x01)); + + ir_expression *const r0095 = rshift(swizzle_y(r008A), body.constant(int(20))); + ir_expression *const r0096 = bit_and(r0095, body.constant(2047u)); + ir_expression *const r0097 = expr(ir_unop_u2i, r0096); + ir_expression *const r0098 = equal(r0097, body.constant(int(2047))); + ir_expression *const r0099 = bit_and(swizzle_y(r008A), body.constant(1048575u)); + ir_expression *const r009A = bit_or(r0099, swizzle_x(r008A)); + ir_expression *const r009B = nequal(r009A, body.constant(0u)); + body.emit(assign(r008C, logic_and(r0098, r009B), 0x01)); + + /* IF CONDITION */ + ir_expression *const r009D = logic_or(r008D, r008C); + ir_if *f009C = new(mem_ctx) ir_if(operand(r009D).val); + exec_list *const f009C_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f009C->then_instructions; + + body.emit(assign(r008B, body.constant(false), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f009C->else_instructions; + + ir_variable *const r009E = body.make_temp(glsl_type::uint_type, "extractFloat64Sign_retval"); + body.emit(assign(r009E, rshift(swizzle_y(r0089), body.constant(int(31))), 0x01)); + + ir_variable *const r009F = body.make_temp(glsl_type::uint_type, "extractFloat64Sign_retval"); + body.emit(assign(r009F, rshift(swizzle_y(r008A), body.constant(int(31
[Mesa-dev] [PATCH 01/47] glsl: Add "built-in" function to do abs(fp64)
Signed-off-by: Elie Tournier --- src/compiler/Makefile.sources | 1 + src/compiler/glsl/builtin_float64.h | 19 +++ src/compiler/glsl/builtin_functions.cpp | 4 src/compiler/glsl/builtin_functions.h | 3 +++ src/compiler/glsl/float64.glsl | 29 + src/compiler/glsl/generate_ir.cpp | 1 + src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 7 files changed, 58 insertions(+) create mode 100644 src/compiler/glsl/builtin_float64.h create mode 100644 src/compiler/glsl/float64.glsl diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index a56a710351..24fa7716de 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -22,6 +22,7 @@ LIBGLSL_FILES = \ glsl/builtin_functions.cpp \ glsl/builtin_functions.h \ glsl/builtin_int64.h \ + glsl/builtin_float64.h \ glsl/builtin_types.cpp \ glsl/builtin_variables.cpp \ glsl/generate_ir.cpp \ diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h new file mode 100644 index 00..7b572314fa --- /dev/null +++ b/src/compiler/glsl/builtin_float64.h @@ -0,0 +1,19 @@ +ir_function_signature * +fabs64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r000B = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r000B); + body.emit(assign(r000B, bit_and(swizzle_y(r000B), body.constant(2147483647u)), 0x02)); + + body.emit(ret(r000B)); + + sig->replace_parameters(&sig_parameters); + return sig; +} diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 9df9671f13..37cc164a73 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3324,6 +3324,10 @@ builtin_builder::create_builtins() generate_ir::umul64(mem_ctx, integer_functions_supported), NULL); + add_function("__builtin_fabs64", +generate_ir::fabs64(mem_ctx, integer_functions_supported), +NULL); + #undef F #undef FI #undef FIUD_VEC diff --git a/src/compiler/glsl/builtin_functions.h b/src/compiler/glsl/builtin_functions.h index 89ec9b7d5d..deaf640447 100644 --- a/src/compiler/glsl/builtin_functions.h +++ b/src/compiler/glsl/builtin_functions.h @@ -67,6 +67,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail); ir_function_signature * udivmod64(void *mem_ctx, builtin_available_predicate avail); +ir_function_signature * +fabs64(void *mem_ctx, builtin_available_predicate avail); + } #endif /* BULITIN_FUNCTIONS_H */ diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl new file mode 100644 index 00..d798d7e7ac --- /dev/null +++ b/src/compiler/glsl/float64.glsl @@ -0,0 +1,29 @@ +/* Compile with: + * + * glsl_compiler --version 130 --dump-builder float64.glsl > builtin_float64.h + * + */ + +#version 130 +#extension GL_ARB_shader_bit_encoding : enable + +/* Software IEEE floating-point rounding mode. + * GLSL spec section "4.7.1 Range and Precision": + * The rounding mode cannot be set and is undefined. + * But here, we are able to define the rounding mode at the compilation time. + */ +#define FLOAT_ROUND_NEAREST_EVEN0 +#define FLOAT_ROUND_TO_ZERO 1 +#define FLOAT_ROUND_DOWN2 +#define FLOAT_ROUND_UP 3 +#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN + +/* Absolute value of a Float64 : + * Clear the sign bit + */ +uvec2 +fabs64(uvec2 a) +{ + a.y &= 0x7FFFu; + return a; +} diff --git a/src/compiler/glsl/generate_ir.cpp b/src/compiler/glsl/generate_ir.cpp index 255b0484f2..e6ece4860f 100644 --- a/src/compiler/glsl/generate_ir.cpp +++ b/src/compiler/glsl/generate_ir.cpp @@ -29,5 +29,6 @@ using namespace ir_builder; namespace generate_ir { #include "builtin_int64.h" +#include "builtin_float64.h" } diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 898a26044f..5b68887c8e 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -2449,6 +2449,7 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "__have_builtin_builtin_umod64", 1); add_builtin_define(parser, "__have_builtin_builtin_idiv64", 1); add_builtin_define(parser, "__have_builtin_builtin_imod64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fabs64", 1); } } --
[Mesa-dev] [PATCH 05/47] glsl: Add "built-in" functions to do le(fp64, fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 173 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 50 + src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 231 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 9fc716871d..aa769d9672 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -266,3 +266,176 @@ feq64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +extractFloat64Sign(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r004B = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r004B); + ir_expression *const r004C = rshift(swizzle_y(r004B), body.constant(int(31))); + body.emit(ret(r004C)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +le64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r004D = new(mem_ctx) ir_variable(glsl_type::uint_type, "a0", ir_var_function_in); + sig_parameters.push_tail(r004D); + ir_variable *const r004E = new(mem_ctx) ir_variable(glsl_type::uint_type, "a1", ir_var_function_in); + sig_parameters.push_tail(r004E); + ir_variable *const r004F = new(mem_ctx) ir_variable(glsl_type::uint_type, "b0", ir_var_function_in); + sig_parameters.push_tail(r004F); + ir_variable *const r0050 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b1", ir_var_function_in); + sig_parameters.push_tail(r0050); + ir_expression *const r0051 = less(r004D, r004F); + ir_expression *const r0052 = equal(r004D, r004F); + ir_expression *const r0053 = lequal(r004E, r0050); + ir_expression *const r0054 = logic_and(r0052, r0053); + ir_expression *const r0055 = logic_or(r0051, r0054); + body.emit(ret(r0055)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +fle64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0056 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0056); + ir_variable *const r0057 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "b", ir_var_function_in); + sig_parameters.push_tail(r0057); + ir_variable *const r0058 = body.make_temp(glsl_type::bool_type, "return_value"); + ir_variable *const r0059 = new(mem_ctx) ir_variable(glsl_type::bool_type, "isbNaN", ir_var_auto); + body.emit(r0059); + ir_variable *const r005A = new(mem_ctx) ir_variable(glsl_type::bool_type, "isaNaN", ir_var_auto); + body.emit(r005A); + ir_expression *const r005B = rshift(swizzle_y(r0056), body.constant(int(20))); + ir_expression *const r005C = bit_and(r005B, body.constant(2047u)); + ir_expression *const r005D = expr(ir_unop_u2i, r005C); + ir_expression *const r005E = equal(r005D, body.constant(int(2047))); + ir_expression *const r005F = bit_and(swizzle_y(r0056), body.constant(1048575u)); + ir_expression *const r0060 = bit_or(r005F, swizzle_x(r0056)); + ir_expression *const r0061 = nequal(r0060, body.constant(0u)); + body.emit(assign(r005A, logic_and(r005E, r0061), 0x01)); + + ir_expression *const r0062 = rshift(swizzle_y(r0057), body.constant(int(20))); + ir_expression *const r0063 = bit_and(r0062, body.constant(2047u)); + ir_expression *const r0064 = expr(ir_unop_u2i, r0063); + ir_expression *const r0065 = equal(r0064, body.constant(int(2047))); + ir_expression *const r0066 = bit_and(swizzle_y(r0057), body.constant(1048575u)); + ir_expression *const r0067 = bit_or(r0066, swizzle_x(r0057)); + ir_expression *const r0068 = nequal(r0067, body.constant(0u)); + body.emit(assign(r0059, logic_and(r0065, r0068), 0x01)); + + /* IF CONDITION */ + ir_expression *const r006A = logic_or(r005A, r0059); + ir_if *f0069 = new(mem_ctx) ir_if(operand(r006A).val); + exec_list *const f0069_parent_instructions = body.instructi
[Mesa-dev] [PATCH 14/47] glsl: Add "built-in" functions to do fp32_to_fp64(fp32)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 311 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 38 src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 357 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index c372de0f4d..62f246ebc1 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -13665,3 +13665,314 @@ fp64_to_fp32(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +fp32_to_fp64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0B83 = new(mem_ctx) ir_variable(glsl_type::float_type, "f", ir_var_function_in); + sig_parameters.push_tail(r0B83); + ir_variable *const r0B84 = body.make_temp(glsl_type::bool_type, "execute_flag"); + body.emit(assign(r0B84, body.constant(true), 0x01)); + + ir_variable *const r0B85 = body.make_temp(glsl_type::uvec2_type, "return_value"); + ir_variable *const r0B86 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aSign", ir_var_auto); + body.emit(r0B86); + ir_variable *const r0B87 = new(mem_ctx) ir_variable(glsl_type::int_type, "aExp", ir_var_auto); + body.emit(r0B87); + ir_variable *const r0B88 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aFrac", ir_var_auto); + body.emit(r0B88); + ir_variable *const r0B89 = body.make_temp(glsl_type::uint_type, "floatBitsToUint_retval"); + body.emit(assign(r0B89, expr(ir_unop_bitcast_f2u, r0B83), 0x01)); + + ir_variable *const r0B8A = body.make_temp(glsl_type::uint_type, "assignment_tmp"); + body.emit(assign(r0B8A, bit_and(r0B89, body.constant(8388607u)), 0x01)); + + body.emit(assign(r0B88, r0B8A, 0x01)); + + ir_variable *const r0B8B = body.make_temp(glsl_type::int_type, "assignment_tmp"); + ir_expression *const r0B8C = rshift(r0B89, body.constant(int(23))); + ir_expression *const r0B8D = bit_and(r0B8C, body.constant(255u)); + body.emit(assign(r0B8B, expr(ir_unop_u2i, r0B8D), 0x01)); + + body.emit(assign(r0B87, r0B8B, 0x01)); + + body.emit(assign(r0B86, rshift(r0B89, body.constant(int(31))), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0B8F = equal(r0B8B, body.constant(int(255))); + ir_if *f0B8E = new(mem_ctx) ir_if(operand(r0B8F).val); + exec_list *const f0B8E_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0B8E->then_instructions; + + /* IF CONDITION */ + ir_expression *const r0B91 = nequal(r0B8A, body.constant(0u)); + ir_if *f0B90 = new(mem_ctx) ir_if(operand(r0B91).val); + exec_list *const f0B90_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0B90->then_instructions; + + ir_variable *const r0B92 = body.make_temp(glsl_type::uint_type, "assignment_tmp"); + body.emit(assign(r0B92, lshift(r0B89, body.constant(int(9))), 0x01)); + + ir_variable *const r0B93 = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + ir_expression *const r0B94 = lshift(r0B92, body.constant(int(20))); + body.emit(assign(r0B93, bit_or(r0B94, body.constant(0u)), 0x01)); + + ir_expression *const r0B95 = rshift(r0B92, body.constant(int(12))); + ir_expression *const r0B96 = lshift(r0B86, body.constant(int(31))); + ir_expression *const r0B97 = bit_or(r0B96, body.constant(2146959360u)); + body.emit(assign(r0B93, bit_or(r0B95, r0B97), 0x02)); + + body.emit(assign(r0B85, r0B93, 0x03)); + + body.emit(assign(r0B84, body.constant(false), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0B90->else_instructions; + + ir_variable *const r0B98 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "z", ir_var_auto); + body.emit(r0B98); + ir_expression *const r0B99 = lshift(r0B86, body.constant(int(31))); + body.emit(assign(r0B98, add(r0B99, body.constant(2146435072u)), 0x02)); + + body.emit(assign(r0B98, body.constant(0u), 0x01)); + + body.emit(assign(r0B85, r0B98, 0x03)); + + body.emit(assign(r0B84, body.constant(false), 0x01)); + + + body.instructions = f0B90_parent_instructions; + body.emit(f0B90); + + /* END IF */ + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0B8E->else_instructions; + + /* IF CONDIT
[Mesa-dev] [PATCH 09/47] glsl: Add "built-in" functions to do fp64_to_uint(fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 446 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 68 + src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 522 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 5213751223..bfe387b0f5 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -11848,3 +11848,449 @@ fmul64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +shift64Right(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::void_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0A04 = new(mem_ctx) ir_variable(glsl_type::uint_type, "a0", ir_var_function_in); + sig_parameters.push_tail(r0A04); + ir_variable *const r0A05 = new(mem_ctx) ir_variable(glsl_type::uint_type, "a1", ir_var_function_in); + sig_parameters.push_tail(r0A05); + ir_variable *const r0A06 = new(mem_ctx) ir_variable(glsl_type::int_type, "count", ir_var_function_in); + sig_parameters.push_tail(r0A06); + ir_variable *const r0A07 = new(mem_ctx) ir_variable(glsl_type::uint_type, "z0Ptr", ir_var_function_inout); + sig_parameters.push_tail(r0A07); + ir_variable *const r0A08 = new(mem_ctx) ir_variable(glsl_type::uint_type, "z1Ptr", ir_var_function_inout); + sig_parameters.push_tail(r0A08); + ir_variable *const r0A09 = new(mem_ctx) ir_variable(glsl_type::uint_type, "z1", ir_var_auto); + body.emit(r0A09); + ir_variable *const r0A0A = new(mem_ctx) ir_variable(glsl_type::uint_type, "z0", ir_var_auto); + body.emit(r0A0A); + ir_variable *const r0A0B = body.make_temp(glsl_type::int_type, "assignment_tmp"); + ir_expression *const r0A0C = neg(r0A06); + body.emit(assign(r0A0B, bit_and(r0A0C, body.constant(int(31))), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0A0E = equal(r0A06, body.constant(int(0))); + ir_if *f0A0D = new(mem_ctx) ir_if(operand(r0A0E).val); + exec_list *const f0A0D_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A0D->then_instructions; + + body.emit(assign(r0A09, r0A05, 0x01)); + + body.emit(assign(r0A0A, r0A04, 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0A0D->else_instructions; + + /* IF CONDITION */ + ir_expression *const r0A10 = less(r0A06, body.constant(int(32))); + ir_if *f0A0F = new(mem_ctx) ir_if(operand(r0A10).val); + exec_list *const f0A0F_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A0F->then_instructions; + + ir_expression *const r0A11 = lshift(r0A04, r0A0B); + ir_expression *const r0A12 = rshift(r0A05, r0A06); + body.emit(assign(r0A09, bit_or(r0A11, r0A12), 0x01)); + + body.emit(assign(r0A0A, rshift(r0A04, r0A06), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0A0F->else_instructions; + + ir_variable *const r0A13 = body.make_temp(glsl_type::uint_type, "conditional_tmp"); + /* IF CONDITION */ + ir_expression *const r0A15 = less(r0A06, body.constant(int(64))); + ir_if *f0A14 = new(mem_ctx) ir_if(operand(r0A15).val); + exec_list *const f0A14_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0A14->then_instructions; + +ir_expression *const r0A16 = bit_and(r0A06, body.constant(int(31))); +body.emit(assign(r0A13, rshift(r0A04, r0A16), 0x01)); + + +/* ELSE INSTRUCTIONS */ +body.instructions = &f0A14->else_instructions; + +body.emit(assign(r0A13, body.constant(0u), 0x01)); + + + body.instructions = f0A14_parent_instructions; + body.emit(f0A14); + + /* END IF */ + + body.emit(assign(r0A09, r0A13, 0x01)); + + body.emit(assign(r0A0A, body.constant(0u), 0x01)); + + + body.instructions = f0A0F_parent_instructions; + body.emit(f0A0F); + + /* END IF */ + + + body.instructions = f0A0D_parent_instructions; + body.emit(f0A0D); + + /* END IF */ + + body.emit(assign(r0A08, r0A09, 0x01)); + + body.emit(assign(r0A07, r0A0A, 0x01)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +fp64_to_uint(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = +
[Mesa-dev] [PATCH 18/47] glsl: Add "built-in" functions to do rcp(fp64)
This algorithm is not accurate. Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 1829 +++ src/compiler/glsl/builtin_functions.cpp |4 + src/compiler/glsl/builtin_functions.h |3 + src/compiler/glsl/float64.glsl | 10 + src/compiler/glsl/glcpp/glcpp-parse.y |1 + 5 files changed, 1847 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 97d6adfd9f..e6b654cb0d 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -18479,3 +18479,1832 @@ fround64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +frcp64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0F45 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0F45); + ir_variable *const r0F46 = body.make_temp(glsl_type::uint_type, "z1Ptr"); + body.emit(assign(r0F46, sub(body.constant(2406117202u), swizzle_x(r0F45)), 0x01)); + + ir_expression *const r0F47 = sub(body.constant(3217938081u), swizzle_y(r0F45)); + ir_expression *const r0F48 = less(body.constant(2406117202u), swizzle_x(r0F45)); + ir_expression *const r0F49 = expr(ir_unop_b2i, r0F48); + ir_expression *const r0F4A = expr(ir_unop_i2u, r0F49); + body.emit(assign(r0F45, sub(r0F47, r0F4A), 0x02)); + + body.emit(assign(r0F45, r0F46, 0x01)); + + ir_variable *const r0F4B = new(mem_ctx) ir_variable(glsl_type::uint_type, "z1", ir_var_auto); + body.emit(r0F4B); + ir_variable *const r0F4C = new(mem_ctx) ir_variable(glsl_type::uint_type, "z0", ir_var_auto); + body.emit(r0F4C); + ir_expression *const r0F4D = lshift(swizzle_y(r0F45), body.constant(int(31))); + ir_expression *const r0F4E = rshift(r0F46, body.constant(int(1))); + body.emit(assign(r0F4B, bit_or(r0F4D, r0F4E), 0x01)); + + body.emit(assign(r0F4C, rshift(swizzle_y(r0F45), body.constant(int(1))), 0x01)); + + body.emit(assign(r0F45, r0F4C, 0x02)); + + body.emit(assign(r0F45, r0F4B, 0x01)); + + ir_variable *const r0F4F = body.make_temp(glsl_type::bool_type, "execute_flag"); + body.emit(assign(r0F4F, body.constant(true), 0x01)); + + ir_variable *const r0F50 = body.make_temp(glsl_type::uvec2_type, "return_value"); + ir_variable *const r0F51 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zSign", ir_var_auto); + body.emit(r0F51); + ir_variable *const r0F52 = new(mem_ctx) ir_variable(glsl_type::int_type, "bExp", ir_var_auto); + body.emit(r0F52); + ir_variable *const r0F53 = new(mem_ctx) ir_variable(glsl_type::int_type, "aExp", ir_var_auto); + body.emit(r0F53); + ir_variable *const r0F54 = new(mem_ctx) ir_variable(glsl_type::uint_type, "bFracHi", ir_var_auto); + body.emit(r0F54); + ir_variable *const r0F55 = new(mem_ctx) ir_variable(glsl_type::uint_type, "bFracLo", ir_var_auto); + body.emit(r0F55); + ir_variable *const r0F56 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aFracHi", ir_var_auto); + body.emit(r0F56); + ir_variable *const r0F57 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aFracLo", ir_var_auto); + body.emit(r0F57); + ir_variable *const r0F58 = new(mem_ctx) ir_variable(glsl_type::int_type, "zExp", ir_var_auto); + body.emit(r0F58); + ir_variable *const r0F59 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFrac2", ir_var_auto); + body.emit(r0F59); + ir_variable *const r0F5A = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFrac1", ir_var_auto); + body.emit(r0F5A); + ir_variable *const r0F5B = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFrac0", ir_var_auto); + body.emit(r0F5B); + body.emit(assign(r0F5B, body.constant(0u), 0x01)); + + body.emit(assign(r0F5A, body.constant(0u), 0x01)); + + body.emit(assign(r0F59, body.constant(0u), 0x01)); + + ir_variable *const r0F5C = body.make_temp(glsl_type::uint_type, "extractFloat64FracLo_retval"); + body.emit(assign(r0F5C, swizzle_x(r0F45), 0x01)); + + body.emit(assign(r0F57, r0F5C, 0x01)); + + ir_variable *const r0F5D = body.make_temp(glsl_type::uint_type, "extractFloat64FracHi_retval"); + body.emit(assign(r0F5D, bit_and(r0F4C, body.constant(1048575u)), 0x01)); + + body.emit(assign(r0F56, r0F5D, 0x01)); + + ir_variable *const r0F5E = body.make_temp(glsl_type::uint_type, "extractFloat64FracLo_retval"); + body.emit(assign(r0F5E, swizzle_x(r0F45), 0x01)); + + body.emit(assign(r0F55, r0F5E, 0x01))
[Mesa-dev] [PATCH 10/47] glsl: Add "built-in" functions to do uint_to_fp64(uint)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 259 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 26 src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 293 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index bfe387b0f5..2c11e463ca 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -12294,3 +12294,262 @@ fp64_to_uint(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +uint_to_fp64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0A5E = new(mem_ctx) ir_variable(glsl_type::uint_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0A5E); + ir_variable *const r0A5F = body.make_temp(glsl_type::uvec2_type, "return_value"); + ir_variable *const r0A60 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aLow", ir_var_auto); + body.emit(r0A60); + ir_variable *const r0A61 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aHigh", ir_var_auto); + body.emit(r0A61); + /* IF CONDITION */ + ir_expression *const r0A63 = equal(r0A5E, body.constant(0u)); + ir_if *f0A62 = new(mem_ctx) ir_if(operand(r0A63).val); + exec_list *const f0A62_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A62->then_instructions; + + body.emit(assign(r0A5F, ir_constant::zero(mem_ctx, glsl_type::uvec2_type), 0x03)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0A62->else_instructions; + + ir_variable *const r0A64 = body.make_temp(glsl_type::uint_type, "a"); + body.emit(assign(r0A64, r0A5E, 0x01)); + + ir_variable *const r0A65 = body.make_temp(glsl_type::int_type, "return_value"); + ir_variable *const r0A66 = new(mem_ctx) ir_variable(glsl_type::int_type, "shiftCount", ir_var_auto); + body.emit(r0A66); + /* IF CONDITION */ + ir_expression *const r0A68 = equal(r0A5E, body.constant(0u)); + ir_if *f0A67 = new(mem_ctx) ir_if(operand(r0A68).val); + exec_list *const f0A67_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A67->then_instructions; + + body.emit(assign(r0A65, body.constant(int(32)), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0A67->else_instructions; + + body.emit(assign(r0A66, body.constant(int(0)), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0A6A = bit_and(r0A5E, body.constant(4294901760u)); + ir_expression *const r0A6B = equal(r0A6A, body.constant(0u)); + ir_if *f0A69 = new(mem_ctx) ir_if(operand(r0A6B).val); + exec_list *const f0A69_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0A69->then_instructions; + +body.emit(assign(r0A66, body.constant(int(16)), 0x01)); + +body.emit(assign(r0A64, lshift(r0A5E, body.constant(int(16))), 0x01)); + + + body.instructions = f0A69_parent_instructions; + body.emit(f0A69); + + /* END IF */ + + /* IF CONDITION */ + ir_expression *const r0A6D = bit_and(r0A64, body.constant(4278190080u)); + ir_expression *const r0A6E = equal(r0A6D, body.constant(0u)); + ir_if *f0A6C = new(mem_ctx) ir_if(operand(r0A6E).val); + exec_list *const f0A6C_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0A6C->then_instructions; + +body.emit(assign(r0A66, add(r0A66, body.constant(int(8))), 0x01)); + +body.emit(assign(r0A64, lshift(r0A64, body.constant(int(8))), 0x01)); + + + body.instructions = f0A6C_parent_instructions; + body.emit(f0A6C); + + /* END IF */ + + /* IF CONDITION */ + ir_expression *const r0A70 = bit_and(r0A64, body.constant(4026531840u)); + ir_expression *const r0A71 = equal(r0A70, body.constant(0u)); + ir_if *f0A6F = new(mem_ctx) ir_if(operand(r0A71).val); + exec_list *const f0A6F_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0A6F->then_instructions; + +body.emit(assign(r0A66, add(r0A66, body.constant(int(4))), 0x01)); + +body.emit(assign(r0A64, lshift(r0A64, body.co
[Mesa-dev] [PATCH 12/47] glsl: Add "built-in" functions to do int_to_fp64(int)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 334 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 23 +++ src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 365 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index c5640c459a..644407a185 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -12837,3 +12837,337 @@ fp64_to_int(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +int_to_fp64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0ABE = new(mem_ctx) ir_variable(glsl_type::int_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0ABE); + ir_variable *const r0ABF = body.make_temp(glsl_type::uvec2_type, "return_value"); + ir_variable *const r0AC0 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zSign", ir_var_auto); + body.emit(r0AC0); + ir_variable *const r0AC1 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFrac1", ir_var_auto); + body.emit(r0AC1); + ir_variable *const r0AC2 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFrac0", ir_var_auto); + body.emit(r0AC2); + body.emit(assign(r0AC2, body.constant(0u), 0x01)); + + body.emit(assign(r0AC1, body.constant(0u), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0AC4 = equal(r0ABE, body.constant(int(0))); + ir_if *f0AC3 = new(mem_ctx) ir_if(operand(r0AC4).val); + exec_list *const f0AC3_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0AC3->then_instructions; + + ir_variable *const r0AC5 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "z", ir_var_auto); + body.emit(r0AC5); + body.emit(assign(r0AC5, body.constant(0u), 0x02)); + + body.emit(assign(r0AC5, body.constant(0u), 0x01)); + + body.emit(assign(r0ABF, r0AC5, 0x03)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0AC3->else_instructions; + + ir_expression *const r0AC6 = less(r0ABE, body.constant(int(0))); + ir_expression *const r0AC7 = expr(ir_unop_b2i, r0AC6); + body.emit(assign(r0AC0, expr(ir_unop_i2u, r0AC7), 0x01)); + + ir_variable *const r0AC8 = body.make_temp(glsl_type::uint_type, "conditional_tmp"); + /* IF CONDITION */ + ir_expression *const r0ACA = less(r0ABE, body.constant(int(0))); + ir_if *f0AC9 = new(mem_ctx) ir_if(operand(r0ACA).val); + exec_list *const f0AC9_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0AC9->then_instructions; + + ir_expression *const r0ACB = neg(r0ABE); + body.emit(assign(r0AC8, expr(ir_unop_i2u, r0ACB), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0AC9->else_instructions; + + body.emit(assign(r0AC8, expr(ir_unop_i2u, r0ABE), 0x01)); + + + body.instructions = f0AC9_parent_instructions; + body.emit(f0AC9); + + /* END IF */ + + ir_variable *const r0ACC = body.make_temp(glsl_type::uint_type, "a"); + body.emit(assign(r0ACC, r0AC8, 0x01)); + + ir_variable *const r0ACD = body.make_temp(glsl_type::int_type, "return_value"); + ir_variable *const r0ACE = new(mem_ctx) ir_variable(glsl_type::int_type, "shiftCount", ir_var_auto); + body.emit(r0ACE); + /* IF CONDITION */ + ir_expression *const r0AD0 = equal(r0AC8, body.constant(0u)); + ir_if *f0ACF = new(mem_ctx) ir_if(operand(r0AD0).val); + exec_list *const f0ACF_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0ACF->then_instructions; + + body.emit(assign(r0ACD, body.constant(int(32)), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0ACF->else_instructions; + + body.emit(assign(r0ACE, body.constant(int(0)), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0AD2 = bit_and(r0AC8, body.constant(4294901760u)); + ir_expression *const r0AD3 = equal(r0AD2, body.constant(0u)); + ir_if *f0AD1 = new(mem_ctx) ir_if(operand(r0AD3).val); + exec_list *const f0AD1_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0AD1->then_instructions; + +body.emit(assign(r0ACE, body.constant(int(16)), 0x01)); +
[Mesa-dev] [PATCH 16/47] glsl: Add "built-in" functions to do trunc(fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 121 src/compiler/glsl/builtin_functions.cpp | 4 ++ src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 23 ++ src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 152 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index b16bc5def5..2bce013d0e 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -18133,3 +18133,124 @@ fsqrt64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +ftrunc64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0F09 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0F09); + ir_variable *const r0F0A = body.make_temp(glsl_type::uvec2_type, "return_value"); + ir_variable *const r0F0B = body.make_temp(glsl_type::int_type, "assignment_tmp"); + ir_expression *const r0F0C = rshift(swizzle_y(r0F09), body.constant(int(20))); + ir_expression *const r0F0D = bit_and(r0F0C, body.constant(2047u)); + ir_expression *const r0F0E = expr(ir_unop_u2i, r0F0D); + body.emit(assign(r0F0B, add(r0F0E, body.constant(int(-1023))), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0F10 = less(r0F0B, body.constant(int(0))); + ir_if *f0F0F = new(mem_ctx) ir_if(operand(r0F10).val); + exec_list *const f0F0F_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0F0F->then_instructions; + + body.emit(assign(r0F0A, ir_constant::zero(mem_ctx, glsl_type::uvec2_type), 0x03)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0F0F->else_instructions; + + /* IF CONDITION */ + ir_expression *const r0F12 = greater(r0F0B, body.constant(int(52))); + ir_if *f0F11 = new(mem_ctx) ir_if(operand(r0F12).val); + exec_list *const f0F11_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0F11->then_instructions; + + body.emit(assign(r0F0A, r0F09, 0x03)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0F11->else_instructions; + + ir_variable *const r0F13 = body.make_temp(glsl_type::int_type, "assignment_tmp"); + body.emit(assign(r0F13, sub(body.constant(int(52)), r0F0B), 0x01)); + + ir_variable *const r0F14 = body.make_temp(glsl_type::uint_type, "conditional_tmp"); + /* IF CONDITION */ + ir_expression *const r0F16 = gequal(r0F13, body.constant(int(32))); + ir_if *f0F15 = new(mem_ctx) ir_if(operand(r0F16).val); + exec_list *const f0F15_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0F15->then_instructions; + +body.emit(assign(r0F14, body.constant(0u), 0x01)); + + +/* ELSE INSTRUCTIONS */ +body.instructions = &f0F15->else_instructions; + +body.emit(assign(r0F14, lshift(body.constant(4294967295u), r0F13), 0x01)); + + + body.instructions = f0F15_parent_instructions; + body.emit(f0F15); + + /* END IF */ + + ir_variable *const r0F17 = body.make_temp(glsl_type::uint_type, "conditional_tmp"); + /* IF CONDITION */ + ir_expression *const r0F19 = less(r0F13, body.constant(int(33))); + ir_if *f0F18 = new(mem_ctx) ir_if(operand(r0F19).val); + exec_list *const f0F18_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0F18->then_instructions; + +body.emit(assign(r0F17, body.constant(4294967295u), 0x01)); + + +/* ELSE INSTRUCTIONS */ +body.instructions = &f0F18->else_instructions; + +ir_expression *const r0F1A = add(r0F13, body.constant(int(-32))); +body.emit(assign(r0F17, lshift(body.constant(4294967295u), r0F1A), 0x01)); + + + body.instructions = f0F18_parent_instructions; + body.emit(f0F18); + + /* END IF */ + + ir_variable *const r0F1B = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r0F1B, bit_and(r0F14, swizzle_x(r0F09)), 0x01)); + + body.emit(assign(r0F1B, bit_and(r0F17, swizzle_y(r0F09)), 0x02)); + + body.emit(assign(r0F0A, r0F1B, 0x03)); + + + body.instructions = f0F11_parent_instructions; + body.emit(f0F11);
[Mesa-dev] [PATCH 19/47] glsl: Add a lowering pass for 64-bit float abs()
Squashed with: glsl/lower_64bit: fix return type conversion (airlied) Only do conversion for the 64-bit types, add a path to do result merging without conversion. Signed-off-by: Elie Tournier --- src/compiler/Makefile.sources | 2 +- src/compiler/glsl/ir_optimization.h| 8 +- .../glsl/{lower_int64.cpp => lower_64bit.cpp} | 87 ++ src/mesa/state_tracker/st_extensions.c | 3 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++ 5 files changed, 92 insertions(+), 17 deletions(-) rename src/compiler/glsl/{lower_int64.cpp => lower_64bit.cpp} (81%) diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 24fa7716de..699133234c 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -92,7 +92,7 @@ LIBGLSL_FILES = \ glsl/lower_distance.cpp \ glsl/lower_if_to_cond_assign.cpp \ glsl/lower_instructions.cpp \ - glsl/lower_int64.cpp \ + glsl/lower_64bit.cpp \ glsl/lower_jumps.cpp \ glsl/lower_mat_op_to_vec.cpp \ glsl/lower_noise.cpp \ diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 573ddb4a8d..6cc0909a80 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -55,11 +55,14 @@ #define DIV_TO_MUL_RCP(FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP) #define SQRT_TO_ABS_SQRT 0x20 -/* Opertaions for lower_64bit_integer_instructions() */ +/* Operations for lower_64bit_integer_instructions() + * and lower_64bit_double_instructions() + */ #define MUL64 (1U << 0) #define SIGN64(1U << 1) #define DIV64 (1U << 2) #define MOD64 (1U << 3) +#define ABS64 (1U << 4) /** * \see class lower_packing_builtins_visitor @@ -177,4 +180,7 @@ compare_index_block(exec_list *instructions, ir_variable *index, bool lower_64bit_integer_instructions(exec_list *instructions, unsigned what_to_lower); +bool lower_64bit_double_instructions(exec_list *instructions, + unsigned what_to_lower); + #endif /* GLSL_IR_OPTIMIZATION_H */ diff --git a/src/compiler/glsl/lower_int64.cpp b/src/compiler/glsl/lower_64bit.cpp similarity index 81% rename from src/compiler/glsl/lower_int64.cpp rename to src/compiler/glsl/lower_64bit.cpp index 2d4fdbb1a5..4887e5538c 100644 --- a/src/compiler/glsl/lower_int64.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -22,7 +22,7 @@ */ /** - * \file lower_int64.cpp + * \file lower_64bit.cpp * * Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered * to a uvec2. For each operation that can be lowered, there is a function @@ -56,6 +56,9 @@ void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src); ir_dereference_variable *compact_destination(ir_factory &, const glsl_type *type, ir_variable *result[4]); +ir_dereference_variable *merge_destination(ir_factory &, + const glsl_type *type, + ir_variable *result[4]); ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir, ir_expression *ir, @@ -132,7 +135,7 @@ private: #define lowering(x) (this->lower & x) bool -lower_64bit_integer_instructions(exec_list *instructions, +lower_64bit_instructions(exec_list *instructions, unsigned what_to_lower) { if (instructions->is_empty()) @@ -163,6 +166,19 @@ lower_64bit_integer_instructions(exec_list *instructions, return v.progress; } +bool +lower_64bit_integer_instructions(exec_list *instructions, + unsigned what_to_lower) +{ + return lower_64bit_instructions(instructions, what_to_lower); +} + +bool +lower_64bit_double_instructions(exec_list *instructions, + unsigned what_to_lower) +{ + return lower_64bit_instructions(instructions, what_to_lower); +} /** * Expand individual 64-bit values to uvec2 values @@ -200,18 +216,21 @@ lower_64bit::expand_source(ir_factory &body, ir_rvalue *val, ir_variable **expanded_src) { - assert(val->type->is_integer_64()); + assert(val->type->is_integer_64() || val->type->is_double()); ir_variable *const temp = body.make_temp(val->type, "tmp"); body.emit(assign(temp, val)); const ir_expression_operation unpack_opcode = - val->type->base_type == GLSL_TYPE_UINT64 - ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32; + val->type->base_type =
[Mesa-dev] [PATCH 11/47] glsl: Add "built-in" functions to do fp64_to_int(fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 284 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 45 + src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 337 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 2c11e463ca..c5640c459a 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -12553,3 +12553,287 @@ uint_to_fp64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +fp64_to_int(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::int_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0A87 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0A87); + ir_variable *const r0A88 = body.make_temp(glsl_type::bool_type, "execute_flag"); + body.emit(assign(r0A88, body.constant(true), 0x01)); + + ir_variable *const r0A89 = body.make_temp(glsl_type::int_type, "return_value"); + ir_variable *const r0A8A = new(mem_ctx) ir_variable(glsl_type::uint_type, "absZ", ir_var_auto); + body.emit(r0A8A); + ir_variable *const r0A8B = new(mem_ctx) ir_variable(glsl_type::uint_type, "aSign", ir_var_auto); + body.emit(r0A8B); + ir_variable *const r0A8C = new(mem_ctx) ir_variable(glsl_type::uint_type, "aFracHi", ir_var_auto); + body.emit(r0A8C); + ir_variable *const r0A8D = body.make_temp(glsl_type::uint_type, "extractFloat64FracHi_retval"); + body.emit(assign(r0A8D, bit_and(swizzle_y(r0A87), body.constant(1048575u)), 0x01)); + + body.emit(assign(r0A8C, r0A8D, 0x01)); + + ir_variable *const r0A8E = body.make_temp(glsl_type::int_type, "extractFloat64Exp_retval"); + ir_expression *const r0A8F = rshift(swizzle_y(r0A87), body.constant(int(20))); + ir_expression *const r0A90 = bit_and(r0A8F, body.constant(2047u)); + body.emit(assign(r0A8E, expr(ir_unop_u2i, r0A90), 0x01)); + + body.emit(assign(r0A8B, rshift(swizzle_y(r0A87), body.constant(int(31))), 0x01)); + + body.emit(assign(r0A8A, body.constant(0u), 0x01)); + + ir_variable *const r0A91 = body.make_temp(glsl_type::int_type, "assignment_tmp"); + body.emit(assign(r0A91, add(r0A8E, body.constant(int(-1043))), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0A93 = lequal(body.constant(int(0)), r0A91); + ir_if *f0A92 = new(mem_ctx) ir_if(operand(r0A93).val); + exec_list *const f0A92_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A92->then_instructions; + + /* IF CONDITION */ + ir_expression *const r0A95 = less(body.constant(int(1054)), r0A8E); + ir_if *f0A94 = new(mem_ctx) ir_if(operand(r0A95).val); + exec_list *const f0A94_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A94->then_instructions; + + /* IF CONDITION */ + ir_expression *const r0A97 = equal(r0A8E, body.constant(int(2047))); + ir_expression *const r0A98 = bit_or(r0A8D, swizzle_x(r0A87)); + ir_expression *const r0A99 = expr(ir_unop_u2i, r0A98); + ir_expression *const r0A9A = expr(ir_unop_i2b, r0A99); + ir_expression *const r0A9B = logic_and(r0A97, r0A9A); + ir_if *f0A96 = new(mem_ctx) ir_if(operand(r0A9B).val); + exec_list *const f0A96_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0A96->then_instructions; + +body.emit(assign(r0A8B, body.constant(0u), 0x01)); + + + body.instructions = f0A96_parent_instructions; + body.emit(f0A96); + + /* END IF */ + + ir_variable *const r0A9C = body.make_temp(glsl_type::int_type, "conditional_tmp"); + /* IF CONDITION */ + ir_expression *const r0A9E = expr(ir_unop_u2i, r0A8B); + ir_expression *const r0A9F = expr(ir_unop_i2b, r0A9E); + ir_if *f0A9D = new(mem_ctx) ir_if(operand(r0A9F).val); + exec_list *const f0A9D_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0A9D->then_instructions; + +body.emit(assign(r0A9C, body.constant(int(-2147483648)), 0x01)); + + +/* ELSE INSTRUCTIONS */ +body.instructions = &f0A9D->else_instructions; + +body.emit(assign(r0A9C, body.constant(int(2147483647)), 0x01)); + + + body.instructions =
[Mesa-dev] [PATCH 17/47] glsl: Add "built-in" functions to do round(fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 225 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 41 ++ src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 274 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 2bce013d0e..97d6adfd9f 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -18254,3 +18254,228 @@ ftrunc64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +fround64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0F1C = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0F1C); + ir_variable *const r0F1D = body.make_temp(glsl_type::bool_type, "execute_flag"); + body.emit(assign(r0F1D, body.constant(true), 0x01)); + + ir_variable *const r0F1E = body.make_temp(glsl_type::uvec2_type, "return_value"); + ir_variable *const r0F1F = new(mem_ctx) ir_variable(glsl_type::uint_type, "aLo", ir_var_auto); + body.emit(r0F1F); + ir_variable *const r0F20 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aHi", ir_var_auto); + body.emit(r0F20); + ir_variable *const r0F21 = body.make_temp(glsl_type::int_type, "assignment_tmp"); + ir_expression *const r0F22 = rshift(swizzle_y(r0F1C), body.constant(int(20))); + ir_expression *const r0F23 = bit_and(r0F22, body.constant(2047u)); + ir_expression *const r0F24 = expr(ir_unop_u2i, r0F23); + body.emit(assign(r0F21, add(r0F24, body.constant(int(-1023))), 0x01)); + + body.emit(assign(r0F20, swizzle_y(r0F1C), 0x01)); + + body.emit(assign(r0F1F, swizzle_x(r0F1C), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0F26 = less(r0F21, body.constant(int(20))); + ir_if *f0F25 = new(mem_ctx) ir_if(operand(r0F26).val); + exec_list *const f0F25_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0F25->then_instructions; + + /* IF CONDITION */ + ir_expression *const r0F28 = less(r0F21, body.constant(int(0))); + ir_if *f0F27 = new(mem_ctx) ir_if(operand(r0F28).val); + exec_list *const f0F27_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0F27->then_instructions; + + body.emit(assign(r0F20, bit_and(swizzle_y(r0F1C), body.constant(2147483648u)), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0F2A = equal(r0F21, body.constant(int(-1))); + ir_expression *const r0F2B = nequal(swizzle_x(r0F1C), body.constant(0u)); + ir_expression *const r0F2C = logic_and(r0F2A, r0F2B); + ir_if *f0F29 = new(mem_ctx) ir_if(operand(r0F2C).val); + exec_list *const f0F29_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0F29->then_instructions; + +body.emit(assign(r0F20, bit_or(r0F20, body.constant(1072693248u)), 0x01)); + + + body.instructions = f0F29_parent_instructions; + body.emit(f0F29); + + /* END IF */ + + body.emit(assign(r0F1F, body.constant(0u), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0F27->else_instructions; + + ir_variable *const r0F2D = body.make_temp(glsl_type::uint_type, "assignment_tmp"); + body.emit(assign(r0F2D, rshift(body.constant(1048575u), r0F21), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0F2F = bit_and(r0F20, r0F2D); + ir_expression *const r0F30 = equal(r0F2F, body.constant(0u)); + ir_expression *const r0F31 = equal(r0F1F, body.constant(0u)); + ir_expression *const r0F32 = logic_and(r0F30, r0F31); + ir_if *f0F2E = new(mem_ctx) ir_if(operand(r0F32).val); + exec_list *const f0F2E_parent_instructions = body.instructions; + +/* THEN INSTRUCTIONS */ +body.instructions = &f0F2E->then_instructions; + +body.emit(assign(r0F1E, r0F1C, 0x03)); + +body.emit(assign(r0F1D, body.constant(false), 0x01)); + + +/* ELSE INSTRUCTIONS */ +body.instructions = &f0F2E->else_instructions; + +ir_expression *const r0F33 = rshift(body.constant(524288u), r0F21); +body.emit(assign(r0F20, add(r0F20, r0F33), 0x01)); + +ir_expression *const r0F34 = expr(ir_unop_bit_not,
[Mesa-dev] [PATCH 13/47] glsl: Add "built-in" functions to do fp64_to_fp32(fp64)
This algorithm seems to be ok on softpipe but fail the piglit test on r600g when we convert the fp64 representing the smallest fp32 into a fp32. However, the IR is the same in both case. Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 494 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 108 +++ src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 610 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 644407a185..c372de0f4d 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -13171,3 +13171,497 @@ int_to_fp64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +packFloat32(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::float_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0AF8 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zSign", ir_var_function_in); + sig_parameters.push_tail(r0AF8); + ir_variable *const r0AF9 = new(mem_ctx) ir_variable(glsl_type::int_type, "zExp", ir_var_function_in); + sig_parameters.push_tail(r0AF9); + ir_variable *const r0AFA = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFrac", ir_var_function_in); + sig_parameters.push_tail(r0AFA); + ir_variable *const r0AFB = body.make_temp(glsl_type::float_type, "uintBitsToFloat_retval"); + ir_expression *const r0AFC = lshift(r0AF8, body.constant(int(31))); + ir_expression *const r0AFD = expr(ir_unop_i2u, r0AF9); + ir_expression *const r0AFE = lshift(r0AFD, body.constant(int(23))); + ir_expression *const r0AFF = add(r0AFC, r0AFE); + ir_expression *const r0B00 = add(r0AFF, r0AFA); + body.emit(assign(r0AFB, expr(ir_unop_bitcast_u2f, r0B00), 0x01)); + + body.emit(ret(r0AFB)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +roundAndPackFloat32(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::float_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0B01 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zSign", ir_var_function_in); + sig_parameters.push_tail(r0B01); + ir_variable *const r0B02 = new(mem_ctx) ir_variable(glsl_type::int_type, "zExp", ir_var_function_in); + sig_parameters.push_tail(r0B02); + ir_variable *const r0B03 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFrac", ir_var_function_in); + sig_parameters.push_tail(r0B03); + ir_variable *const r0B04 = body.make_temp(glsl_type::bool_type, "execute_flag"); + body.emit(assign(r0B04, body.constant(true), 0x01)); + + ir_variable *const r0B05 = body.make_temp(glsl_type::float_type, "return_value"); + ir_variable *const r0B06 = new(mem_ctx) ir_variable(glsl_type::int_type, "roundBits", ir_var_auto); + body.emit(r0B06); + ir_expression *const r0B07 = bit_and(r0B03, body.constant(127u)); + body.emit(assign(r0B06, expr(ir_unop_u2i, r0B07), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0B09 = expr(ir_unop_i2u, r0B02); + ir_expression *const r0B0A = lequal(body.constant(253u), r0B09); + ir_if *f0B08 = new(mem_ctx) ir_if(operand(r0B0A).val); + exec_list *const f0B08_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0B08->then_instructions; + + /* IF CONDITION */ + ir_expression *const r0B0C = less(body.constant(int(253)), r0B02); + ir_expression *const r0B0D = equal(r0B02, body.constant(int(253))); + ir_expression *const r0B0E = expr(ir_unop_u2i, r0B03); + ir_expression *const r0B0F = less(r0B0E, body.constant(int(-64))); + ir_expression *const r0B10 = logic_and(r0B0D, r0B0F); + ir_expression *const r0B11 = logic_or(r0B0C, r0B10); + ir_if *f0B0B = new(mem_ctx) ir_if(operand(r0B11).val); + exec_list *const f0B0B_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0B0B->then_instructions; + + ir_expression *const r0B12 = lshift(r0B01, body.constant(int(31))); + ir_expression *const r0B13 = add(r0B12, body.constant(2139095040u)); + body.emit(assign(r0B05, expr(ir_unop_bitcast_u2f, r0B13), 0x01)); + + body.emit(assign(r0B04, body.constant(false), 0x01)); + + + /* ELSE INSTRUCTIONS */ +
[Mesa-dev] [PATCH 21/47] glsl: Add a lowering pass for 64-bit float sign()
Signed-off-by: Elie Tournier --- src/compiler/glsl/lower_64bit.cpp | 5 - src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 91b972fe82..86ee13cb89 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -421,7 +421,10 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) case ir_unop_sign: if (lowering(SIGN64)) { - *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64); + if (ir->type->base_type == GLSL_TYPE_DOUBLE) +*rvalue = handle_op(ir, "__builtin_fsign64", generate_ir::fsign64); + else +*rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64); } break; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3b0b19be53..1d5e74ab31 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7059,7 +7059,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (!pscreen->get_param(pscreen, PIPE_CAP_DOUBLES) && ctx->Const.GLSLVersion >= 130) { unsigned lower_inst = ABS64 | - NEG64; + NEG64 | + SIGN64; lower_64bit_double_instructions(ir, lower_inst); } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 30/47] glsl: Add a lowering pass for 64-bit float i2d()
Signed-off-by: Elie Tournier --- src/compiler/glsl/ir_optimization.h| 1 + src/compiler/glsl/lower_64bit.cpp | 7 +++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 157c7e1a6f..c59d7f6ee2 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -71,6 +71,7 @@ #define D2U (1U << 10) #define U2D (1U << 11) #define D2I (1U << 12) +#define I2D (1U << 13) /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 41255094ec..570f997779 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -450,6 +450,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_unop_i2d: + if (lowering(I2D)) { + if (ir->type->base_type == GLSL_TYPE_DOUBLE) +*rvalue = handle_op(ir, "__builtin_int_to_fp64", generate_ir::int_to_fp64, true); + } + break; + case ir_unop_neg: if (lowering(NEG64)) { if (ir->type->base_type == GLSL_TYPE_DOUBLE) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 821461e439..1be223a3cb 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7068,7 +7068,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) MUL64 | D2U | U2D | - D2I; + D2I | + I2D; lower_64bit_double_instructions(ir, lower_inst); } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 20/47] glsl: Add a lowering pass for 64-bit float neg()
Signed-off-by: Elie Tournier --- src/compiler/glsl/ir_optimization.h| 1 + src/compiler/glsl/lower_64bit.cpp | 7 +++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 6cc0909a80..ad76a7032e 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -63,6 +63,7 @@ #define DIV64 (1U << 2) #define MOD64 (1U << 3) #define ABS64 (1U << 4) +#define NEG64 (1U << 5) /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 4887e5538c..91b972fe82 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -412,6 +412,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_unop_neg: + if (lowering(NEG64)) { + if (ir->type->base_type == GLSL_TYPE_DOUBLE) +*rvalue = handle_op(ir, "__builtin_fneg64", generate_ir::fneg64); + } + break; + case ir_unop_sign: if (lowering(SIGN64)) { *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 991eae4917..3b0b19be53 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7058,7 +7058,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) */ if (!pscreen->get_param(pscreen, PIPE_CAP_DOUBLES) && ctx->Const.GLSLVersion >= 130) { - unsigned lower_inst = ABS64; + unsigned lower_inst = ABS64 | + NEG64; lower_64bit_double_instructions(ir, lower_inst); } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 23/47] glsl: Add a lowering pass for 64-bit float lequal()
Signed-off-by: Elie Tournier --- src/compiler/glsl/ir_optimization.h| 1 + src/compiler/glsl/lower_64bit.cpp | 7 +++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 86cfdf7619..271dad77e4 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -65,6 +65,7 @@ #define ABS64 (1U << 4) #define NEG64 (1U << 5) #define EQ64 (1U << 6) +#define LE64 (1U << 7) /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 95d7c4c3d4..68ffa8f706 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -445,6 +445,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_binop_lequal: + if (lowering(LE64)) { + if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) +*rvalue = handle_op(ir, "__builtin_fle64", generate_ir::fle64); + } + break; + case ir_binop_mod: if (lowering(MOD64)) { if (ir->type->base_type == GLSL_TYPE_UINT64) { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e7b848872b..78fa1e3a76 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7061,7 +7061,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) unsigned lower_inst = ABS64 | NEG64 | SIGN64 | - EQ64; + EQ64 | + LE64; lower_64bit_double_instructions(ir, lower_inst); } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 25/47] glsl: Add a lowering pass for 64-bit float add()
Signed-off-by: Elie Tournier --- src/compiler/glsl/ir_optimization.h| 1 + src/compiler/glsl/lower_64bit.cpp | 7 +++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index ee8cd92269..81ffe67077 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -67,6 +67,7 @@ #define EQ64 (1U << 6) #define LE64 (1U << 7) #define LT64 (1U << 8) +#define ADD64 (1U << 9) /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 2b6580d293..414e8414f9 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -428,6 +428,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_binop_add: + if (lowering(ADD64)) { + if (ir->type->base_type == GLSL_TYPE_DOUBLE) +*rvalue = handle_op(ir, "__builtin_fadd64", generate_ir::fadd64); + } + break; + case ir_binop_div: if (lowering(DIV64)) { if (ir->type->base_type == GLSL_TYPE_UINT64) { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 59f896a785..fab331edea 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7063,7 +7063,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) SIGN64 | EQ64 | LE64 | - LT64; + LT64 | + ADD64; lower_64bit_double_instructions(ir, lower_inst); } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 22/47] glsl: Add a lowering pass for 64-bit float equal()
Signed-off-by: Elie Tournier --- src/compiler/glsl/ir_optimization.h| 1 + src/compiler/glsl/lower_64bit.cpp | 7 +++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index ad76a7032e..86cfdf7619 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -64,6 +64,7 @@ #define MOD64 (1U << 3) #define ABS64 (1U << 4) #define NEG64 (1U << 5) +#define EQ64 (1U << 6) /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 86ee13cb89..95d7c4c3d4 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -438,6 +438,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_binop_equal: + if (lowering(EQ64)) { + if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) +*rvalue = handle_op(ir, "__builtin_feq64", generate_ir::feq64); + } + break; + case ir_binop_mod: if (lowering(MOD64)) { if (ir->type->base_type == GLSL_TYPE_UINT64) { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 1d5e74ab31..e7b848872b 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7060,7 +7060,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) ctx->Const.GLSLVersion >= 130) { unsigned lower_inst = ABS64 | NEG64 | - SIGN64; + SIGN64 | + EQ64; lower_64bit_double_instructions(ir, lower_inst); } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 29/47] glsl: Add a lowering pass for 64-bit float d2i()
Signed-off-by: Elie Tournier --- src/compiler/glsl/ir_optimization.h| 1 + src/compiler/glsl/lower_64bit.cpp | 7 +++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 5d1118d0cf..157c7e1a6f 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -70,6 +70,7 @@ #define ADD64 (1U << 9) #define D2U (1U << 10) #define U2D (1U << 11) +#define D2I (1U << 12) /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 38b0420baa..41255094ec 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -436,6 +436,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_unop_d2i: + if (lowering(D2I)) { + if (ir->type->base_type == GLSL_TYPE_INT) +*rvalue = handle_op(ir, "__builtin_fp64_to_int", generate_ir::fp64_to_int); + } + break; + case ir_unop_d2u: if (lowering(D2U)) { if (ir->type->base_type == GLSL_TYPE_UINT) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index df815a86d1..821461e439 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7067,7 +7067,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) ADD64 | MUL64 | D2U | - U2D; + U2D | + D2I; lower_64bit_double_instructions(ir, lower_inst); } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 28/47] glsl: Add a lowering pass for 64-bit float u2d()
Handle non 64bit sources (airlied) Signed-off-by: Elie Tournier --- src/compiler/glsl/ir_optimization.h| 1 + src/compiler/glsl/lower_64bit.cpp | 49 -- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 +- 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index aa71dfdb39..5d1118d0cf 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -69,6 +69,7 @@ #define LT64 (1U << 8) #define ADD64 (1U << 9) #define D2U (1U << 10) +#define U2D (1U << 11) /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 0b9d466b9d..38b0420baa 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -52,7 +52,7 @@ using namespace ir_builder; namespace lower_64bit { void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src); - +void extract_source(ir_factory &, ir_rvalue *val, ir_variable **extracted_src); ir_dereference_variable *compact_destination(ir_factory &, const glsl_type *type, ir_variable *result[4]); @@ -124,7 +124,7 @@ private: ir_factory added_functions; ir_rvalue *handle_op(ir_expression *ir, const char *function_name, -function_generator generator); +function_generator generator, bool conv_to_double = false); }; } /* anonymous namespace */ @@ -245,6 +245,25 @@ lower_64bit::expand_source(ir_factory &body, expanded_src[i] = expanded_src[0]; } +void +lower_64bit::extract_source(ir_factory &body, +ir_rvalue *val, +ir_variable **extracted_src) +{ + ir_variable *const temp = body.make_temp(val->type, "tmp"); + + body.emit(assign(temp, val)); + unsigned i; + for (i = 0; i < val->type->vector_elements; i++) { + extracted_src[i] = body.make_temp(val->type->get_scalar_type(), "extracted_source"); + + body.emit(assign(extracted_src[i], swizzle(temp, i, 1))); + } + + for (/* empty */; i < 4; i++) + extracted_src[i] = extracted_src[0]; +} + /** * Convert a series of uvec2 results into a single 64-bit integer vector */ @@ -315,7 +334,10 @@ lower_64bit::lower_op_to_function_call(ir_instruction *base_ir, ir_factory body(&instructions, mem_ctx); for (unsigned i = 0; i < num_operands; i++) { - expand_source(body, ir->operands[i], src[i]); + if (ir->operands[i]->type->is_64bit()) + expand_source(body, ir->operands[i], src[i]); + else + extract_source(body, ir->operands[i], src[i]); if (ir->operands[i]->type->vector_elements > source_components) source_components = ir->operands[i]->type->vector_elements; @@ -365,13 +387,15 @@ lower_64bit::lower_op_to_function_call(ir_instruction *base_ir, ir_rvalue * lower_64bit_visitor::handle_op(ir_expression *ir, const char *function_name, - function_generator generator) + function_generator generator, + bool conv_to_double) { - for (unsigned i = 0; i < ir->get_num_operands(); i++) - if (!ir->operands[i]->type->is_integer_64() && - !ir->operands[i]->type->is_double()) - return ir; - + if (conv_to_double == false) { + for (unsigned i = 0; i < ir->num_operands; i++) + if (!ir->operands[i]->type->is_integer_64() && + !ir->operands[i]->type->is_double()) +return ir; + } /* Get a handle to the correct ir_function_signature for the core * operation. */ @@ -435,6 +459,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_unop_u2d: + if (lowering(U2D)) { + if (ir->type->base_type == GLSL_TYPE_DOUBLE) +*rvalue = handle_op(ir, "__builtin_uint_to_fp64", generate_ir::uint_to_fp64, true); + } + break; + case ir_binop_add: if (lowering(ADD64)) { if (ir->type->base_type == GLSL_TYPE_DOUBLE) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index de5a499f8d..df815a86d1 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7066,7 +7066,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) LT64 |
[Mesa-dev] [PATCH 33/47] glsl: Add a lowering pass for 64-bit float sqrt()
Signed-off-by: Elie Tournier --- src/compiler/glsl/ir_optimization.h| 1 + src/compiler/glsl/lower_64bit.cpp | 7 +++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index dce0bf4a9f..7a5a30541f 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -74,6 +74,7 @@ #define I2D (1U << 13) #define D2F (1U << 14) #define F2D (1U << 15) +#define SQRT64(1U << 16) /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 054cdcb50a..3681c4f0c5 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -487,6 +487,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_unop_sqrt: + if (lowering(SQRT64)) { + if (ir->type->base_type == GLSL_TYPE_DOUBLE) +*rvalue = handle_op(ir, "__builtin_fsqrt64", generate_ir::fsqrt64); + } + break; + case ir_unop_u2d: if (lowering(U2D)) { if (ir->type->base_type == GLSL_TYPE_DOUBLE) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f26368812f..474e9bcdea 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7071,7 +7071,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) D2I | I2D | D2F | - F2D; + F2D | + SQRT64; lower_64bit_double_instructions(ir, lower_inst); } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 24/47] glsl: Add a lowering pass for 64-bit float less()
Signed-off-by: Elie Tournier --- src/compiler/glsl/ir_optimization.h| 1 + src/compiler/glsl/lower_64bit.cpp | 7 +++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 271dad77e4..ee8cd92269 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -66,6 +66,7 @@ #define NEG64 (1U << 5) #define EQ64 (1U << 6) #define LE64 (1U << 7) +#define LT64 (1U << 8) /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 68ffa8f706..2b6580d293 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -452,6 +452,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_binop_less: + if (lowering(LT64)) { + if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) +*rvalue = handle_op(ir, "__builtin_flt64", generate_ir::flt64); + } + break; + case ir_binop_mod: if (lowering(MOD64)) { if (ir->type->base_type == GLSL_TYPE_UINT64) { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 78fa1e3a76..59f896a785 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7062,7 +7062,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) NEG64 | SIGN64 | EQ64 | - LE64; + LE64 | + LT64; lower_64bit_double_instructions(ir, lower_inst); } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 37/47] glsl/lower_64bit: handle any/all operations
From: Dave Airlie This just splits them out and combines the results. Signed-off-by: Dave Airlie --- src/compiler/glsl/lower_64bit.cpp | 61 ++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index f8fcf9fd0a..79148bb169 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -59,6 +59,12 @@ ir_dereference_variable *compact_destination(ir_factory &, ir_dereference_variable *merge_destination(ir_factory &, const glsl_type *type, ir_variable *result[4]); +ir_dereference_variable *all_equal_destination(ir_factory &, + const glsl_type *type, + ir_variable *result[4]); +ir_dereference_variable *any_nequal_destination(ir_factory &, + const glsl_type *type, + ir_variable *result[4]); ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir, ir_expression *ir, @@ -309,6 +315,47 @@ lower_64bit::merge_destination(ir_factory &body, return new(mem_ctx) ir_dereference_variable(merged_result); } +/* + * and the results from each comparison. + */ +ir_dereference_variable * +lower_64bit::all_equal_destination(ir_factory &body, +const glsl_type *type, +ir_variable *result[4]) +{ + ir_variable *const merged_result = + body.make_temp(glsl_type::bool_type, "all_result"); + + body.emit(assign(merged_result, result[0])); + for (unsigned i = 1; i < type->vector_elements; i++) { + body.emit(assign(merged_result, logic_and(merged_result, result[i]))); + } + + void *const mem_ctx = ralloc_parent(merged_result); + return new(mem_ctx) ir_dereference_variable(merged_result); +} + +/* + * and the results from each comparison, the not the result + */ +ir_dereference_variable * +lower_64bit::any_nequal_destination(ir_factory &body, +const glsl_type *type, +ir_variable *result[4]) +{ + ir_variable *const merged_result = + body.make_temp(glsl_type::bool_type, "any_result"); + + body.emit(assign(merged_result, result[0])); + for (unsigned i = 1; i < type->vector_elements; i++) { + body.emit(assign(merged_result, logic_and(merged_result, result[i]))); + } + + body.emit(assign(merged_result, logic_not(merged_result))); + void *const mem_ctx = ralloc_parent(merged_result); + return new(mem_ctx) ir_dereference_variable(merged_result); +} + ir_rvalue * lower_64bit::lower_op_to_function_call(ir_instruction *base_ir, ir_expression *ir, @@ -362,7 +409,11 @@ lower_64bit::lower_op_to_function_call(ir_instruction *base_ir, } ir_rvalue *rv; - if (ir->type->is_64bit()) + if (ir->operation == ir_binop_all_equal) + rv = all_equal_destination(body, ir->type, dst); + else if (ir->operation == ir_binop_any_nequal) + rv = any_nequal_destination(body, ir->type, dst); + else if (ir->type->is_64bit()) rv = compact_destination(body, ir->type, dst); else rv = merge_destination(body, ir->type, dst); @@ -579,6 +630,14 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_binop_all_equal: + case ir_binop_any_nequal: + if (lowering(EQ64)) { +if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) { +*rvalue = handle_op(ir, "__builtin_feq64", generate_ir::feq64); +} + } + break; default: break; } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 39/47] glsl: Add a lowering pass for 64-bit float gequal()
Signed-off-by: Elie Tournier --- src/compiler/glsl/lower_64bit.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 4978759eb9..07d39de79d 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -417,7 +417,8 @@ lower_64bit::lower_op_to_function_call(ir_instruction *base_ir, body.emit(c); - if (ir->operation == ir_unop_d2b) + if (ir->operation == ir_unop_d2b || + ir->operation == ir_binop_gequal) body.emit(assign(dst[i], logic_not(dst[i]))); } @@ -624,6 +625,7 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_binop_gequal: case ir_binop_less: if (lowering(LT64)) { if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 38/47] glsl/lower_64bit: lower d2b using comparison
From: Dave Airlie This just does a compare to 0 and inverts the result to lower d2b. Not 100% sure this is always correct, but it passes piglit --- src/compiler/glsl/lower_64bit.cpp | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/lower_64bit.cpp b/src/compiler/glsl/lower_64bit.cpp index 79148bb169..4978759eb9 100644 --- a/src/compiler/glsl/lower_64bit.cpp +++ b/src/compiler/glsl/lower_64bit.cpp @@ -361,7 +361,7 @@ lower_64bit::lower_op_to_function_call(ir_instruction *base_ir, ir_expression *ir, ir_function_signature *callee) { - const unsigned num_operands = ir->num_operands; + unsigned num_operands = ir->num_operands; ir_variable *src[4][4]; ir_variable *dst[4]; void *const mem_ctx = ralloc_parent(ir); @@ -390,6 +390,16 @@ lower_64bit::lower_op_to_function_call(ir_instruction *base_ir, source_components = ir->operands[i]->type->vector_elements; } + if (ir->operation == ir_unop_d2b) { + for (unsigned i = 0; i < source_components; i++) { + src[1][i] = body.make_temp(glsl_type::uvec2_type, "zero"); + + body.emit(assign(src[1][i], body.constant(0u), 1)); + body.emit(assign(src[1][i], body.constant(0u), 2)); + } + num_operands++; + } + for (unsigned i = 0; i < source_components; i++) { dst[i] = body.make_temp(result_type, "expanded_64bit_result"); @@ -406,6 +416,9 @@ lower_64bit::lower_op_to_function_call(ir_instruction *base_ir, ¶meters); body.emit(c); + + if (ir->operation == ir_unop_d2b) + body.emit(assign(dst[i], logic_not(dst[i]))); } ir_rvalue *rv; @@ -487,6 +500,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) } break; + case ir_unop_d2b: + if (lowering(EQ64)) { + if (ir->type->base_type == GLSL_TYPE_BOOL) +*rvalue = handle_op(ir, "__builtin_feq64", generate_ir::feq64); + } + break; + case ir_unop_d2f: if (lowering(D2F)) { if (ir->type->base_type == GLSL_TYPE_FLOAT) -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev