Reviewed-by: Marek Olšák <marek.ol...@amd.com> Marek
On Sun, Jul 12, 2015 at 4:55 AM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > This adds the translation from TGSI to AMDGPU llvm backend, for the > 64-bit opcodes. The backend pretty much handles everything for us > fine. There is one patch required for SI DFRAC support, that I know > off. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > docs/GL3.txt | 4 +- > src/gallium/drivers/radeon/radeon_llvm.h | 7 +- > .../drivers/radeon/radeon_setup_tgsi_llvm.c | 143 > ++++++++++++++++++++- > src/gallium/drivers/radeonsi/si_pipe.c | 1 + > src/gallium/drivers/radeonsi/si_shader.c | 31 ++++- > 5 files changed, 173 insertions(+), 13 deletions(-) > > diff --git a/docs/GL3.txt b/docs/GL3.txt > index 94bbcd1..d1a42be 100644 > --- a/docs/GL3.txt > +++ b/docs/GL3.txt > @@ -109,7 +109,7 @@ GL 4.0, GLSL 4.00: > - Enhanced per-sample shading DONE (r600, radeonsi) > - Interpolation functions DONE (r600) > - New overload resolution rules DONE > - GL_ARB_gpu_shader_fp64 DONE (nvc0, llvmpipe, > softpipe) > + GL_ARB_gpu_shader_fp64 DONE (nvc0, radeonsi, > llvmpipe, softpipe) > GL_ARB_sample_shading DONE (i965, nv50, > nvc0, r600, radeonsi) > GL_ARB_shader_subroutine started (Dave) > GL_ARB_tessellation_shader started (Chris, Ilia) > @@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10: > GL_ARB_get_program_binary DONE (0 binary > formats) > GL_ARB_separate_shader_objects DONE (all drivers) > GL_ARB_shader_precision started (Micah) > - GL_ARB_vertex_attrib_64bit DONE (nvc0, llvmpipe, > softpipe) > + GL_ARB_vertex_attrib_64bit DONE (nvc0, radeonsi > llvmpipe, softpipe) > GL_ARB_viewport_array DONE (i965, nv50, > nvc0, r600, radeonsi, llvmpipe) > > > diff --git a/src/gallium/drivers/radeon/radeon_llvm.h > b/src/gallium/drivers/radeon/radeon_llvm.h > index 6a9557b..591e698 100644 > --- a/src/gallium/drivers/radeon/radeon_llvm.h > +++ b/src/gallium/drivers/radeon/radeon_llvm.h > @@ -146,6 +146,8 @@ static inline LLVMTypeRef tgsi2llvmtype( > case TGSI_TYPE_UNSIGNED: > case TGSI_TYPE_SIGNED: > return LLVMInt32TypeInContext(ctx); > + case TGSI_TYPE_DOUBLE: > + return LLVMDoubleTypeInContext(ctx); > case TGSI_TYPE_UNTYPED: > case TGSI_TYPE_FLOAT: > return LLVMFloatTypeInContext(ctx); > @@ -205,6 +207,9 @@ build_tgsi_intrinsic_nomem( > struct lp_build_tgsi_context * bld_base, > struct lp_build_emit_data * emit_data); > > - > +LLVMValueRef > +radeon_llvm_emit_fetch_double(struct lp_build_tgsi_context *bld_base, > + LLVMValueRef ptr, > + LLVMValueRef ptr2); > > #endif /* RADEON_LLVM_H */ > diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > index c8c980d..444a41c 100644 > --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > @@ -116,6 +116,28 @@ emit_fetch( > enum tgsi_opcode_type type, > unsigned swizzle); > > +LLVMValueRef > +radeon_llvm_emit_fetch_double( > + struct lp_build_tgsi_context *bld_base, > + LLVMValueRef ptr, > + LLVMValueRef ptr2) > +{ > + LLVMBuilderRef builder = bld_base->base.gallivm->builder; > + LLVMValueRef result; > + > + result = > LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, > 32), bld_base->base.type.length * 2)); > + > + result = LLVMBuildInsertElement(builder, > + result, > + bitcast(bld_base, TGSI_TYPE_UNSIGNED, > ptr), > + bld_base->int_bld.zero, ""); > + result = LLVMBuildInsertElement(builder, > + result, > + bitcast(bld_base, TGSI_TYPE_UNSIGNED, > ptr2), > + bld_base->int_bld.one, ""); > + return bitcast(bld_base, TGSI_TYPE_DOUBLE, result); > +} > + > static LLVMValueRef > emit_array_fetch( > struct lp_build_tgsi_context *bld_base, > @@ -160,7 +182,7 @@ emit_fetch( > struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); > struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); > LLVMBuilderRef builder = bld_base->base.gallivm->builder; > - LLVMValueRef result = NULL, ptr; > + LLVMValueRef result = NULL, ptr, ptr2; > > if (swizzle == ~0) { > LLVMValueRef values[TGSI_NUM_CHANNELS]; > @@ -184,11 +206,27 @@ emit_fetch( > switch(reg->Register.File) { > case TGSI_FILE_IMMEDIATE: { > LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); > - return > LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); > + if (type == TGSI_TYPE_DOUBLE) { > + result = > LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, > 32), bld_base->base.type.length * 2)); > + result = LLVMConstInsertElement(result, > + > bld->immediates[reg->Register.Index][swizzle], > + > bld_base->int_bld.zero); > + result = LLVMConstInsertElement(result, > + > bld->immediates[reg->Register.Index][swizzle + 1], > + > bld_base->int_bld.one); > + return LLVMConstBitCast(result, ctype); > + } else { > + return > LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); > + } > } > > case TGSI_FILE_INPUT: > result = > ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]; > + if (type == TGSI_TYPE_DOUBLE) { > + ptr = result; > + ptr2 = > ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)]; > + return radeon_llvm_emit_fetch_double(bld_base, ptr, > ptr2); > + } > break; > > case TGSI_FILE_TEMPORARY: > @@ -199,11 +237,23 @@ emit_fetch( > break; > } > ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + > swizzle]; > + if (type == TGSI_TYPE_DOUBLE) { > + ptr2 = ctx->temps[reg->Register.Index * > TGSI_NUM_CHANNELS + swizzle + 1]; > + return radeon_llvm_emit_fetch_double(bld_base, > + LLVMBuildLoad(builder, ptr, > ""), > + LLVMBuildLoad(builder, ptr2, > "")); > + } > result = LLVMBuildLoad(builder, ptr, ""); > break; > > case TGSI_FILE_OUTPUT: > ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle); > + if (type == TGSI_TYPE_DOUBLE) { > + ptr2 = lp_get_output_ptr(bld, reg->Register.Index, > swizzle + 1); > + return radeon_llvm_emit_fetch_double(bld_base, > + LLVMBuildLoad(builder, ptr, > ""), > + LLVMBuildLoad(builder, ptr2, > "")); > + } > result = LLVMBuildLoad(builder, ptr, ""); > break; > > @@ -348,9 +398,10 @@ emit_store( > struct gallivm_state *gallivm = bld->bld_base.base.gallivm; > const struct tgsi_full_dst_register *reg = &inst->Dst[0]; > LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; > - LLVMValueRef temp_ptr; > + LLVMValueRef temp_ptr, temp_ptr2 = NULL; > unsigned chan, chan_index; > boolean is_vec_store = FALSE; > + enum tgsi_opcode_type dtype = > tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); > > if (dst[0]) { > LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); > @@ -371,6 +422,8 @@ emit_store( > TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { > LLVMValueRef value = dst[chan_index]; > > + if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || > chan_index == 3)) > + continue; > if (inst->Instruction.Saturate) > value = radeon_llvm_saturate(bld_base, value); > > @@ -379,8 +432,9 @@ emit_store( > LLVMBuildStore(builder, value, temp_ptr); > continue; > } > - > - value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); > + > + if (dtype != TGSI_TYPE_DOUBLE) > + value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); > > if (reg->Register.Indirect) { > struct tgsi_declaration_range range = > get_array_range(bld_base, > @@ -418,6 +472,8 @@ emit_store( > switch(reg->Register.File) { > case TGSI_FILE_OUTPUT: > temp_ptr = > bld->outputs[reg->Register.Index][chan_index]; > + if (dtype == TGSI_TYPE_DOUBLE) > + temp_ptr2 = > bld->outputs[reg->Register.Index][chan_index + 1]; > break; > > case TGSI_FILE_TEMPORARY: > @@ -428,12 +484,28 @@ emit_store( > break; > } > temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * > reg->Register.Index + chan_index]; > + if (dtype == TGSI_TYPE_DOUBLE) > + temp_ptr2 = ctx->temps[ > TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1]; > + > break; > > default: > return; > } > - LLVMBuildStore(builder, value, temp_ptr); > + if (dtype != TGSI_TYPE_DOUBLE) > + LLVMBuildStore(builder, value, temp_ptr); > + else { > + LLVMValueRef ptr = LLVMBuildBitCast(builder, > value, > + > LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), > ""); > + LLVMValueRef val2; > + value = LLVMBuildExtractElement(builder, ptr, > + > bld_base->uint_bld.zero, ""); > + val2 = LLVMBuildExtractElement(builder, ptr, > + > bld_base->uint_bld.one, ""); > + > + LLVMBuildStore(builder, bitcast(bld_base, > TGSI_TYPE_FLOAT, value), temp_ptr); > + LLVMBuildStore(builder, bitcast(bld_base, > TGSI_TYPE_FLOAT, val2), temp_ptr2); > + } > } > } > } > @@ -996,6 +1068,35 @@ static void emit_fcmp( > emit_data->output[emit_data->chan] = v; > } > > +static void emit_dcmp( > + const struct lp_build_tgsi_action *action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + LLVMBuilderRef builder = bld_base->base.gallivm->builder; > + LLVMContextRef context = bld_base->base.gallivm->context; > + LLVMRealPredicate pred; > + > + /* Use ordered for everything but NE (which is usual for > + * float comparisons) > + */ > + switch (emit_data->inst->Instruction.Opcode) { > + case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break; > + case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break; > + case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break; > + case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break; > + default: assert(!"unknown instruction"); pred = 0; break; > + } > + > + LLVMValueRef v = LLVMBuildFCmp(builder, pred, > + emit_data->args[0], emit_data->args[1],""); > + > + v = LLVMBuildSExtOrBitCast(builder, v, > + LLVMInt32TypeInContext(context), ""); > + > + emit_data->output[emit_data->chan] = v; > +} > + > static void emit_not( > const struct lp_build_tgsi_action * action, > struct lp_build_tgsi_context * bld_base, > @@ -1161,6 +1262,16 @@ static void emit_ineg( > emit_data->args[0], ""); > } > > +static void emit_dneg( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + LLVMBuilderRef builder = bld_base->base.gallivm->builder; > + emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder, > + emit_data->args[0], ""); > +} > + > static void emit_f2i( > const struct lp_build_tgsi_action * action, > struct lp_build_tgsi_context * bld_base, > @@ -1423,6 +1534,12 @@ void radeon_llvm_context_init(struct > radeon_llvm_context * ctx) > lp_build_context_init(&bld_base->base, &ctx->gallivm, type); > lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, > lp_uint_type(type)); > lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, > lp_int_type(type)); > + { > + struct lp_type dbl_type; > + dbl_type = type; > + dbl_type.width *= 2; > + lp_build_context_init(&ctx->soa.bld_base.dbl_bld, > &ctx->gallivm, dbl_type); > + } > > bld_base->soa = 1; > bld_base->emit_store = emit_store; > @@ -1461,10 +1578,24 @@ void radeon_llvm_context_init(struct > radeon_llvm_context * ctx) > bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; > bld_base->op_actions[TGSI_OPCODE_COS].emit = > build_tgsi_intrinsic_nomem; > bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32"; > + bld_base->op_actions[TGSI_OPCODE_DABS].emit = > build_tgsi_intrinsic_nomem; > + bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "fabs"; > + bld_base->op_actions[TGSI_OPCODE_DFMA].emit = > build_tgsi_intrinsic_nomem; > + bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64"; > + bld_base->op_actions[TGSI_OPCODE_DFRAC].intr_name = > "llvm.AMDIL.fraction."; > + bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg; > + bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp; > + bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp; > + bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp; > + bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp; > bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; > bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args; > bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy"; > bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args; > + bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = > build_tgsi_intrinsic_nomem; > + bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = > "llvm.AMDGPU.rsq.f64"; > + bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = > build_tgsi_intrinsic_nomem; > + bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64"; > bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; > bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; > bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index 13b67d2..a9dce2c 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -451,6 +451,7 @@ static int si_get_shader_param(struct pipe_screen* > pscreen, unsigned shader, enu > case PIPE_SHADER_CAP_PREFERRED_IR: > return PIPE_SHADER_IR_TGSI; > case PIPE_SHADER_CAP_DOUBLES: > + return HAVE_LLVM >= 0x0307; > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > return 0; > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 753b238..75a29ae 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -703,8 +703,15 @@ static LLVMValueRef fetch_constant( > buf = reg->Register.Dimension ? reg->Dimension.Index : 0; > idx = reg->Register.Index * 4 + swizzle; > > - if (!reg->Register.Indirect) > - return bitcast(bld_base, type, > si_shader_ctx->constants[buf][idx]); > + if (!reg->Register.Indirect) { > + if (type != TGSI_TYPE_DOUBLE) > + return bitcast(bld_base, type, > si_shader_ctx->constants[buf][idx]); > + else { > + return radeon_llvm_emit_fetch_double(bld_base, > + > si_shader_ctx->constants[buf][idx], > + > si_shader_ctx->constants[buf][idx + 1]); > + } > + } > > addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle]; > addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg"); > @@ -713,9 +720,25 @@ static LLVMValueRef fetch_constant( > lp_build_const_int32(base->gallivm, idx * 4)); > > result = buffer_load_const(base->gallivm->builder, > si_shader_ctx->const_resource[buf], > - addr, base->elem_type); > + addr, bld_base->base.elem_type); > + > + if (type != TGSI_TYPE_DOUBLE) > + result = bitcast(bld_base, type, result); > + else { > + LLVMValueRef addr2, result2; > + addr2 = > si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1]; > + addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load > addr reg2"); > + addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16); > + addr2 = lp_build_add(&bld_base->uint_bld, addr2, > + lp_build_const_int32(base->gallivm, idx > * 4)); > > - return bitcast(bld_base, type, result); > + result2 = buffer_load_const(base->gallivm->builder, > si_shader_ctx->const_resource[buf], > + addr2, bld_base->base.elem_type); > + > + result = radeon_llvm_emit_fetch_double(bld_base, > + result, result2); > + } > + return result; > } > > /* Initialize arguments for the shader export intrinsic */ > -- > 2.4.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev