Series looks good to me. Jose
----- Original Message ----- > From: Roland Scheidegger <srol...@vmware.com> > > The conversion code for srgb was tuned for n x 4x8bit AoS -> 4 x nxfloat SoA > (and vice versa), fix this to handle also 16bit 565-style srgb formats. > Still not really all that generic, things like r10g10b10a2_srgb or > r4g4b4a4_srgb wouldn't work (the latter trivial to fix, the former would not > require more work to not crash but near certainly need some higher precision > calculation) but not needed right now. > The code is not fully optimized for this (could use more direct calculation > instead of expanding to 8-bit range first) but should be good enough. > --- > src/gallium/auxiliary/gallivm/lp_bld_format.h | 1 + > src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 3 +- > src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c | 26 +++++++++++-- > src/gallium/drivers/llvmpipe/lp_screen.c | 1 + > src/gallium/drivers/llvmpipe/lp_state_fs.c | 39 > ++++++++++++++++++-- > 5 files changed, 61 insertions(+), 9 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h > b/src/gallium/auxiliary/gallivm/lp_bld_format.h > index a7a4ba0..1177fb2 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h > @@ -167,6 +167,7 @@ lp_build_float_to_srgb_packed(struct gallivm_state > *gallivm, > LLVMValueRef > lp_build_srgb_to_linear(struct gallivm_state *gallivm, > struct lp_type src_type, > + unsigned chan_bits, > LLVMValueRef src); > > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c > index 81cd2b0..ff2887e 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c > @@ -165,13 +165,12 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, > > if (type.floating) { > if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { > - assert(width == 8); > if (format_desc->swizzle[3] == chan) { > input = lp_build_unsigned_norm_to_float(gallivm, width, > type, input); > } > else { > struct lp_type conv_type = lp_uint_type(type); > - input = lp_build_srgb_to_linear(gallivm, conv_type, > input); > + input = lp_build_srgb_to_linear(gallivm, conv_type, width, > input); > } > } > else { > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c > b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c > index 6645151..e4849fe 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c > @@ -88,11 +88,12 @@ > * (3rd order polynomial is required for crappy but just sufficient > accuracy) > * > * @param src integer (vector) value(s) to convert > - * (8 bit values unpacked to 32 bit already). > + * (chan_bits bit values unpacked to 32 bit already). > */ > LLVMValueRef > lp_build_srgb_to_linear(struct gallivm_state *gallivm, > struct lp_type src_type, > + unsigned chan_bits, > LLVMValueRef src) > { > struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32); > @@ -105,6 +106,8 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm, > }; > > assert(src_type.width == 32); > + /* Technically this would work with more bits too but would be > inaccurate. */ > + assert(chan_bits <= 8); > > lp_build_context_init(&f32_bld, gallivm, f32_type); > > @@ -124,6 +127,12 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm, > */ > /* doing the 1/255 mul as part of the approximation */ > srcf = lp_build_int_to_float(&f32_bld, src); > + if (chan_bits != 8) { > + /* could adjust all the constants instead */ > + LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type, > + 255.0f / ((1 << > chan_bits) - 1)); > + srcf = lp_build_mul(&f32_bld, srcf, rescale_const); > + } > lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * > 255.0f)); > part_lin = lp_build_mul(&f32_bld, srcf, lin_const); > > @@ -150,6 +159,7 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm, > static LLVMValueRef > lp_build_linear_to_srgb(struct gallivm_state *gallivm, > struct lp_type src_type, > + unsigned chan_bits, > LLVMValueRef src) > { > LLVMBuilderRef builder = gallivm->builder; > @@ -292,6 +302,13 @@ lp_build_linear_to_srgb(struct gallivm_state *gallivm, > is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, > lin_thresh); > tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final); > > + if (chan_bits != 8) { > + /* could adjust all the constants instead */ > + LLVMValueRef rescale_const = lp_build_const_vec(gallivm, src_type, > + ((1 << chan_bits) - 1) > / 255.0f); > + tmp = lp_build_mul(&f32_bld, tmp, rescale_const); > + } > + > f32_bld.type.sign = 0; > return lp_build_iround(&f32_bld, tmp); > } > @@ -300,7 +317,9 @@ lp_build_linear_to_srgb(struct gallivm_state *gallivm, > /** > * Convert linear float soa values to packed srgb AoS values. > * This only handles packed formats which are 4x8bit in size > - * (rgba and rgbx plus swizzles). > + * (rgba and rgbx plus swizzles), and 16bit 565-style formats > + * with no alpha. (In the latter case the return values won't be > + * fully packed, it will look like r5g6b5x16r5g6b5x16...) > * > * @param src float SoA (vector) values to convert. > */ > @@ -320,7 +339,8 @@ lp_build_float_to_srgb_packed(struct gallivm_state > *gallivm, > > /* rgb is subject to linear->srgb conversion, alpha is not */ > for (chan = 0; chan < 3; chan++) { > - tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, src[chan]); > + unsigned chan_bits = dst_fmt->channel[dst_fmt->swizzle[chan]].size; > + tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, chan_bits, > src[chan]); > } > /* > * can't use lp_build_conv since we want to keep values as 32bit > diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c > b/src/gallium/drivers/llvmpipe/lp_screen.c > index c8e95fe..fe06e34 100644 > --- a/src/gallium/drivers/llvmpipe/lp_screen.c > +++ b/src/gallium/drivers/llvmpipe/lp_screen.c > @@ -342,6 +342,7 @@ llvmpipe_is_format_supported( struct pipe_screen > *_screen, > > if (bind & PIPE_BIND_RENDER_TARGET) { > if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { > + /* this is a lie actually other formats COULD exist where we would > fail */ > if (format_desc->nr_channels < 3) > return FALSE; > } > diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c > b/src/gallium/drivers/llvmpipe/lp_state_fs.c > index 2f9f907..5e28f0e 100644 > --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c > +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c > @@ -868,12 +868,12 @@ lp_mem_type_from_format_desc(const struct > util_format_description *format_desc, > unsigned chan; > > if (format_expands_to_float_soa(format_desc)) { > - /* just make this a 32bit uint */ > + /* just make this a uint with width of block */ > type->floating = false; > type->fixed = false; > type->sign = false; > type->norm = false; > - type->width = 32; > + type->width = format_desc->block.bits; > type->length = 1; > return; > } > @@ -1137,12 +1137,24 @@ convert_to_blend_type(struct gallivm_state *gallivm, > * This is pretty suboptimal for this case blending in SoA would be > much > * better, since conversion gets us SoA values so need to convert > back. > */ > - assert(src_type.width == 32); > + assert(src_type.width == 32 || src_type.width == 16); > assert(dst_type.floating); > assert(dst_type.width == 32); > assert(dst_type.length % 4 == 0); > assert(num_srcs % 4 == 0); > > + if (src_type.width == 16) { > + /* expand 4x16bit values to 4x32bit */ > + struct lp_type type32x4 = src_type; > + LLVMTypeRef ltype32x4; > + unsigned num_fetch = dst_type.length == 8 ? num_srcs / 2 : num_srcs > / 4; > + type32x4.width = 32; > + ltype32x4 = lp_build_vec_type(gallivm, type32x4); > + for (i = 0; i < num_fetch; i++) { > + src[i] = LLVMBuildZExt(builder, src[i], ltype32x4, ""); > + } > + src_type.width = 32; > + } > for (i = 0; i < 4; i++) { > tmpsrc[i] = src[i]; > } > @@ -1298,7 +1310,7 @@ convert_from_blend_type(struct gallivm_state *gallivm, > assert(src_type.floating); > assert(src_type.width == 32); > assert(src_type.length % 4 == 0); > - assert(dst_type.width == 32); > + assert(dst_type.width == 32 || dst_type.width == 16); > > for (i = 0; i < num_srcs / 4; i++) { > LLVMValueRef tmpsoa[4], tmpdst; > @@ -1333,6 +1345,25 @@ convert_from_blend_type(struct gallivm_state *gallivm, > src[i] = tmpdst; > } > } > + if (dst_type.width == 16) { > + struct lp_type type16x8 = dst_type; > + struct lp_type type32x4 = dst_type; > + LLVMTypeRef ltype16x4, ltypei64, ltypei128; > + unsigned num_fetch = src_type.length == 8 ? num_srcs / 2 : num_srcs > / 4; > + type16x8.length = 8; > + type32x4.width = 32; > + ltypei128 = LLVMIntTypeInContext(gallivm->context, 128); > + ltypei64 = LLVMIntTypeInContext(gallivm->context, 64); > + ltype16x4 = lp_build_vec_type(gallivm, dst_type); > + /* We could do vector truncation but it doesn't generate very good > code */ > + for (i = 0; i < num_fetch; i++) { > + src[i] = lp_build_pack2(gallivm, type32x4, type16x8, > + src[i], lp_build_zero(gallivm, > type32x4)); > + src[i] = LLVMBuildBitCast(builder, src[i], ltypei128, ""); > + src[i] = LLVMBuildTrunc(builder, src[i], ltypei64, ""); > + src[i] = LLVMBuildBitCast(builder, src[i], ltype16x4, ""); > + } > + } > return; > } > > -- > 1.7.9.5 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev