Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> > On Jan 5, 2017, at 11:18 AM, Tim Rowley <timothy.o.row...@intel.com> wrote: > > --- > .../drivers/swr/rasterizer/common/formats.cpp | 104 ++++++++++++++------- > .../drivers/swr/rasterizer/common/formats.h | 4 + > .../drivers/swr/rasterizer/core/format_traits.h | 88 +++++++++++++++++ > src/gallium/drivers/swr/rasterizer/core/utils.h | 64 +++++++++++++ > .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 49 ++++++++++ > .../drivers/swr/rasterizer/jitter/builder_misc.h | 2 + > .../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 56 ++++++++++- > .../jitter/scripts/gen_llvm_ir_macros.py | 2 + > src/gallium/drivers/swr/swr_screen.cpp | 5 + > 9 files changed, 341 insertions(+), 33 deletions(-) > > diff --git a/src/gallium/drivers/swr/rasterizer/common/formats.cpp > b/src/gallium/drivers/swr/rasterizer/common/formats.cpp > index aba4c3f..72020ee 100644 > --- a/src/gallium/drivers/swr/rasterizer/common/formats.cpp > +++ b/src/gallium/drivers/swr/rasterizer/common/formats.cpp > @@ -149,16 +149,26 @@ const SWR_FORMAT_INFO gFormatInfo[] = { > { 0.0f, 0.0f, 0.0f, 0.0f }, > 1, 1 > }, > - // padding (0x5) > + // R64G64_FLOAT (0x5) > { > - nullptr, > - { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, > SWR_TYPE_UNKNOWN }, > - { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 }, > - 0, 0, 0, false, false, false, false, > - { false, false, false, false }, > - { 0.0f, 0.0f, 0.0f, 0.0f }, > - 1, 1 > + "R64G64_FLOAT", > + { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN > }, > + { 0, 0, 0, 0x3f800000 }, // Defaults for missing components > + { 0, 1, 0, 0 }, // Swizzle > + { 64, 64, 0, 0 }, // Bits per component > + 128, // Bits per element > + 16, // Bytes per element > + 2, // Num components > + false, // isSRGB > + false, // isBC > + false, // isSubsampled > + false, // isLuminance > + { false, false, false, false }, // Is normalized? > + { 1.0f, 1.0f, 0, 0 }, // To float scale factor > + 1, // bcWidth > + 1, // bcHeight > }, > + > // R32G32B32X32_FLOAT (0x6) > { > "R32G32B32X32_FLOAT", > @@ -1719,16 +1729,26 @@ const SWR_FORMAT_INFO gFormatInfo[] = { > { 0.0f, 0.0f, 0.0f, 0.0f }, > 1, 1 > }, > - // padding (0x8D) > + // R64_FLOAT (0x8D) > { > - nullptr, > - { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, > SWR_TYPE_UNKNOWN }, > - { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 }, > - 0, 0, 0, false, false, false, false, > - { false, false, false, false }, > - { 0.0f, 0.0f, 0.0f, 0.0f }, > - 1, 1 > + "R64_FLOAT", > + { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, > SWR_TYPE_UNKNOWN }, > + { 0, 0, 0, 0x3f800000 }, // Defaults for missing components > + { 0, 0, 0, 0 }, // Swizzle > + { 64, 0, 0, 0 }, // Bits per component > + 64, // Bits per element > + 8, // Bytes per element > + 1, // Num components > + false, // isSRGB > + false, // isBC > + false, // isSubsampled > + false, // isLuminance > + { false, false, false, false }, // Is normalized? > + { 1.0f, 0, 0, 0 }, // To float scale factor > + 1, // bcWidth > + 1, // bcHeight > }, > + > // R16G16B16X16_UNORM (0x8E) > { > "R16G16B16X16_UNORM", > @@ -5529,26 +5549,46 @@ const SWR_FORMAT_INFO gFormatInfo[] = { > 1, // bcHeight > }, > > - // padding (0x197) > + // R64G64B64A64_FLOAT (0x197) > { > - nullptr, > - { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, > SWR_TYPE_UNKNOWN }, > - { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 }, > - 0, 0, 0, false, false, false, false, > - { false, false, false, false }, > - { 0.0f, 0.0f, 0.0f, 0.0f }, > - 1, 1 > + "R64G64B64A64_FLOAT", > + { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT }, > + { 0, 0, 0, 0x3f800000 }, // Defaults for missing components > + { 0, 1, 2, 3 }, // Swizzle > + { 64, 64, 64, 64 }, // Bits per component > + 256, // Bits per element > + 32, // Bytes per element > + 4, // Num components > + false, // isSRGB > + false, // isBC > + false, // isSubsampled > + false, // isLuminance > + { false, false, false, false }, // Is normalized? > + { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor > + 1, // bcWidth > + 1, // bcHeight > }, > - // padding (0x198) > + > + // R64G64B64_FLOAT (0x198) > { > - nullptr, > - { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, > SWR_TYPE_UNKNOWN }, > - { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 }, > - 0, 0, 0, false, false, false, false, > - { false, false, false, false }, > - { 0.0f, 0.0f, 0.0f, 0.0f }, > - 1, 1 > + "R64G64B64_FLOAT", > + { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN }, > + { 0, 0, 0, 0x3f800000 }, // Defaults for missing components > + { 0, 1, 2, 0 }, // Swizzle > + { 64, 64, 64, 0 }, // Bits per component > + 192, // Bits per element > + 24, // Bytes per element > + 3, // Num components > + false, // isSRGB > + false, // isBC > + false, // isSubsampled > + false, // isLuminance > + { false, false, false, false }, // Is normalized? > + { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor > + 1, // bcWidth > + 1, // bcHeight > }, > + > // BC4_SNORM (0x199) > { > "BC4_SNORM", > diff --git a/src/gallium/drivers/swr/rasterizer/common/formats.h > b/src/gallium/drivers/swr/rasterizer/common/formats.h > index 3d686d3..0056a56 100644 > --- a/src/gallium/drivers/swr/rasterizer/common/formats.h > +++ b/src/gallium/drivers/swr/rasterizer/common/formats.h > @@ -57,6 +57,7 @@ enum SWR_FORMAT > R32G32B32A32_FLOAT = 0x0, > R32G32B32A32_SINT = 0x1, > R32G32B32A32_UINT = 0x2, > + R64G64_FLOAT = 0x5, > R32G32B32X32_FLOAT = 0x6, > R32G32B32A32_SSCALED = 0x7, > R32G32B32A32_USCALED = 0x8, > @@ -78,6 +79,7 @@ enum SWR_FORMAT > R32_FLOAT_X8X24_TYPELESS = 0x88, > X32_TYPELESS_G8X24_UINT = 0x89, > L32A32_FLOAT = 0x8A, > + R64_FLOAT = 0x8D, > R16G16B16X16_UNORM = 0x8E, > R16G16B16X16_FLOAT = 0x8F, > L32X32_FLOAT = 0x91, > @@ -193,6 +195,8 @@ enum SWR_FORMAT > R8G8B8_SNORM = 0x194, > R8G8B8_SSCALED = 0x195, > R8G8B8_USCALED = 0x196, > + R64G64B64A64_FLOAT = 0x197, > + R64G64B64_FLOAT = 0x198, > BC4_SNORM = 0x199, > BC5_SNORM = 0x19A, > R16G16B16_FLOAT = 0x19B, > diff --git a/src/gallium/drivers/swr/rasterizer/core/format_traits.h > b/src/gallium/drivers/swr/rasterizer/core/format_traits.h > index 59d4e7d..6c42804 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/format_traits.h > +++ b/src/gallium/drivers/swr/rasterizer/core/format_traits.h > @@ -134,6 +134,28 @@ template<> struct FormatTraits<R32G32B32A32_UINT> : > }; > > ////////////////////////////////////////////////////////////////////////// > +/// FormatTraits<R64G64_FLOAT> - Format traits specialization for > R64G64_FLOAT > +////////////////////////////////////////////////////////////////////////// > +template<> struct FormatTraits<R64G64_FLOAT> : > + ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>, > + FormatSwizzle<0, 1>, > + Defaults<0, 0, 0, 0x3f800000> > +{ > + static const uint32_t bpp{ 128 }; > + static const uint32_t numComps{ 2 }; > + static const bool hasAlpha{ false }; > + static const uint32_t alphaComp{ 0 }; > + static const bool isSRGB{ false }; > + static const bool isBC{ false }; > + static const bool isSubsampled{ false }; > + static const uint32_t bcWidth{ 1 }; > + static const uint32_t bcHeight{ 1 }; > + > + typedef Transpose64_64 TransposeT; > + typedef Format2<64, 64> FormatT; > +}; > + > +////////////////////////////////////////////////////////////////////////// > /// FormatTraits<R32G32B32X32_FLOAT> - Format traits specialization for > R32G32B32X32_FLOAT > ////////////////////////////////////////////////////////////////////////// > template<> struct FormatTraits<R32G32B32X32_FLOAT> : > @@ -596,6 +618,28 @@ template<> struct FormatTraits<L32A32_FLOAT> : > }; > > ////////////////////////////////////////////////////////////////////////// > +/// FormatTraits<R64_FLOAT> - Format traits specialization for R64_FLOAT > +////////////////////////////////////////////////////////////////////////// > +template<> struct FormatTraits<R64_FLOAT> : > + ComponentTraits<SWR_TYPE_FLOAT, 64>, > + FormatSwizzle<0>, > + Defaults<0, 0, 0, 0x3f800000> > +{ > + static const uint32_t bpp{ 64 }; > + static const uint32_t numComps{ 1 }; > + static const bool hasAlpha{ false }; > + static const uint32_t alphaComp{ 0 }; > + static const bool isSRGB{ false }; > + static const bool isBC{ false }; > + static const bool isSubsampled{ false }; > + static const uint32_t bcWidth{ 1 }; > + static const uint32_t bcHeight{ 1 }; > + > + typedef TransposeSingleComponent<64> TransposeT; > + typedef Format1<64> FormatT; > +}; > + > +////////////////////////////////////////////////////////////////////////// > /// FormatTraits<R16G16B16X16_UNORM> - Format traits specialization for > R16G16B16X16_UNORM > ////////////////////////////////////////////////////////////////////////// > template<> struct FormatTraits<R16G16B16X16_UNORM> : > @@ -3126,6 +3170,50 @@ template<> struct FormatTraits<R8G8B8_USCALED> : > }; > > ////////////////////////////////////////////////////////////////////////// > +/// FormatTraits<R64G64B64A64_FLOAT> - Format traits specialization for > R64G64B64A64_FLOAT > +////////////////////////////////////////////////////////////////////////// > +template<> struct FormatTraits<R64G64B64A64_FLOAT> : > + ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, > 64, SWR_TYPE_FLOAT, 64>, > + FormatSwizzle<0, 1, 2, 3>, > + Defaults<0, 0, 0, 0x3f800000> > +{ > + static const uint32_t bpp{ 256 }; > + static const uint32_t numComps{ 4 }; > + static const bool hasAlpha{ true }; > + static const uint32_t alphaComp{ 3 }; > + static const bool isSRGB{ false }; > + static const bool isBC{ false }; > + static const bool isSubsampled{ false }; > + static const uint32_t bcWidth{ 1 }; > + static const uint32_t bcHeight{ 1 }; > + > + typedef Transpose64_64_64_64 TransposeT; > + typedef Format4<64, 64, 64, 64> FormatT; > +}; > + > +////////////////////////////////////////////////////////////////////////// > +/// FormatTraits<R64G64B64_FLOAT> - Format traits specialization for > R64G64B64_FLOAT > +////////////////////////////////////////////////////////////////////////// > +template<> struct FormatTraits<R64G64B64_FLOAT> : > + ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, > 64>, > + FormatSwizzle<0, 1, 2>, > + Defaults<0, 0, 0, 0x3f800000> > +{ > + static const uint32_t bpp{ 192 }; > + static const uint32_t numComps{ 3 }; > + static const bool hasAlpha{ false }; > + static const uint32_t alphaComp{ 0 }; > + static const bool isSRGB{ false }; > + static const bool isBC{ false }; > + static const bool isSubsampled{ false }; > + static const uint32_t bcWidth{ 1 }; > + static const uint32_t bcHeight{ 1 }; > + > + typedef Transpose64_64_64 TransposeT; > + typedef Format3<64, 64, 64> FormatT; > +}; > + > +////////////////////////////////////////////////////////////////////////// > /// FormatTraits<BC4_SNORM> - Format traits specialization for BC4_SNORM > ////////////////////////////////////////////////////////////////////////// > template<> struct FormatTraits<BC4_SNORM> : > diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h > b/src/gallium/drivers/swr/rasterizer/core/utils.h > index a236575..0e2cb47 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/utils.h > +++ b/src/gallium/drivers/swr/rasterizer/core/utils.h > @@ -856,6 +856,70 @@ struct Transpose11_11_10 > #endif > }; > > +////////////////////////////////////////////////////////////////////////// > +/// Transpose64 > +////////////////////////////////////////////////////////////////////////// > +struct Transpose64 > +{ > + > ////////////////////////////////////////////////////////////////////////// > + /// @brief Performs an SOA to AOS conversion > + /// @param pSrc - source data in SOA form > + /// @param pDst - output data in AOS form > + static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete; > +#if ENABLE_AVX512_SIMD16 > + > + static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete; > +#endif > +}; > + > +////////////////////////////////////////////////////////////////////////// > +/// Transpose64_64 > +////////////////////////////////////////////////////////////////////////// > +struct Transpose64_64 > +{ > + > ////////////////////////////////////////////////////////////////////////// > + /// @brief Performs an SOA to AOS conversion > + /// @param pSrc - source data in SOA form > + /// @param pDst - output data in AOS form > + static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete; > +#if ENABLE_AVX512_SIMD16 > + > + static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete; > +#endif > +}; > + > +////////////////////////////////////////////////////////////////////////// > +/// Transpose64_64_64 > +////////////////////////////////////////////////////////////////////////// > +struct Transpose64_64_64 > +{ > + > ////////////////////////////////////////////////////////////////////////// > + /// @brief Performs an SOA to AOS conversion > + /// @param pSrc - source data in SOA form > + /// @param pDst - output data in AOS form > + static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete; > +#if ENABLE_AVX512_SIMD16 > + > + static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete; > +#endif > +}; > + > +////////////////////////////////////////////////////////////////////////// > +/// Transpose64_64_64_64 > +////////////////////////////////////////////////////////////////////////// > +struct Transpose64_64_64_64 > +{ > + > ////////////////////////////////////////////////////////////////////////// > + /// @brief Performs an SOA to AOS conversion > + /// @param pSrc - source data in SOA form > + /// @param pDst - output data in AOS form > + static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete; > +#if ENABLE_AVX512_SIMD16 > + > + static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete; > +#endif > +}; > + > // helper function to unroll loops > template<int Begin, int End, int Step = 1> > struct UnrollerL { > diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp > b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp > index 8120a2f..8744eb6 100644 > --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp > +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp > @@ -632,6 +632,55 @@ namespace SwrJit > } > > ////////////////////////////////////////////////////////////////////////// > + /// @brief Generate a masked gather operation in LLVM IR. If not > + /// supported on the underlying platform, emulate it with loads > + /// @param vSrc - SIMD wide value that will be loaded if mask is invalid > + /// @param pBase - Int8* base VB address pointer value > + /// @param vIndices - SIMD wide value of VB byte offsets > + /// @param vMask - SIMD wide mask that controls whether to access memory > or the src values > + /// @param scale - value to scale indices by > + Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, > Value* vMask, Value* scale) > + { > + Value* vGather; > + > + // use avx2 gather instruction if available > + if(JM()->mArch.AVX2()) > + { > + vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, scale); > + } > + else > + { > + Value* pStack = STACKSAVE(); > + > + // store vSrc on the stack. this way we can select between a > valid load address and the vSrc address > + Value* vSrcPtr = ALLOCA(vSrc->getType()); > + STORE(vSrc, vSrcPtr); > + > + vGather = UndefValue::get(VectorType::get(mDoubleTy, 4)); > + Value *vScaleVec = VECTOR_SPLAT(4, Z_EXT(scale,mInt32Ty)); > + Value *vOffsets = MUL(vIndices,vScaleVec); > + Value *mask = MASK(vMask); > + for(uint32_t i = 0; i < mVWidth/2; ++i) > + { > + // single component byte index > + Value *offset = VEXTRACT(vOffsets,C(i)); > + // byte pointer to component > + Value *loadAddress = GEP(pBase,offset); > + loadAddress = > BITCAST(loadAddress,PointerType::get(mDoubleTy,0)); > + // pointer to the value to load if we're masking off a > component > + Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)}); > + Value *selMask = VEXTRACT(mask,C(i)); > + // switch in a safe address to load if we're trying to > access a vertex > + Value *validAddress = SELECT(selMask, loadAddress, > maskLoadAddress); > + Value *val = LOAD(validAddress); > + vGather = VINSERT(vGather,val,C(i)); > + } > + STACKRESTORE(pStack); > + } > + return vGather; > + } > + > + > ////////////////////////////////////////////////////////////////////////// > /// @brief convert x86 <N x float> mask to llvm <N x i1> mask > Value* Builder::MASK(Value* vmask) > { > diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h > b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h > index 86f7bf2..67f938e 100644 > --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h > +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h > @@ -113,6 +113,8 @@ Value *GATHERDD(Value* src, Value* pBase, Value* indices, > Value* mask, Value* sc > void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* > byteOffsets, > Value* mask, Value* vGatherComponents[], bool bPackedOutput); > > +Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, > Value* scale); > + > void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask); > > void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, > Value* vGatherOutput[], bool bPackedOutput); > diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp > b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp > index c5936e5..984aab6 100644 > --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp > +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp > @@ -519,7 +519,7 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE > &fetchState, Value* str > bool FetchJit::IsOddFormat(SWR_FORMAT format) > { > const SWR_FORMAT_INFO& info = GetFormatInfo(format); > - if (info.bpc[0] != 8 && info.bpc[0] != 16 && info.bpc[0] != 32) > + if (info.bpc[0] != 8 && info.bpc[0] != 16 && info.bpc[0] != 32 && > info.bpc[0] != 64) > { > return true; > } > @@ -914,6 +914,58 @@ void FetchJit::JitGatherVertices(const > FETCH_COMPILE_STATE &fetchState, > } > } > break; > + case 64: > + { > + for (uint32_t i = 0; i < 4; i++) > + { > + if (isComponentEnabled(compMask, i)) > + { > + // if we need to gather the component > + if (compCtrl[i] == StoreSrc) > + { > + Value *vMaskLo = VSHUFFLE(pMask, > VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3})); > + Value *vMaskHi = VSHUFFLE(pMask, > VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7})); > + vMaskLo = S_EXT(vMaskLo, > VectorType::get(mInt64Ty, 4)); > + vMaskHi = S_EXT(vMaskHi, > VectorType::get(mInt64Ty, 4)); > + vMaskLo = BITCAST(vMaskLo, > VectorType::get(mDoubleTy, 4)); > + vMaskHi = BITCAST(vMaskHi, > VectorType::get(mDoubleTy, 4)); > + > + Value *vOffsetsLo = VEXTRACTI128(vOffsets, > C(0)); > + Value *vOffsetsHi = VEXTRACTI128(vOffsets, > C(1)); > + > + Value *vZeroDouble = VECTOR_SPLAT(4, > ConstantFP::get(IRB()->getDoubleTy(), 0.0f)); > + > + Value* pGatherLo = GATHERPD(vZeroDouble, > + pStreamBase, > vOffsetsLo, vMaskLo, C((char)1)); > + Value* pGatherHi = GATHERPD(vZeroDouble, > + pStreamBase, > vOffsetsHi, vMaskHi, C((char)1)); > + > + pGatherLo = VCVTPD2PS(pGatherLo); > + pGatherHi = VCVTPD2PS(pGatherHi); > + > + Value *pGather = VSHUFFLE(pGatherLo, > pGatherHi, C({0, 1, 2, 3, 4, 5, 6, 7})); > + > + vVertexElements[currentVertexElement++] = > pGather; > + } > + else > + { > + vVertexElements[currentVertexElement++] = > GenerateCompCtrlVector(compCtrl[i]); > + } > + > + if (currentVertexElement > 3) > + { > + StoreVertexElements(pVtxOut, outputElt++, 4, > vVertexElements); > + // reset to the next vVertexElement to output > + currentVertexElement = 0; > + } > + > + } > + > + // offset base to the next component in the vertex > to gather > + pStreamBase = GEP(pStreamBase, C((char)8)); > + } > + } > + break; > default: > SWR_ASSERT(0, "Tried to fetch invalid FP format"); > break; > @@ -1730,6 +1782,8 @@ PFN_FETCH_FUNC JitFetchFunc(HANDLE hJitMgr, const > HANDLE hFunc) > fclose(fd); > #endif > > + pJitMgr->DumpAsm(const_cast<llvm::Function*>(func), "final"); > + > return pfnFetch; > } > > diff --git > a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py > b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py > index c6d0941..70d3576 100644 > --- a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py > +++ b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py > @@ -84,6 +84,7 @@ inst_aliases = { > } > > intrinsics = [ > + ["VGATHERPD", "x86_avx2_gather_d_pd_256", ["src", "pBase", > "indices", "mask", "scale"]], > ["VGATHERPS", "x86_avx2_gather_d_ps_256", ["src", "pBase", "indices", > "mask", "scale"]], > ["VGATHERDD", "x86_avx2_gather_d_d_256", ["src", "pBase", "indices", > "mask", "scale"]], > ["VSQRTPS", "x86_avx_sqrt_ps_256", ["a"]], > @@ -101,6 +102,7 @@ intrinsics = [ > ["VPSHUFB", "x86_avx2_pshuf_b", ["a", "b"]], > ["VPERMD", "x86_avx2_permd", ["a", "idx"]], > ["VPERMPS", "x86_avx2_permps", ["idx", "a"]], > + ["VCVTPD2PS", "x86_avx_cvt_pd2_ps_256", ["a"]], > ["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]], > ["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]], > ["VHSUBPS", "x86_avx_hsub_ps_256", ["a", "b"]], > diff --git a/src/gallium/drivers/swr/swr_screen.cpp > b/src/gallium/drivers/swr/swr_screen.cpp > index 6ff21cd..cc8030e 100644 > --- a/src/gallium/drivers/swr/swr_screen.cpp > +++ b/src/gallium/drivers/swr/swr_screen.cpp > @@ -537,6 +537,11 @@ mesa_to_swr_format(enum pipe_format format) > {PIPE_FORMAT_R32G32B32_FIXED, R32G32B32_SFIXED}, > {PIPE_FORMAT_R32G32B32A32_FIXED, R32G32B32A32_SFIXED}, > > + {PIPE_FORMAT_R64_FLOAT, R64_FLOAT}, > + {PIPE_FORMAT_R64G64_FLOAT, R64G64_FLOAT}, > + {PIPE_FORMAT_R64G64B64_FLOAT, R64G64B64_FLOAT}, > + {PIPE_FORMAT_R64G64B64A64_FLOAT, R64G64B64A64_FLOAT}, > + > /* These formats have entries in SWR but don't have Load/StoreTile > * implementations. That means these aren't renderable, and thus having > * a mapping entry here is detrimental. > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev