Honor the colorHottileEnable mask when accessing colorBuffer pointers.
---
 src/gallium/drivers/swr/rasterizer/core/backend.cpp | 20 ++++++++++----------
 src/gallium/drivers/swr/rasterizer/core/backend.h   | 18 ++++++++++++------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 71655f1..a53b51f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -541,7 +541,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint3
                     AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
                     depthPassMask = DepthStencilTest(&state, 
work.triFlags.frontFacing, work.triFlags.viewportIndex,
                                                      psContext.vZ, 
pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
-                                       
AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
+                    
AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
                     AR_END(BEEarlyDepthTest, 0);
 
                     // early-exit if no pixels passed depth or earlyZ is 
forced on
@@ -574,7 +574,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint3
                     AR_BEGIN(BELateDepthTest, pDC->drawId);
                     depthPassMask = DepthStencilTest(&state, 
work.triFlags.frontFacing, work.triFlags.viewportIndex,
                                                         psContext.vZ, 
pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
-                                       
AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
+                    
AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
                     AR_END(BELateDepthTest, 0);
 
                     if (!_simd_movemask_ps(depthPassMask))
@@ -593,7 +593,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint3
                 // output merger
                 AR_BEGIN(BEOutputMerger, pDC->drawId);
 #if USE_8x2_TILE_BACKEND
-                OutputMerger(psContext, pColorBuffer, 0, &state.blendState, 
state.pfnBlendFunc, vCoverageMask, depthPassMask, 
state.psState.numRenderTargets, useAlternateOffset);
+                OutputMerger(psContext, pColorBuffer, 0, &state.blendState, 
state.pfnBlendFunc, vCoverageMask, depthPassMask, 
state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
 #else
                 OutputMerger(psContext, pColorBuffer, 0, &state.blendState, 
state.pfnBlendFunc, vCoverageMask, depthPassMask, 
state.psState.numRenderTargets);
 #endif
@@ -752,7 +752,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint32_
                         AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
                         depthPassMask = DepthStencilTest(&state, 
work.triFlags.frontFacing, work.triFlags.viewportIndex,
                                               psContext.vZ, pDepthSample, 
vCoverageMask, pStencilSample, &stencilPassMask);
-                                               
AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
+                        
AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
                         AR_END(BEEarlyDepthTest, 0);
 
                         // early-exit if no samples passed depth or earlyZ is 
forced on.
@@ -786,7 +786,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint32_
                         AR_BEGIN(BELateDepthTest, pDC->drawId);
                         depthPassMask = DepthStencilTest(&state, 
work.triFlags.frontFacing, work.triFlags.viewportIndex,
                                               psContext.vZ, pDepthSample, 
vCoverageMask, pStencilSample, &stencilPassMask);
-                                               
AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
+                        
AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
                         AR_END(BELateDepthTest, 0);
 
                         if (!_simd_movemask_ps(depthPassMask))
@@ -807,7 +807,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint32_
                     // output merger
                     AR_BEGIN(BEOutputMerger, pDC->drawId);
 #if USE_8x2_TILE_BACKEND
-                    OutputMerger(psContext, pColorBuffer, sample, 
&state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, 
state.psState.numRenderTargets, useAlternateOffset);
+                    OutputMerger(psContext, pColorBuffer, sample, 
&state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, 
state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
 #else
                     OutputMerger(psContext, pColorBuffer, sample, 
&state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, 
state.psState.numRenderTargets);
 #endif
@@ -935,7 +935,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, 
uint32_t x, uint32_t
             {
                 uint32_t depthPassCount = PixelRateZTest(activeLanes, 
psContext, BEEarlyDepthTest);
                 UPDATE_STAT_BE(DepthPassCount, depthPassCount);
-                               
AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, 
_simd_movemask_ps(activeLanes)));
+                AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, 
_simd_movemask_ps(activeLanes)));
             }
 
             // if we have no covered samples that passed depth at this point, 
go to next tile
@@ -969,7 +969,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, 
uint32_t x, uint32_t
             {
                 uint32_t depthPassCount = PixelRateZTest(activeLanes, 
psContext, BELateDepthTest);
                 UPDATE_STAT_BE(DepthPassCount, depthPassCount);
-                               AR_EVENT(LateDepthInfoPixelRate(depthPassCount, 
_simd_movemask_ps(activeLanes)));
+                AR_EVENT(LateDepthInfoPixelRate(depthPassCount, 
_simd_movemask_ps(activeLanes)));
             }
 
             // if we have no covered samples that passed depth at this point, 
skip OM and go to next tile
@@ -1001,7 +1001,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint32_t
                 
                 // broadcast the results of the PS to all passing pixels
 #if USE_8x2_TILE_BACKEND
-                OutputMerger(psContext, pColorBuffer, sample, 
&state.blendState, state.pfnBlendFunc, coverageMask, depthMask, 
state.psState.numRenderTargets, useAlternateOffset);
+                OutputMerger(psContext, pColorBuffer, sample, 
&state.blendState, state.pfnBlendFunc, coverageMask, depthMask, 
state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
 #else
                 OutputMerger(psContext, pColorBuffer, sample, 
&state.blendState, state.pfnBlendFunc, coverageMask, depthMask, 
state.psState.numRenderTargets);
 #endif
@@ -1148,7 +1148,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, 
uint32_t x, uint32_t y,
                     AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
                     simdscalar depthPassMask = DepthStencilTest(&state, 
work.triFlags.frontFacing, work.triFlags.viewportIndex,
                         psContext.vZ, pDepthSample, vCoverageMask, 
pStencilSample, &stencilPassMask);
-                                       
AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
+                    
AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
                     DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], 
&state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
                         pDepthSample, depthPassMask, vCoverageMask, 
pStencilSample, stencilPassMask);
                     AR_END(BEEarlyDepthTest, 0);
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h 
b/src/gallium/drivers/swr/rasterizer/core/backend.h
index 48cfce2..de37b1a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -691,6 +691,7 @@ INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& 
coeffs, SWR_PS_CONTE
     psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW, 
coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.sample, psContext.vJ.sample);
 }
 
+// Merge Output to 4x2 SIMD Tile Format
 INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* 
(&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE 
*pBlendState,
                          const PFN_BLEND_JIT_FUNC 
(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar 
depthPassMask, const uint32_t NumRT)
 {
@@ -751,8 +752,9 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, 
uint8_t* (&pColorBase)[SWR_N
 }
 
 #if USE_8x2_TILE_BACKEND
+// Merge Output to 8x2 SIMD16 Tile Format
 INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* 
(&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE 
*pBlendState,
-    const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar 
&coverageMask, simdscalar depthPassMask, const uint32_t NumRT, bool 
useAlternateOffset)
+    const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar 
&coverageMask, simdscalar depthPassMask, const uint32_t NumRT, const uint32_t 
colorBufferEnableMask, bool useAlternateOffset)
 {
     // type safety guaranteed from template instantiation in 
BEChooser<>::GetFunc
     uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
@@ -765,7 +767,8 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, 
uint8_t* (&pColorBase)[SWR_N
     simdvector blendSrc;
     simdvector blendOut;
 
-    for (uint32_t rt = 0; rt < NumRT; ++rt)
+    uint32_t colorBufferBit = 1;
+    for (uint32_t rt = 0; rt < NumRT; rt += 1, colorBufferBit <<= 1)
     {
         simdscalar *pColorSample = reinterpret_cast<simdscalar 
*>(pColorBase[rt] + rasterTileColorOffset);
 
@@ -774,10 +777,13 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, 
uint8_t* (&pColorBase)[SWR_N
         /// TODO: move this into the blend JIT.
         blendOut = psContext.shaded[rt];
 
-        blendSrc[0] = pColorSample[0];
-        blendSrc[1] = pColorSample[2];
-        blendSrc[2] = pColorSample[4];
-        blendSrc[3] = pColorSample[6];
+        if (colorBufferBit & colorBufferEnableMask)
+        {
+            blendSrc[0] = pColorSample[0];
+            blendSrc[1] = pColorSample[2];
+            blendSrc[2] = pColorSample[4];
+            blendSrc[3] = pColorSample[6];
+        }
 
         // Blend outputs and update coverage mask for alpha test
         if (pfnBlendFunc[rt] != nullptr)
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to