From: Nicolai Hähnle <nicolai.haeh...@amd.com> 1. Add new surface flags needEquation for client driver use to force the surface tile setting equation compatible. Override 2D/3D macro tile mode to PRT_* tile mode if this flag is TRUE and num slice > 1. 2. Add numEquations and pEquationTable in ADDR_CREATE_OUTPUT structure to return number of equations and the equation table to client driver 3. Add equationIndex in ADDR_COMPUTE_SURFACE_INFO_OUTPUT structure to return the equation index to client driver
Please note the use of address equation has following restrictions: 1) The surface can't be splitable 2) The surface can't have non zero tile swizzle value 3) Surface with > 1 slices must have PRT tile mode, which disable slice rotation --- src/amd/addrlib/addrinterface.h | 134 ++++++-- src/amd/addrlib/core/addrcommon.h | 21 ++ src/amd/addrlib/core/addrlib.cpp | 7 + src/amd/addrlib/core/addrlib.h | 8 + src/amd/addrlib/core/addrlib1.cpp | 227 +++++++++++- src/amd/addrlib/core/addrlib1.h | 18 +- src/amd/addrlib/r800/ciaddrlib.cpp | 87 +++-- src/amd/addrlib/r800/ciaddrlib.h | 7 +- src/amd/addrlib/r800/egbaddrlib.cpp | 226 ++++++++++-- src/amd/addrlib/r800/egbaddrlib.h | 14 +- src/amd/addrlib/r800/siaddrlib.cpp | 663 ++++++++++++++++++++++++++++++++++-- src/amd/addrlib/r800/siaddrlib.h | 50 ++- 12 files changed, 1344 insertions(+), 118 deletions(-) diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h index cc1024b..95b187f 100644 --- a/src/amd/addrlib/addrinterface.h +++ b/src/amd/addrlib/addrinterface.h @@ -112,20 +112,79 @@ typedef VOID* ADDR_CLIENT_HANDLE; * AddrUseCombinedSwizzle() * **/ /////////////////////////////////////////////////////////////////////////////////////////////////// // Callback functions /////////////////////////////////////////////////////////////////////////////////////////////////// /** *************************************************************************************************** +* @brief channel setting structure +*************************************************************************************************** +*/ +typedef union _ADDR_CHANNEL_SETTING +{ + struct + { + UINT_8 valid : 1; ///< Indicate whehter this channel setting is valid + UINT_8 channel : 2; ///< 0 for x channel, 1 for y channel, 2 for z channel + UINT_8 index : 5; ///< Channel index + }; + UINT_8 value; ///< Value +} ADDR_CHANNEL_SETTING; + +/** +*************************************************************************************************** +* @brief address equation key structure +*************************************************************************************************** +*/ +typedef union _ADDR_EQUATION_KEY +{ + struct + { + UINT_32 log2ElementBytes : 3; ///< Log2 of Bytes per pixel + UINT_32 tileMode : 5; ///< Tile mode + UINT_32 microTileType : 3; ///< Micro tile type + UINT_32 pipeConfig : 5; ///< pipe config + UINT_32 numBanks : 5; ///< Number of banks + UINT_32 bankWidth : 4; ///< Bank width + UINT_32 bankHeight : 4; ///< Bank height + UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio + } fields; + UINT_32 value; +} ADDR_EQUATION_KEY; + +/** +*************************************************************************************************** +* @brief address equation structure +*************************************************************************************************** +*/ +#define ADDR_MAX_EQUATION_BIT 20u + +// Invalid equation index +#define ADDR_INVALID_EQUATION_INDEX 0xFFFFFFFF + +typedef struct _ADDR_EQUATION +{ + ADDR_CHANNEL_SETTING addr[ADDR_MAX_EQUATION_BIT]; ///< addr setting + ///< each bit is result of addr ^ xor ^ xor2 + ADDR_CHANNEL_SETTING xor1[ADDR_MAX_EQUATION_BIT]; ///< xor setting + ADDR_CHANNEL_SETTING xor2[ADDR_MAX_EQUATION_BIT]; ///< xor2 setting + UINT_32 numBits; ///< The number of bits in equation + BOOL_32 stackedDepthSlices; ///< TRUE if depth slices are treated as being + ///< stacked vertically prior to swizzling +} ADDR_EQUATION; + + +/** +*************************************************************************************************** * @brief Alloc system memory flags. * @note These flags are reserved for future use and if flags are added will minimize the impact * of the client. *************************************************************************************************** */ typedef union _ADDR_ALLOCSYSMEM_FLAGS { struct { UINT_32 reserved : 32; ///< Reserved for future use. @@ -315,23 +374,26 @@ typedef struct _ADDR_CREATE_INPUT *************************************************************************************************** * ADDR_CREATEINFO_OUTPUT * * @brief * Return AddrLib handle to client driver * *************************************************************************************************** */ typedef struct _ADDR_CREATE_OUTPUT { - UINT_32 size; ///< Size of this structure in bytes + UINT_32 size; ///< Size of this structure in bytes - ADDR_HANDLE hLib; ///< Address lib handle + ADDR_HANDLE hLib; ///< Address lib handle + + UINT_32 numEquations; ///< Number of equations in the table + const ADDR_EQUATION* pEquationTable; ///< Pointer to the equation table } ADDR_CREATE_OUTPUT; /** *************************************************************************************************** * AddrCreate * * @brief * Create AddrLib object, must be called before any interface calls * * @return @@ -413,47 +475,52 @@ typedef struct _ADDR_QBSTEREOINFO * ADDR_SURFACE_FLAGS * * @brief * Surface flags *************************************************************************************************** */ typedef union _ADDR_SURFACE_FLAGS { struct { - UINT_32 color : 1; ///< Flag indicates this is a color buffer - UINT_32 depth : 1; ///< Flag indicates this is a depth/stencil buffer - UINT_32 stencil : 1; ///< Flag indicates this is a stencil buffer - UINT_32 texture : 1; ///< Flag indicates this is a texture - UINT_32 cube : 1; ///< Flag indicates this is a cubemap - UINT_32 volume : 1; ///< Flag indicates this is a volume texture - UINT_32 fmask : 1; ///< Flag indicates this is an fmask - UINT_32 cubeAsArray : 1; ///< Flag indicates if treat cubemap as arrays - UINT_32 compressZ : 1; ///< Flag indicates z buffer is compressed - UINT_32 overlay : 1; ///< Flag indicates this is an overlay surface - UINT_32 noStencil : 1; ///< Flag indicates this depth has no separate stencil - UINT_32 display : 1; ///< Flag indicates this should match display controller req. - UINT_32 opt4Space : 1; ///< Flag indicates this surface should be optimized for space - /// i.e. save some memory but may lose performance - UINT_32 prt : 1; ///< Flag for partially resident texture - UINT_32 qbStereo : 1; ///< Quad buffer stereo surface - UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0) - UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding - UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable - UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce - UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear - UINT_32 czDispCompatible: 1; ///< SI+: CZ family has a HW bug needs special alignment. - /// This flag indicates we need to follow the alignment with - /// CZ families or other ASICs under PX configuration + CZ. - UINT_32 nonSplit : 1; ///< CI: depth texture should not be split - UINT_32 disableLinearOpt: 1; ///< Disable tile mode optimization to linear - UINT_32 reserved : 9; ///< Reserved bits + UINT_32 color : 1; ///< Flag indicates this is a color buffer + UINT_32 depth : 1; ///< Flag indicates this is a depth/stencil buffer + UINT_32 stencil : 1; ///< Flag indicates this is a stencil buffer + UINT_32 texture : 1; ///< Flag indicates this is a texture + UINT_32 cube : 1; ///< Flag indicates this is a cubemap + UINT_32 volume : 1; ///< Flag indicates this is a volume texture + UINT_32 fmask : 1; ///< Flag indicates this is an fmask + UINT_32 cubeAsArray : 1; ///< Flag indicates if treat cubemap as arrays + UINT_32 compressZ : 1; ///< Flag indicates z buffer is compressed + UINT_32 overlay : 1; ///< Flag indicates this is an overlay surface + UINT_32 noStencil : 1; ///< Flag indicates this depth has no separate stencil + UINT_32 display : 1; ///< Flag indicates this should match display controller req. + UINT_32 opt4Space : 1; ///< Flag indicates this surface should be optimized for space + /// i.e. save some memory but may lose performance + UINT_32 prt : 1; ///< Flag for partially resident texture + UINT_32 qbStereo : 1; ///< Quad buffer stereo surface + UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0) + UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding + UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable + UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce + UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear + UINT_32 czDispCompatible : 1; ///< SI+: CZ family has a HW bug needs special alignment. + /// This flag indicates we need to follow the + /// alignment with CZ families or other ASICs under + /// PX configuration + CZ. + UINT_32 nonSplit : 1; ///< CI: depth texture should not be split + UINT_32 disableLinearOpt : 1; ///< Disable tile mode optimization to linear + UINT_32 needEquation : 1; ///< Make the surface tile setting equation compatible. + /// This flag indicates we need to override tile + /// mode to PRT_* tile mode to disable slice rotation, + /// which is needed by swizzle pattern equation. + UINT_32 reserved : 8; ///< Reserved bits }; UINT_32 value; } ADDR_SURFACE_FLAGS; /** *************************************************************************************************** * ADDR_COMPUTE_SURFACE_INFO_INPUT * * @brief @@ -467,20 +534,21 @@ typedef struct _ADDR_COMPUTE_SURFACE_INFO_INPUT AddrTileMode tileMode; ///< Tile mode AddrFormat format; ///< If format is set to valid one, bpp/width/height /// might be overwritten UINT_32 bpp; ///< Bits per pixel UINT_32 numSamples; ///< Number of samples UINT_32 width; ///< Width, in pixels UINT_32 height; ///< Height, in pixels UINT_32 numSlices; ///< Number of surface slices or depth UINT_32 slice; ///< Slice index UINT_32 mipLevel; ///< Current mipmap level + UINT_32 numMipLevels; ///< Number of mips in mip chain ADDR_SURFACE_FLAGS flags; ///< Surface type flags UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as /// number of samples for normal AA; Set it to the /// number of fragments for EQAA /// r800 and later HWL parameters // Needed by 2D tiling, for linear and 1D tiling, just keep them 0's ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Set to 0 to default/calculate AddrTileType tileType; ///< Micro tiling type, not needed when tileIndex != -1 INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it /// while the global useTileIndex is set to 1 @@ -532,23 +600,29 @@ typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT INT_32 tileIndex; ///< Tile index, MAY be "downgraded" INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) /// Output flags struct { /// Special information to work around SI mipmap swizzle bug UBTS #317508 UINT_32 last2DLevel : 1; ///< TRUE if this is the last 2D(3D) tiled ///< Only meaningful when create flag checkLast2DLevel is set UINT_32 tcCompatible : 1; ///< If the surface can be shader compatible - UINT_32 reserved :30; ///< Reserved bits + UINT_32 reserved :30; ///< Reserved bits }; + UINT_32 equationIndex; ///< Equation index in the equation table; + + UINT_32 blockWidth; ///< Width in element inside one block(1D->Micro, 2D->Macro) + UINT_32 blockHeight; ///< Height in element inside one block(1D->Micro, 2D->Macro) + UINT_32 blockSlices; ///< Slice number inside one block(1D->Micro, 2D->Macro) + /// Stereo info ADDR_QBSTEREOINFO* pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE } ADDR_COMPUTE_SURFACE_INFO_OUTPUT; /** *************************************************************************************************** * AddrComputeSurfaceInfo * * @brief * Compute surface width/height/depth/alignments and suitable tiling mode diff --git a/src/amd/addrlib/core/addrcommon.h b/src/amd/addrlib/core/addrcommon.h index 35320e6..9902eb1 100644 --- a/src/amd/addrlib/core/addrcommon.h +++ b/src/amd/addrlib/core/addrcommon.h @@ -569,12 +569,33 @@ static inline VOID SafeAssign( static inline VOID SafeAssign( AddrTileMode* pLVal, ///< [in] Pointer to left val AddrTileMode rVal) ///< [in] Right value { if (pLVal) { *pLVal = rVal; } } +/** +*************************************************************************************************** +* InitChannel +* +* @brief +* Get channel initialization value +*************************************************************************************************** +*/ +static inline ADDR_CHANNEL_SETTING InitChannel( + UINT_32 valid, ///< [in] valid setting + UINT_32 channel, ///< [in] channel setting + UINT_32 index) ///< [in] index setting +{ + ADDR_CHANNEL_SETTING t; + t.valid = valid; + t.channel = channel; + t.index = index; + + return t; +} + #endif // __ADDR_COMMON_H__ diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp index ec62f03..88ff572 100644 --- a/src/amd/addrlib/core/addrlib.cpp +++ b/src/amd/addrlib/core/addrlib.cpp @@ -259,20 +259,27 @@ ADDR_E_RETURNCODE AddrLib::Create( ADDR_ASSERT_ALWAYS(); } else { pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags); } } pCreateOut->hLib = pLib; + if ((pLib != NULL) && + (returnCode == ADDR_OK)) + { + pCreateOut->numEquations = + pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable); + } + if ((pLib == NULL) && (returnCode == ADDR_OK)) { // Unknown failures, we return the general error code returnCode = ADDR_ERROR; } return returnCode; } diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h index d1c5dd7..4ba7c2d 100644 --- a/src/amd/addrlib/core/addrlib.h +++ b/src/amd/addrlib/core/addrlib.h @@ -189,20 +189,28 @@ protected: // // Initialization // /// Pure Virtual function for Hwl computing internal global parameters from h/w registers virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) = 0; /// Pure Virtual function for Hwl converting chip family virtual AddrChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0; + /// Get equation table pointer and number of equations + virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const + { + *ppEquationTable = NULL; + + return 0; + } + // // Misc helper // static UINT_32 Bits2Number(UINT_32 bitNum,...); static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags) { return (numFrags != 0) ? numFrags : Max(1u, numSamples); } diff --git a/src/amd/addrlib/core/addrlib1.cpp b/src/amd/addrlib/core/addrlib1.cpp index 922455b..81455f0 100644 --- a/src/amd/addrlib/core/addrlib1.cpp +++ b/src/amd/addrlib/core/addrlib1.cpp @@ -317,29 +317,25 @@ ADDR_E_RETURNCODE AddrLib1::ComputeSurfaceInfo( { ADDR_ASSERT(!IsMacroTiled(localIn.tileMode)); } pOut->macroModeIndex = macroModeIndex; } } if (returnCode == ADDR_OK) { + // HWL layer may override tile mode if necessary + HwlOverrideTileMode(&localIn); + AddrTileMode tileMode = localIn.tileMode; - AddrTileType tileType = localIn.tileType; - // HWL layer may override tile mode if necessary - if (HwlOverrideTileMode(&localIn, &tileMode, &tileType)) - { - localIn.tileMode = tileMode; - localIn.tileType = tileType; - } // Optimize tile mode if possible if (OptimizeTileMode(&localIn, &tileMode)) { localIn.tileMode = tileMode; } } // Call main function to compute surface info if (returnCode == ADDR_OK) { @@ -1199,24 +1195,24 @@ ADDR_E_RETURNCODE AddrLib1::GetTileIndex( } return returnCode; } /** *************************************************************************************************** * AddrLib1::Thickness * * @brief -* Compute surface thickness +* Get tile mode thickness * * @return -* Surface thickness +* Tile mode thickness *************************************************************************************************** */ UINT_32 AddrLib1::Thickness( AddrTileMode tileMode) ///< [in] tile mode { return m_modeFlags[tileMode].thickness; } @@ -2729,20 +2725,233 @@ UINT_32 AddrLib1::ComputePipeFromAddr( // To get the pipe number, shift off the pipe interleave bits and mask the pipe bits. // pipe = static_cast<UINT_32>(addr >> Log2(groupBytes)) & (numPipes - 1); return pipe; } /** *************************************************************************************************** +* AddrLib1::ComputeMicroTileEquation +* +* @brief +* Compute micro tile equation +* +* @return +* If equation can be computed +* +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib1::ComputeMicroTileEquation( + UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel + AddrTileMode tileMode, ///< [in] tile mode + AddrTileType microTileType, ///< [in] pixel order in display/non-display mode + ADDR_EQUATION* pEquation ///< [out] equation + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + for (UINT_32 i = 0; i < log2BytesPP; i++) + { + pEquation->addr[i].valid = 1; + pEquation->addr[i].channel = 0; + pEquation->addr[i].index = i; + } + + ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[log2BytesPP]; + + ADDR_CHANNEL_SETTING x0 = InitChannel(1, 0, log2BytesPP + 0); + ADDR_CHANNEL_SETTING x1 = InitChannel(1, 0, log2BytesPP + 1); + ADDR_CHANNEL_SETTING x2 = InitChannel(1, 0, log2BytesPP + 2); + ADDR_CHANNEL_SETTING y0 = InitChannel(1, 1, 0); + ADDR_CHANNEL_SETTING y1 = InitChannel(1, 1, 1); + ADDR_CHANNEL_SETTING y2 = InitChannel(1, 1, 2); + ADDR_CHANNEL_SETTING z0 = InitChannel(1, 2, 0); + ADDR_CHANNEL_SETTING z1 = InitChannel(1, 2, 1); + ADDR_CHANNEL_SETTING z2 = InitChannel(1, 2, 2); + + UINT_32 thickness = Thickness(tileMode); + UINT_32 bpp = 1 << (log2BytesPP + 3); + + if (microTileType != ADDR_THICK) + { + if (microTileType == ADDR_DISPLAYABLE) + { + switch (bpp) + { + case 8: + pixelBit[0] = x0; + pixelBit[1] = x1; + pixelBit[2] = x2; + pixelBit[3] = y1; + pixelBit[4] = y0; + pixelBit[5] = y2; + break; + case 16: + pixelBit[0] = x0; + pixelBit[1] = x1; + pixelBit[2] = x2; + pixelBit[3] = y0; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + case 32: + pixelBit[0] = x0; + pixelBit[1] = x1; + pixelBit[2] = y0; + pixelBit[3] = x2; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + case 64: + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = x2; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + case 128: + pixelBit[0] = y0; + pixelBit[1] = x0; + pixelBit[2] = x1; + pixelBit[3] = x2; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER) + { + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = y1; + pixelBit[4] = x2; + pixelBit[5] = y2; + } + else if (microTileType == ADDR_ROTATED) + { + ADDR_ASSERT(thickness == 1); + + switch (bpp) + { + case 8: + pixelBit[0] = y0; + pixelBit[1] = y1; + pixelBit[2] = y2; + pixelBit[3] = x1; + pixelBit[4] = x0; + pixelBit[5] = x2; + break; + case 16: + pixelBit[0] = y0; + pixelBit[1] = y1; + pixelBit[2] = y2; + pixelBit[3] = x0; + pixelBit[4] = x1; + pixelBit[5] = x2; + break; + case 32: + pixelBit[0] = y0; + pixelBit[1] = y1; + pixelBit[2] = x0; + pixelBit[3] = y2; + pixelBit[4] = x1; + pixelBit[5] = x2; + break; + case 64: + pixelBit[0] = y0; + pixelBit[1] = x0; + pixelBit[2] = y1; + pixelBit[3] = x1; + pixelBit[4] = x2; + pixelBit[5] = y2; + break; + default: + retCode = ADDR_NOTSUPPORTED; + break; + } + } + + if (thickness > 1) + { + pixelBit[6] = z0; + pixelBit[7] = z1; + pEquation->numBits = 8 + log2BytesPP; + } + else + { + pEquation->numBits = 6 + log2BytesPP; + } + } + else // ADDR_THICK + { + ADDR_ASSERT(thickness > 1); + + switch (bpp) + { + case 8: + case 16: + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = y1; + pixelBit[4] = z0; + pixelBit[5] = z1; + break; + case 32: + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = z0; + pixelBit[4] = y1; + pixelBit[5] = z1; + break; + case 64: + case 128: + pixelBit[0] = y0; + pixelBit[1] = x0; + pixelBit[2] = z0; + pixelBit[3] = x1; + pixelBit[4] = y1; + pixelBit[5] = z1; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + pixelBit[6] = x2; + pixelBit[7] = y2; + pEquation->numBits = 8 + log2BytesPP; + } + + if (thickness == 8) + { + pixelBit[8] = z2; + pEquation->numBits = 9 + log2BytesPP; + } + + // stackedDepthSlices is used for addressing mode that a tile block contains multiple slices, + // which is not supported by our address lib + pEquation->stackedDepthSlices = FALSE; + + return retCode; +} + +/** +*************************************************************************************************** * AddrLib1::ComputePixelIndexWithinMicroTile * * @brief * Compute the pixel index inside a micro tile of surface * * @return * Pixel index * *************************************************************************************************** */ diff --git a/src/amd/addrlib/core/addrlib1.h b/src/amd/addrlib/core/addrlib1.h index 13d915a..a852ac2 100644 --- a/src/amd/addrlib/core/addrlib1.h +++ b/src/amd/addrlib/core/addrlib1.h @@ -339,27 +339,23 @@ protected: ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const; // Surface mipmap VOID ComputeMipLevel( ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const; /// Pure Virtual function for Hwl checking degrade for base level virtual BOOL_32 HwlDegradeBaseLevel( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0; - virtual BOOL_32 HwlOverrideTileMode( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - AddrTileMode* pTileMode, - AddrTileType* pTileType) const + virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const { - // not supported in hwl layer, FALSE for not-overrided - return FALSE; + // not supported in hwl layer } AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const; VOID PadDimensions( AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel, UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign, UINT_32* pSlices, UINT_32 sliceAlign) const; @@ -384,40 +380,50 @@ protected: UINT_32 pitch, UINT_32 height, UINT_32 numSlices, UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const; VOID ComputeSurfaceCoordFromAddrMicroTiled( UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const; + ADDR_E_RETURNCODE ComputeMicroTileEquation( + UINT_32 bpp, AddrTileMode tileMode, + AddrTileType microTileType, ADDR_EQUATION* pEquation) const; + UINT_32 ComputePixelIndexWithinMicroTile( UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const; /// Pure Virtual function for Hwl computing coord from offset inside micro tile virtual VOID HwlComputePixelCoordFromOffset( UINT_32 offset, UINT_32 bpp, UINT_32 numSamples, AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0; // // Addressing shared by all // virtual UINT_32 HwlGetPipes( const ADDR_TILEINFO* pTileInfo) const; UINT_32 ComputePipeFromAddr( UINT_64 addr, UINT_32 numPipes) const; + virtual ADDR_E_RETURNCODE ComputePipeEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const + { + return ADDR_NOTSUPPORTED; + } + /// Pure Virtual function for Hwl computing pipe from coord virtual UINT_32 ComputePipeFromCoord( UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0; /// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe( UINT_32 pipe, UINT_32 x) const = 0; // diff --git a/src/amd/addrlib/r800/ciaddrlib.cpp b/src/amd/addrlib/r800/ciaddrlib.cpp index 7585e25..3322d95 100644 --- a/src/amd/addrlib/r800/ciaddrlib.cpp +++ b/src/amd/addrlib/r800/ciaddrlib.cpp @@ -475,20 +475,25 @@ BOOL_32 CiAddrLib::HwlInitGlobalParams( if (valid) { valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries); } if (valid) { valid = InitMacroTileCfgTable(pRegValue->pMacroTileConfig, pRegValue->noOfMacroEntries); } + if (valid) + { + InitEquationTable(); + } + return valid; } /** *************************************************************************************************** * CiAddrLib::HwlPostCheckTileIndex * * @brief * Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches * tile mode/type/info and change the index if needed @@ -608,21 +613,21 @@ ADDR_E_RETURNCODE CiAddrLib::HwlSetupTileCfg( pInfo->macroAspectRatio = 1; pInfo->tileSplitBytes = 64; pInfo->pipeConfig = ADDR_PIPECFG_P2; } else if (static_cast<UINT_32>(index) >= m_noOfEntries) { returnCode = ADDR_INVALIDPARAMS; } else { - const ADDR_TILECONFIG* pCfgTable = GetTileSetting(index); + const AddrTileConfig* pCfgTable = GetTileSetting(index); if (pInfo != NULL) { if (IsMacroTiled(pCfgTable->mode)) { ADDR_ASSERT((macroModeIndex != TileIndexInvalid) && (macroModeIndex != TileIndexNoMacroIndex)); UINT_32 tileSplit; @@ -857,32 +862,30 @@ AddrTileMode CiAddrLib::HwlDegradeThickTileMode( } /** *************************************************************************************************** * CiAddrLib::HwlOverrideTileMode * * @brief * Override THICK to THIN, for specific formats on CI * * @return -* Suitable tile mode +* N/A * *************************************************************************************************** */ -BOOL_32 CiAddrLib::HwlOverrideTileMode( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - AddrTileMode* pTileMode, ///< [in/out] pointer to the tile mode - AddrTileType* pTileType ///< [in/out] pointer to the tile type +VOID CiAddrLib::HwlOverrideTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in/out] input output structure ) const { - BOOL_32 bOverrided = FALSE; - AddrTileMode tileMode = *pTileMode; + AddrTileMode tileMode = pInOut->tileMode; + AddrTileType tileType = pInOut->tileType; // currently, all CI/VI family do not // support ADDR_TM_PRT_2D_TILED_THICK,ADDR_TM_PRT_3D_TILED_THICK and // ADDR_TM_PRT_2D_TILED_THIN1, ADDR_TM_PRT_3D_TILED_THIN1 switch (tileMode) { case ADDR_TM_PRT_2D_TILED_THICK: case ADDR_TM_PRT_3D_TILED_THICK: tileMode = ADDR_TM_PRT_TILED_THICK; break; @@ -895,21 +898,21 @@ BOOL_32 CiAddrLib::HwlOverrideTileMode( } // UBTS#404321, we do not need such overriding, as THICK+THICK entries removed from the tile-mode table if (!m_settings.isBonaire) { UINT_32 thickness = Thickness(tileMode); // tile_thickness = (array_mode == XTHICK) ? 8 : ((array_mode == THICK) ? 4 : 1) if (thickness > 1) { - switch (pIn->format) + switch (pInOut->format) { // see //gfxip/gcB/devel/cds/src/verif/tc/models/csim/tcp.cpp // tcpError("Thick micro tiling is not supported for format... case ADDR_FMT_X24_8_32_FLOAT: case ADDR_FMT_32_AS_8: case ADDR_FMT_32_AS_8_8: case ADDR_FMT_32_AS_32_32_32_32: // packed formats case ADDR_FMT_GB_GR: @@ -950,40 +953,80 @@ BOOL_32 CiAddrLib::HwlOverrideTileMode( case ADDR_TM_PRT_3D_TILED_THICK: tileMode = ADDR_TM_PRT_3D_TILED_THIN1; break; default: break; } // Switch tile type from thick to thin - if (tileMode != *pTileMode) + if (tileMode != pInOut->tileMode) { // see tileIndex: 13-18 - *pTileType = ADDR_NON_DISPLAYABLE; + tileType = ADDR_NON_DISPLAYABLE; } break; default: break; } } } - if (tileMode != *pTileMode) + // Override 2D/3D macro tile mode to PRT_* tile mode if + // client driver requests this surface is equation compatible + if ((pInOut->flags.needEquation == TRUE) && + (pInOut->numSamples <= 1) && + (IsMacroTiled(tileMode) == TRUE) && + (IsPrtTileMode(tileMode) == FALSE)) { - *pTileMode = tileMode; - bOverrided = TRUE; + UINT_32 thickness = Thickness(tileMode); + + if (thickness == 1) + { + tileMode = ADDR_TM_PRT_TILED_THIN1; + } + else + { + static const UINT_32 PrtTileBytes = 0x10000; + // First prt thick tile index in the tile mode table + static const UINT_32 PrtThickTileIndex = 22; + ADDR_TILEINFO tileInfo = {0}; + + HwlComputeMacroModeIndex(PrtThickTileIndex, + pInOut->flags, + pInOut->bpp, + pInOut->numSamples, + &tileInfo); + + UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples * + thickness * HwlGetPipes(&tileInfo) * + tileInfo.banks * tileInfo.bankWidth * + tileInfo.bankHeight; + + if (macroTileBytes <= PrtTileBytes) + { + tileMode = ADDR_TM_PRT_TILED_THICK; + } + else + { + tileMode = ADDR_TM_PRT_TILED_THIN1; + } + } } - return bOverrided; + if (tileMode != pInOut->tileMode) + { + pInOut->tileMode = tileMode; + pInOut->tileType = tileType; + } } /** *************************************************************************************************** * CiAddrLib::HwlSetupTileInfo * * @brief * Setup default value of tile info for SI *************************************************************************************************** */ @@ -1009,21 +1052,24 @@ VOID CiAddrLib::HwlSetupTileInfo( if (!IsLinear(tileMode)) { // Thick tile modes must use thick micro tile mode but Bonaire does not support due to // old derived netlists (UBTS 404321) if (thickness > 1) { if (m_settings.isBonaire) { inTileType = ADDR_NON_DISPLAYABLE; } - else if ((m_allowNonDispThickModes == FALSE) || (inTileType != ADDR_NON_DISPLAYABLE)) + else if ((m_allowNonDispThickModes == FALSE) || + (inTileType != ADDR_NON_DISPLAYABLE) || + // There is no PRT_THICK + THIN entry in tile mode table except Bonaire + (IsPrtTileMode(tileMode) == TRUE)) { inTileType = ADDR_THICK; } } // 128 bpp tiling must be non-displayable. // Fmask reuse color buffer's entry but bank-height field can be from another entry // To simplify the logic, fmask entry should be picked from non-displayable ones else if (bpp == 128 || flags.fmask) { inTileType = ADDR_NON_DISPLAYABLE; @@ -1048,21 +1094,21 @@ VOID CiAddrLib::HwlSetupTileInfo( // tileSize = thickness * bpp * numSamples * 8 * 8 / 8 UINT_32 tileSize = thickness * bpp * numSamples * 8; // Turn off tc compatible if row_size is smaller than tile size (tile split occurs). if (m_rowSize < tileSize) { flags.tcCompatible = FALSE; pOut->tcCompatible = FALSE; } - if (flags.depth && (flags.nonSplit || flags.tcCompatible)) + if (flags.depth && (flags.nonSplit || flags.tcCompatible || flags.needEquation)) { // Texure readable depth surface should not be split switch (tileSize) { case 128: index = 1; break; case 256: index = 2; break; @@ -1270,30 +1316,30 @@ VOID CiAddrLib::HwlSetupTileInfo( pOut->tileIndex = 8; *pTileInfo = m_tileTable[8].info; } // Turn off tcCompatible for color surface if tileSplit happens. Depth/stencil is // handled at tileIndex selecting time. if (pOut->tcCompatible && (inTileType != ADDR_DEPTH_SAMPLE_ORDER)) { if (IsMacroTiled(tileMode)) { - UINT_32 tileIndex = static_cast<UINT_32>(pOut->tileIndex); + INT_32 tileIndex = pOut->tileIndex; if ((tileIndex == TileIndexInvalid) && (IsTileInfoAllZero(pTileInfo) == FALSE)) { tileIndex = HwlPostCheckTileIndex(pTileInfo, tileMode, inTileType, tileIndex); } if (tileIndex != TileIndexInvalid) { - ADDR_ASSERT(tileIndex < TileTableSize); + ADDR_ASSERT(static_cast<UINT_32>(tileIndex) < TileTableSize); // Non-depth entries store a split factor UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes; UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness); UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x); if (m_rowSize < colorTileSplit) { pOut->tcCompatible = FALSE; } } @@ -1311,21 +1357,21 @@ VOID CiAddrLib::HwlSetupTileInfo( * CiAddrLib::ReadGbTileMode * * @brief * Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG. * @return * NA. *************************************************************************************************** */ VOID CiAddrLib::ReadGbTileMode( UINT_32 regValue, ///< [in] GB_TILE_MODE register - ADDR_TILECONFIG* pCfg ///< [out] output structure + AddrTileConfig* pCfg ///< [out] output structure ) const { GB_TILE_MODE gbTileMode; gbTileMode.val = regValue; pCfg->type = static_cast<AddrTileType>(gbTileMode.f.micro_tile_mode_new); pCfg->info.pipeConfig = static_cast<AddrPipeCfg>(gbTileMode.f.pipe_config + 1); if (pCfg->type == ADDR_DEPTH_SAMPLE_ORDER) { @@ -1908,10 +1954,11 @@ ADDR_E_RETURNCODE CiAddrLib::HwlGetMaxAlignments( } if (pOut != NULL) { pOut->baseAlign = maxBaseAlign; } return ADDR_OK; } + diff --git a/src/amd/addrlib/r800/ciaddrlib.h b/src/amd/addrlib/r800/ciaddrlib.h index 750b2b3..e959df3 100644 --- a/src/amd/addrlib/r800/ciaddrlib.h +++ b/src/amd/addrlib/r800/ciaddrlib.h @@ -134,24 +134,21 @@ protected: ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const; virtual VOID HwlFmaskPostThunkSurfInfo( const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const; virtual AddrTileMode HwlDegradeThickTileMode( AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const; - virtual BOOL_32 HwlOverrideTileMode( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - AddrTileMode* pTileMode, - AddrTileType* pTileType) const; + virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; virtual ADDR_E_RETURNCODE HwlComputeDccInfo( const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const; virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const; virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( @@ -161,21 +158,21 @@ protected: virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; virtual VOID HwlPadDimensions( AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel, UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign, UINT_32* pSlices, UINT_32 sliceAlign) const; private: VOID ReadGbTileMode( - UINT_32 regValue, ADDR_TILECONFIG* pCfg) const; + UINT_32 regValue, AddrTileConfig* pCfg) const; VOID ReadGbMacroTileCfg( UINT_32 regValue, ADDR_TILEINFO* pCfg) const; BOOL_32 InitTileSettingTable( const UINT_32 *pSetting, UINT_32 noOfEntries); BOOL_32 InitMacroTileCfgTable( const UINT_32 *pSetting, UINT_32 noOfEntries); diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp index 52cf59b..854d572 100644 --- a/src/amd/addrlib/r800/egbaddrlib.cpp +++ b/src/amd/addrlib/r800/egbaddrlib.cpp @@ -441,21 +441,23 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled( // SanityCheckMacroTiled is called in ComputeSurfaceAlignmentsMacroTiled // valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode, pIn->bpp, pIn->flags, pIn->mipLevel, numSamples, pOut->pTileInfo, &pOut->baseAlign, &pOut->pitchAlign, - &pOut->heightAlign); + &pOut->heightAlign, + &pOut->blockWidth, + &pOut->blockHeight); if (valid) { // // Compute the micro tile thickness. // microTileThickness = Thickness(expTileMode); // // Find the correct tiling mode for mip levels @@ -464,57 +466,56 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled( { // // Try valid tile mode // expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode, pIn->bpp, expPitch, expHeight, expNumSlices, numSamples, - pOut->pitchAlign, - pOut->heightAlign, + pOut->blockWidth, + pOut->blockHeight, pOut->pTileInfo); if (!IsMacroTiled(expTileMode)) // Downgraded to micro-tiled { return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, expTileMode); } - else + else if (microTileThickness != Thickness(expTileMode)) { - if (microTileThickness != Thickness(expTileMode)) - { - // - // Re-compute if thickness changed since bank-height may be changed! - // - return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode); - } + // + // Re-compute if thickness changed since bank-height may be changed! + // + return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode); } } paddedPitch = expPitch; paddedHeight = expHeight; // // Re-cal alignment // if (expTileMode != origTileMode) // Tile mode is changed but still macro-tiled { valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode, pIn->bpp, pIn->flags, pIn->mipLevel, numSamples, pOut->pTileInfo, &pOut->baseAlign, &pOut->pitchAlign, - &pOut->heightAlign); + &pOut->heightAlign, + &pOut->blockWidth, + &pOut->blockHeight); } // // Do padding // PadDimensions(expTileMode, pIn->bpp, pIn->flags, numSamples, pOut->pTileInfo, @@ -528,40 +529,86 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled( (pOut->pStereoInfo != NULL)) { UINT_32 stereoHeightAlign = HwlStereoCheckRightOffsetPadding(pOut->pTileInfo); if (stereoHeightAlign != 0) { paddedHeight = PowTwoAlign(paddedHeight, stereoHeightAlign); } } - // - // Compute the size of a slice. - // - bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(paddedPitch) * - paddedHeight * NextPow2(pIn->bpp) * numSamples); + if ((pIn->flags.needEquation == TRUE) && + (m_chipFamily == ADDR_CHIP_FAMILY_SI) && + (pIn->numMipLevels > 1) && + (pIn->mipLevel == 0)) + { + BOOL_32 convertTo1D = FALSE; + + ADDR_ASSERT(Thickness(expTileMode) == 1); + + for (UINT_32 i = 1; i < pIn->numMipLevels; i++) + { + UINT_32 mipPitch = Max(1u, paddedPitch >> i); + UINT_32 mipHeight = Max(1u, pIn->height >> i); + UINT_32 mipSlices = pIn->flags.volume ? + Max(1u, pIn->numSlices >> i) : pIn->numSlices; + expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode, + pIn->bpp, + mipPitch, + mipHeight, + mipSlices, + numSamples, + pOut->blockWidth, + pOut->blockHeight, + pOut->pTileInfo); + + if (IsMacroTiled(expTileMode)) + { + if (PowTwoAlign(mipPitch, pOut->blockWidth) != + PowTwoAlign(mipPitch, pOut->pitchAlign)) + { + convertTo1D = TRUE; + break; + } + } + else + { + break; + } + } + + if (convertTo1D) + { + return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, ADDR_TM_1D_TILED_THIN1); + } + } pOut->pitch = paddedPitch; // Put this check right here to workaround special mipmap cases which the original height // is needed. // The original height is pre-stored in pOut->height in PostComputeMipLevel and // pOut->pitch is needed in HwlCheckLastMacroTiledLvl, too. if (m_configFlags.checkLast2DLevel && (numSamples == 1)) // Don't check MSAA { // Set a TRUE in pOut if next Level is the first 1D sub level HwlCheckLastMacroTiledLvl(pIn, pOut); } pOut->height = paddedHeight; pOut->depth = expNumSlices; + // + // Compute the size of a slice. + // + bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(paddedPitch) * + paddedHeight * NextPow2(pIn->bpp) * numSamples); + pOut->surfSize = bytesPerSlice * expNumSlices; pOut->tileMode = expTileMode; pOut->depthAlign = microTileThickness; } // if (valid) return valid; } @@ -790,21 +837,23 @@ BOOL_32 EgBasedAddrLib::HwlReduceBankWidthHeight( */ BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled( AddrTileMode tileMode, ///< [in] tile mode UINT_32 bpp, ///< [in] bits per pixel ADDR_SURFACE_FLAGS flags, ///< [in] surface flags UINT_32 mipLevel, ///< [in] mip level UINT_32 numSamples, ///< [in] number of samples ADDR_TILEINFO* pTileInfo, ///< [in/out] bank structure. UINT_32* pBaseAlign, ///< [out] base address alignment in bytes UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels - UINT_32* pHeightAlign ///< [out] height alignment in pixels + UINT_32* pHeightAlign, ///< [out] height alignment in pixels + UINT_32* pMacroTileWidth, ///< [out] macro tile width in pixels + UINT_32* pMacroTileHeight ///< [out] macro tile height in pixels ) const { BOOL_32 valid = SanityCheckMacroTiled(pTileInfo); if (valid) { UINT_32 macroTileWidth; UINT_32 macroTileHeight; UINT_32 tileSize; @@ -851,30 +900,32 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled( pipes, pTileInfo); // // The required granularity for pitch is the macro tile width. // macroTileWidth = MicroTileWidth * pTileInfo->bankWidth * pipes * pTileInfo->macroAspectRatio; *pPitchAlign = macroTileWidth; + *pMacroTileWidth = macroTileWidth; AdjustPitchAlignment(flags, pPitchAlign); // // The required granularity for height is the macro tile height. // macroTileHeight = MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / pTileInfo->macroAspectRatio; *pHeightAlign = macroTileHeight; + *pMacroTileHeight = macroTileHeight; // // Compute base alignment // *pBaseAlign = pipes * pTileInfo->bankWidth * pTileInfo->banks * pTileInfo->bankHeight * tileSize; if ((mipLevel == 0) && (flags.prt) && (m_chipFamily == ADDR_CHIP_FAMILY_SI)) { static const UINT_32 PrtTileSize = 0x10000; @@ -1106,20 +1157,22 @@ BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const { BOOL_32 degrade = FALSE; BOOL_32 valid = TRUE; ADDR_ASSERT(IsMacroTiled(pIn->tileMode)); UINT_32 baseAlign; UINT_32 pitchAlign; UINT_32 heightAlign; + UINT_32 macroTileWidth; + UINT_32 macroTileHeight; ADDR_ASSERT(pIn->pTileInfo); ADDR_TILEINFO tileInfo = *pIn->pTileInfo; ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; if (UseTileIndex(pIn->tileIndex)) { out.tileIndex = pIn->tileIndex; out.macroModeIndex = TileIndexInvalid; } @@ -1136,25 +1189,27 @@ BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel( &out); valid = ComputeSurfaceAlignmentsMacroTiled(pIn->tileMode, pIn->bpp, pIn->flags, pIn->mipLevel, pIn->numSamples, &tileInfo, &baseAlign, &pitchAlign, - &heightAlign); + &heightAlign, + ¯oTileWidth, + ¯oTileHeight); if (valid) { - degrade = (pIn->width < pitchAlign || pIn->height < heightAlign); + degrade = ((pIn->width < macroTileWidth) || (pIn->height < macroTileHeight)); // Check whether 2D tiling still has too much footprint if (degrade == FALSE) { // Only check width and height as slices are aligned to thickness UINT_64 unalignedSize = pIn->width * pIn->height; UINT_32 alignedPitch = PowTwoAlign(pIn->width, pitchAlign); UINT_32 alignedHeight = PowTwoAlign(pIn->height, heightAlign); UINT_64 alignedSize = alignedPitch * alignedHeight; @@ -1406,20 +1461,151 @@ UINT_64 EgBasedAddrLib::DispatchComputeSurfaceAddrFromCoord( addr = addr | static_cast<UINT_64>(addr5Bit << 5); } } #endif return addr; } /** *************************************************************************************************** +* EgBasedAddrLib::ComputeMacroTileEquation +* +* @brief +* Computes the address equation in macro tile +* @return +* If equation can be computed +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedAddrLib::ComputeMacroTileEquation( + UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel + AddrTileMode tileMode, ///< [in] tile mode + AddrTileType microTileType, ///< [in] micro tiling type + ADDR_TILEINFO* pTileInfo, ///< [in] bank structure + ADDR_EQUATION* pEquation ///< [out] Equation for addressing in macro tile + ) const +{ + ADDR_E_RETURNCODE retCode; + + // Element equation within a tile + retCode = ComputeMicroTileEquation(log2BytesPP, tileMode, microTileType, pEquation); + + if (retCode == ADDR_OK) + { + // Tile equesiton with signle pipe bank + UINT_32 numPipes = HwlGetPipes(pTileInfo); + UINT_32 numPipeBits = Log2(numPipes); + + for (UINT_32 i = 0; i < Log2(pTileInfo->bankWidth); i++) + { + pEquation->addr[pEquation->numBits].valid = 1; + pEquation->addr[pEquation->numBits].channel = 0; + pEquation->addr[pEquation->numBits].index = i + log2BytesPP + 3 + numPipeBits; + pEquation->numBits++; + } + + for (UINT_32 i = 0; i < Log2(pTileInfo->bankHeight); i++) + { + pEquation->addr[pEquation->numBits].valid = 1; + pEquation->addr[pEquation->numBits].channel = 1; + pEquation->addr[pEquation->numBits].index = i + 3; + pEquation->numBits++; + } + + ADDR_EQUATION equation; + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + UINT_32 thresholdX = 32; + UINT_32 thresholdY = 32; + + if (IsPrtNoRotationTileMode(tileMode)) + { + UINT_32 macroTilePitch = + (MicroTileWidth * pTileInfo->bankWidth * numPipes) * pTileInfo->macroAspectRatio; + UINT_32 macroTileHeight = + (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / + pTileInfo->macroAspectRatio; + thresholdX = Log2(macroTilePitch); + thresholdY = Log2(macroTileHeight); + } + + // Pipe equation + retCode = ComputePipeEquation(log2BytesPP, thresholdX, thresholdY, pTileInfo, &equation); + + if (retCode == ADDR_OK) + { + UINT_32 pipeBitStart = Log2(m_pipeInterleaveBytes); + + if (pEquation->numBits > pipeBitStart) + { + UINT_32 numLeftShift = pEquation->numBits - pipeBitStart; + + for (UINT_32 i = 0; i < numLeftShift; i++) + { + pEquation->addr[pEquation->numBits + equation.numBits - i - 1] = + pEquation->addr[pEquation->numBits - i - 1]; + pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor1[pEquation->numBits - i - 1]; + pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor2[pEquation->numBits - i - 1]; + } + } + + for (UINT_32 i = 0; i < equation.numBits; i++) + { + pEquation->addr[pipeBitStart + i] = equation.addr[i]; + pEquation->xor1[pipeBitStart + i] = equation.xor1[i]; + pEquation->xor2[pipeBitStart + i] = equation.xor2[i]; + pEquation->numBits++; + } + + // Bank equation + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + retCode = ComputeBankEquation(log2BytesPP, thresholdX, thresholdY, + pTileInfo, &equation); + + if (retCode == ADDR_OK) + { + UINT_32 bankBitStart = pipeBitStart + numPipeBits + Log2(m_bankInterleave); + + if (pEquation->numBits > bankBitStart) + { + UINT_32 numLeftShift = pEquation->numBits - bankBitStart; + + for (UINT_32 i = 0; i < numLeftShift; i++) + { + pEquation->addr[pEquation->numBits + equation.numBits - i - 1] = + pEquation->addr[pEquation->numBits - i - 1]; + pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor1[pEquation->numBits - i - 1]; + pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor2[pEquation->numBits - i - 1]; + } + } + + for (UINT_32 i = 0; i < equation.numBits; i++) + { + pEquation->addr[bankBitStart + i] = equation.addr[i]; + pEquation->xor1[bankBitStart + i] = equation.xor1[i]; + pEquation->xor2[bankBitStart + i] = equation.xor2[i]; + pEquation->numBits++; + } + } + } + } + + return retCode; +} + +/** +*************************************************************************************************** * EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled * * @brief * Computes the surface address and bit position from a * coordinate for 2D tilied (macro tiled) * @return * The byte address *************************************************************************************************** */ UINT_64 EgBasedAddrLib::ComputeSurfaceAddrFromCoordMacroTiled( diff --git a/src/amd/addrlib/r800/egbaddrlib.h b/src/amd/addrlib/r800/egbaddrlib.h index d43eca8..a424082 100644 --- a/src/amd/addrlib/r800/egbaddrlib.h +++ b/src/amd/addrlib/r800/egbaddrlib.h @@ -247,20 +247,27 @@ protected: UINT_32 GetBankPipeSwizzle( UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_64 baseAddr, ADDR_TILEINFO* pTileInfo) const; UINT_32 ComputeSliceTileSwizzle( AddrTileMode tileMode, UINT_32 baseSwizzle, UINT_32 slice, UINT_64 baseAddr, ADDR_TILEINFO* pTileInfo) const; /// Addressing functions + virtual ADDR_E_RETURNCODE ComputeBankEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const + { + return ADDR_NOTSUPPORTED; + } + UINT_32 ComputeBankFromCoord( UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode, UINT_32 bankSwizzle, UINT_32 tileSpitSlice, ADDR_TILEINFO* pTileInfo) const; UINT_32 ComputeBankFromAddr( UINT_64 addr, UINT_32 numBanks, UINT_32 numPipes) const; UINT_32 ComputePipeRotation( AddrTileMode tileMode, UINT_32 numPipes) const; @@ -274,20 +281,24 @@ protected: UINT_32 bank, UINT_32 pipe, UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices, ADDR_TILEINFO* pTileInfo, CoordFromBankPipe *pOutput) const; /// Htile/Cmask functions UINT_64 ComputeHtileBytes( UINT_32 pitch, UINT_32 height, UINT_32 bpp, BOOL_32 isLinear, UINT_32 numSlices, UINT_64* sliceBytes, UINT_32 baseAlign) const; + ADDR_E_RETURNCODE ComputeMacroTileEquation( + UINT_32 log2BytesPP, AddrTileMode tileMode, AddrTileType microTileType, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; + // Static functions static BOOL_32 IsTileInfoAllZero(ADDR_TILEINFO* pTileInfo); static UINT_32 ComputeFmaskNumPlanesFromNumSamples(UINT_32 numSamples); static UINT_32 ComputeFmaskResolvedBppFromNumSamples(UINT_32 numSamples); private: BOOL_32 ComputeSurfaceInfoLinear( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, @@ -311,21 +322,22 @@ private: BOOL_32 ComputeSurfaceAlignmentsMicroTiled( AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 mipLevel, UINT_32 numSamples, UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const; BOOL_32 ComputeSurfaceAlignmentsMacroTiled( AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 mipLevel, UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, - UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const; + UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign, + UINT_32* pMacroTileWidth, UINT_32* pMacroTileHeight) const; /// Surface addressing functions UINT_64 DispatchComputeSurfaceAddrFromCoord( const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; VOID DispatchComputeSurfaceCoordFromAddr( const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; diff --git a/src/amd/addrlib/r800/siaddrlib.cpp b/src/amd/addrlib/r800/siaddrlib.cpp index 694c0f3..686bb7f 100644 --- a/src/amd/addrlib/r800/siaddrlib.cpp +++ b/src/amd/addrlib/r800/siaddrlib.cpp @@ -66,21 +66,22 @@ AddrLib* AddrSIHwlInit(const AddrClient* pClient) *************************************************************************************************** * SiAddrLib::SiAddrLib * * @brief * Constructor * *************************************************************************************************** */ SiAddrLib::SiAddrLib(const AddrClient* pClient) : EgBasedAddrLib(pClient), - m_noOfEntries(0) + m_noOfEntries(0), + m_numEquations(0) { m_class = SI_ADDRLIB; memset(&m_settings, 0, sizeof(m_settings)); } /** *************************************************************************************************** * SiAddrLib::~SiAddrLib * * @brief @@ -161,20 +162,352 @@ UINT_32 SiAddrLib::GetPipePerSurf( break; default: ADDR_ASSERT(!"Invalid pipe config"); numPipes = m_pipes; } return numPipes; } /** *************************************************************************************************** +* SiAddrLib::ComputeBankEquation +* +* @brief +* Compute bank equation +* +* @return +* If equation can be computed +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE SiAddrLib::ComputeBankEquation( + UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel + UINT_32 threshX, ///< [in] threshold for x channel + UINT_32 threshY, ///< [in] threshold for y channel + ADDR_TILEINFO* pTileInfo, ///< [in] tile info + ADDR_EQUATION* pEquation ///< [out] bank equation + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + UINT_32 pipes = HwlGetPipes(pTileInfo); + UINT_32 bankXStart = 3 + Log2(pipes) + Log2(pTileInfo->bankWidth); + UINT_32 bankYStart = 3 + Log2(pTileInfo->bankHeight); + + ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, log2BytesPP + bankXStart); + ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, log2BytesPP + bankXStart + 1); + ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, log2BytesPP + bankXStart + 2); + ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, log2BytesPP + bankXStart + 3); + ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, bankYStart); + ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, bankYStart + 1); + ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, bankYStart + 2); + ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, bankYStart + 3); + + x3.value = (threshX > bankXStart) ? x3.value : 0; + x4.value = (threshX > bankXStart + 1) ? x4.value : 0; + x5.value = (threshX > bankXStart + 2) ? x5.value : 0; + x6.value = (threshX > bankXStart + 3) ? x6.value : 0; + y3.value = (threshY > bankYStart) ? y3.value : 0; + y4.value = (threshY > bankYStart + 1) ? y4.value : 0; + y5.value = (threshY > bankYStart + 2) ? y5.value : 0; + y6.value = (threshY > bankYStart + 3) ? y6.value : 0; + + switch (pTileInfo->banks) + { + case 16: + pEquation->addr[0] = y6; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y5; + pEquation->xor1[1] = y6; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y4; + pEquation->xor1[2] = x5; + pEquation->addr[3] = y3; + pEquation->xor1[3] = x6; + pEquation->numBits = 4; + break; + case 8: + pEquation->addr[0] = y5; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y4; + pEquation->xor1[1] = y5; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y3; + pEquation->xor1[2] = x5; + pEquation->numBits = 3; + break; + case 4: + pEquation->addr[0] = y4; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y3; + pEquation->xor1[1] = x4; + pEquation->numBits = 2; + break; + case 2: + pEquation->addr[0] = y3; + pEquation->xor1[0] = x3; + pEquation->numBits = 1; + break; + default: + pEquation->numBits = 0; + retCode = ADDR_NOTSUPPORTED; + ADDR_ASSERT_ALWAYS(); + break; + } + + for (UINT_32 i = 0; i < pEquation->numBits; i++) + { + if (pEquation->addr[i].value == 0) + { + if (pEquation->xor1[i].value == 0) + { + // 00X -> X00 + pEquation->addr[i].value = pEquation->xor2[i].value; + pEquation->xor2[i].value = 0; + } + else + { + pEquation->addr[i].value = pEquation->xor1[i].value; + + if (pEquation->xor2[i].value != 0) + { + // 0XY -> XY0 + pEquation->xor1[i].value = pEquation->xor2[i].value; + pEquation->xor2[i].value = 0; + } + else + { + // 0X0 -> X00 + pEquation->xor1[i].value = 0; + } + } + } + else if (pEquation->xor1[i].value == 0) + { + if (pEquation->xor2[i].value != 0) + { + // X0Y -> XY0 + pEquation->xor1[i].value = pEquation->xor2[i].value; + pEquation->xor2[i].value = 0; + } + } + } + + if ((pTileInfo->bankWidth == 1) && + ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) || + (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32))) + { + retCode = ADDR_NOTSUPPORTED; + } + + return retCode; +} + +/** +*************************************************************************************************** +* SiAddrLib::ComputePipeEquation +* +* @brief +* Compute pipe equation +* +* @return +* If equation can be computed +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE SiAddrLib::ComputePipeEquation( + UINT_32 log2BytesPP, ///< [in] Log2 of bytes per pixel + UINT_32 threshX, ///< [in] Threshold for X channel + UINT_32 threshY, ///< [in] Threshold for Y channel + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + ADDR_EQUATION* pEquation ///< [out] Pipe configure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + ADDR_CHANNEL_SETTING* pAddr = pEquation->addr; + ADDR_CHANNEL_SETTING* pXor1 = pEquation->xor1; + ADDR_CHANNEL_SETTING* pXor2 = pEquation->xor2; + + ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, 3 + log2BytesPP); + ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, 4 + log2BytesPP); + ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, 5 + log2BytesPP); + ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, 6 + log2BytesPP); + ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, 3); + ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, 4); + ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, 5); + ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, 6); + + x3.value = (threshX > 3) ? x3.value : 0; + x4.value = (threshX > 4) ? x4.value : 0; + x5.value = (threshX > 5) ? x5.value : 0; + x6.value = (threshX > 6) ? x6.value : 0; + y3.value = (threshY > 3) ? y3.value : 0; + y4.value = (threshY > 4) ? y4.value : 0; + y5.value = (threshY > 5) ? y5.value : 0; + y6.value = (threshY > 6) ? y6.value : 0; + + switch (pTileInfo->pipeConfig) + { + case ADDR_PIPECFG_P2: + pAddr[0] = x3; + pXor1[0] = y3; + pEquation->numBits = 1; + break; + case ADDR_PIPECFG_P4_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pAddr[1] = x3; + pXor1[1] = y4; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P4_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y4; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P4_16x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y5; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P4_32x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x5; + pXor1[1] = y5; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P8_16x16_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x3; + pXor1[1] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_16x32_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x3; + pXor1[1] = y4; + pAddr[2] = x4; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_16x32_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x5; + pXor1[1] = y4; + pAddr[2] = x4; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x32_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x3; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x32_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x32_16x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y6; + pAddr[2] = x5; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x64_32x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x6; + pXor1[1] = y5; + pAddr[2] = x5; + pXor1[2] = y6; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P16_32x32_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pAddr[1] = x3; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y6; + pAddr[3] = x6; + pXor1[3] = y5; + pEquation->numBits = 4; + break; + case ADDR_PIPECFG_P16_32x32_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y6; + pAddr[3] = x6; + pXor1[3] = y5; + pEquation->numBits = 4; + break; + default: + ADDR_UNHANDLED_CASE(); + pEquation->numBits = 0; + retCode = ADDR_NOTSUPPORTED; + break; + } + + for (UINT_32 i = 0; i < pEquation->numBits; i++) + { + if (pAddr[i].value == 0) + { + if (pXor1[i].value == 0) + { + pAddr[i].value = pXor2[i].value; + } + else + { + pAddr[i].value = pXor1[i].value; + pXor1[i].value = 0; + } + } + } + + return retCode; +} + +/** +*************************************************************************************************** * SiAddrLib::ComputePipeFromCoord * * @brief * Compute pipe number from coordinates * @return * Pipe number *************************************************************************************************** */ UINT_32 SiAddrLib::ComputePipeFromCoord( UINT_32 x, ///< [in] x coordinate @@ -1882,20 +2215,25 @@ BOOL_32 SiAddrLib::HwlInitGlobalParams( m_pipes = 4; } else { // Hainan is 2-pipe (m_settings.isHainan == 1) m_pipes = 2; } valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries); + if (valid) + { + InitEquationTable(); + } + m_maxSamples = 16; } return valid; } /** *************************************************************************************************** * SiAddrLib::HwlConvertTileInfoToHW * @brief @@ -2167,21 +2505,45 @@ UINT_32 SiAddrLib::HwlPreAdjustBank( * ADDR_E_RETURNCODE *************************************************************************************************** */ ADDR_E_RETURNCODE SiAddrLib::HwlComputeSurfaceInfo( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure ) const { pOut->tileIndex = pIn->tileIndex; - return EgBasedAddrLib::HwlComputeSurfaceInfo(pIn,pOut); + ADDR_E_RETURNCODE retCode = EgBasedAddrLib::HwlComputeSurfaceInfo(pIn, pOut); + + UINT_32 tileIndex = static_cast<UINT_32>(pOut->tileIndex); + + if ((pIn->flags.needEquation == TRUE) && + (pIn->numSamples <= 1) && + (tileIndex < TileTableSize)) + { + pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex]; + + if (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX) + { + pOut->blockWidth = m_blockWidth[pOut->equationIndex]; + + pOut->blockHeight = m_blockHeight[pOut->equationIndex]; + + pOut->blockSlices = m_blockSlices[pOut->equationIndex]; + } + } + else + { + pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX; + } + + return retCode; } /** *************************************************************************************************** * SiAddrLib::HwlComputeMipLevel * @brief * Compute MipLevel info (including level 0) * @return * TRUE if HWL's handled *************************************************************************************************** @@ -2275,22 +2637,22 @@ VOID SiAddrLib::HwlCheckLastMacroTiledLvl( { nextSlices = pIn->numSlices; } nextTileMode = ComputeSurfaceMipLevelTileMode(pIn->tileMode, pIn->bpp, nextPitch, nextHeight, nextSlices, pIn->numSamples, - pOut->pitchAlign, - pOut->heightAlign, + pOut->blockWidth, + pOut->blockHeight, pOut->pTileInfo); pOut->last2DLevel = IsMicroTiled(nextTileMode); } } /** *************************************************************************************************** * SiAddrLib::HwlDegradeThickTileMode * @@ -2338,21 +2700,21 @@ BOOL_32 SiAddrLib::HwlTileInfoEqual( /** *************************************************************************************************** * SiAddrLib::GetTileSettings * * @brief * Get tile setting infos by index. * @return * Tile setting info. *************************************************************************************************** */ -const ADDR_TILECONFIG* SiAddrLib::GetTileSetting( +const AddrTileConfig* SiAddrLib::GetTileSetting( UINT_32 index ///< [in] Tile index ) const { ADDR_ASSERT(index < m_noOfEntries); return &m_tileTable[index]; } /** *************************************************************************************************** * SiAddrLib::HwlPostCheckTileIndex @@ -2477,21 +2839,21 @@ ADDR_E_RETURNCODE SiAddrLib::HwlSetupTileCfg( pInfo->tileSplitBytes = 64; pInfo->pipeConfig = ADDR_PIPECFG_P2; } } else if (static_cast<UINT_32>(index) >= m_noOfEntries) { returnCode = ADDR_INVALIDPARAMS; } else { - const ADDR_TILECONFIG* pCfgTable = GetTileSetting(index); + const AddrTileConfig* pCfgTable = GetTileSetting(index); if (pInfo) { *pInfo = pCfgTable->info; } else { if (IsMacroTiled(pCfgTable->mode)) { returnCode = ADDR_INVALIDPARAMS; @@ -2518,21 +2880,21 @@ ADDR_E_RETURNCODE SiAddrLib::HwlSetupTileCfg( * SiAddrLib::ReadGbTileMode * * @brief * Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG. * @return * NA. *************************************************************************************************** */ VOID SiAddrLib::ReadGbTileMode( UINT_32 regValue, ///< [in] GB_TILE_MODE register - ADDR_TILECONFIG* pCfg ///< [out] output structure + AddrTileConfig* pCfg ///< [out] output structure ) const { GB_TILE_MODE gbTileMode; gbTileMode.val = regValue; pCfg->type = static_cast<AddrTileType>(gbTileMode.f.micro_tile_mode); pCfg->info.bankHeight = 1 << gbTileMode.f.bank_height; pCfg->info.bankWidth = 1 << gbTileMode.f.bank_width; pCfg->info.banks = 1 << (gbTileMode.f.num_banks + 1); pCfg->info.macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect; @@ -2766,32 +3128,29 @@ UINT_32 SiAddrLib::HwlComputeFmaskBits( } /** *************************************************************************************************** * SiAddrLib::HwlOverrideTileMode * * @brief * Override tile modes (for PRT only, avoid client passes in an invalid PRT mode for SI. * * @return -* Suitable tile mode +* N/A * *************************************************************************************************** */ -BOOL_32 SiAddrLib::HwlOverrideTileMode( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - AddrTileMode* pTileMode, ///< [in/out] pointer to the tile mode - AddrTileType* pTileType ///< [in/out] pointer to the tile type +void SiAddrLib::HwlOverrideTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in/out] input output structure ) const { - BOOL_32 bOverrided = FALSE; - AddrTileMode tileMode = *pTileMode; + AddrTileMode tileMode = pInOut->tileMode; switch (tileMode) { case ADDR_TM_PRT_TILED_THIN1: tileMode = ADDR_TM_2D_TILED_THIN1; break; case ADDR_TM_PRT_TILED_THICK: tileMode = ADDR_TM_2D_TILED_THICK; break; @@ -2801,28 +3160,48 @@ BOOL_32 SiAddrLib::HwlOverrideTileMode( break; case ADDR_TM_PRT_3D_TILED_THICK: tileMode = ADDR_TM_3D_TILED_THICK; break; default: break; } - if (tileMode != *pTileMode) + if ((pInOut->flags.needEquation == TRUE) && + (IsMacroTiled(tileMode) == TRUE) && + (pInOut->numSamples <= 1)) { - *pTileMode = tileMode; - bOverrided = TRUE; - ADDR_ASSERT(pIn->flags.prt == TRUE); + UINT_32 thickness = Thickness(tileMode); + + pInOut->flags.prt = TRUE; + + if (thickness > 1) + { + tileMode = ADDR_TM_1D_TILED_THICK; + } + else if (pInOut->numSlices > 1) + { + tileMode = ADDR_TM_1D_TILED_THIN1; + } + else + { + tileMode = ADDR_TM_2D_TILED_THIN1; + } } - return bOverrided; + if (tileMode != pInOut->tileMode) + { + pInOut->tileMode = tileMode; + + ADDR_ASSERT(pInOut->flags.prt == TRUE); + } } /** *************************************************************************************************** * SiAddrLib::HwlGetMaxAlignments * * @brief * Gets maximum alignments * @return * ADDR_E_RETURNCODE @@ -2857,10 +3236,256 @@ ADDR_E_RETURNCODE SiAddrLib::HwlGetMaxAlignments( } if (pOut != NULL) { pOut->baseAlign = maxBaseAlign; } return ADDR_OK; } +/** +*************************************************************************************************** +* SiAddrLib::InitEquationTable +* +* @brief +* Initialize Equation table. +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID SiAddrLib::InitEquationTable() +{ + ADDR_EQUATION_KEY equationKeyTable[EquationTableSize]; + memset(equationKeyTable, 0, sizeof(equationKeyTable)); + + memset(m_equationTable, 0, sizeof(m_equationTable)); + + memset(m_blockWidth, 0, sizeof(m_blockWidth)); + + memset(m_blockHeight, 0, sizeof(m_blockHeight)); + + memset(m_blockSlices, 0, sizeof(m_blockSlices)); + + // Loop all possible bpp + for (UINT_32 log2ElementBytes = 0; log2ElementBytes < MaxNumElementBytes; log2ElementBytes++) + { + // Get bits per pixel + UINT_32 bpp = 1 << (log2ElementBytes + 3); + + // Loop all possible tile index + for (INT_32 tileIndex = 0; tileIndex < m_noOfEntries; tileIndex++) + { + UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; + + AddrTileConfig tileConfig = m_tileTable[tileIndex]; + + ADDR_SURFACE_FLAGS flags = {{0}}; + + // Compute tile info, hardcode numSamples to 1 because MSAA is not supported + // in swizzle pattern equation + HwlComputeMacroModeIndex(tileIndex, flags, bpp, 1, &tileConfig.info, NULL, NULL); + + // Check if the input is supported + if (IsEquationSupported(bpp, tileConfig, tileIndex) == TRUE) + { + ADDR_EQUATION_KEY key = {{0}}; + + // Generate swizzle equation key from bpp and tile config + key.fields.log2ElementBytes = log2ElementBytes; + key.fields.tileMode = tileConfig.mode; + // Treat depth micro tile type and non-display micro tile type as the same key + // because they have the same equation actually + key.fields.microTileType = (tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) ? + ADDR_NON_DISPLAYABLE : tileConfig.type; + key.fields.pipeConfig = tileConfig.info.pipeConfig; + key.fields.numBanks = tileConfig.info.banks; + key.fields.bankWidth = tileConfig.info.bankWidth; + key.fields.bankHeight = tileConfig.info.bankHeight; + key.fields.macroAspectRatio = tileConfig.info.macroAspectRatio; + + // Find in the table if the equation has been built based on the key + for (UINT_32 i = 0; i < m_numEquations; i++) + { + if (key.value == equationKeyTable[i].value) + { + equationIndex = i; + break; + } + } + + // If found, just fill the index into the lookup table and no need + // to generate the equation again. Otherwise, generate the equation. + if (equationIndex == ADDR_INVALID_EQUATION_INDEX) + { + ADDR_EQUATION equation; + ADDR_E_RETURNCODE retCode; + + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + // Generate the equation + if (IsMicroTiled(tileConfig.mode)) + { + retCode = ComputeMicroTileEquation(log2ElementBytes, + tileConfig.mode, + tileConfig.type, + &equation); + } + else + { + retCode = ComputeMacroTileEquation(log2ElementBytes, + tileConfig.mode, + tileConfig.type, + &tileConfig.info, + &equation); + } + // Only fill the equation into the table if the return code is ADDR_OK, + // otherwise if the return code is not ADDR_OK, it indicates this is not + // a valid input, we do nothing but just fill invalid equation index + // into the lookup table. + if (retCode == ADDR_OK) + { + equationIndex = m_numEquations; + ADDR_ASSERT(equationIndex < EquationTableSize); + + m_blockSlices[equationIndex] = Thickness(tileConfig.mode); + + if (IsMicroTiled(tileConfig.mode)) + { + m_blockWidth[equationIndex] = MicroTileWidth; + m_blockHeight[equationIndex] = MicroTileHeight; + } + else + { + const ADDR_TILEINFO* pTileInfo = &tileConfig.info; + + m_blockWidth[equationIndex] = + HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth * + pTileInfo->macroAspectRatio; + m_blockHeight[equationIndex] = + MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / + pTileInfo->macroAspectRatio; + + if (m_chipFamily == ADDR_CHIP_FAMILY_SI) + { + static const UINT_32 PrtTileSize = 0x10000; + + UINT_32 macroTileSize = + m_blockWidth[equationIndex] * m_blockHeight[equationIndex] * + bpp / 8; + + if (macroTileSize < PrtTileSize) + { + UINT_32 numMacroTiles = PrtTileSize / macroTileSize; + + ADDR_ASSERT(macroTileSize == (1u << equation.numBits)); + ADDR_ASSERT((PrtTileSize % macroTileSize) == 0); + + UINT_32 numBits = Log2(numMacroTiles); + + UINT_32 xStart = Log2(m_blockWidth[equationIndex]) + + log2ElementBytes; + + m_blockWidth[equationIndex] *= numMacroTiles; + + for (UINT_32 i = 0; i < numBits; i++) + { + equation.addr[equation.numBits + i].valid = 1; + equation.addr[equation.numBits + i].index = xStart + i; + } + + equation.numBits += numBits; + } + } + } + + equationKeyTable[equationIndex] = key; + m_equationTable[equationIndex] = equation; + + m_numEquations++; + } + } + } + + // Fill the index into the lookup table, if the combination is not supported + // fill the invalid equation index + m_equationLookupTable[log2ElementBytes][tileIndex] = equationIndex; + } + } +} + +/** +*************************************************************************************************** +* SiAddrLib::IsEquationSupported +* +* @brief +* Check if it is supported for given bpp and tile config to generate a equation. +* +* @return +* TRUE if supported +*************************************************************************************************** +*/ +BOOL_32 SiAddrLib::IsEquationSupported( + UINT_32 bpp, ///< Bits per pixel + AddrTileConfig tileConfig, ///< Tile config + INT_32 tileIndex ///< Tile index + ) const +{ + BOOL_32 supported = TRUE; + + // Linear tile mode is not supported in swizzle pattern equation + if (IsLinear(tileConfig.mode)) + { + supported = FALSE; + } + // These tile modes are for Tex2DArray and Tex3D which has depth (num_slice > 1) use, + // which is not supported in swizzle pattern equation due to slice rotation + else if ((tileConfig.mode == ADDR_TM_2D_TILED_THICK) || + (tileConfig.mode == ADDR_TM_2D_TILED_XTHICK) || + (tileConfig.mode == ADDR_TM_3D_TILED_THIN1) || + (tileConfig.mode == ADDR_TM_3D_TILED_THICK) || + (tileConfig.mode == ADDR_TM_3D_TILED_XTHICK)) + { + supported = FALSE; + } + // Only 8bpp(stencil), 16bpp and 32bpp is supported for depth + else if ((tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) && (bpp > 32)) + { + supported = FALSE; + } + // Tile split is not supported in swizzle pattern equation + else if (IsMacroTiled(tileConfig.mode)) + { + UINT_32 thickness = Thickness(tileConfig.mode); + if (((bpp >> 3) * MicroTilePixels * thickness) > tileConfig.info.tileSplitBytes) + { + supported = FALSE; + } + + if ((supported == TRUE) && (m_chipFamily == ADDR_CHIP_FAMILY_SI)) + { + // Please refer to SiAddrLib::HwlSetupTileInfo for PRT tile index selecting + // Tile index 3, 6, 21-25 are for PRT single sample + if (tileIndex == 3) + { + supported = (bpp == 16); + } + else if (tileIndex == 6) + { + supported = (bpp == 32); + } + else if ((tileIndex >= 21) && (tileIndex <= 25)) + { + supported = (bpp == 8u * (1u << (static_cast<UINT_32>(tileIndex) - 21u))); + } + else + { + supported = FALSE; + } + } + } + + return supported; +} + + diff --git a/src/amd/addrlib/r800/siaddrlib.h b/src/amd/addrlib/r800/siaddrlib.h index 9201fb2..814cd00 100644 --- a/src/amd/addrlib/r800/siaddrlib.h +++ b/src/amd/addrlib/r800/siaddrlib.h @@ -35,21 +35,21 @@ #define __SI_ADDR_LIB_H__ #include "addrlib1.h" #include "egbaddrlib.h" /** *************************************************************************************************** * @brief Describes the information in tile mode table *************************************************************************************************** */ -struct ADDR_TILECONFIG +struct AddrTileConfig { AddrTileMode mode; AddrTileType type; ADDR_TILEINFO info; }; /** *************************************************************************************************** * @brief SI specific settings structure. *************************************************************************************************** @@ -124,20 +124,28 @@ protected: ADDR_TILEINFO* pInfo, AddrTileMode* pMode = 0, AddrTileType* pType = 0) const; virtual VOID HwlComputeTileDataWidthAndHeightLinear( UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const; virtual UINT_64 HwlComputeHtileBytes( UINT_32 pitch, UINT_32 height, UINT_32 bpp, BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const; + virtual ADDR_E_RETURNCODE ComputeBankEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; + + virtual ADDR_E_RETURNCODE ComputePipeEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; + virtual UINT_32 ComputePipeFromCoord( UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 ignoreSE, ADDR_TILEINFO* pTileInfo) const; virtual UINT_32 HwlGetPipes(const ADDR_TILEINFO* pTileInfo) const; /// Pre-handler of 3x pitch (96 bit) adjustment virtual UINT_32 HwlPreHandleBaseLvl3xPitch( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; @@ -166,24 +174,21 @@ protected: virtual VOID HwlCheckLastMacroTiledLvl( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; virtual BOOL_32 HwlTileInfoEqual( const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const; virtual AddrTileMode HwlDegradeThickTileMode( AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const; - virtual BOOL_32 HwlOverrideTileMode( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - AddrTileMode* pTileMode, - AddrTileType* pTileType) const; + virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; virtual BOOL_32 HwlSanityCheckMacroTiled( ADDR_TILEINFO* pTileInfo) const { return TRUE; } virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const; virtual UINT_64 HwlGetSizeAdjustmentLinear( @@ -222,43 +227,72 @@ protected: virtual BOOL_32 HwlReduceBankWidthHeight( UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, UINT_32 bankHeightAlign, UINT_32 pipes, ADDR_TILEINFO* pTileInfo) const { return TRUE; } virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; + // Get equation table pointer and number of equations + virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const + { + *ppEquationTable = m_equationTable; + + return m_numEquations; + } + + // Check if it is supported for given bpp and tile config to generate an equation + BOOL_32 IsEquationSupported( + UINT_32 bpp, AddrTileConfig tileConfig, INT_32 tileIndex) const; + // Protected non-virtual functions VOID ComputeTileCoordFromPipeAndElemIdx( UINT_32 elemIdx, UINT_32 pipe, AddrPipeCfg pipeCfg, UINT_32 pitchInMacroTile, UINT_32 x, UINT_32 y, UINT_32* pX, UINT_32* pY) const; UINT_32 TileCoordToMaskElementIndex( UINT_32 tx, UINT_32 ty, AddrPipeCfg pipeConfig, UINT_32 *macroShift, UINT_32 *elemIdxBits) const; BOOL_32 DecodeGbRegs( const ADDR_REGISTER_VALUE* pRegValue); - const ADDR_TILECONFIG* GetTileSetting( + const AddrTileConfig* GetTileSetting( UINT_32 index) const; + // Initialize equation table + VOID InitEquationTable(); + static const UINT_32 TileTableSize = 32; - ADDR_TILECONFIG m_tileTable[TileTableSize]; + AddrTileConfig m_tileTable[TileTableSize]; UINT_32 m_noOfEntries; + // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp) + static const UINT_32 MaxNumElementBytes = 5; + // More than half slots in tile mode table can't support equation + static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2; + // Equation table + ADDR_EQUATION m_equationTable[EquationTableSize]; + UINT_32 m_blockWidth[EquationTableSize]; + UINT_32 m_blockHeight[EquationTableSize]; + UINT_32 m_blockSlices[EquationTableSize]; + // Number of equation entries in the table + UINT_32 m_numEquations; + // Equation lookup table according to bpp and tile index + UINT_32 m_equationLookupTable[MaxNumElementBytes][TileTableSize]; + private: UINT_32 GetPipePerSurf(AddrPipeCfg pipeConfig) const; VOID ReadGbTileMode( - UINT_32 regValue, ADDR_TILECONFIG* pCfg) const; + UINT_32 regValue, AddrTileConfig* pCfg) const; BOOL_32 InitTileSettingTable( const UINT_32 *pSetting, UINT_32 noOfEntries); SIChipSettings m_settings; }; #endif -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev