From 1c1b257d552e984e2953b31d4b0b2d203c664a8a Mon Sep 17 00:00:00 2001 From: Vadim Seregin <vseregin@qti.qualcomm.com> Date: Thu, 21 Oct 2021 08:49:46 -0700 Subject: [PATCH] Lossless code cleanup, mainly TMP related --- source/Lib/CommonLib/InterPrediction.cpp | 4 + source/Lib/CommonLib/IntraPrediction.cpp | 546 +++++++++++++++++- source/Lib/CommonLib/IntraPrediction.h | 87 ++- source/Lib/CommonLib/TrQuant.cpp | 543 ----------------- source/Lib/CommonLib/TrQuant.h | 83 +-- source/Lib/CommonLib/TypeDef.h | 16 +- source/Lib/CommonLib/x86/InitX86.cpp | 25 +- source/Lib/CommonLib/x86/IntraX86.h | 321 ++++++++++ source/Lib/CommonLib/x86/TrQuantX86.h | 270 +-------- source/Lib/CommonLib/x86/avx/Intra_avx.cpp | 1 + source/Lib/CommonLib/x86/avx2/Intra_avx2.cpp | 1 + .../Lib/CommonLib/x86/sse41/Intra_sse41.cpp | 1 + .../Lib/CommonLib/x86/sse42/Intra_sse42.cpp | 1 + source/Lib/DecoderLib/DecCu.cpp | 25 +- source/Lib/EncoderLib/IntraSearch.cpp | 42 +- 15 files changed, 1030 insertions(+), 936 deletions(-) create mode 100644 source/Lib/CommonLib/x86/IntraX86.h create mode 100644 source/Lib/CommonLib/x86/avx/Intra_avx.cpp create mode 100644 source/Lib/CommonLib/x86/avx2/Intra_avx2.cpp create mode 100644 source/Lib/CommonLib/x86/sse41/Intra_sse41.cpp create mode 100644 source/Lib/CommonLib/x86/sse42/Intra_sse42.cpp diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 7fe099653..e4873b7a5 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -48,6 +48,10 @@ #include "Reshape.h" #endif +#if ENABLE_SIMD_TMP +#include "CommonDefX86.h" +#endif + //! \ingroup CommonLib //! \{ diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index 5e5a66392..f16ddaa80 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -107,6 +107,9 @@ IntraPrediction::IntraPrediction() #if MMLM m_encPreRDRun = false; #endif +#if JVET_V0130_INTRA_TMP + m_pppTarPatch = NULL; +#endif } IntraPrediction::~IntraPrediction() @@ -140,6 +143,31 @@ void IntraPrediction::destroy() buffer.destroy(); } m_tempBuffer.clear(); + +#if JVET_V0130_INTRA_TMP + if( m_pppTarPatch != NULL ) + { + for( unsigned int uiDepth = 0; uiDepth < USE_MORE_BLOCKSIZE_DEPTH_MAX; uiDepth++ ) + { + unsigned int blkSize = g_uiDepth2Width[uiDepth]; + + unsigned int patchSize = blkSize + TMP_TEMPLATE_SIZE; + for( unsigned int uiRow = 0; uiRow < patchSize; uiRow++ ) + { + if( m_pppTarPatch[uiDepth][uiRow] != NULL ) + { + delete[]m_pppTarPatch[uiDepth][uiRow]; m_pppTarPatch[uiDepth][uiRow] = NULL; + } + } + if( m_pppTarPatch[uiDepth] != NULL ) + { + delete[]m_pppTarPatch[uiDepth]; m_pppTarPatch[uiDepth] = NULL; + } + } + delete[] m_pppTarPatch; + m_pppTarPatch = NULL; + } +#endif } void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY) @@ -210,6 +238,34 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth { buffer.create( chromaFormatIDC, Area( 0, 0, MAX_CU_SIZE, MAX_CU_SIZE ) ); } + +#if JVET_V0130_INTRA_TMP + unsigned int blkSize; + + if( m_pppTarPatch == NULL ) + { + m_pppTarPatch = new Pel * *[USE_MORE_BLOCKSIZE_DEPTH_MAX]; + for( unsigned int uiDepth = 0; uiDepth < USE_MORE_BLOCKSIZE_DEPTH_MAX; uiDepth++ ) + { + blkSize = g_uiDepth2Width[uiDepth]; + + unsigned int patchSize = blkSize + TMP_TEMPLATE_SIZE; + m_pppTarPatch[uiDepth] = new Pel *[patchSize]; + for( unsigned int uiRow = 0; uiRow < patchSize; uiRow++ ) + { + m_pppTarPatch[uiDepth][uiRow] = new Pel[patchSize]; + } + } + } + + m_calcTemplateDiff = calcTemplateDiff; +#endif + +#if ENABLE_SIMD_TMP +#ifdef TARGET_SIMD_X86 + initIntraX86(); +#endif +#endif } #if JVET_W0123_TIMD_FUSION @@ -1984,7 +2040,7 @@ void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const Comp #if JVET_V0130_INTRA_TMP #if JVET_W0069_TMP_BOUNDARY -RefTemplateType IntraPrediction::GetRefTemplateType(CodingUnit& cu, CompArea& area) +RefTemplateType IntraPrediction::getRefTemplateType(CodingUnit& cu, CompArea& area) #else bool IntraPrediction::isRefTemplateAvailable(CodingUnit& cu, CompArea& area) #endif @@ -5556,4 +5612,492 @@ int &a, int &b, int &iShift) } #endif +#if JVET_V0130_INTRA_TMP +void insertNode( int diff, int& iXOffset, int& iYOffset, int& pDiff, int& pX, int& pY, short& pId, unsigned int& setId ) +{ + pDiff = diff; + pX = iXOffset; + pY = iYOffset; + pId = setId; +} + +void clipMvIntraConstraint( CodingUnit* pcCU, int regionId, int& iHorMin, int& iHorMax, int& iVerMin, int& iVerMax, unsigned int uiTemplateSize, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int iCurrY, int iCurrX, int offsetLCUY, int offsetLCUX ) +{ + int searchRangeWidth = TMP_SEARCH_RANGE_MULT_FACTOR * uiBlkWidth; + int searchRangeHeight = TMP_SEARCH_RANGE_MULT_FACTOR * uiBlkHeight; + int iMvShift = 0; + int iTemplateSize = uiTemplateSize; + int iBlkWidth = uiBlkWidth; + int iBlkHeight = uiBlkHeight; + if( regionId == 0 ) //above outside LCU + { + iHorMax = std::min( (iCurrX + searchRangeWidth) << iMvShift, ( int ) ((pcCU->cs->pps->getPicWidthInLumaSamples() - iBlkWidth) << iMvShift) ); + iHorMin = std::max( (iTemplateSize) << iMvShift, (iCurrX - searchRangeWidth) << iMvShift ); + + iVerMax = (iCurrY - iBlkHeight - offsetLCUY) << iMvShift; + iVerMin = std::max( ((iTemplateSize) << iMvShift), ((iCurrY - searchRangeHeight) << iMvShift) ); + + iHorMin = iHorMin - iCurrX; + iHorMax = iHorMax - iCurrX; + iVerMax = iVerMax - iCurrY; + iVerMin = iVerMin - iCurrY; + } + else if( regionId == 1 ) //left outside LCU + { + iHorMax = (iCurrX - offsetLCUX - iBlkWidth) << iMvShift; + iHorMin = std::max( (iTemplateSize) << iMvShift, (iCurrX - searchRangeWidth) << iMvShift ); + + iVerMin = std::max( (iTemplateSize) << iMvShift, (iCurrY - iBlkHeight - offsetLCUY) << iMvShift ); + iVerMax = (iCurrY) << iMvShift; + + iHorMin = iHorMin - iCurrX; + iHorMax = iHorMax - iCurrX; + iVerMax = iVerMax - iCurrY; + iVerMin = iVerMin - iCurrY; + } + else if( regionId == 2 ) //left outside LCU (can reach the bottom row of LCU) + { + iHorMin = std::max( (iTemplateSize) << iMvShift, (iCurrX - searchRangeWidth) << iMvShift ); + iHorMax = (iCurrX - offsetLCUX - iBlkWidth) << iMvShift; + iVerMin = (iCurrY + 1) << iMvShift; + iVerMax = std::min( pcCU->cs->pps->getPicHeightInLumaSamples() - iBlkHeight, (iCurrY - offsetLCUY + pcCU->cs->sps->getCTUSize() - iBlkHeight) << iMvShift ); + + iHorMin = iHorMin - iCurrX; + iHorMax = iHorMax - iCurrX; + iVerMax = iVerMax - iCurrY; + iVerMin = iVerMin - iCurrY; + } +} + +TempLibFast::TempLibFast() +{ +} + +TempLibFast::~TempLibFast() +{ +} + +void TempLibFast::initTemplateDiff( unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int bitDepth ) +{ + int maxValue = ((1 << bitDepth) >> (INIT_THRESHOULD_SHIFTBITS)) * (uiPatchHeight * uiPatchWidth - uiBlkHeight * uiBlkWidth); + m_diffMax = maxValue; + { + m_pDiff = maxValue; + } +} + +#if JVET_W0069_TMP_BOUNDARY +void IntraPrediction::getTargetTemplate( CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType tempType ) +#else +void IntraPrediction::getTargetTemplate( CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight ) +#endif +{ + const ComponentID compID = COMPONENT_Y; + unsigned int uiPatchWidth = uiBlkWidth + TMP_TEMPLATE_SIZE; + unsigned int uiPatchHeight = uiBlkHeight + TMP_TEMPLATE_SIZE; + unsigned int uiTarDepth = floorLog2( std::max( uiBlkHeight, uiBlkWidth ) ) - 2; + Pel** tarPatch = m_pppTarPatch[uiTarDepth]; + CompArea area = pcCU->blocks[compID]; + Pel* pCurrStart = pcCU->cs->picture->getRecoBuf( area ).buf; + unsigned int uiPicStride = pcCU->cs->picture->getRecoBuf( compID ).stride; + unsigned int uiY, uiX; + + //fill template + //up-left & up + Pel* tarTemp; +#if JVET_W0069_TMP_BOUNDARY + if( tempType == L_SHAPE_TEMPLATE ) + { +#endif + Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE * uiPicStride - TMP_TEMPLATE_SIZE; + for( uiY = 0; uiY < TMP_TEMPLATE_SIZE; uiY++ ) + { + tarTemp = tarPatch[uiY]; + for( uiX = 0; uiX < uiPatchWidth; uiX++ ) + { + tarTemp[uiX] = pCurrTemp[uiX]; + } + pCurrTemp += uiPicStride; + } + //left + for( uiY = TMP_TEMPLATE_SIZE; uiY < uiPatchHeight; uiY++ ) + { + tarTemp = tarPatch[uiY]; + for( uiX = 0; uiX < TMP_TEMPLATE_SIZE; uiX++ ) + { + tarTemp[uiX] = pCurrTemp[uiX]; + } + pCurrTemp += uiPicStride; + } +#if JVET_W0069_TMP_BOUNDARY + } + else if( tempType == ABOVE_TEMPLATE ) + { + Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE * uiPicStride; + for( uiY = 0; uiY < TMP_TEMPLATE_SIZE; uiY++ ) + { + tarTemp = tarPatch[uiY]; + for( uiX = 0; uiX < uiBlkWidth; uiX++ ) + { + tarTemp[uiX] = pCurrTemp[uiX]; + } + pCurrTemp += uiPicStride; + } + } + else if( tempType == LEFT_TEMPLATE ) + { + Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE; + for( uiY = TMP_TEMPLATE_SIZE; uiY < uiPatchHeight; uiY++ ) + { + tarTemp = tarPatch[uiY]; + for( uiX = 0; uiX < TMP_TEMPLATE_SIZE; uiX++ ) + { + tarTemp[uiX] = pCurrTemp[uiX]; + } + pCurrTemp += uiPicStride; + } + } +#endif +} + +#if JVET_W0069_TMP_BOUNDARY +void IntraPrediction::candidateSearchIntra( CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType tempType ) +#else +void IntraPrediction::candidateSearchIntra( CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight ) +#endif +{ + const ComponentID compID = COMPONENT_Y; + const int channelBitDepth = pcCU->cs->sps->getBitDepth( toChannelType( compID ) ); + unsigned int uiPatchWidth = uiBlkWidth + TMP_TEMPLATE_SIZE; + unsigned int uiPatchHeight = uiBlkHeight + TMP_TEMPLATE_SIZE; + unsigned int uiTarDepth = floorLog2( std::max( uiBlkWidth, uiBlkHeight ) ) - 2; + Pel** tarPatch = getTargetPatch( uiTarDepth ); + //Initialize the library for saving the best candidates + m_tempLibFast.initTemplateDiff( uiPatchWidth, uiPatchHeight, uiBlkWidth, uiBlkHeight, channelBitDepth ); + short setId = 0; //record the reference picture. +#if JVET_W0069_TMP_BOUNDARY + searchCandidateFromOnePicIntra( pcCU, tarPatch, uiPatchWidth, uiPatchHeight, setId, tempType ); +#else + searchCandidateFromOnePicIntra( pcCU, tarPatch, uiPatchWidth, uiPatchHeight, setId ); +#endif + //count collected candidate number + int pDiff = m_tempLibFast.getDiff(); + int maxDiff = m_tempLibFast.getDiffMax(); + + + if( pDiff < maxDiff ) + { + m_uiVaildCandiNum = 1; + } + else + { + m_uiVaildCandiNum = 0; + } +} + +#if JVET_W0069_TMP_BOUNDARY +void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId, RefTemplateType tempType ) +#else +void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId ) +#endif +{ + const ComponentID compID = COMPONENT_Y; + unsigned int uiBlkWidth = uiPatchWidth - TMP_TEMPLATE_SIZE; + unsigned int uiBlkHeight = uiPatchHeight - TMP_TEMPLATE_SIZE; + + int pX = m_tempLibFast.getX(); + int pY = m_tempLibFast.getY(); + int pDiff = m_tempLibFast.getDiff(); + short pId = m_tempLibFast.getId(); + CompArea area = pcCU->blocks[compID]; + int refStride = pcCU->cs->picture->getRecoBuf( compID ).stride; + + Pel* ref = pcCU->cs->picture->getRecoBuf( area ).buf; + + setRefPicUsed( ref ); //facilitate the access of each candidate point + setStride( refStride ); + + Mv cTmpMvPred; + cTmpMvPred.setZero(); + + unsigned int uiCUPelY = area.pos().y; + unsigned int uiCUPelX = area.pos().x; + int blkX = 0; + int blkY = 0; + int iCurrY = uiCUPelY + blkY; + int iCurrX = uiCUPelX + blkX; + + Position ctuRsAddr = CU::getCtuXYAddr( *pcCU ); + int offsetLCUY = iCurrY - ctuRsAddr.y; + int offsetLCUX = iCurrX - ctuRsAddr.x; + + int iYOffset, iXOffset; + int diff; + Pel* refCurr; + + const int regionNum = 3; + int mvYMins[regionNum]; + int mvYMaxs[regionNum]; + int mvXMins[regionNum]; + int mvXMaxs[regionNum]; + int regionId = 0; + + //1. check the near pixels within LCU + //above pixels in LCU + int iTemplateSize = TMP_TEMPLATE_SIZE; + int iBlkWidth = uiBlkWidth; + int iBlkHeight = uiBlkHeight; + regionId = 0; + int iMvShift = 0; + + int iVerMin = std::max( ((iTemplateSize) << iMvShift), (iCurrY - offsetLCUY - iBlkHeight + 1) << iMvShift ); + int iVerMax = (iCurrY - iBlkHeight) << iMvShift; + int iHorMin = std::max( (iTemplateSize) << iMvShift, (iCurrX - offsetLCUX - iBlkWidth + 1) << iMvShift ); + int iHorMax = (iCurrX - iBlkWidth); + + mvXMins[regionId] = iHorMin - iCurrX; + mvXMaxs[regionId] = iHorMax - iCurrX; + mvYMins[regionId] = iVerMin - iCurrY; + mvYMaxs[regionId] = iVerMax - iCurrY; + + //check within CTU pixels + for( regionId = 0; regionId < 1; regionId++ ) + { + int mvYMin = mvYMins[regionId]; + int mvYMax = mvYMaxs[regionId]; + int mvXMin = mvXMins[regionId]; + int mvXMax = mvXMaxs[regionId]; + if( mvYMax < mvYMin || mvXMax < mvXMin ) + { + continue; + } + + for( iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset-- ) + { + for( iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset-- ) + { + refCurr = ref + iYOffset * refStride + iXOffset; +#if JVET_W0069_TMP_BOUNDARY + diff = m_calcTemplateDiff( refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff, tempType ); +#else + diff = m_calcTemplateDiff( refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff ); +#endif + if( diff < (pDiff) ) + { + insertNode( diff, iXOffset, iYOffset, pDiff, pX, pY, pId, setId ); + } + if( pDiff == 0 ) + { + regionId++; + } + } + } + } + + //2. check the pixels outside CTU + for( regionId = 0; regionId < regionNum; regionId++ ) + { + // this function fills in the range the template matching for pixels outside the current CTU + clipMvIntraConstraint( pcCU, regionId, mvXMins[regionId], mvXMaxs[regionId], mvYMins[regionId], mvYMaxs[regionId], TMP_TEMPLATE_SIZE, uiBlkWidth, uiBlkHeight, iCurrY, iCurrX, offsetLCUY, offsetLCUX ); + } + + for( regionId = 0; regionId < regionNum; regionId++ ) + { + int mvYMin = mvYMins[regionId]; + int mvYMax = mvYMaxs[regionId]; + int mvXMin = mvXMins[regionId]; + int mvXMax = mvXMaxs[regionId]; + if( mvYMax < mvYMin || mvXMax < mvXMin ) + { + continue; + } + for( iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset-- ) + { + for( iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset-- ) + { + refCurr = ref + iYOffset * refStride + iXOffset; +#if JVET_W0069_TMP_BOUNDARY + diff = m_calcTemplateDiff( refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff, tempType ); +#else + diff = m_calcTemplateDiff( refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff ); +#endif + + if( diff < (pDiff) ) + { + insertNode( diff, iXOffset, iYOffset, pDiff, pX, pY, pId, setId ); + } + + if( pDiff == 0 ) + { + regionId = regionNum; + } + } + } + } + + m_tempLibFast.m_pX = pX; + m_tempLibFast.m_pY = pY; + m_tempLibFast.m_pDiff = pDiff; + m_tempLibFast.m_pId = pId; +} +bool IntraPrediction::generateTMPrediction( Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int& foundCandiNum ) +{ + bool bSucceedFlag = true; + unsigned int uiPatchWidth = uiBlkWidth + TMP_TEMPLATE_SIZE; + unsigned int uiPatchHeight = uiBlkHeight + TMP_TEMPLATE_SIZE; + + foundCandiNum = m_uiVaildCandiNum; + if( foundCandiNum < 1 ) + { + return false; + } + + int pX = m_tempLibFast.getX(); + int pY = m_tempLibFast.getY(); + Pel* ref; + int picStride = getStride(); + int iOffsetY, iOffsetX; + Pel* refTarget; + unsigned int uiHeight = uiPatchHeight - TMP_TEMPLATE_SIZE; + unsigned int uiWidth = uiPatchWidth - TMP_TEMPLATE_SIZE; + + //the data center: we use the prediction block as the center now. + //collect the candidates + ref = getRefPicUsed(); + { + iOffsetY = pY; + iOffsetX = pX; + refTarget = ref + iOffsetY * picStride + iOffsetX; + for( unsigned int uiY = 0; uiY < uiHeight; uiY++ ) + { + for( unsigned int uiX = 0; uiX < uiWidth; uiX++ ) + { + piPred[uiX] = refTarget[uiX]; + } + refTarget += picStride; + piPred += uiStride; + } + } + return bSucceedFlag; +} + +#if JVET_W0069_TMP_BOUNDARY +bool IntraPrediction::generateTmDcPrediction( Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int DC_Val ) +{ + bool bSucceedFlag = true; + { + for( unsigned int uiY = 0; uiY < uiBlkHeight; uiY++ ) + { + for( unsigned int uiX = 0; uiX < uiBlkWidth; uiX++ ) + { + piPred[uiX] = DC_Val; + } + piPred += uiStride; + } + } + return bSucceedFlag; +} +#endif + +#if JVET_W0069_TMP_BOUNDARY +int IntraPrediction::calcTemplateDiff( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType tempType ) +#else +int IntraPrediction::calcTemplateDiff( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax ) +#endif +{ + int diffSum = 0; +#if JVET_W0069_TMP_BOUNDARY + Pel* refPatchRow; + if( tempType == L_SHAPE_TEMPLATE ) + { + refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; + } + else if( tempType == LEFT_TEMPLATE ) + { + refPatchRow = ref - TMP_TEMPLATE_SIZE; + } + else if( tempType == ABOVE_TEMPLATE ) + { + refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride; + } +#else + Pel* refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; +#endif + Pel* tarPatchRow; + +#if JVET_W0069_TMP_BOUNDARY + if( tempType == L_SHAPE_TEMPLATE ) + { +#endif + // horizontal difference + for( int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++ ) + { + tarPatchRow = tarPatch[iY]; + for( int iX = 0; iX < uiPatchWidth; iX++ ) + { + diffSum += abs( refPatchRow[iX] - tarPatchRow[iX] ); + } + if( diffSum > iMax ) //for speeding up + { + return diffSum; + } + refPatchRow += uiStride; + } + + // vertical difference + for( int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++ ) + { + tarPatchRow = tarPatch[iY]; + for( int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++ ) + { + diffSum += abs( refPatchRow[iX] - tarPatchRow[iX] ); + } + if( diffSum > iMax ) //for speeding up + { + return diffSum; + } + refPatchRow += uiStride; + } +#if JVET_W0069_TMP_BOUNDARY + } + else if( tempType == ABOVE_TEMPLATE ) + { + // top template difference + for( int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++ ) + { + tarPatchRow = tarPatch[iY]; + for( int iX = 0; iX < uiPatchWidth - TMP_TEMPLATE_SIZE; iX++ ) + { + diffSum += abs( refPatchRow[iX] - tarPatchRow[iX] ); + } + if( diffSum > iMax ) //for speeding up + { + return diffSum; + } + refPatchRow += uiStride; + } + } + else if( tempType == LEFT_TEMPLATE ) + { + // left template difference + for( int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++ ) + { + tarPatchRow = tarPatch[iY]; + for( int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++ ) + { + diffSum += abs( refPatchRow[iX] - tarPatchRow[iX] ); + } + if( diffSum > iMax ) //for speeding up + { + return diffSum; + } + refPatchRow += uiStride; + } + } +#endif + + return diffSum; +} +#endif + //! \} diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h index d89f5d2e7..b97ad8975 100644 --- a/source/Lib/CommonLib/IntraPrediction.h +++ b/source/Lib/CommonLib/IntraPrediction.h @@ -66,6 +66,43 @@ enum PredBuf static const uint32_t MAX_INTRA_FILTER_DEPTHS=8; +#if JVET_V0130_INTRA_TMP +extern unsigned int g_uiDepth2Width[5]; +extern unsigned int g_uiDepth2MaxCandiNum[5]; + +class TempLibFast +{ +public: + int m_pX; //offset X + int m_pY; //offset Y + int m_pXInteger; //offset X for integer pixel search + int m_pYInteger; //offset Y for integer pixel search + int m_pDiffInteger; + int getXInteger() { return m_pXInteger; } + int getYInteger() { return m_pYInteger; } + int getDiffInteger() { return m_pDiffInteger; } + short m_pIdInteger; //frame id + short getIdInteger() { return m_pIdInteger; } + int m_pDiff; //mse + short m_pId; //frame id + + TempLibFast(); + ~TempLibFast(); + //void init(); + int getX() { return m_pX; } + int getY() { return m_pY; } + int getDiff() { return m_pDiff; } + short getId() { return m_pId; } + /*void initDiff(unsigned int uiPatchSize, int bitDepth); + void initDiff(unsigned int uiPatchSize, int bitDepth, int iCandiNumber);*/ + void initTemplateDiff( unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int bitDepth ); + int m_diffMax; + int getDiffMax() { return m_diffMax; } +}; + +typedef short TrainDataType; +#endif + class IntraPrediction { #if MMLM @@ -140,6 +177,16 @@ protected: bool m_bestScanRotationMode; std::vector<PelStorage> m_tempBuffer; +#if JVET_V0130_INTRA_TMP + int m_uiPartLibSize; + TempLibFast m_tempLibFast; + Pel* m_refPicUsed; + Picture* m_refPicBuf; + unsigned int m_uiPicStride; + unsigned int m_uiVaildCandiNum; + Pel*** m_pppTarPatch; +#endif + // prediction void xPredIntraPlanar ( const CPelBuf &pSrc, PelBuf &pDst ); void xPredIntraDc ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter = true ); @@ -204,7 +251,7 @@ public: virtual ~IntraPrediction(); #if JVET_W0069_TMP_BOUNDARY - RefTemplateType GetRefTemplateType(CodingUnit& cu, CompArea& area); + RefTemplateType getRefTemplateType(CodingUnit& cu, CompArea& area); #endif void init (ChromaFormat chromaFormatIDC, const unsigned bitDepthY); @@ -259,6 +306,44 @@ public: Pel* getPredictorPtr2 (const ComponentID compID, uint32_t idx) { return m_yuvExt2[compID][idx]; } void switchBuffer (const PredictionUnit &pu, ComponentID compID, PelBuf srcBuff, Pel *dst); #endif + +#if JVET_V0130_INTRA_TMP +#if JVET_W0069_TMP_BOUNDARY + int( *m_calcTemplateDiff )(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType); + static int calcTemplateDiff( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType ); +#else + int( *m_calcTemplateDiff )(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax); + static int calcTemplateDiff( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax ); +#endif + Pel** getTargetPatch( unsigned int uiDepth ) { return m_pppTarPatch[uiDepth]; } + Pel* getRefPicUsed() { return m_refPicUsed; } + void setRefPicUsed( Pel* ref ) { m_refPicUsed = ref; } + unsigned int getStride() { return m_uiPicStride; } + void setStride( unsigned int uiPicStride ) { m_uiPicStride = uiPicStride; } + +#if JVET_W0069_TMP_BOUNDARY + void searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId, RefTemplateType tempType ); + void candidateSearchIntra( CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType tempType ); +#else + void searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId ); + void candidateSearchIntra( CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight ); +#endif + bool generateTMPrediction( Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int& foundCandiNum ); +#if JVET_W0069_TMP_BOUNDARY + bool generateTmDcPrediction( Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int DC_Val ); + void getTargetTemplate( CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType tempType ); +#else + void getTargetTemplate( CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight ); +#endif +#endif + +#if ENABLE_SIMD_TMP +#ifdef TARGET_SIMD_X86 + void initIntraX86(); + template <X86_VEXT vext> + void _initIntraX86(); +#endif +#endif }; #if ENABLE_DIMD int buildHistogram(const Pel *pReco, int iStride, uint32_t uiHeight, uint32_t uiWidth, int* piHistogram, int direction, int bw, int bh); diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index ce6634820..349675100 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -55,10 +55,6 @@ #include "CommonLib/CodingStatistics.h" #endif -#if ENABLE_SIMD_TMP -#include "CommonDefX86.h" -#endif - #if JVET_V0130_INTRA_TMP unsigned int g_uiDepth2Width[5] = { 4, 8, 16, 32, 64 }; #endif @@ -195,9 +191,6 @@ TrQuant::TrQuant() : m_quant( nullptr ) m_fwdICT[-2] = fwdTransformCbCr<-2>; m_fwdICT[ 3] = fwdTransformCbCr< 3>; m_fwdICT[-3] = fwdTransformCbCr<-3>; -#if JVET_V0130_INTRA_TMP - m_pppTarPatch = NULL; -#endif } } @@ -208,31 +201,6 @@ TrQuant::~TrQuant() delete m_quant; m_quant = nullptr; } - -#if JVET_V0130_INTRA_TMP - if (m_pppTarPatch != NULL) - { - for (unsigned int uiDepth = 0; uiDepth < USE_MORE_BLOCKSIZE_DEPTH_MAX; uiDepth++) - { - unsigned int blkSize = g_uiDepth2Width[uiDepth]; - - unsigned int patchSize = blkSize + TMP_TEMPLATE_SIZE; - for (unsigned int uiRow = 0; uiRow < patchSize; uiRow++) - { - if (m_pppTarPatch[uiDepth][uiRow] != NULL) - { - delete[]m_pppTarPatch[uiDepth][uiRow]; m_pppTarPatch[uiDepth][uiRow] = NULL; - } - } - if (m_pppTarPatch[uiDepth] != NULL) - { - delete[]m_pppTarPatch[uiDepth]; m_pppTarPatch[uiDepth] = NULL; - } - } - delete[] m_pppTarPatch; - m_pppTarPatch = NULL; - } -#endif } #if ENABLE_SPLIT_PARALLELISM @@ -270,27 +238,6 @@ void TrQuant::init( const Quant* otherQuant, m_quant->init( uiMaxTrSize, bUseRDOQ, bUseRDOQTS, useSelectiveRDOQ ); } - -#if JVET_V0130_INTRA_TMP - unsigned int blkSize; - - if (m_pppTarPatch == NULL) - { - m_pppTarPatch = new Pel * *[USE_MORE_BLOCKSIZE_DEPTH_MAX]; - for (unsigned int uiDepth = 0; uiDepth < USE_MORE_BLOCKSIZE_DEPTH_MAX; uiDepth++) - { - blkSize = g_uiDepth2Width[uiDepth]; - - unsigned int patchSize = blkSize + TMP_TEMPLATE_SIZE; - m_pppTarPatch[uiDepth] = new Pel * [patchSize]; - for (unsigned int uiRow = 0; uiRow < patchSize; uiRow++) - { - m_pppTarPatch[uiDepth][uiRow] = new Pel[patchSize]; - } - } -} -#endif - #if TU_256 fastFwdTrans = { { @@ -348,9 +295,6 @@ void TrQuant::init( const Quant* otherQuant, #if ENABLE_SIMD_SIGN_PREDICTION m_computeSAD = xComputeSAD; #endif -#if JVET_V0130_INTRA_TMP - m_calcTemplateDiff = calcTemplateDiff; -#endif #if ENABLE_SIMD_SIGN_PREDICTION || TRANSFORM_SIMD_OPT #ifdef TARGET_SIMD_X86 @@ -468,493 +412,6 @@ void TrQuant::invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32 } } -#if JVET_V0130_INTRA_TMP -void insertNode(int diff, int& iXOffset, int& iYOffset, int& pDiff, int& pX, int& pY, short& pId, unsigned int& setId) -{ - pDiff = diff; - pX = iXOffset; - pY = iYOffset; - pId = setId; -} - -void clipMvIntraConstraint(CodingUnit* pcCU, int regionId, int& iHorMin, int& iHorMax, int& iVerMin, int& iVerMax, unsigned int uiTemplateSize, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int iCurrY, int iCurrX, int offsetLCUY, int offsetLCUX) -{ - int searchRangeWidth = TMP_SEARCH_RANGE_MULT_FACTOR * uiBlkWidth; - int searchRangeHeight = TMP_SEARCH_RANGE_MULT_FACTOR * uiBlkHeight; - int iMvShift = 0; - int iTemplateSize = uiTemplateSize; - int iBlkWidth = uiBlkWidth; - int iBlkHeight = uiBlkHeight; - if (regionId == 0) //above outside LCU - { - iHorMax = std::min((iCurrX + searchRangeWidth) << iMvShift, (int)((pcCU->cs->pps->getPicWidthInLumaSamples() - iBlkWidth) << iMvShift) ); - iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - searchRangeWidth) << iMvShift); - - iVerMax = (iCurrY - iBlkHeight - offsetLCUY) << iMvShift; - iVerMin = std::max(((iTemplateSize) << iMvShift), ((iCurrY - searchRangeHeight) << iMvShift)); - - iHorMin = iHorMin - iCurrX; - iHorMax = iHorMax - iCurrX; - iVerMax = iVerMax - iCurrY; - iVerMin = iVerMin - iCurrY; - } - else if (regionId == 1) //left outside LCU - { - iHorMax = (iCurrX - offsetLCUX - iBlkWidth) << iMvShift; - iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - searchRangeWidth) << iMvShift); - - iVerMin = std::max((iTemplateSize) << iMvShift, (iCurrY - iBlkHeight - offsetLCUY) << iMvShift); - iVerMax = (iCurrY) << iMvShift; - - iHorMin = iHorMin - iCurrX; - iHorMax = iHorMax - iCurrX; - iVerMax = iVerMax - iCurrY; - iVerMin = iVerMin - iCurrY; - } - else if (regionId == 2) //left outside LCU (can reach the bottom row of LCU) - { - iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - searchRangeWidth) << iMvShift); - iHorMax = (iCurrX - offsetLCUX - iBlkWidth) << iMvShift; - iVerMin = (iCurrY + 1) << iMvShift; - iVerMax = std::min(pcCU->cs->pps->getPicHeightInLumaSamples() - iBlkHeight, (iCurrY - offsetLCUY + pcCU->cs->sps->getCTUSize() - iBlkHeight) << iMvShift); - - iHorMin = iHorMin - iCurrX; - iHorMax = iHorMax - iCurrX; - iVerMax = iVerMax - iCurrY; - iVerMin = iVerMin - iCurrY; - } -} - -TempLibFast::TempLibFast() -{ -} - -TempLibFast::~TempLibFast() -{ -} - -void TempLibFast::initTemplateDiff(unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int bitDepth) -{ - int maxValue = ((1 << bitDepth) >> (INIT_THRESHOULD_SHIFTBITS)) * (uiPatchHeight * uiPatchWidth - uiBlkHeight * uiBlkWidth); - m_diffMax = maxValue; - { - m_pDiff = maxValue; - } -} - -#if JVET_W0069_TMP_BOUNDARY -void TrQuant::getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType TempType) -#else -void TrQuant::getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight) -#endif -{ - const ComponentID compID = COMPONENT_Y; - unsigned int uiPatchWidth = uiBlkWidth + TMP_TEMPLATE_SIZE; - unsigned int uiPatchHeight = uiBlkHeight + TMP_TEMPLATE_SIZE; - unsigned int uiTarDepth = floorLog2(std::max(uiBlkHeight, uiBlkWidth)) - 2; - Pel** tarPatch = m_pppTarPatch[uiTarDepth]; - CompArea area = pcCU->blocks[compID]; - Pel* pCurrStart = pcCU->cs->picture->getRecoBuf(area).buf; - unsigned int uiPicStride = pcCU->cs->picture->getRecoBuf(compID).stride; - unsigned int uiY, uiX; - - //fill template - //up-left & up - Pel* tarTemp; -#if JVET_W0069_TMP_BOUNDARY - if (TempType == L_SHAPE_TEMPLATE) - { -#endif - Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE * uiPicStride - TMP_TEMPLATE_SIZE; - for (uiY = 0; uiY < TMP_TEMPLATE_SIZE; uiY++) - { - tarTemp = tarPatch[uiY]; - for (uiX = 0; uiX < uiPatchWidth; uiX++) - { - tarTemp[uiX] = pCurrTemp[uiX]; - } - pCurrTemp += uiPicStride; - } - //left - for (uiY = TMP_TEMPLATE_SIZE; uiY < uiPatchHeight; uiY++) - { - tarTemp = tarPatch[uiY]; - for (uiX = 0; uiX < TMP_TEMPLATE_SIZE; uiX++) - { - tarTemp[uiX] = pCurrTemp[uiX]; - } - pCurrTemp += uiPicStride; - } -#if JVET_W0069_TMP_BOUNDARY - } - else if (TempType == ABOVE_TEMPLATE) - { - Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE * uiPicStride; - for (uiY = 0; uiY < TMP_TEMPLATE_SIZE; uiY++) - { - tarTemp = tarPatch[uiY]; - for (uiX = 0; uiX < uiBlkWidth; uiX++) - { - tarTemp[uiX] = pCurrTemp[uiX]; - } - pCurrTemp += uiPicStride; - } - } - else if (TempType == LEFT_TEMPLATE) - { - Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE; - for (uiY = TMP_TEMPLATE_SIZE; uiY < uiPatchHeight; uiY++) - { - tarTemp = tarPatch[uiY]; - for (uiX = 0; uiX < TMP_TEMPLATE_SIZE; uiX++) - { - tarTemp[uiX] = pCurrTemp[uiX]; - } - pCurrTemp += uiPicStride; - } - } -#endif -} - -#if JVET_W0069_TMP_BOUNDARY -void TrQuant::candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType TempType) -#else -void TrQuant::candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight) -#endif -{ - const ComponentID compID = COMPONENT_Y; - const int channelBitDepth = pcCU->cs->sps->getBitDepth(toChannelType(compID)); - unsigned int uiPatchWidth = uiBlkWidth + TMP_TEMPLATE_SIZE; - unsigned int uiPatchHeight = uiBlkHeight + TMP_TEMPLATE_SIZE; - unsigned int uiTarDepth = floorLog2(std::max(uiBlkWidth, uiBlkHeight)) - 2; - Pel** tarPatch = getTargetPatch(uiTarDepth); - //Initialize the library for saving the best candidates - m_tempLibFast.initTemplateDiff(uiPatchWidth, uiPatchHeight, uiBlkWidth, uiBlkHeight, channelBitDepth); - short setId = 0; //record the reference picture. -#if JVET_W0069_TMP_BOUNDARY - searchCandidateFromOnePicIntra(pcCU, tarPatch, uiPatchWidth, uiPatchHeight, setId, TempType); -#else - searchCandidateFromOnePicIntra(pcCU, tarPatch, uiPatchWidth, uiPatchHeight, setId); -#endif - //count collected candidate number - int pDiff = m_tempLibFast.getDiff(); - int maxDiff = m_tempLibFast.getDiffMax(); - - - if( pDiff < maxDiff ) - { - m_uiVaildCandiNum = 1; - } - else - { - m_uiVaildCandiNum = 0; - } -} - -#if JVET_W0069_TMP_BOUNDARY -void TrQuant::searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId, RefTemplateType TempType) -#else -void TrQuant::searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId) -#endif -{ - const ComponentID compID = COMPONENT_Y; - unsigned int uiBlkWidth = uiPatchWidth - TMP_TEMPLATE_SIZE; - unsigned int uiBlkHeight = uiPatchHeight - TMP_TEMPLATE_SIZE; - - int pX = m_tempLibFast.getX(); - int pY = m_tempLibFast.getY(); - int pDiff = m_tempLibFast.getDiff(); - short pId = m_tempLibFast.getId(); - CompArea area = pcCU->blocks[compID]; - int refStride = pcCU->cs->picture->getRecoBuf(compID).stride; - - Pel* ref = pcCU->cs->picture->getRecoBuf(area).buf; - - setRefPicUsed(ref); //facilitate the access of each candidate point - setStride(refStride); - - Mv cTmpMvPred; - cTmpMvPred.setZero(); - - unsigned int uiCUPelY = area.pos().y; - unsigned int uiCUPelX = area.pos().x; - int blkX = 0; - int blkY = 0; - int iCurrY = uiCUPelY + blkY; - int iCurrX = uiCUPelX + blkX; - - Position ctuRsAddr = CU::getCtuXYAddr(*pcCU); - int offsetLCUY = iCurrY - ctuRsAddr.y; - int offsetLCUX = iCurrX - ctuRsAddr.x; - - int iYOffset, iXOffset; - int diff; - Pel* refCurr; - - const int regionNum = 3; - int mvYMins[regionNum]; - int mvYMaxs[regionNum]; - int mvXMins[regionNum]; - int mvXMaxs[regionNum]; - int regionId = 0; - - //1. check the near pixels within LCU - //above pixels in LCU - int iTemplateSize = TMP_TEMPLATE_SIZE; - int iBlkWidth = uiBlkWidth; - int iBlkHeight = uiBlkHeight; - regionId = 0; - int iMvShift = 0; - - int iVerMin = std::max(((iTemplateSize) << iMvShift), (iCurrY - offsetLCUY - iBlkHeight + 1) << iMvShift); - int iVerMax = (iCurrY - iBlkHeight) << iMvShift; - int iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - offsetLCUX - iBlkWidth + 1) << iMvShift); - int iHorMax = (iCurrX - iBlkWidth); - - mvXMins[regionId] = iHorMin - iCurrX; - mvXMaxs[regionId] = iHorMax - iCurrX; - mvYMins[regionId] = iVerMin - iCurrY; - mvYMaxs[regionId] = iVerMax - iCurrY; - - //check within CTU pixels - for (regionId = 0; regionId < 1; regionId++) - { - int mvYMin = mvYMins[regionId]; - int mvYMax = mvYMaxs[regionId]; - int mvXMin = mvXMins[regionId]; - int mvXMax = mvXMaxs[regionId]; - if (mvYMax < mvYMin || mvXMax < mvXMin) - { - continue; - } - - for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset--) - { - for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset--) - { - refCurr = ref + iYOffset * refStride + iXOffset; -#if JVET_W0069_TMP_BOUNDARY - diff = m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff, TempType); -#else - diff = m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff); -#endif - if (diff < (pDiff)) - { - insertNode(diff, iXOffset, iYOffset, pDiff, pX, pY, pId, setId); - } - if (pDiff == 0) - { - regionId++; - } - } - } - } - - //2. check the pixels outside CTU - for (regionId = 0; regionId < regionNum; regionId++) - {// this function fills in the range the template matching for pixels outside the current CTU - clipMvIntraConstraint(pcCU, regionId, mvXMins[regionId], mvXMaxs[regionId], mvYMins[regionId], mvYMaxs[regionId], TMP_TEMPLATE_SIZE, uiBlkWidth, uiBlkHeight, iCurrY, iCurrX, offsetLCUY, offsetLCUX); - } - - for (regionId = 0; regionId < regionNum; regionId++) - { - int mvYMin = mvYMins[regionId]; - int mvYMax = mvYMaxs[regionId]; - int mvXMin = mvXMins[regionId]; - int mvXMax = mvXMaxs[regionId]; - if ( mvYMax < mvYMin || mvXMax < mvXMin ) - { - continue; - } - for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset--) - { - for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset--) - { - refCurr = ref + iYOffset * refStride + iXOffset; -#if JVET_W0069_TMP_BOUNDARY - diff = m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff, TempType); -#else - diff = m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff); -#endif - - if (diff < (pDiff)) - { - insertNode(diff, iXOffset, iYOffset, pDiff, pX, pY, pId, setId); - } - - if (pDiff == 0) - { - regionId = regionNum; - } - } - } - } - - m_tempLibFast.m_pX = pX; - m_tempLibFast.m_pY = pY; - m_tempLibFast.m_pDiff = pDiff; - m_tempLibFast.m_pId = pId; -} -bool TrQuant::generateTMPrediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int& foundCandiNum) -{ - bool bSucceedFlag = true; - unsigned int uiPatchWidth = uiBlkWidth + TMP_TEMPLATE_SIZE; - unsigned int uiPatchHeight = uiBlkHeight + TMP_TEMPLATE_SIZE; - - foundCandiNum = m_uiVaildCandiNum; - if (foundCandiNum < 1) - { - return false; - } - - int pX = m_tempLibFast.getX(); - int pY = m_tempLibFast.getY(); - Pel* ref; - int picStride = getStride(); - int iOffsetY, iOffsetX; - Pel* refTarget; - unsigned int uiHeight = uiPatchHeight - TMP_TEMPLATE_SIZE; - unsigned int uiWidth = uiPatchWidth - TMP_TEMPLATE_SIZE; - - //the data center: we use the prediction block as the center now. - //collect the candidates - ref = getRefPicUsed(); - { - iOffsetY = pY; - iOffsetX = pX; - refTarget = ref + iOffsetY * picStride + iOffsetX; - for (unsigned int uiY = 0; uiY < uiHeight; uiY++) - { - for (unsigned int uiX = 0; uiX < uiWidth; uiX++) - { - piPred[uiX] = refTarget[uiX]; - } - refTarget += picStride; - piPred += uiStride; - } - } - return bSucceedFlag; -} - -#if JVET_W0069_TMP_BOUNDARY -bool TrQuant::generateTM_DC_Prediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int DC_Val) -{ - bool bSucceedFlag = true; - { - for (unsigned int uiY = 0; uiY < uiBlkHeight; uiY++) - { - for (unsigned int uiX = 0; uiX < uiBlkWidth; uiX++) - { - piPred[uiX] = DC_Val; - } - piPred += uiStride; - } - } - return bSucceedFlag; -} -#endif - -#if JVET_W0069_TMP_BOUNDARY -int TrQuant::calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType) -#else -int TrQuant::calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax) -#endif -{ - int iDiffSum = 0; -#if JVET_W0069_TMP_BOUNDARY - Pel* refPatchRow; - if( TempType == L_SHAPE_TEMPLATE ) - { - refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; - } - else if( TempType == LEFT_TEMPLATE ) - { - refPatchRow = ref - TMP_TEMPLATE_SIZE; - } - else if( TempType == ABOVE_TEMPLATE ) - { - refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride; - } -#else - Pel* refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; -#endif - Pel* tarPatchRow; - -#if JVET_W0069_TMP_BOUNDARY - if( TempType == L_SHAPE_TEMPLATE ) - { -#endif - // horizontal difference - for( int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++ ) - { - tarPatchRow = tarPatch[iY]; - for( int iX = 0; iX < uiPatchWidth; iX++ ) - { - iDiffSum += abs( refPatchRow[iX] - tarPatchRow[iX] ); - } - if( iDiffSum > iMax ) //for speeding up - { - return iDiffSum; - } - refPatchRow += uiStride; - } - - // vertical difference - for( int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++ ) - { - tarPatchRow = tarPatch[iY]; - for( int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++ ) - { - iDiffSum += abs( refPatchRow[iX] - tarPatchRow[iX] ); - } - if( iDiffSum > iMax ) //for speeding up - { - return iDiffSum; - } - refPatchRow += uiStride; - } -#if JVET_W0069_TMP_BOUNDARY - } - else if (TempType == ABOVE_TEMPLATE) - { - // top template difference - for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) - { - tarPatchRow = tarPatch[iY]; - for (int iX = 0; iX < uiPatchWidth - TMP_TEMPLATE_SIZE; iX++) - { - iDiffSum += abs(refPatchRow[iX] - tarPatchRow[iX]); - } - if (iDiffSum > iMax) //for speeding up - { - return iDiffSum; - } - refPatchRow += uiStride; - } - } - else if (TempType == LEFT_TEMPLATE) - { - // left template difference - for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) - { - tarPatchRow = tarPatch[iY]; - for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) - { - iDiffSum += abs(refPatchRow[iX] - tarPatchRow[iX]); - } - if (iDiffSum > iMax) //for speeding up - { - return iDiffSum; - } - refPatchRow += uiStride; - } - } -#endif - - return iDiffSum; -} -#endif - uint32_t TrQuant::getLFNSTIntraMode( int wideAngPredMode ) { uint32_t intraMode; diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h index ec9b42c35..3bc60d777 100644 --- a/source/Lib/CommonLib/TrQuant.h +++ b/source/Lib/CommonLib/TrQuant.h @@ -56,47 +56,6 @@ typedef void FwdTrans(const TCoeff*, TCoeff*, int, int, int, int); typedef void InvTrans(const TCoeff*, TCoeff*, int, int, int, int, const TCoeff, const TCoeff); - -#if JVET_V0130_INTRA_TMP -extern unsigned int g_uiDepth2Width[5]; -extern unsigned int g_uiDepth2MaxCandiNum[5]; - -class TempLibFast -{ -public: - int m_pX; //offset X - int m_pY; //offset Y - int m_pXInteger; //offset X for integer pixel search - int m_pYInteger; //offset Y for integer pixel search - int m_pDiffInteger; - int getXInteger() { return m_pXInteger; } - int getYInteger() { return m_pYInteger; } - int getDiffInteger() { return m_pDiffInteger; } - short m_pIdInteger; //frame id - short getIdInteger() { return m_pIdInteger; } - int m_pDiff; //mse - short m_pId; //frame id - - - TempLibFast(); - ~TempLibFast(); - //void init(); - int getX() { return m_pX; } - int getY() { return m_pY; } - int getDiff() { return m_pDiff; } - short getId() { return m_pId; } - /*void initDiff(unsigned int uiPatchSize, int bitDepth); - void initDiff(unsigned int uiPatchSize, int bitDepth, int iCandiNumber);*/ - void initTemplateDiff(unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int bitDepth); - int m_diffMax; - int getDiffMax() { return m_diffMax; } -}; - - -typedef short TrainDataType; -#endif - - // ==================================================================================================================== // Class definition // ==================================================================================================================== @@ -128,37 +87,6 @@ public: #else void fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ); void invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ); -#endif -#if JVET_V0130_INTRA_TMP -#if JVET_W0069_TMP_BOUNDARY - int (*m_calcTemplateDiff)(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType); - static int calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType); -#else - int ( *m_calcTemplateDiff )(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax); - static int calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax); -#endif - Pel** getTargetPatch(unsigned int uiDepth) { return m_pppTarPatch[uiDepth]; } - Pel* getRefPicUsed() { return m_refPicUsed; } - void setRefPicUsed(Pel* ref) { m_refPicUsed = ref; } - unsigned int getStride() { return m_uiPicStride; } - void setStride(unsigned int uiPicStride) { m_uiPicStride = uiPicStride; } - -#if JVET_W0069_TMP_BOUNDARY - void searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId, RefTemplateType TempType); - void candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType TempType); -#else - void searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId); - void candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight); -#endif - bool generateTMPrediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int& foundCandiNum); -#if JVET_W0069_TMP_BOUNDARY - bool generateTM_DC_Prediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int DC_Val); -#endif -#if JVET_W0069_TMP_BOUNDARY - void getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType TempType); -#else - void getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight); -#endif #endif uint32_t getLFNSTIntraMode( int wideAngPredMode ); @@ -217,15 +145,6 @@ public: protected: TCoeff m_tempCoeff[MAX_TB_SIZEY * MAX_TB_SIZEY]; -#if JVET_V0130_INTRA_TMP - int m_uiPartLibSize; - TempLibFast m_tempLibFast; - Pel* m_refPicUsed; - Picture* m_refPicBuf; - unsigned int m_uiPicStride; - unsigned int m_uiVaildCandiNum; - Pel*** m_pppTarPatch; -#endif #if SIGN_PREDICTION Pel m_tempSignPredResid[SIGN_PRED_MAX_BS * SIGN_PRED_MAX_BS * 2]{0}; Pel m_signPredTemplate[SIGN_PRED_FREQ_RANGE*SIGN_PRED_FREQ_RANGE*SIGN_PRED_MAX_BS*2]; @@ -305,7 +224,7 @@ private: static void fastInverseTransform_SIMD( const TCoeff *coeff, TCoeff *block, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum ); #endif -#if ENABLE_SIMD_SIGN_PREDICTION || TRANSFORM_SIMD_OPT || ENABLE_SIMD_TMP +#if ENABLE_SIMD_SIGN_PREDICTION || TRANSFORM_SIMD_OPT #ifdef TARGET_SIMD_X86 void initTrQuantX86(); template <X86_VEXT vext> diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 351e19286..38d9110f4 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -65,6 +65,8 @@ #define REMOVE_PCM 1 // Remove PCM related code for memory reduction and speedup +#define JVET_X0049_BDMVR_SW_OPT 1 // JVET-X0049: software optimization for BDMVR (lossless) + // SIMD optimizations #define MCIF_SIMD_NEW 1 // SIMD for interpolation #define DIST_SSE_ENABLE 1 // Enable SIMD for SSE @@ -109,15 +111,19 @@ #define INTRA_6TAP 1 // 6TapCubic + 6 TapGaussian + left side 4 tap weak filtering for intra. #define SECONDARY_MPM 1 // Primary MPM and Secondary MPM: Add neighbouring modes into MPMs from positions AR, BL, AL, derived modes #define ENABLE_DIMD 1 // Decoder side intra mode derivation -#define JVET_V0087_DIMD_NO_ISP ENABLE_DIMD // JVET-V0087: disallow combination of DIMD and ISP +#if ENABLE_DIMD +#define JVET_V0087_DIMD_NO_ISP 1 // JVET-V0087: disallow combination of DIMD and ISP +#define JVET_X0124_TMP_SIGNAL 1 // JVET-X0124: cleanup on signalling of intra template matching +#endif #define JVET_V0130_INTRA_TMP 1 // JVET-V0130: template matching prediction #define JVET_W0069_TMP_BOUNDARY 1 // JVET-W0069: boundary handling for TMP #define JVET_W0123_TIMD_FUSION 1 // JVET-W0123: Template based intra mode derivation and fusion -#define JVET_X0148_TIMD_PDPC JVET_W0123_TIMD_FUSION // JVET-X0148: PDPC handling for TIMD +#if JVET_W0123_TIMD_FUSION +#define JVET_X0148_TIMD_PDPC 1 // JVET-X0148: PDPC handling for TIMD +#endif #if ENABLE_DIMD || JVET_W0123_TIMD_FUSION #define JVET_X0149_TIMD_DIMD_LUT 1 // JVET-X0149: LUT-based derivation of DIMD and TIMD #endif -#define JVET_X0124_TMP_SIGNAL ENABLE_DIMD // JVET-X0124: cleanup on signalling of intra template matching // Inter #define CIIP_PDPC 1 // apply pdpc to megre prediction as a new CIIP mode (CIIP_PDPC) additional to CIIP mode @@ -131,13 +137,11 @@ #define IF_12TAP 1 // 12-tap IF #define ENABLE_OBMC 1 // Enable Overlapped Block Motion Compensation -#define JVET_X0049_BDMVR_SW_OPT 1 // JVET-X0049: software optimization for BDMVR (lossless) #if JVET_X0049_BDMVR_SW_OPT #define JVET_X0049_ADAPT_DMVR 1 // JVET-X0049: Adaptive DMVR #endif #define JVET_X0056_DMVD_EARLY_TERMINATION 1 // JVET-X0056: Early termination for DMVR and TM - -#define JVET_X0083_BM_AMVP_MERGE_MODE 1 +#define JVET_X0083_BM_AMVP_MERGE_MODE 1 // JVET-X0083: AMVP-merge mode // Inter template matching tools #define ENABLE_INTER_TEMPLATE_MATCHING 1 // It controls whether template matching is enabled for inter prediction diff --git a/source/Lib/CommonLib/x86/InitX86.cpp b/source/Lib/CommonLib/x86/InitX86.cpp index e12809508..7766a4ce2 100644 --- a/source/Lib/CommonLib/x86/InitX86.cpp +++ b/source/Lib/CommonLib/x86/InitX86.cpp @@ -193,7 +193,7 @@ void IbcHashMap::initIbcHashMapX86() } #endif -#if ENABLE_SIMD_SIGN_PREDICTION || TRANSFORM_SIMD_OPT || ENABLE_SIMD_TMP +#if ENABLE_SIMD_SIGN_PREDICTION || TRANSFORM_SIMD_OPT void TrQuant::initTrQuantX86() { auto vext = read_x86_extension_flags(); @@ -216,6 +216,29 @@ void TrQuant::initTrQuantX86() } #endif +#if ENABLE_SIMD_TMP +void IntraPrediction::initIntraX86() +{ + auto vext = read_x86_extension_flags(); + switch( vext ) + { + case AVX512: + case AVX2: + _initIntraX86<AVX2>(); + break; + case AVX: + _initIntraX86<AVX>(); + break; + case SSE42: + case SSE41: + _initIntraX86<SSE41>(); + break; + default: + break; + } +} +#endif + #if ENABLE_SIMD_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER_ENABLE_SIMD void BilateralFilter::initBilateralFilterX86() { diff --git a/source/Lib/CommonLib/x86/IntraX86.h b/source/Lib/CommonLib/x86/IntraX86.h new file mode 100644 index 000000000..e4f5617d4 --- /dev/null +++ b/source/Lib/CommonLib/x86/IntraX86.h @@ -0,0 +1,321 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2021, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Implementation of IbcHashMap class + */ + +#include "CommonDefX86.h" +#include "../IntraPrediction.h" + +#ifdef TARGET_SIMD_X86 + +#include <nmmintrin.h> + +#if ENABLE_SIMD_TMP +template< X86_VEXT vext > +#if JVET_W0069_TMP_BOUNDARY +int calcTemplateDiffSIMD(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType tempType) +#else +int calcTemplateDiffSIMD( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax ) +#endif +{ + int diffSum = 0; + int iY; +#if JVET_W0069_TMP_BOUNDARY + Pel* refPatchRow; + if( tempType == L_SHAPE_TEMPLATE ) + { + refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; + } + else if( tempType == LEFT_TEMPLATE ) + { + refPatchRow = ref - TMP_TEMPLATE_SIZE; + } + else if( tempType == ABOVE_TEMPLATE ) + { + refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride; + } +#else + Pel* refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; +#endif + Pel* tarPatchRow; + uint32_t uiSum; + + // horizontal difference +#if JVET_W0069_TMP_BOUNDARY + if( tempType == L_SHAPE_TEMPLATE ) + { +#endif + for( iY = 0; iY < TMP_TEMPLATE_SIZE; iY++ ) + { + tarPatchRow = tarPatch[iY]; + const short* pSrc1 = ( const short* ) tarPatchRow; + const short* pSrc2 = ( const short* ) refPatchRow; + + // SIMD difference + //int iRows = uiPatchHeight; + int iCols = uiPatchWidth; + if( (iCols & 7) == 0 ) + { + // Do with step of 8 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for( int iX = 0; iX < iCols; iX += 8 ) + { + __m128i vsrc1 = _mm_loadu_si128( (const __m128i*)(&pSrc1[iX]) ); + __m128i vsrc2 = _mm_lddqu_si128( (const __m128i*)(&pSrc2[iX]) ); + vsum16 = _mm_add_epi16( vsum16, _mm_abs_epi16( _mm_sub_epi16( vsrc1, vsrc2 ) ) ); + } + __m128i vsumtemp = _mm_add_epi32( _mm_unpacklo_epi16( vsum16, vzero ), _mm_unpackhi_epi16( vsum16, vzero ) ); + vsum32 = _mm_add_epi32( vsum32, vsumtemp ); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0x4e ) ); // 01001110 + vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0xb1 ) ); // 10110001 + uiSum = _mm_cvtsi128_si32( vsum32 ); + } + else + { + // Do with step of 4 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for( int iX = 0; iX < iCols; iX += 4 ) + { + __m128i vsrc1 = _mm_loadl_epi64( (const __m128i*) & pSrc1[iX] ); + __m128i vsrc2 = _mm_loadl_epi64( (const __m128i*) & pSrc2[iX] ); + vsum16 = _mm_add_epi16( vsum16, _mm_abs_epi16( _mm_sub_epi16( vsrc1, vsrc2 ) ) ); + } + __m128i vsumtemp = _mm_add_epi32( _mm_unpacklo_epi16( vsum16, vzero ), _mm_unpackhi_epi16( vsum16, vzero ) ); + vsum32 = _mm_add_epi32( vsum32, vsumtemp ); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0x4e ) ); // 01001110 + vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0xb1 ) ); // 10110001 + uiSum = _mm_cvtsi128_si32( vsum32 ); + } + diffSum += uiSum; + + if( diffSum > iMax ) //for speeding up + { + return diffSum; + } + // update location + refPatchRow += uiStride; + } + + // vertical difference + int iCols = TMP_TEMPLATE_SIZE; + + for( iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++ ) + { + tarPatchRow = tarPatch[iY]; + const short* pSrc1 = ( const short* ) tarPatchRow; + const short* pSrc2 = ( const short* ) refPatchRow; + + // SIMD difference + + // Do with step of 4 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for( int iX = 0; iX < iCols; iX += 4 ) + { + __m128i vsrc1 = _mm_loadl_epi64( (const __m128i*) & pSrc1[iX] ); + __m128i vsrc2 = _mm_loadl_epi64( (const __m128i*) & pSrc2[iX] ); + vsum16 = _mm_add_epi16( vsum16, _mm_abs_epi16( _mm_sub_epi16( vsrc1, vsrc2 ) ) ); + } + __m128i vsumtemp = _mm_add_epi32( _mm_unpacklo_epi16( vsum16, vzero ), _mm_unpackhi_epi16( vsum16, vzero ) ); + vsum32 = _mm_add_epi32( vsum32, vsumtemp ); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0x4e ) ); // 01001110 + vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0xb1 ) ); // 10110001 + uiSum = _mm_cvtsi128_si32( vsum32 ); + + diffSum += uiSum; + + if( diffSum > iMax ) //for speeding up + { + return diffSum; + } + // update location + refPatchRow += uiStride; + } +#if JVET_W0069_TMP_BOUNDARY + } + else if (tempType == ABOVE_TEMPLATE) + { + // horizontal difference + for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) + { + tarPatchRow = tarPatch[iY]; + const short* pSrc1 = (const short*)tarPatchRow; + const short* pSrc2 = (const short*)refPatchRow; + + // SIMD difference + //int iRows = uiPatchHeight; + int iCols = uiPatchWidth - TMP_TEMPLATE_SIZE; + if ((iCols & 7) == 0) + { + // Do with step of 8 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for (int iX = 0; iX < iCols; iX += 8) + { + __m128i vsrc1 = _mm_loadu_si128((const __m128i*)(&pSrc1[iX])); + __m128i vsrc2 = _mm_lddqu_si128((const __m128i*)(&pSrc2[iX])); + vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + uiSum = _mm_cvtsi128_si32(vsum32); + } + else + { + // Do with step of 4 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for (int iX = 0; iX < iCols; iX += 4) + { + __m128i vsrc1 = _mm_loadl_epi64((const __m128i*) & pSrc1[iX]); + __m128i vsrc2 = _mm_loadl_epi64((const __m128i*) & pSrc2[iX]); + vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + uiSum = _mm_cvtsi128_si32(vsum32); + } + diffSum += uiSum; + + if (diffSum > iMax) //for speeding up + { + return diffSum; + } + // update location + refPatchRow += uiStride; + } + + + } + else if (tempType == LEFT_TEMPLATE) + { + + // vertical difference + int iCols = TMP_TEMPLATE_SIZE; + + for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) + { + tarPatchRow = tarPatch[iY]; + const short* pSrc1 = (const short*)tarPatchRow; + const short* pSrc2 = (const short*)refPatchRow; + + // SIMD difference + + // Do with step of 4 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for (int iX = 0; iX < iCols; iX += 4) + { + __m128i vsrc1 = _mm_loadl_epi64((const __m128i*) & pSrc1[iX]); + __m128i vsrc2 = _mm_loadl_epi64((const __m128i*) & pSrc2[iX]); + vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + uiSum = _mm_cvtsi128_si32(vsum32); + + diffSum += uiSum; + + if (diffSum > iMax) //for speeding up + { + return diffSum; + } + // update location + refPatchRow += uiStride; + } + } +#endif + + return diffSum; +} + +template <X86_VEXT vext> +void IntraPrediction::_initIntraX86() +{ +#if ENABLE_SIMD_TMP + m_calcTemplateDiff = calcTemplateDiffSIMD<vext>; +#endif +} + +template void IntraPrediction::_initIntraX86<SIMDX86>(); +#endif + +#endif //#ifdef TARGET_SIMD_X86 +//! \} diff --git a/source/Lib/CommonLib/x86/TrQuantX86.h b/source/Lib/CommonLib/x86/TrQuantX86.h index ac45c865e..99c806a31 100644 --- a/source/Lib/CommonLib/x86/TrQuantX86.h +++ b/source/Lib/CommonLib/x86/TrQuantX86.h @@ -410,271 +410,7 @@ uint32_t computeSAD_SIMD( const Pel* ref, const Pel* cur, const int size ) } #endif -#if ENABLE_SIMD_TMP -template< X86_VEXT vext > -#if JVET_W0069_TMP_BOUNDARY -int calcTemplateDiffSIMD(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType) -#else -int calcTemplateDiffSIMD( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax ) -#endif -{ - int iDiffSum = 0; - int iY; -#if JVET_W0069_TMP_BOUNDARY - Pel* refPatchRow; - if( TempType == L_SHAPE_TEMPLATE ) - { - refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; - } - else if( TempType == LEFT_TEMPLATE ) - { - refPatchRow = ref - TMP_TEMPLATE_SIZE; - } - else if( TempType == ABOVE_TEMPLATE ) - { - refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride; - } -#else - Pel* refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; -#endif - Pel* tarPatchRow; - uint32_t uiSum; - - // horizontal difference -#if JVET_W0069_TMP_BOUNDARY - if (TempType == L_SHAPE_TEMPLATE) - { -#endif - for( iY = 0; iY < TMP_TEMPLATE_SIZE; iY++ ) - { - tarPatchRow = tarPatch[iY]; - const short* pSrc1 = ( const short* ) tarPatchRow; - const short* pSrc2 = ( const short* ) refPatchRow; - - // SIMD difference - //int iRows = uiPatchHeight; - int iCols = uiPatchWidth; - if( (iCols & 7) == 0 ) - { - // Do with step of 8 - __m128i vzero = _mm_setzero_si128(); - __m128i vsum32 = vzero; - //for (int iY = 0; iY < iRows; iY += iSubStep) - { - __m128i vsum16 = vzero; - for( int iX = 0; iX < iCols; iX += 8 ) - { - __m128i vsrc1 = _mm_loadu_si128( (const __m128i*)(&pSrc1[iX]) ); - __m128i vsrc2 = _mm_lddqu_si128( (const __m128i*)(&pSrc2[iX]) ); - vsum16 = _mm_add_epi16( vsum16, _mm_abs_epi16( _mm_sub_epi16( vsrc1, vsrc2 ) ) ); - } - __m128i vsumtemp = _mm_add_epi32( _mm_unpacklo_epi16( vsum16, vzero ), _mm_unpackhi_epi16( vsum16, vzero ) ); - vsum32 = _mm_add_epi32( vsum32, vsumtemp ); - //pSrc1 += iStrideSrc1; - //pSrc2 += iStrideSrc2; - } - vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0x4e ) ); // 01001110 - vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0xb1 ) ); // 10110001 - uiSum = _mm_cvtsi128_si32( vsum32 ); - } - else - { - // Do with step of 4 - __m128i vzero = _mm_setzero_si128(); - __m128i vsum32 = vzero; - //for (int iY = 0; iY < iRows; iY += iSubStep) - { - __m128i vsum16 = vzero; - for( int iX = 0; iX < iCols; iX += 4 ) - { - __m128i vsrc1 = _mm_loadl_epi64( (const __m128i*) & pSrc1[iX] ); - __m128i vsrc2 = _mm_loadl_epi64( (const __m128i*) & pSrc2[iX] ); - vsum16 = _mm_add_epi16( vsum16, _mm_abs_epi16( _mm_sub_epi16( vsrc1, vsrc2 ) ) ); - } - __m128i vsumtemp = _mm_add_epi32( _mm_unpacklo_epi16( vsum16, vzero ), _mm_unpackhi_epi16( vsum16, vzero ) ); - vsum32 = _mm_add_epi32( vsum32, vsumtemp ); - //pSrc1 += iStrideSrc1; - //pSrc2 += iStrideSrc2; - } - vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0x4e ) ); // 01001110 - vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0xb1 ) ); // 10110001 - uiSum = _mm_cvtsi128_si32( vsum32 ); - } - iDiffSum += uiSum; - - if( iDiffSum > iMax ) //for speeding up - { - return iDiffSum; - } - // update location - refPatchRow += uiStride; - } - - // vertical difference - int iCols = TMP_TEMPLATE_SIZE; - - for( iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++ ) - { - tarPatchRow = tarPatch[iY]; - const short* pSrc1 = ( const short* ) tarPatchRow; - const short* pSrc2 = ( const short* ) refPatchRow; - - // SIMD difference - - // Do with step of 4 - __m128i vzero = _mm_setzero_si128(); - __m128i vsum32 = vzero; - //for (int iY = 0; iY < iRows; iY += iSubStep) - { - __m128i vsum16 = vzero; - for( int iX = 0; iX < iCols; iX += 4 ) - { - __m128i vsrc1 = _mm_loadl_epi64( (const __m128i*) & pSrc1[iX] ); - __m128i vsrc2 = _mm_loadl_epi64( (const __m128i*) & pSrc2[iX] ); - vsum16 = _mm_add_epi16( vsum16, _mm_abs_epi16( _mm_sub_epi16( vsrc1, vsrc2 ) ) ); - } - __m128i vsumtemp = _mm_add_epi32( _mm_unpacklo_epi16( vsum16, vzero ), _mm_unpackhi_epi16( vsum16, vzero ) ); - vsum32 = _mm_add_epi32( vsum32, vsumtemp ); - //pSrc1 += iStrideSrc1; - //pSrc2 += iStrideSrc2; - } - vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0x4e ) ); // 01001110 - vsum32 = _mm_add_epi32( vsum32, _mm_shuffle_epi32( vsum32, 0xb1 ) ); // 10110001 - uiSum = _mm_cvtsi128_si32( vsum32 ); - - iDiffSum += uiSum; - - if( iDiffSum > iMax ) //for speeding up - { - return iDiffSum; - } - // update location - refPatchRow += uiStride; - } -#if JVET_W0069_TMP_BOUNDARY - } - else if (TempType == ABOVE_TEMPLATE) - { - // horizontal difference - for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) - { - tarPatchRow = tarPatch[iY]; - const short* pSrc1 = (const short*)tarPatchRow; - const short* pSrc2 = (const short*)refPatchRow; - - // SIMD difference - //int iRows = uiPatchHeight; - int iCols = uiPatchWidth - TMP_TEMPLATE_SIZE; - if ((iCols & 7) == 0) - { - // Do with step of 8 - __m128i vzero = _mm_setzero_si128(); - __m128i vsum32 = vzero; - //for (int iY = 0; iY < iRows; iY += iSubStep) - { - __m128i vsum16 = vzero; - for (int iX = 0; iX < iCols; iX += 8) - { - __m128i vsrc1 = _mm_loadu_si128((const __m128i*)(&pSrc1[iX])); - __m128i vsrc2 = _mm_lddqu_si128((const __m128i*)(&pSrc2[iX])); - vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); - } - __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); - vsum32 = _mm_add_epi32(vsum32, vsumtemp); - //pSrc1 += iStrideSrc1; - //pSrc2 += iStrideSrc2; - } - vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 - vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 - uiSum = _mm_cvtsi128_si32(vsum32); - } - else - { - // Do with step of 4 - __m128i vzero = _mm_setzero_si128(); - __m128i vsum32 = vzero; - //for (int iY = 0; iY < iRows; iY += iSubStep) - { - __m128i vsum16 = vzero; - for (int iX = 0; iX < iCols; iX += 4) - { - __m128i vsrc1 = _mm_loadl_epi64((const __m128i*) & pSrc1[iX]); - __m128i vsrc2 = _mm_loadl_epi64((const __m128i*) & pSrc2[iX]); - vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); - } - __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); - vsum32 = _mm_add_epi32(vsum32, vsumtemp); - //pSrc1 += iStrideSrc1; - //pSrc2 += iStrideSrc2; - } - vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 - vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 - uiSum = _mm_cvtsi128_si32(vsum32); - } - iDiffSum += uiSum; - - if (iDiffSum > iMax) //for speeding up - { - return iDiffSum; - } - // update location - refPatchRow += uiStride; - } - - - } - else if (TempType == LEFT_TEMPLATE) - { - - // vertical difference - int iCols = TMP_TEMPLATE_SIZE; - - for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) - { - tarPatchRow = tarPatch[iY]; - const short* pSrc1 = (const short*)tarPatchRow; - const short* pSrc2 = (const short*)refPatchRow; - - // SIMD difference - - // Do with step of 4 - __m128i vzero = _mm_setzero_si128(); - __m128i vsum32 = vzero; - //for (int iY = 0; iY < iRows; iY += iSubStep) - { - __m128i vsum16 = vzero; - for (int iX = 0; iX < iCols; iX += 4) - { - __m128i vsrc1 = _mm_loadl_epi64((const __m128i*) & pSrc1[iX]); - __m128i vsrc2 = _mm_loadl_epi64((const __m128i*) & pSrc2[iX]); - vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); - } - __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); - vsum32 = _mm_add_epi32(vsum32, vsumtemp); - //pSrc1 += iStrideSrc1; - //pSrc2 += iStrideSrc2; - } - vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 - vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 - uiSum = _mm_cvtsi128_si32(vsum32); - - iDiffSum += uiSum; - - if (iDiffSum > iMax) //for speeding up - { - return iDiffSum; - } - // update location - refPatchRow += uiStride; - } - } -#endif - - return iDiffSum; -} -#endif - -#if ENABLE_SIMD_SIGN_PREDICTION || TRANSFORM_SIMD_OPT || ENABLE_SIMD_TMP +#if ENABLE_SIMD_SIGN_PREDICTION || TRANSFORM_SIMD_OPT template <X86_VEXT vext> void TrQuant::_initTrQuantX86() { @@ -897,10 +633,6 @@ void TrQuant::_initTrQuantX86() fastInvTrans[2][5] = fastInverseTransform_SIMD<DST7, 64>; #endif #endif - -#if ENABLE_SIMD_TMP - m_calcTemplateDiff = calcTemplateDiffSIMD<vext>; -#endif } template void TrQuant::_initTrQuantX86<SIMDX86>(); diff --git a/source/Lib/CommonLib/x86/avx/Intra_avx.cpp b/source/Lib/CommonLib/x86/avx/Intra_avx.cpp new file mode 100644 index 000000000..59a692f72 --- /dev/null +++ b/source/Lib/CommonLib/x86/avx/Intra_avx.cpp @@ -0,0 +1 @@ +#include "../IntraX86.h" diff --git a/source/Lib/CommonLib/x86/avx2/Intra_avx2.cpp b/source/Lib/CommonLib/x86/avx2/Intra_avx2.cpp new file mode 100644 index 000000000..59a692f72 --- /dev/null +++ b/source/Lib/CommonLib/x86/avx2/Intra_avx2.cpp @@ -0,0 +1 @@ +#include "../IntraX86.h" diff --git a/source/Lib/CommonLib/x86/sse41/Intra_sse41.cpp b/source/Lib/CommonLib/x86/sse41/Intra_sse41.cpp new file mode 100644 index 000000000..59a692f72 --- /dev/null +++ b/source/Lib/CommonLib/x86/sse41/Intra_sse41.cpp @@ -0,0 +1 @@ +#include "../IntraX86.h" diff --git a/source/Lib/CommonLib/x86/sse42/Intra_sse42.cpp b/source/Lib/CommonLib/x86/sse42/Intra_sse42.cpp new file mode 100644 index 000000000..59a692f72 --- /dev/null +++ b/source/Lib/CommonLib/x86/sse42/Intra_sse42.cpp @@ -0,0 +1 @@ +#include "../IntraX86.h" diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index 0e196a07f..6d3fb02d4 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -385,22 +385,23 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) { int foundCandiNum; #if JVET_W0069_TMP_BOUNDARY - RefTemplateType TempType = m_pcIntraPred->GetRefTemplateType(*(tu.cu), tu.cu->blocks[COMPONENT_Y]); - if (TempType != NO_TEMPLATE) + RefTemplateType tempType = m_pcIntraPred->getRefTemplateType(*(tu.cu), tu.cu->blocks[COMPONENT_Y]); + + if( tempType != NO_TEMPLATE ) { - m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight(), TempType); - m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight(), TempType); - m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); + m_pcIntraPred->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight(), tempType); + m_pcIntraPred->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight(), tempType); + m_pcIntraPred->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); } else { foundCandiNum = 1; - m_pcTrQuant->generateTM_DC_Prediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), 1 << (tu.cu->cs->sps->getBitDepth(CHANNEL_TYPE_LUMA) - 1)); + m_pcIntraPred->generateTmDcPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), 1 << (tu.cu->cs->sps->getBitDepth(CHANNEL_TYPE_LUMA) - 1)); } #else - m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight()); - m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight()); - m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); + m_pcIntraPred->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight()); + m_pcIntraPred->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight()); + m_pcIntraPred->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); #endif assert(foundCandiNum >= 1); } @@ -605,9 +606,9 @@ void DecCu::xIntraRecACTBlk(TransformUnit& tu) { int foundCandiNum; const unsigned int uiStride = cs.picture->getRecoBuf(COMPONENT_Y).stride; - m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight()); - m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight()); - m_pcTrQuant->generateTMPrediction(piPred.buf, uiStride, pu.lwidth(), pu.lheight(), foundCandiNum); + m_pcIntraPred->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight()); + m_pcIntraPred->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight()); + m_pcIntraPred->generateTMPrediction(piPred.buf, uiStride, pu.lwidth(), pu.lheight(), foundCandiNum); } else if (PU::isMIP(pu, chType)) #else diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index e97365623..c4edea054 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -892,27 +892,27 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c CodingUnit cu_cpy = cu; #if JVET_W0069_TMP_BOUNDARY - RefTemplateType TemplateType = GetRefTemplateType( cu_cpy, cu_cpy.blocks[COMPONENT_Y] ); - if( TemplateType != NO_TEMPLATE ) + RefTemplateType templateType = getRefTemplateType( cu_cpy, cu_cpy.blocks[COMPONENT_Y] ); + if( templateType != NO_TEMPLATE ) #else if( isRefTemplateAvailable( cu_cpy, cu_cpy.blocks[COMPONENT_Y] ) ) #endif { #if JVET_W0069_TMP_BOUNDARY - m_pcTrQuant->getTargetTemplate( &cu_cpy, pu.lwidth(), pu.lheight(), TemplateType ); - m_pcTrQuant->candidateSearchIntra( &cu_cpy, pu.lwidth(), pu.lheight(), TemplateType ); - bsuccessfull = m_pcTrQuant->generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); + getTargetTemplate( &cu_cpy, pu.lwidth(), pu.lheight(), templateType ); + candidateSearchIntra( &cu_cpy, pu.lwidth(), pu.lheight(), templateType ); + bsuccessfull = generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); #else - m_pcTrQuant->getTargetTemplate( &cu_cpy, pu.lwidth(), pu.lheight() ); - m_pcTrQuant->candidateSearchIntra( &cu_cpy, pu.lwidth(), pu.lheight() ); - bsuccessfull = m_pcTrQuant->generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); + getTargetTemplate( &cu_cpy, pu.lwidth(), pu.lheight() ); + candidateSearchIntra( &cu_cpy, pu.lwidth(), pu.lheight() ); + bsuccessfull = generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); #endif } #if JVET_W0069_TMP_BOUNDARY else { foundCandiNum = 1; - bsuccessfull = m_pcTrQuant->generateTM_DC_Prediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), 1 << (cu_cpy.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ) - 1) ); + bsuccessfull = generateTmDcPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), 1 << (cu_cpy.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ) - 1) ); } #endif if( bsuccessfull && foundCandiNum >= 1 ) @@ -3679,22 +3679,22 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp { int foundCandiNum; #if JVET_W0069_TMP_BOUNDARY - RefTemplateType TempType = GetRefTemplateType( *(tu.cu), tu.cu->blocks[COMPONENT_Y] ); - if( TempType != NO_TEMPLATE ) + RefTemplateType tempType = getRefTemplateType( *(tu.cu), tu.cu->blocks[COMPONENT_Y] ); + if( tempType != NO_TEMPLATE ) { - m_pcTrQuant->getTargetTemplate( tu.cu, pu.lwidth(), pu.lheight(), TempType ); - m_pcTrQuant->candidateSearchIntra( tu.cu, pu.lwidth(), pu.lheight(), TempType ); - m_pcTrQuant->generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); + getTargetTemplate( tu.cu, pu.lwidth(), pu.lheight(), tempType ); + candidateSearchIntra( tu.cu, pu.lwidth(), pu.lheight(), tempType ); + generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); } else { foundCandiNum = 1; - m_pcTrQuant->generateTM_DC_Prediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), 1 << (tu.cu->cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ) - 1) ); + generateTmDcPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), 1 << (tu.cu->cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ) - 1) ); } #else - m_pcTrQuant->getTargetTemplate( tu.cu, pu.lwidth(), pu.lheight() ); - m_pcTrQuant->candidateSearchIntra( tu.cu, pu.lwidth(), pu.lheight() ); - m_pcTrQuant->generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); + getTargetTemplate( tu.cu, pu.lwidth(), pu.lheight() ); + candidateSearchIntra( tu.cu, pu.lwidth(), pu.lheight() ); + generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); #endif CHECK( foundCandiNum < 1, "" ); } @@ -5094,9 +5094,9 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti if( PU::isTmp( pu, chType ) ) { int foundCandiNum; - m_pcTrQuant->getTargetTemplate( pu.cu, pu.lwidth(), pu.lheight() ); - m_pcTrQuant->candidateSearchIntra( pu.cu, pu.lwidth(), pu.lheight() ); - m_pcTrQuant->generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); + getTargetTemplate( pu.cu, pu.lwidth(), pu.lheight() ); + candidateSearchIntra( pu.cu, pu.lwidth(), pu.lheight() ); + generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); CHECK( foundCandiNum < 1, "" ); } -- GitLab