From 606d73c54cea55af53e372d94351d0337eab5a6d Mon Sep 17 00:00:00 2001 From: druizcoll <druizcoll@ofinno.com> Date: Fri, 7 Jun 2024 17:49:41 -0400 Subject: [PATCH 1/3] JVET-AI0129 (EE2-2.6c): IntraTMP candidates with overlapping refinement window enhanced --- source/Lib/CommonLib/CommonDef.h | 9 + source/Lib/CommonLib/IntraPrediction.cpp | 1020 ++++++++++++++++++++-- source/Lib/CommonLib/IntraPrediction.h | 56 +- source/Lib/CommonLib/TypeDef.h | 1 + source/Lib/CommonLib/UnitTools.cpp | 93 ++ source/Lib/CommonLib/UnitTools.h | 3 + source/Lib/CommonLib/x86/IntraX86.h | 468 +++++++++- source/Lib/DecoderLib/DecCu.cpp | 4 + source/Lib/EncoderLib/IntraSearch.cpp | 8 + 9 files changed, 1592 insertions(+), 70 deletions(-) diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index b9d6834f6..7b0000884 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -2178,6 +2178,15 @@ static const int TMP_NUM_MERGE_CANDS = 10; static const int NUM_TMP_ARBVP = 20; static const int EBVP_RANGE = 1; #endif + +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT +static const int NUM_TMP_ARBVP_S = 5; +static const int TMP_MRG_REG_ID = 6; +static const int TMP_MRG_REF_WIND = 5; +static const int TMP_AR_REF_WIND = EBVP_RANGE; +static const int INIT_TL_POS = (MTMP_NUM - TL_NUM_SPARSE); +#endif + #if JVET_AG0152_SGPM_ITMP_IBC static const int SGPM_NUM_BVS = 6; // maximum BVs to be considered into the list for Itmp-Sgpm static const int SGPM_BV_START_IDX = NUM_LUMA_MODE; diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index afef96be8..dbf9606d3 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -128,6 +128,9 @@ IntraPrediction::IntraPrediction() #if JVET_W0123_TIMD_FUSION m_timdSatdCost = nullptr; #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_itmpSatdCost = nullptr; +#endif #if JVET_AC0071_DBV m_dbvSadCost = nullptr; #endif @@ -217,6 +220,9 @@ void IntraPrediction::destroy() #if JVET_W0123_TIMD_FUSION delete m_timdSatdCost; #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + delete m_itmpSatdCost; +#endif #if JVET_AC0071_DBV delete m_dbvSadCost; #endif @@ -291,7 +297,7 @@ void IntraPrediction::destroy() } m_tempBuffer.clear(); -#if JVET_V0130_INTRA_TMP +#if JVET_V0130_INTRA_TMP && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT if( m_pppTarPatch != NULL ) { for( unsigned int uiDepth = 0; uiDepth < USE_MORE_BLOCKSIZE_DEPTH_MAX; uiDepth++ ) @@ -414,6 +420,12 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth m_timdSatdCost = new RdCost; } #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + if (m_itmpSatdCost == nullptr) + { + m_itmpSatdCost = new RdCost; + } +#endif #if JVET_AC0071_DBV if (m_dbvSadCost == nullptr) { @@ -574,6 +586,7 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth m_calcAeipGroupSum = calcAeipGroupSum; #endif #if JVET_V0130_INTRA_TMP +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT unsigned int blkSize; if( m_pppTarPatch == NULL ) { @@ -590,6 +603,7 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth } } } +#endif m_calcTemplateDiff = calcTemplateDiff; #if JVET_AG0136_INTRA_TMP_LIC @@ -7941,6 +7955,13 @@ int IntraPrediction::getBestNonAnglularMode(const CPelBuf& recoBuf, const CompAr distParamSad[0].useMR = false; distParamSad[1].applyWeight = false; distParamSad[1].useMR = false; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + DistParam distParamSadItmp[2]; + distParamSadItmp[0].applyWeight = false; + distParamSadItmp[0].useMR = false; + distParamSadItmp[1].applyWeight = false; + distParamSadItmp[1].useMR = false; +#endif if (eTempType == LEFT_ABOVE_NEIGHBOR) { m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg + iTempWidth, piPred + iTempWidth, iOrgStride, @@ -7949,16 +7970,26 @@ int IntraPrediction::getBestNonAnglularMode(const CPelBuf& recoBuf, const CompAr m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg + iTempHeight * iOrgStride, piPred + iTempHeight * uiPredStride, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); // Use HAD (SATD) cost +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_itmpSatdCost->setTimdDistParam(distParamSadItmp[0], piOrg + iTempWidth, piOrg + iTempWidth, iOrgStride, iOrgStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true); + m_itmpSatdCost->setTimdDistParam(distParamSadItmp[1], piOrg + iTempHeight * iOrgStride, piOrg + iTempHeight * uiPredStride, iOrgStride, iOrgStride, channelBitDepth, COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); +#endif } else if (eTempType == LEFT_NEIGHBOR) { m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_itmpSatdCost->setTimdDistParam(distParamSadItmp[1], piOrg, piPred, iOrgStride, iOrgStride, channelBitDepth, COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); +#endif } else if (eTempType == ABOVE_NEIGHBOR) { m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_itmpSatdCost->setTimdDistParam(distParamSadItmp[0], piOrg, piOrg, iOrgStride, iOrgStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true); +#endif } initTimdIntraPatternLuma(cu, area, eTempType != ABOVE_NEIGHBOR ? iTempWidth : 0, eTempType != LEFT_NEIGHBOR ? iTempHeight : 0, uiRefWidth, uiRefHeight); @@ -7987,19 +8018,34 @@ int IntraPrediction::getBestNonAnglularMode(const CPelBuf& recoBuf, const CompAr initPredTimdIntraParams(pu, area, i); predTimdIntraAng(COMPONENT_Y, pu, i, piPred, uiPredStride, uiRealW, uiRealH, eTempType, (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth, (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tmpCost0 = distParamSad[0].distFunc(distParamSad[0]); + tmpCost1 = distParamSad[1].distFunc(distParamSad[1]); + uiCost = tmpCost0 + tmpCost1; +#endif } else { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + distParamSadItmp[0].cur.buf = piOrg + BVs[i - 1].hor + BVs[i - 1].ver * iOrgStride + iTempWidth; + tmpCost0 = distParamSadItmp[0].distFunc(distParamSadItmp[0]); + distParamSadItmp[1].cur.buf = piOrg + BVs[i - 1].hor + (BVs[i - 1].ver + iTempHeight) * iOrgStride; + tmpCost1 = distParamSadItmp[1].distFunc(distParamSadItmp[1]); + uiCost = tmpCost0 + tmpCost1; +#else predTimdIbcItmp(COMPONENT_Y, pu, BVs[i - 1], piPred, uiPredStride, uiRealW, uiRealH, eTempType, (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth, (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight, piOrg, iOrgStride); +#endif } +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tmpCost0 = distParamSad[0].distFunc(distParamSad[0]); tmpCost1 = distParamSad[1].distFunc(distParamSad[1]); uiCost = tmpCost0 + tmpCost1; +#endif if (uiCost < uiBestCost) { uiBestCost = uiCost; @@ -14358,8 +14404,13 @@ void insertNode( int diff, int& iXOffset, int& iYOffset, int& pDiff, int& pX, in void clipMvIntraConstraint( CodingUnit* pcCU, int regionId, int& iHorMin, int& iHorMax, int& iVerMin, int& iVerMax, unsigned int uiTemplateSize, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int iCurrY, int iCurrX, int offsetLCUY, int offsetLCUX, RefTemplateType tempType ) { #if JVET_AD0086_ENHANCED_INTRA_TMP +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const int searchRangeWidth = std::max((uiBlkWidth == 64 ? TMP_SEARCH_RANGE_MULT_FACTOR - 1 : TMP_SEARCH_RANGE_MULT_FACTOR) * static_cast<int>(uiBlkWidth), TMP_MINSR); + const int searchRangeHeight = std::max((uiBlkHeight == 64 ? TMP_SEARCH_RANGE_MULT_FACTOR - 1 : TMP_SEARCH_RANGE_MULT_FACTOR) * static_cast<int>(uiBlkHeight), TMP_MINSR); +#else int searchRangeWidth = std::max(TMP_SEARCH_RANGE_MULT_FACTOR * (int) uiBlkWidth, TMP_MINSR); int searchRangeHeight = std::max(TMP_SEARCH_RANGE_MULT_FACTOR * (int) uiBlkHeight, TMP_MINSR); +#endif #else int searchRangeWidth = TMP_SEARCH_RANGE_MULT_FACTOR * uiBlkWidth; int searchRangeHeight = TMP_SEARCH_RANGE_MULT_FACTOR * uiBlkHeight; @@ -14462,10 +14513,25 @@ void clipMvIntraConstraint( CodingUnit* pcCU, int regionId, int& iHorMin, int& i void clipMvIntraConstraintRefine(int& iHorMin, int& iHorMax, int& iVerMin, int& iVerMax,int pX, int pY, int refinementRange #if JVET_AG0136_INTRA_TMP_LIC , const int tmpSampling +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , const bool isTransferredLeft, const bool isTransferredTop, const int bestRegionId, const bool isExtLeft, const bool isExtTop, const bool isExceptionAllowed #endif ) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + if (isTransferredLeft) + { + iHorMin = isExceptionAllowed && (bestRegionId == 3 || bestRegionId == 5) && isExtLeft ? pX : std::max(iHorMin, pX); + iHorMax = isExceptionAllowed && bestRegionId == 3 ? pX + tmpSampling - 1 : std::min(iHorMax, pX + tmpSampling - 1); + } + else + { +#endif #if JVET_AD0086_ENHANCED_INTRA_TMP +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + iHorMin = isExceptionAllowed && (bestRegionId == 3 || bestRegionId == 5) && isExtLeft ? pX - refinementRange : std::max(iHorMin, pX - refinementRange); +#else iHorMin = std::max(iHorMin, pX - refinementRange + ( #if JVET_AG0136_INTRA_TMP_LIC tmpSampling @@ -14473,7 +14539,36 @@ void clipMvIntraConstraintRefine(int& iHorMin, int& iHorMax, int& iVerMin, int& TMP_SAMPLING #endif % 2 ? 0 : 1)); +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + iHorMax = isExceptionAllowed && bestRegionId == 3 ? pX + refinementRange - ( +#if JVET_AG0136_INTRA_TMP_LIC + tmpSampling +#else + TMP_SAMPLING +#endif + % 2 ? 0 : 1) + : std::min(iHorMax, pX + refinementRange - ( +#if JVET_AG0136_INTRA_TMP_LIC + tmpSampling +#else + TMP_SAMPLING +#endif + % 2 ? 0 : 1)); +#else iHorMax = std::min(iHorMax, pX + refinementRange); +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + } + if (isTransferredTop) + { + iVerMin = isExceptionAllowed && bestRegionId == 4 && isExtTop ? pY : std::max(iVerMin, pY); + iVerMax = isExceptionAllowed && bestRegionId == 1 ? pY + tmpSampling - 1 : std::min(iVerMax, pY + tmpSampling - 1); + } + else + { + iVerMin = isExceptionAllowed && bestRegionId == 4 && isExtTop ? pY - refinementRange : std::max(iVerMin, pY - refinementRange); +#else iVerMin = std::max(iVerMin, pY - refinementRange + ( #if JVET_AG0136_INTRA_TMP_LIC tmpSampling @@ -14481,7 +14576,28 @@ void clipMvIntraConstraintRefine(int& iHorMin, int& iHorMax, int& iVerMin, int& TMP_SAMPLING #endif % 2 ? 0 : 1)); +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + iVerMax = isExceptionAllowed && bestRegionId == 1 ? pY + refinementRange - ( +#if JVET_AG0136_INTRA_TMP_LIC + tmpSampling +#else + TMP_SAMPLING +#endif + % 2 ? 0 : 1) + : std::min(iVerMax, pY + refinementRange - ( +#if JVET_AG0136_INTRA_TMP_LIC + tmpSampling +#else + TMP_SAMPLING +#endif + % 2 ? 0 : 1)); +#else iVerMax = std::min(iVerMax, pY + refinementRange); +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + } +#endif #else iHorMin = std::max(iHorMin, pX - refinementRange); iHorMax = std::min(iHorMax, pX + refinementRange); @@ -14517,22 +14633,31 @@ void IntraPrediction::getTargetTemplate( CodingUnit* pcCU, unsigned int uiBlkWid #endif { const ComponentID compID = COMPONENT_Y; +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT unsigned int uiPatchWidth = uiBlkWidth + TMP_TEMPLATE_SIZE; unsigned int uiPatchHeight = uiBlkHeight + TMP_TEMPLATE_SIZE; unsigned int uiTarDepth = floorLog2( std::max( uiBlkHeight, uiBlkWidth ) ) - 2; Pel** tarPatch = m_pppTarPatch[uiTarDepth]; +#endif CompArea area = pcCU->blocks[compID]; Pel* pCurrStart = pcCU->cs->picture->getRecoBuf( area ).buf; unsigned int uiPicStride = pcCU->cs->picture->getRecoBuf( compID ).stride; +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT unsigned int uiY, uiX; +#endif //fill template //up-left & up +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT Pel* tarTemp; +#endif #if JVET_W0069_TMP_BOUNDARY if( tempType == L_SHAPE_TEMPLATE ) { #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_pppTarPatch = pCurrStart - TMP_TEMPLATE_SIZE * uiPicStride - TMP_TEMPLATE_SIZE; +#else Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE * uiPicStride - TMP_TEMPLATE_SIZE; for( uiY = 0; uiY < TMP_TEMPLATE_SIZE; uiY++ ) { @@ -14553,10 +14678,14 @@ void IntraPrediction::getTargetTemplate( CodingUnit* pcCU, unsigned int uiBlkWid } pCurrTemp += uiPicStride; } +#endif #if JVET_W0069_TMP_BOUNDARY } else if( tempType == ABOVE_TEMPLATE ) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_pppTarPatch = pCurrStart - TMP_TEMPLATE_SIZE * uiPicStride; +#else Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE * uiPicStride; for( uiY = 0; uiY < TMP_TEMPLATE_SIZE; uiY++ ) { @@ -14567,9 +14696,13 @@ void IntraPrediction::getTargetTemplate( CodingUnit* pcCU, unsigned int uiBlkWid } pCurrTemp += uiPicStride; } +#endif } else if( tempType == LEFT_TEMPLATE ) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_pppTarPatch = pCurrStart - TMP_TEMPLATE_SIZE; +#else Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE; for( uiY = TMP_TEMPLATE_SIZE; uiY < uiPatchHeight; uiY++ ) { @@ -14580,6 +14713,7 @@ void IntraPrediction::getTargetTemplate( CodingUnit* pcCU, unsigned int uiBlkWid } pCurrTemp += uiPicStride; } +#endif } #endif } @@ -14596,9 +14730,15 @@ void IntraPrediction::candidateSearchIntra( CodingUnit* pcCU, unsigned int uiBlk { unsigned int uiPatchWidth = uiBlkWidth + TMP_TEMPLATE_SIZE; unsigned int uiPatchHeight = uiBlkHeight + TMP_TEMPLATE_SIZE; +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT unsigned int uiTarDepth = floorLog2( std::max( uiBlkWidth, uiBlkHeight ) ) - 2; +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + Pel* tarPatch = getTargetPatch(); +#else Pel** tarPatch = getTargetPatch( uiTarDepth ); +#endif //Initialize the library for saving the best candidates #if !JVET_AD0086_ENHANCED_INTRA_TMP const ComponentID compID = COMPONENT_Y; @@ -14638,7 +14778,11 @@ void IntraPrediction::candidateSearchIntra( CodingUnit* pcCU, unsigned int uiBlk } #if JVET_W0069_TMP_BOUNDARY +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT +void IntraPrediction::searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel* tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, RefTemplateType tempType +#else void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, RefTemplateType tempType +#endif #if JVET_AG0136_INTRA_TMP_LIC , const bool useMR #endif @@ -14718,6 +14862,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta setRefPicUsed( ref ); //facilitate the access of each candidate point setStride( refStride ); +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT Mv cTmpMvPred; cTmpMvPred.setZero(); @@ -14727,10 +14872,17 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta int blkY = 0; int iCurrY = uiCUPelY + blkY; int iCurrX = uiCUPelX + blkX; +#else + const int iCurrY = area.pos().y; + const int iCurrX = area.pos().x; +#endif Position ctuRsAddr = CU::getCtuXYAddr(*pcCU); int offsetLCUY = iCurrY - ctuRsAddr.y; int offsetLCUX = iCurrX - ctuRsAddr.x; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const int offset = TMP_SAMPLING >> 1; +#endif int iYOffset, iXOffset; #if JVET_AD0086_ENHANCED_INTRA_TMP @@ -14756,6 +14908,18 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta #endif Pel* refCurr; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const int iRefine = 1; + const int iRefineRange = TMP_SAMPLING >> 1; + const int TMP_REG_REF_WIND = iRefineRange; + const Mv bvOffSparseTL = Mv(TMP_REG_REF_WIND, TMP_REG_REF_WIND); + const Mv bvOffSparseBR = Mv(TMP_REG_REF_WIND, TMP_REG_REF_WIND); + const Mv bvOffMerge = Mv(TMP_MRG_REF_WIND, TMP_MRG_REF_WIND); + const Mv bvOffArbvp = Mv(TMP_AR_REF_WIND, TMP_AR_REF_WIND); + Mv regTL, regBR, bvMrg; + Mv iMrgWindTL, iMrgWindBR; + bool bRegOverlap = false; +#endif #if JVET_AD0086_ENHANCED_INTRA_TMP #if JVET_AE0077_EXT_INTRATMP const int regionNum = 6; @@ -14801,7 +14965,11 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta log2SizeTop = floorLog2(TMP_TEMPLATE_SIZE * uiBlkWidth); log2SizeLeft = floorLog2(uiBlkHeight * TMP_TEMPLATE_SIZE); sizeTopLeft = (uiBlkHeight + uiPatchWidth) * TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_calcTargetMean(tarPatch, m_uiPicStride, uiPatchWidth, uiPatchHeight, tempType, needTopLeft ? 3 : 0, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); +#else m_calcTargetMean(tarPatch, uiPatchWidth, uiPatchHeight, tempType, needTopLeft ? 3 : 0, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER m_log2SizeTop = log2SizeTop ; m_log2SizeLeft = log2SizeLeft; @@ -14854,19 +15022,96 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } } #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const int numInitMrg = static_cast<int>(m_bvBasedMergeCandidates.size()); + if (uiBlkWidth <= 8 && uiBlkHeight <= 8) + { + regTL = Mv(mvXMins[regionId], mvYMins[regionId]); + regBR = Mv(mvXMaxs[regionId], mvYMaxs[regionId]); + bRegOverlap = false; + + for (int mrgIdx = 0; mrgIdx < numInitMrg; mrgIdx++) + { + bvMrg = m_bvBasedMergeCandidates[mrgIdx]; + iMrgWindTL = bvMrg - bvOffMerge; + iMrgWindBR = bvMrg + bvOffMerge; + + if ((regTL.hor >= iMrgWindTL.hor) && (regTL.ver >= iMrgWindTL.ver) && (regBR.hor <= iMrgWindBR.hor) && (regBR.ver <= iMrgWindBR.ver)) + { + bRegOverlap = true; + break; + } + } + if (bRegOverlap) continue; + } +#endif #if JVET_AB0130_ITMP_SAMPLING +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + int shiftX = 0; for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset -= TMP_SAMPLING) { + shiftX = (mvXMax != mvXMin) ? (shiftX % TMP_SAMPLING) : 0; + bool isAvailablePairFound{false}; + for (iXOffset = mvXMax - shiftX++; iXOffset >= mvXMin; iXOffset -= TMP_SAMPLING) + { + int iXOffset_metric = iXOffset + offset; + int iYOffset_metric = iYOffset + offset; + bool isTransferredLeft = false; + bool isTransferredTop = false; + if (iXOffset_metric > mvXMax) + { + iXOffset_metric = iXOffset; + isTransferredLeft = true; + } + if (iYOffset_metric > mvYMax) + { + iYOffset_metric = iYOffset; + isTransferredTop = true; + } +#else + for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset -= TMP_SAMPLING) + { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + bool isAvailablePairFound{false}; +#endif for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset -= TMP_SAMPLING) { +#endif #if JVET_AE0077_EXT_INTRATMP +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + if ((regionId == 4 || regionId == 5) && !isAvailablePairFound) +#else if (regionId == 4 || regionId == 5) +#endif { Position bottomRight(iCurrX + iXOffset + uiBlkWidth - 1, iCurrY + iYOffset + uiBlkHeight - 1); if (!pcCU->cs->isDecomp(bottomRight, CHANNEL_TYPE_LUMA)) { continue; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + else + { + if (bJointCalc || useMR) + { + if (!pcCU->cs->isDecomp(Position(iCurrX + iXOffset_metric + uiBlkWidth - 1, iCurrY + iYOffset_metric + uiBlkHeight - 1), CHANNEL_TYPE_LUMA)) + { + iXOffset_metric = iXOffset; + iYOffset_metric = iYOffset; + isTransferredLeft = true; + isTransferredTop = true; + } + else + { + isAvailablePairFound = true; + } + } + else + { + isAvailablePairFound = true; + } + } +#endif } #endif #else @@ -14876,16 +15121,39 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta { #endif refCurr = ref + iYOffset * refStride + iXOffset; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + int licShift = 0; + if (!isTransferredLeft) + { + licShift += offset; + } + if (!isTransferredTop) + { + licShift += offset * m_uiPicStride; + } +#endif #if JVET_AG0136_INTRA_TMP_LIC if (bJointCalc) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_calcTemplateDiffJointSadMrsad(refCurr, refStride, tarPatch, m_uiPicStride, uiPatchWidth, uiPatchHeight, diff, diffSupp, pDiff, pDiffSupp, tempType, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean, licShift); +#else m_calcTemplateDiffJointSadMrsad(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, diffSupp, pDiff, pDiffSupp, tempType, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + for (int temIdx = 0; temIdx < (needTopLeft ? 3 : 1); temIdx++) +#else for (int temIdx = 0; temIdx < 3; temIdx++) +#endif { if (diffSupp[temIdx] < pDiffSupp[temIdx]) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + updateCandList(TempLibFast(iXOffset_metric, iYOffset_metric, Mv(iXOffset_metric, iYOffset_metric) - bvOffSparseTL, Mv(iXOffset_metric, iYOffset_metric) + bvOffSparseBR, isTransferredLeft, isTransferredTop, regionId), diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); +#else updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); +#endif if (sparseMtmpCandListSupp[temIdx].size() == mtmpNumSparseForLic[temIdx]) { pDiffSupp[temIdx] = std::min((int) sparseMtmpCostListSupp[temIdx][mtmpNumSparseForLic[temIdx] - 1], pDiffSupp[temIdx]); @@ -14895,18 +15163,33 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } else { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + Pel* ref_tmp = pcCU->tmpLicFlag ? refCurr + licShift : refCurr; + m_calcTemplateDiff(ref_tmp, refStride, tarPatch, m_uiPicStride, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0, useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); +#else m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0, useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); +#endif } #else m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0); #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + for (int temIdx = 0; temIdx < (needTopLeft ? 3 : 1); temIdx++) +#else for (int temIdx = 0; temIdx < 3; temIdx++) +#endif { if (diff[temIdx] < pDiff[temIdx]) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + int curXoffset = (!bJointCalc && (useMR)) ? iXOffset_metric : iXOffset; + int curYoffset = (!bJointCalc && (useMR)) ? iYOffset_metric : iYOffset; + updateCandList(TempLibFast(curXoffset, curYoffset, Mv(curXoffset, curYoffset) - bvOffSparseTL, Mv(curXoffset, curYoffset) + bvOffSparseBR, !bJointCalc && (useMR) ? isTransferredLeft : false, !bJointCalc && (useMR) ? isTransferredTop : false, regionId), diff[temIdx], sparseMtmpCandList[temIdx], sparseMtmpCostList[temIdx], mtmpNumSparse[temIdx]); +#else updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diff[temIdx], sparseMtmpCandList[temIdx], sparseMtmpCostList[temIdx], mtmpNumSparse[temIdx]); +#endif if (sparseMtmpCandList[temIdx].size() == mtmpNumSparse[temIdx]) { pDiff[temIdx] = std::min((int) sparseMtmpCostList[temIdx][mtmpNumSparse[temIdx] - 1], pDiff[temIdx]); @@ -15024,6 +15307,26 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta #if JVET_AG0151_INTRA_TMP_MERGE_MODE std::vector<Mv> bvBasedMergeCandidatesITMP; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + bvBasedMergeCandidatesITMP = m_bvBasedMergeCandidates; + std::vector<Mv> bvBasedMergeCandidatesOut; + std::vector<Mv> bvBasedMergeCandidatesIn; + + for (int iBv = 0; iBv < bvRegionIdList.size(); iBv++) + { + if (bvRegionIdList[iBv] == TMP_MRG_REG_ID) + { + bvBasedMergeCandidatesOut.push_back(m_bvBasedMergeCandidates[iBv]); + } + else + { + bvBasedMergeCandidatesIn.push_back(m_bvBasedMergeCandidates[iBv]); + } + } + bvBasedMergeCandidatesOut.insert(bvBasedMergeCandidatesOut.end(), bvBasedMergeCandidatesIn.begin(), bvBasedMergeCandidatesIn.end()); + bvBasedMergeCandidatesITMP = bvBasedMergeCandidatesOut; + +#else if (m_bvBasedMergeCandidates.size() > TMP_NUM_MERGE_CANDS) { std::vector<Mv> bvBasedMergeCandidatesOut; @@ -15059,6 +15362,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta { bvBasedMergeCandidatesITMP = m_bvBasedMergeCandidates; } +#endif #if JVET_AH0055_INTRA_TMP_ARBVP const int numNeighborMerge = (int) bvBasedMergeCandidatesITMP.size(); const int totalNum = numNeighborMerge + NUM_TMP_ARBVP; @@ -15157,64 +15461,188 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } } while (bvBasedMergeCandidatesITMP.size() > end && bvBasedMergeCandidatesITMP.size() < totalNum); #endif -#if JVET_AG0136_INTRA_TMP_LIC - bool isBvAddedReg = false; - bool isBvAddedSupp = false; -#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + // Add ARBVPs based on sparse candidates + std::vector<Mv> bvBasedMergeCandidatesITMPSupp; + bvBasedMergeCandidatesITMPSupp = bvBasedMergeCandidatesITMP; + + PU::getSparseArBvMergeCandidate(pu, bvBasedMergeCandidatesITMP, sparseMtmpCandList[0]); + if (bJointCalc) + { + PU::getSparseArBvMergeCandidate(pu, bvBasedMergeCandidatesITMPSupp, sparseMtmpCandListSupp[0]); + } + + // Clustering of the Merge and ARBVP candidates based on refinement window + bool ClustMrgReg[3] = { false, false, false }; + bool ClustMrgSupp[3] = { false, false, false }; + Mv iMergeWindTL, iMergeWindBR; + Mv iSparseWindTL, iSparseWindBR; + Mv mergeCand, sparseCand; + bool bOverlap = false; + for (int iM = 0; iM < bvBasedMergeCandidatesITMP.size(); iM++) { -#if JVET_AH0055_INTRA_TMP_ARBVP - regionId = iM < numNeighborMerge ? regionNum: regionNum + 1; -#else - regionId = regionNum; -#endif - iYOffset = bvBasedMergeCandidatesITMP[iM].ver; - iXOffset = bvBasedMergeCandidatesITMP[iM].hor; - refCurr = ref + iYOffset * refStride + iXOffset; -#if JVET_AG0136_INTRA_TMP_LIC - if (bJointCalc) + mergeCand = bvBasedMergeCandidatesITMP[iM]; + iMergeWindTL = (iM < numNeighborMerge) ? mergeCand - bvOffMerge : mergeCand - bvOffArbvp; + iMergeWindBR = (iM < numNeighborMerge) ? mergeCand + bvOffMerge : mergeCand + bvOffArbvp; + regionId = (iM < numNeighborMerge) ? TMP_MRG_REG_ID : TMP_MRG_REG_ID + 1; + refCurr = ref + mergeCand.ver * refStride + mergeCand.hor; + + for (int temIdx = 0; temIdx < (needTopLeft ? 3 : 1); temIdx++) { - m_calcTemplateDiffJointSadMrsad(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, diffSupp, pDiff, pDiffSupp, tempType, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); - for (int temIdx = 0; temIdx < 3; temIdx++) + m_calcTemplateDiff(refCurr, refStride, tarPatch, m_uiPicStride, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0, useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); + bOverlap = false; + for (int i = 0; i < sparseMtmpCandList[temIdx].size(); i++) { - bool bRedundant = false; - for (int i = 0; i < sparseMtmpCandListSupp[temIdx].size(); i++) + sparseCand = Mv(sparseMtmpCandList[temIdx][i].m_pX, sparseMtmpCandList[temIdx][i].m_pY); + if (mergeCand == sparseCand) { - if (iYOffset == sparseMtmpCandListSupp[temIdx][i].m_pY && iXOffset == sparseMtmpCandListSupp[temIdx][i].m_pX) - { - bRedundant = true; - break; - } + ClustMrgReg[temIdx] = true; + bOverlap = true; + break; } - if (bRedundant) + + iSparseWindTL = (sparseMtmpCandList[temIdx][i].m_windTL); + iSparseWindBR = (sparseMtmpCandList[temIdx][i].m_windBR); + if (!((iMergeWindBR.hor < iSparseWindTL.hor) || (iMergeWindTL.hor > iSparseWindBR.hor)) + && !((iMergeWindBR.ver < iSparseWindTL.ver) || (iMergeWindTL.ver > iSparseWindBR.ver))) { - continue; + iSparseWindTL = + Mv(std::min(iSparseWindTL.hor, iMergeWindTL.hor), std::min(iSparseWindTL.ver, iMergeWindTL.ver)); + iSparseWindBR = + Mv(std::max(iSparseWindBR.hor, iMergeWindBR.hor), std::max(iSparseWindBR.ver, iMergeWindBR.ver)); + + if (diff[temIdx] < sparseMtmpCostList[temIdx][i]) + { + sparseMtmpCandList[temIdx].erase(sparseMtmpCandList[temIdx].begin() + i); + sparseMtmpCostList[temIdx].erase(sparseMtmpCostList[temIdx].begin() + i); + updateCandList(TempLibFast(mergeCand.hor, mergeCand.ver, iSparseWindTL, iSparseWindBR, false, false, regionId), + diff[temIdx], sparseMtmpCandList[temIdx], sparseMtmpCostList[temIdx], mtmpNumSparse[temIdx]); + if (sparseMtmpCandList[temIdx].size() == mtmpNumSparse[temIdx]) + { + pDiff[temIdx] = std::min(static_cast<int>(sparseMtmpCostList[temIdx][mtmpNumSparse[temIdx] - 1]), pDiff[temIdx]); + } + } + else + { + sparseMtmpCandList[temIdx][i].m_windTL = iSparseWindTL; + sparseMtmpCandList[temIdx][i].m_windBR = iSparseWindBR; + sparseMtmpCandList[temIdx][i].m_rId = regionId; + } + ClustMrgReg[temIdx] = true; + bOverlap = true; + break; } - if (diffSupp[temIdx] < pDiffSupp[temIdx]) + } + if (!bOverlap) + { + if (diff[temIdx] < pDiff[temIdx]) { - updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); - if (sparseMtmpCandListSupp[temIdx].size() == mtmpNumSparseForLic[temIdx]) + updateCandList(TempLibFast(mergeCand.hor, mergeCand.ver, iMergeWindTL, iMergeWindBR, false, false, regionId), diff[temIdx], + sparseMtmpCandList[temIdx], sparseMtmpCostList[temIdx], mtmpNumSparse[temIdx]); + if (sparseMtmpCandList[temIdx].size() == mtmpNumSparse[temIdx]) { - pDiffSupp[temIdx] = std::min((int)sparseMtmpCostListSupp[temIdx][mtmpNumSparseForLic[temIdx] - 1], pDiffSupp[temIdx]); + pDiff[temIdx] = std::min(static_cast<int>(sparseMtmpCostList[temIdx][mtmpNumSparse[temIdx] - 1]), pDiff[temIdx]); } - isBvAddedSupp = true; + ClustMrgReg[temIdx] = true; } } } - else + } + + if (bJointCalc) + { + for (int iM = 0; iM < bvBasedMergeCandidatesITMPSupp.size(); iM++) { - m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0, useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); + mergeCand = bvBasedMergeCandidatesITMPSupp[iM]; + iMergeWindTL = (iM < numNeighborMerge) ? mergeCand - bvOffMerge : mergeCand - bvOffArbvp; + iMergeWindBR = (iM < numNeighborMerge) ? mergeCand + bvOffMerge : mergeCand + bvOffArbvp; + regionId = (iM < numNeighborMerge) ? TMP_MRG_REG_ID : TMP_MRG_REG_ID + 1; + refCurr = ref + mergeCand.ver * refStride + mergeCand.hor; + + for (int temIdx = 0; temIdx < (needTopLeft ? 3 : 1); temIdx++) + { + m_calcTemplateDiff(refCurr, refStride, tarPatch, m_uiPicStride, uiPatchWidth, uiPatchHeight, diffSupp, pDiffSupp, tempType, needTopLeft ? 3 : 0, true, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); + bOverlap = false; + for (int i = 0; i < sparseMtmpCandListSupp[temIdx].size(); i++) + { + sparseCand = Mv(sparseMtmpCandListSupp[temIdx][i].m_pX, sparseMtmpCandListSupp[temIdx][i].m_pY); + if (mergeCand == sparseCand) + { + ClustMrgSupp[temIdx] = true; + bOverlap = true; + break; + } + + iSparseWindTL = (sparseMtmpCandListSupp[temIdx][i].m_windTL); + iSparseWindBR = (sparseMtmpCandListSupp[temIdx][i].m_windBR); + if (!((iMergeWindBR.hor < iSparseWindTL.hor) || (iMergeWindTL.hor > iSparseWindBR.hor)) && !((iMergeWindBR.ver < iSparseWindTL.ver) || (iMergeWindTL.ver > iSparseWindBR.ver))) + { + iSparseWindTL = Mv(std::min(iSparseWindTL.hor, iMergeWindTL.hor), std::min(iSparseWindTL.ver, iMergeWindTL.ver)); + iSparseWindBR = Mv(std::max(iSparseWindBR.hor, iMergeWindBR.hor), std::max(iSparseWindBR.ver, iMergeWindBR.ver)); + + if (diffSupp[temIdx] < sparseMtmpCostListSupp[temIdx][i]) + { + sparseMtmpCandListSupp[temIdx].erase(sparseMtmpCandListSupp[temIdx].begin() + i); + sparseMtmpCostListSupp[temIdx].erase(sparseMtmpCostListSupp[temIdx].begin() + i); + updateCandList(TempLibFast(mergeCand.hor, mergeCand.ver, iSparseWindTL, iSparseWindBR, false, false, regionId), diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); + if (sparseMtmpCandListSupp[temIdx].size() == mtmpNumSparseForLic[temIdx]) + { + pDiffSupp[temIdx] = std::min(static_cast<int>(sparseMtmpCostListSupp[temIdx][mtmpNumSparseForLic[temIdx] - 1]), pDiffSupp[temIdx]); + } + } + else + { + sparseMtmpCandListSupp[temIdx][i].m_windTL = iSparseWindTL; + sparseMtmpCandListSupp[temIdx][i].m_windBR = iSparseWindBR; + sparseMtmpCandListSupp[temIdx][i].m_rId = regionId; + } + bOverlap = true; + ClustMrgSupp[temIdx] = true; + break; + } + } + if (!bOverlap) + { + if (diffSupp[temIdx] < pDiffSupp[temIdx]) + { + updateCandList(TempLibFast(mergeCand.hor, mergeCand.ver, iMergeWindTL, iMergeWindBR, false, false, regionId), + diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); + if (sparseMtmpCandListSupp[temIdx].size() == mtmpNumSparseForLic[temIdx]) + { + pDiffSupp[temIdx] = std::min(static_cast<int>(sparseMtmpCostListSupp[temIdx][mtmpNumSparseForLic[temIdx] - 1]), pDiffSupp[temIdx]); + } + ClustMrgSupp[temIdx] = true; + } + } + } } + } #else - m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0); +#if JVET_AG0136_INTRA_TMP_LIC + bool isBvAddedReg = false; + bool isBvAddedSupp = false; #endif + for (int iM = 0; iM < bvBasedMergeCandidatesITMP.size(); iM++) + { +#if JVET_AH0055_INTRA_TMP_ARBVP + regionId = iM < numNeighborMerge ? regionNum : regionNum + 1; +#else + regionId = regionNum; +#endif + iYOffset = bvBasedMergeCandidatesITMP[iM].ver; + iXOffset = bvBasedMergeCandidatesITMP[iM].hor; + refCurr = ref + iYOffset * refStride + iXOffset; +#if JVET_AG0136_INTRA_TMP_LIC + if (bJointCalc) + { + m_calcTemplateDiffJointSadMrsad(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, diffSupp, pDiff, pDiffSupp, tempType, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); for (int temIdx = 0; temIdx < 3; temIdx++) { - // check for redundancy bool bRedundant = false; - for (int i = 0; i < sparseMtmpCandList[temIdx].size(); i++) + for (int i = 0; i < sparseMtmpCandListSupp[temIdx].size(); i++) { - if (iYOffset == sparseMtmpCandList[temIdx][i].m_pY && iXOffset == sparseMtmpCandList[temIdx][i].m_pX) + if (iYOffset == sparseMtmpCandListSupp[temIdx][i].m_pY && iXOffset == sparseMtmpCandListSupp[temIdx][i].m_pX) { bRedundant = true; break; @@ -15224,20 +15652,55 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta { continue; } - if (diff[temIdx] < pDiff[temIdx]) + if (diffSupp[temIdx] < pDiffSupp[temIdx]) { - updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diff[temIdx], - sparseMtmpCandList[temIdx], sparseMtmpCostList[temIdx], mtmpNumSparse[temIdx]); - if (sparseMtmpCandList[temIdx].size() == mtmpNumSparse[temIdx]) + updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); + if (sparseMtmpCandListSupp[temIdx].size() == mtmpNumSparseForLic[temIdx]) { - pDiff[temIdx] = std::min((int)sparseMtmpCostList[temIdx][mtmpNumSparse[temIdx] - 1], pDiff[temIdx]); + pDiffSupp[temIdx] = std::min((int)sparseMtmpCostListSupp[temIdx][mtmpNumSparseForLic[temIdx] - 1], pDiffSupp[temIdx]); } -#if JVET_AG0136_INTRA_TMP_LIC - isBvAddedReg = true; + isBvAddedSupp = true; + } + } + } + else + { + m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0, useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); + } +#else + m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0); #endif + for (int temIdx = 0; temIdx < 3; temIdx++) + { + // check for redundancy + bool bRedundant = false; + for (int i = 0; i < sparseMtmpCandList[temIdx].size(); i++) + { + if (iYOffset == sparseMtmpCandList[temIdx][i].m_pY && iXOffset == sparseMtmpCandList[temIdx][i].m_pX) + { + bRedundant = true; + break; } } + if (bRedundant) + { + continue; + } + if (diff[temIdx] < pDiff[temIdx]) + { + updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diff[temIdx], + sparseMtmpCandList[temIdx], sparseMtmpCostList[temIdx], mtmpNumSparse[temIdx]); + if (sparseMtmpCandList[temIdx].size() == mtmpNumSparse[temIdx]) + { + pDiff[temIdx] = std::min((int)sparseMtmpCostList[temIdx][mtmpNumSparse[temIdx] - 1], pDiff[temIdx]); + } +#if JVET_AG0136_INTRA_TMP_LIC + isBvAddedReg = true; +#endif + } } +} +#endif #endif #if JVET_AD0086_ENHANCED_INTRA_TMP @@ -15270,22 +15733,47 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta pDiff[i] = 0; } } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + bool bRedundant = false; + Mv refineWindTL, refineWindBR; + int mvYMin = 0; + int mvYMax = 0; + int mvXMin = 0; + int mvXMax = 0; +#endif for (int candIdx = 0; candIdx < sparseMtmpCandList[temIdx].size(); candIdx++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + refineWindTL = sparseMtmpCandList[temIdx][candIdx].m_windTL; + refineWindBR = sparseMtmpCandList[temIdx][candIdx].m_windBR; +#else int iRefine = 1; int iRefineRange = TMP_SAMPLING >> 1; +#endif bestRegionId = sparseMtmpCandList[temIdx][candIdx].m_rId; #if JVET_AG0151_INTRA_TMP_MERGE_MODE -#if JVET_AG0136_INTRA_TMP_LIC +#if JVET_AG0136_INTRA_TMP_LIC && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT const int RefineSizeForLic = pcCU->slice->getSPS()->getItmpLicMode() ? 5 : 2; #endif +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT int mvYMin = 0; int mvYMax = 0; int mvXMin = 0; int mvXMax = 0; +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + if (bestRegionId > 5) +#else if (bestRegionId == 6) +#endif { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + mvYMin = refineWindTL.ver; + mvYMax = refineWindBR.ver; + mvXMin = refineWindTL.hor; + mvXMax = refineWindBR.hor; +#else #if JVET_AG0136_INTRA_TMP_LIC if ((!pcCU->cs->pcv->isEncoder || !bJointCalc) && pcCU->tmpFlag && (pcCU->tmpLicFlag || pcCU->ibcLicFlag)) { @@ -15306,9 +15794,10 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta mvYMax = sparseMtmpCandList[temIdx][candIdx].m_pY + 5; mvXMin = sparseMtmpCandList[temIdx][candIdx].m_pX - 5; mvXMax = sparseMtmpCandList[temIdx][candIdx].m_pX + 5; +#endif #endif } -#if JVET_AH0055_INTRA_TMP_ARBVP +#if JVET_AH0055_INTRA_TMP_ARBVP && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT else if (bestRegionId == 7) { mvYMin = sparseMtmpCandList[temIdx][candIdx].m_pY - EBVP_RANGE; @@ -15326,6 +15815,9 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta clipMvIntraConstraintRefine(mvXMin, mvXMax, mvYMin, mvYMax, sparseMtmpCandList[temIdx][candIdx].m_pX, sparseMtmpCandList[temIdx][candIdx].m_pY, iRefineRange #if JVET_AG0136_INTRA_TMP_LIC , TMP_SAMPLING +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , sparseMtmpCandList[temIdx][candIdx].m_isTransferredLeft, sparseMtmpCandList[temIdx][candIdx].m_isTransferredTop, bestRegionId, iCurrX + mvXMin >= TMP_TEMPLATE_SIZE + iRefineRange, iCurrY + mvYMin >= TMP_TEMPLATE_SIZE + iRefineRange, !pcCU->slice->getSPS()->getItmpLicMode() #endif ); } @@ -15340,28 +15832,51 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta #endif ); #endif +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT if (!(mvYMax < mvYMin || mvXMax < mvXMin)) { +#endif for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset -= iRefine) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + bool isAvailablePairFound{false}; +#endif for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset -= iRefine) { #if JVET_AE0077_EXT_INTRATMP +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + if ((bestRegionId == 4 || bestRegionId == 5 || (!pcCU->slice->getSPS()->getItmpLicMode() && ((bestRegionId == 3 && iXOffset > 0) || (bestRegionId == 1 && iYOffset > 0)))) && !isAvailablePairFound) +#else if (bestRegionId == 4 || bestRegionId == 5) +#endif { Position bottomRight(iCurrX + iXOffset + uiBlkWidth - 1, iCurrY + iYOffset + uiBlkHeight - 1); if (!pcCU->cs->isDecomp(bottomRight, CHANNEL_TYPE_LUMA)) { continue; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + else + { + isAvailablePairFound = true; + } +#endif } #endif #if JVET_AG0151_INTRA_TMP_MERGE_MODE #if JVET_AG0136_INTRA_TMP_LIC +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + if (ClustMrgReg[temIdx]) +#else if (isBvAddedReg) +#endif { #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + bRedundant = false; +#else bool bRedundant = false; +#endif for (int i = 0; i < refineMtmpCandList[temIdx].size(); i++) { if (iYOffset == refineMtmpCandList[temIdx][i].m_pY && iXOffset == refineMtmpCandList[temIdx][i].m_pX) @@ -15397,8 +15912,12 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta else { refCurr = ref + iYOffset * refStride + iXOffset; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_calcTemplateDiff(refCurr, refStride, tarPatch, m_uiPicStride, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, temIdx +#else m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, temIdx +#endif #if JVET_AG0136_INTRA_TMP_LIC , useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean #endif @@ -15406,8 +15925,12 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } if (diff[temIdx] < pDiff[temIdx]) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + updateCandList(TempLibFast(iXOffset, iYOffset, refineWindTL, refineWindBR, false, false, bestRegionId), diff[temIdx], refineMtmpCandList[temIdx], refineMtmpCostList[temIdx], mtmpNumRefine[temIdx]); +#else updateCandList(TempLibFast(iXOffset, iYOffset, bestRegionId), diff[temIdx], refineMtmpCandList[temIdx], refineMtmpCostList[temIdx], mtmpNumRefine[temIdx]); +#endif if (refineMtmpCandList[temIdx].size() == mtmpNumRefine[temIdx]) { pDiff[temIdx] = std::min((int) refineMtmpCostList[temIdx][mtmpNumRefine[temIdx] - 1], pDiff[temIdx]); @@ -15415,7 +15938,9 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } } } +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT } +#endif } } @@ -15438,8 +15963,17 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta if (tempType == L_SHAPE_TEMPLATE && needTopLeft) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + // If the list size is less than INIT_TL_POS the Only-TL candidates are skip + if (refMtmpCandListTemp.size() >= INIT_TL_POS) + { + int cnt = 0; + bool bRedundant = false; + int mvXCur, mvYCur, pos; +#endif for (int temIdx = 2; temIdx >0; temIdx--) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT int cnt = 0; for (int candIdx = 0; candIdx < refineMtmpCostList[temIdx].size() && cnt < TL_NUM; candIdx++) { @@ -15448,6 +15982,13 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta int mvXCur = refineMtmpCandList[temIdx][candIdx].m_pX; int mvYCur = refineMtmpCandList[temIdx][candIdx].m_pY; +#else + for (int candIdx = 0; candIdx < refineMtmpCandList[temIdx].size(); candIdx++) + { + bRedundant = false; + mvXCur = refineMtmpCandList[temIdx][candIdx].m_pX; + mvYCur = refineMtmpCandList[temIdx][candIdx].m_pY; +#endif #if JVET_AG0136_INTRA_TMP_LIC for (int crIdx = 0; crIdx < refMtmpCandListTemp.size(); crIdx++) { @@ -15467,8 +16008,12 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta if (!bRedundant) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + pos = (MTMP_NUM - static_cast<int>(refineMtmpCandList[1].size()) - static_cast<int>(refineMtmpCandList[2].size())) + cnt++; +#else cnt++; int pos = MTMP_NUM - 1 - TL_NUM * temIdx + cnt; +#endif #if JVET_AG0136_INTRA_TMP_LIC if (pos < refMtmpCandListTemp.size()) { @@ -15487,9 +16032,19 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta m_mtmpCandList[pos] = refineMtmpCandList[temIdx][candIdx]; #endif } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + else + { + // TL candidates are appended sequentially if the list size is less than MTMP_NUM + refMtmpCandListTemp.push_back(refineMtmpCandList[temIdx][candIdx]); + } +#endif } } } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + } +#endif } #else #if JVET_AB0130_ITMP_SAMPLING @@ -15624,8 +16179,16 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta pDiffSparse[i] = pDiffSupp[i] + (sparseMtmpCandListSupp[i].size() < mtmpNumSparseForLic[i] ? 0 : 1); } -#if JVET_AG0151_INTRA_TMP_MERGE_MODE +#if JVET_AG0151_INTRA_TMP_MERGE_MODE && (!JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT) const int RefineSizeForLic = pcCU->slice->getSPS()->getItmpLicMode() ? 5 : 2; +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + bool bRedundant = false; + Mv refineWindTL, refineWindBR; + int mvYMin = 0; + int mvYMax = 0; + int mvXMin = 0; + int mvXMax = 0; #endif for (int temIdx = 0; temIdx < 3; temIdx++) { @@ -15646,12 +16209,27 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } for (int candIdx = 0; candIdx < sparseMtmpCandListSupp[temIdx].size(); candIdx++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + refineWindTL = sparseMtmpCandListSupp[temIdx][candIdx].m_windTL; + refineWindBR = sparseMtmpCandListSupp[temIdx][candIdx].m_windBR; +#endif bestRegionId = sparseMtmpCandListSupp[temIdx][candIdx].m_rId; #if JVET_AG0151_INTRA_TMP_MERGE_MODE +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT int mvYMin = 0; int mvYMax = 0; int mvXMin = 0; int mvXMax = 0; +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + if (bestRegionId >= TMP_MRG_REG_ID) + { + mvYMin = refineWindTL.ver; + mvYMax = refineWindBR.ver; + mvXMin = refineWindTL.hor; + mvXMax = refineWindBR.hor; + } +#else if (bestRegionId == 6) { mvYMin = sparseMtmpCandListSupp[temIdx][candIdx].m_pY - RefineSizeForLic; @@ -15667,6 +16245,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta mvXMin = sparseMtmpCandListSupp[temIdx][candIdx].m_pX - EBVP_RANGE; mvXMax = sparseMtmpCandListSupp[temIdx][candIdx].m_pX + EBVP_RANGE; } +#endif #endif else { @@ -15674,7 +16253,11 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta mvYMax = mvYMaxs[bestRegionId]; mvXMin = mvXMins[bestRegionId]; mvXMax = mvXMaxs[bestRegionId]; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + clipMvIntraConstraintRefine(mvXMin, mvXMax, mvYMin, mvYMax, sparseMtmpCandListSupp[temIdx][candIdx].m_pX, sparseMtmpCandListSupp[temIdx][candIdx].m_pY, TMP_SAMPLING >> 1, TMP_SAMPLING, sparseMtmpCandListSupp[temIdx][candIdx].m_isTransferredLeft, sparseMtmpCandListSupp[temIdx][candIdx].m_isTransferredTop, bestRegionId, iCurrX + mvXMin >= TMP_TEMPLATE_SIZE + (TMP_SAMPLING >> 1), iCurrY + mvYMin >= TMP_TEMPLATE_SIZE + (TMP_SAMPLING >> 1), !pcCU->slice->getSPS()->getItmpLicMode()); +#else clipMvIntraConstraintRefine(mvXMin, mvXMax, mvYMin, mvYMax, sparseMtmpCandListSupp[temIdx][candIdx].m_pX, sparseMtmpCandListSupp[temIdx][candIdx].m_pY, TMP_SAMPLING >> 1, TMP_SAMPLING); +#endif } #else int mvYMin = mvYMins[bestRegionId]; @@ -15687,24 +16270,47 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta #endif ); #endif +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT if (!(mvYMax < mvYMin || mvXMax < mvXMin)) { +#endif for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset -= 1) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + bool isAvailablePairFound{false}; +#endif for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset -= 1) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + if ((bestRegionId == 4 || bestRegionId == 5 || (!pcCU->slice->getSPS()->getItmpLicMode() && ((bestRegionId == 3 && iXOffset > 0) || (bestRegionId == 1 && iYOffset > 0)))) && !isAvailablePairFound) +#else if (bestRegionId == 4 || bestRegionId == 5) +#endif { Position bottomRight(iCurrX + iXOffset + uiBlkWidth - 1, iCurrY + iYOffset + uiBlkHeight - 1); if (!pcCU->cs->isDecomp(bottomRight, CHANNEL_TYPE_LUMA)) { continue; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + else + { + isAvailablePairFound = true; + } +#endif } #if JVET_AG0151_INTRA_TMP_MERGE_MODE +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + if (ClustMrgSupp[temIdx]) +#else if (isBvAddedSupp) +#endif { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + bRedundant = false; +#else bool bRedundant = false; +#endif for (int i = 0; i < refineMtmpCandList[temIdx].size(); i++) { if (iYOffset == refineMtmpCandList[temIdx][i].m_pY && iXOffset == refineMtmpCandList[temIdx][i].m_pX) @@ -15737,11 +16343,19 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta else { refCurr = ref + iYOffset * refStride + iXOffset; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_calcTemplateDiff(refCurr, refStride, tarPatch, m_uiPicStride, uiPatchWidth, uiPatchHeight, diffSupp, pDiffSupp, tempType, temIdx, true, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); +#else m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diffSupp, pDiffSupp, tempType, temIdx, true, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); +#endif } if (diffSupp[temIdx] < pDiffSupp[temIdx]) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + updateCandList(TempLibFast(iXOffset, iYOffset, refineWindTL, refineWindBR, false, false, bestRegionId), diffSupp[temIdx], refineMtmpCandList[temIdx], refineMtmpCostList[temIdx], mtmpNumRefine[temIdx]); +#else updateCandList(TempLibFast(iXOffset, iYOffset, bestRegionId), diffSupp[temIdx], refineMtmpCandList[temIdx], refineMtmpCostList[temIdx], mtmpNumRefine[temIdx]); +#endif if (refineMtmpCandList[temIdx].size() == mtmpNumRefine[temIdx]) { pDiffSupp[temIdx] = std::min((int) refineMtmpCostList[temIdx][mtmpNumRefine[temIdx] - 1], pDiffSupp[temIdx]); @@ -15749,7 +16363,9 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } } } +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT } +#endif } } @@ -15757,15 +16373,31 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta m_mtmpCostListUseMR = refineMtmpCostList[0]; static_vector<TempLibFast, MTMP_NUM>& refMtmpCandListTemp = m_mtmpCandListUseMR; if (tempType == L_SHAPE_TEMPLATE && needTopLeft) +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + // If the list size is less than INIT_TL_POS the Only-TL candidates are skip + if (refMtmpCandListTemp.size() >= INIT_TL_POS) + { + int cnt = 0; + bool bRedundant = false; + int mvXCur, mvYCur, pos; +#endif { for (int temIdx = 2; temIdx > 0; temIdx--) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT int cnt = 0; for (int candIdx = 0; candIdx < refineMtmpCostList[temIdx].size() && cnt < TL_NUM; candIdx++) { bool bRedundant = false; const int mvXCur = refineMtmpCandList[temIdx][candIdx].m_pX; const int mvYCur = refineMtmpCandList[temIdx][candIdx].m_pY; +#else + for (int candIdx = 0; candIdx < refineMtmpCostList[temIdx].size(); candIdx++) + { + bRedundant = false; + mvXCur = refineMtmpCandList[temIdx][candIdx].m_pX; + mvYCur = refineMtmpCandList[temIdx][candIdx].m_pY; +#endif for (int crIdx = 0; crIdx < refMtmpCandListTemp.size(); crIdx++) { if (mvXCur == refMtmpCandListTemp[crIdx].m_pX && mvYCur == refMtmpCandListTemp[crIdx].m_pY) @@ -15776,8 +16408,12 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } if (!bRedundant) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + pos = (MTMP_NUM - static_cast<int>(refineMtmpCandList[1].size()) - static_cast<int>(refineMtmpCandList[2].size())) + cnt++; +#else cnt++; int pos = MTMP_NUM - 1 - TL_NUM * temIdx + cnt; +#endif if (pos < refMtmpCandListTemp.size()) { for (int updatePos = (int) refMtmpCandListTemp.size() - 1; updatePos > pos; updatePos--) @@ -15786,9 +16422,19 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } refMtmpCandListTemp[pos] = refineMtmpCandList[temIdx][candIdx]; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + else + { + // TL candidates are appended sequentially if the list size is less than MTMP_NUM + refMtmpCandListTemp.push_back(refineMtmpCandList[temIdx][candIdx]); + } +#endif } } } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + } +#endif } m_tmpNumCandUseMR = static_cast<int>(m_mtmpCandListUseMR.size()); for (int i = 0; i < refMtmpCandListTemp.size(); i++) @@ -16359,7 +17005,11 @@ void IntraPrediction::xCalcTmpFlmRefArea(CodingUnit *cu, unsigned int uiBlkWidth int iBlkHeight = uiBlkHeight; int bestPosX = iCurrX + pX; int bestPosY = iCurrY + pY; - if (regionId == 4 || regionId == 5) + if (regionId == 4 || regionId == 5 +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + || regionId == 1 || regionId == 3 +#endif + ) { if (!cu->cs->isDecomp(Position(bestPosX + iBlkWidth - 1, bestPosY + iBlkHeight), CHANNEL_TYPE_LUMA)) { @@ -17517,7 +18167,11 @@ TempLibFracFast::~TempLibFracFast() { } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT +void IntraPrediction::searchFracCandidate(CodingUnit* pcCU, Pel* tarPatch, RefTemplateType tempType) +#else void IntraPrediction::searchFracCandidate( CodingUnit* pcCU, Pel** tarPatch, RefTemplateType tempType) +#endif { const int tmpIdx = pcCU->tmpIdx; m_mtmpFracCandList[tmpIdx].clear(); @@ -17573,7 +18227,11 @@ void IntraPrediction::searchFracCandidate( CodingUnit* pcCU, Pel** tarPatch, Ref if(pcCU->tmpLicFlag) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_calcTemplateDiff(refCurr, predTempStride, tarPatch, m_uiPicStride, pcCU->lwidth() + TMP_TEMPLATE_SIZE, pcCU->lheight() + TMP_TEMPLATE_SIZE, diff, pDiff, tempType, 0, true +#else m_calcTemplateDiff(refCurr, predTempStride, tarPatch, pcCU->lwidth() + TMP_TEMPLATE_SIZE, pcCU->lheight() + TMP_TEMPLATE_SIZE, diff, pDiff, tempType, 0, true +#endif , m_log2SizeTop , m_log2SizeLeft , m_sizeTopLeft @@ -17582,7 +18240,11 @@ void IntraPrediction::searchFracCandidate( CodingUnit* pcCU, Pel** tarPatch, Ref } else { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_calcTemplateDiff(refCurr, predTempStride, tarPatch, m_uiPicStride, pcCU->lwidth() + TMP_TEMPLATE_SIZE, pcCU->lheight() + TMP_TEMPLATE_SIZE, diff, pDiff, tempType, 0, false, 0, 0, 0, 0, 0); +#else m_calcTemplateDiff(refCurr, predTempStride, tarPatch, pcCU->lwidth() + TMP_TEMPLATE_SIZE, pcCU->lheight() + TMP_TEMPLATE_SIZE, diff, pDiff, tempType, 0, false, 0, 0, 0, 0, 0); +#endif } diff[0] = (int) (diff[0] * TMP_INT_BV_COST_SCALE); @@ -17611,7 +18273,11 @@ void IntraPrediction::searchFracCandidate( CodingUnit* pcCU, Pel** tarPatch, Ref if(pcCU->tmpLicFlag) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_calcTemplateDiff(refCurr, predTempStride, tarPatch, m_uiPicStride, pcCU->lwidth() + TMP_TEMPLATE_SIZE, pcCU->lheight() + TMP_TEMPLATE_SIZE, diff, pDiff, tempType, 0, true +#else m_calcTemplateDiff(refCurr, predTempStride, tarPatch, pcCU->lwidth() + TMP_TEMPLATE_SIZE, pcCU->lheight() + TMP_TEMPLATE_SIZE, diff, pDiff, tempType, 0, true +#endif , m_log2SizeTop , m_log2SizeLeft , m_sizeTopLeft @@ -17620,7 +18286,11 @@ void IntraPrediction::searchFracCandidate( CodingUnit* pcCU, Pel** tarPatch, Ref } else { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_calcTemplateDiff(refCurr, predTempStride, tarPatch, m_uiPicStride, pcCU->lwidth() + TMP_TEMPLATE_SIZE, pcCU->lheight() + TMP_TEMPLATE_SIZE, diff, pDiff, tempType, 0, false, 0, 0, 0, 0, 0); +#else m_calcTemplateDiff(refCurr, predTempStride, tarPatch, pcCU->lwidth() + TMP_TEMPLATE_SIZE, pcCU->lheight() + TMP_TEMPLATE_SIZE, diff, pDiff, tempType, 0, false, 0, 0, 0, 0, 0); +#endif } if (diff[0] < pDiff[0]) { @@ -17677,22 +18347,35 @@ bool IntraPrediction::generateTmDcPrediction( Pel* piPred, unsigned int uiStride #endif #if JVET_AG0136_INTRA_TMP_LIC +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT +void IntraPrediction::calcTargetMean(Pel* tarPatch, int tarStride, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, const RefTemplateType tempType, const int requiredTemplate, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, int& topTargetMean, int& leftTargetMean) +#else void IntraPrediction::calcTargetMean(Pel** tarPatch, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, const RefTemplateType tempType, const int requiredTemplate, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, int& topTargetMean, int& leftTargetMean) +#endif { topTargetMean = 0; leftTargetMean = 0; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* tarPatchRow = tarPatch; +#else const Pel* tarPatchRow = nullptr; +#endif if (tempType == L_SHAPE_TEMPLATE) { if (requiredTemplate == 3 || requiredTemplate == 0 || requiredTemplate == 1) { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif for (int iX = TMP_TEMPLATE_SIZE; iX < uiPatchWidth; iX++) { topTargetMean += tarPatchRow[iX]; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } topTargetMean >>= log2SizeTop; } @@ -17700,11 +18383,16 @@ void IntraPrediction::calcTargetMean(Pel** tarPatch, const unsigned int uiPatchW { for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) { leftTargetMean += tarPatchRow[iX]; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } leftTargetMean >>= log2SizeLeft; } @@ -17713,11 +18401,16 @@ void IntraPrediction::calcTargetMean(Pel** tarPatch, const unsigned int uiPatchW { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif for (int iX = 0; iX < uiPatchWidth - TMP_TEMPLATE_SIZE; iX++) { topTargetMean += tarPatchRow[iX]; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } topTargetMean >>= log2SizeTop; } @@ -17725,32 +18418,55 @@ void IntraPrediction::calcTargetMean(Pel** tarPatch, const unsigned int uiPatchW { for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) { leftTargetMean += tarPatchRow[iX]; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } leftTargetMean >>= log2SizeLeft; } } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT +void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const unsigned int uiStride, Pel* tarPatch, int tarStride, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, int* diffSad, int* diffMrsad, int* iMaxSad, int* iMaxMrsad, const RefTemplateType tempType, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean, const int licShift) +#else void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const unsigned int uiStride, Pel** tarPatch, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, int* diffSad, int* diffMrsad, int* iMaxSad, int* iMaxMrsad, const RefTemplateType tempType, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean) +#endif { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT Pel intermediate = 0; +#endif int diffSumSad = 0; int diffSumMrsad = 0; int topDiffSad = MAX_INT; int topDiffMrsad = MAX_INT; int leftDiffSad = MAX_INT; int leftDiffMrsad = MAX_INT; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* const refLic = ref + licShift; + const Pel* tarPatchRow = tarPatch; +#else const Pel* tarPatchRow = nullptr; +#endif const Pel* refPatchRow = tempType == L_SHAPE_TEMPLATE ? ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE : (tempType == ABOVE_TEMPLATE ? ref - TMP_TEMPLATE_SIZE * uiStride : ref - TMP_TEMPLATE_SIZE); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* refPatchRowLic = tempType == L_SHAPE_TEMPLATE ? refLic - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE : (tempType == ABOVE_TEMPLATE ? refLic - TMP_TEMPLATE_SIZE * uiStride : refLic - TMP_TEMPLATE_SIZE); +#endif int topMeanRef = 0; int leftMeanRef = 0; if (tempType == L_SHAPE_TEMPLATE) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* refPatchRowTemp = refPatchRowLic; +#else const Pel* refPatchRowTemp = refPatchRow; +#endif for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRowTemp += uiStride) { for (int iX = TMP_TEMPLATE_SIZE; iX < uiPatchWidth; iX++) @@ -17759,7 +18475,11 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const } } topMeanRef >>= log2SizeTop; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + refPatchRowTemp = refLic - TMP_TEMPLATE_SIZE; +#else refPatchRowTemp = ref - TMP_TEMPLATE_SIZE; +#endif for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRowTemp += uiStride) { for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) @@ -17771,7 +18491,11 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const } else if (tempType == ABOVE_TEMPLATE) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* refPatchRowTemp = refPatchRowLic; +#else const Pel* refPatchRowTemp = refPatchRow; +#endif for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRowTemp += uiStride) { for (int iX = 0; iX < uiPatchWidth - TMP_TEMPLATE_SIZE; iX++) @@ -17783,7 +18507,11 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const } else if (tempType == LEFT_TEMPLATE) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* refPatchRowTemp = refPatchRowLic; +#else const Pel* refPatchRowTemp = refPatchRow; +#endif for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRowTemp += uiStride) { for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) @@ -17798,8 +18526,10 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const #if JVET_AH0200_INTRA_TMP_BV_REORDER int tempDiff1 = 0; int tempDiff2 = 0; +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT int tempDiff3 = 0; int tempDiff4 = 0; +#endif #endif if (tempType == L_SHAPE_TEMPLATE) { @@ -17809,20 +18539,42 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const topDiffMrsad = 0; leftDiffSad = 0; leftDiffMrsad = 0; - for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) + for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , refPatchRowLic += uiStride +#endif + ) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER tempDiff1 = 0; tempDiff2 = 0; +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tempDiff3 = 0; tempDiff4 = 0; #endif +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) + { + diffSumSad += abs(refPatchRow[iX] - tarPatchRow[iX]); + diffSumMrsad += abs(refPatchRowLic[iX] - tarPatchRow[iX] - topMeanDiff); + } + for (int iX = TMP_TEMPLATE_SIZE; iX < uiPatchWidth; iX++) +#else for (int iX = 0; iX < uiPatchWidth; iX++) +#endif { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + iSumSad = abs(refPatchRow[iX] - tarPatchRow[iX]); + iSumMrsad = abs(refPatchRowLic[iX] - tarPatchRow[iX] - topMeanDiff); +#else intermediate = refPatchRow[iX] - tarPatchRow[iX]; iSumSad = abs(intermediate); iSumMrsad = abs(intermediate - topMeanDiff); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER tempDiff1 += iSumSad; tempDiff2 += iSumMrsad; @@ -17830,6 +18582,7 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const diffSumSad += iSumSad; diffSumMrsad += iSumMrsad; #endif +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT if (iX >= TMP_TEMPLATE_SIZE) { #if JVET_AH0200_INTRA_TMP_BV_REORDER @@ -17840,35 +18593,61 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const topDiffMrsad += iSumMrsad; #endif } +#endif } #if JVET_AH0200_INTRA_TMP_BV_REORDER if (iY == (TMP_TEMPLATE_SIZE - 1)) { tempDiff1 <<= TMP_TEMPLATE_COST_SHIFT; +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tempDiff3 <<= TMP_TEMPLATE_COST_SHIFT; +#endif tempDiff2 <<= TMP_TEMPLATE_COST_SHIFT; +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tempDiff4 <<= TMP_TEMPLATE_COST_SHIFT; +#endif } diffSumSad += tempDiff1; diffSumMrsad += tempDiff2; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + topDiffSad += tempDiff1; + topDiffMrsad += tempDiff2; +#else topDiffSad += tempDiff3; topDiffMrsad += tempDiff4; +#endif #endif if (diffSumSad > iMaxSad[0] && topDiffSad > iMaxSad[1] && diffSumMrsad > iMaxMrsad[0] && topDiffMrsad > iMaxMrsad[1]) { break; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } refPatchRow = ref - TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + refPatchRowLic = refLic - TMP_TEMPLATE_SIZE; + tarPatchRow = tarPatch + TMP_TEMPLATE_SIZE * tarStride; + for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRow += uiStride, refPatchRowLic += uiStride) +#else for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRow += uiStride) +#endif { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + iSumSad = abs(refPatchRow[iX] - tarPatchRow[iX]); + iSumMrsad = abs(refPatchRowLic[iX] - tarPatchRow[iX] - leftMeanDiff); +#else intermediate = refPatchRow[iX] - tarPatchRow[iX]; iSumSad = abs(intermediate); iSumMrsad = abs(intermediate - leftMeanDiff); +#endif diffSumSad += iSumSad; diffSumMrsad += iSumMrsad; leftDiffSad += iSumSad; @@ -17878,7 +18657,11 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const tempDiff1 = (abs(refPatchRow[TMP_TEMPLATE_SIZE - 1] - tarPatchRow[TMP_TEMPLATE_SIZE - 1]))*((1<<TMP_TEMPLATE_COST_SHIFT) - 1); diffSumSad += tempDiff1; leftDiffSad += tempDiff1; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tempDiff1 = (abs(refPatchRowLic[TMP_TEMPLATE_SIZE - 1] - tarPatchRow[TMP_TEMPLATE_SIZE - 1] - leftMeanDiff))*((1<<TMP_TEMPLATE_COST_SHIFT) - 1); +#else tempDiff1 = (abs(refPatchRow[TMP_TEMPLATE_SIZE - 1] - tarPatchRow[TMP_TEMPLATE_SIZE - 1] - leftMeanDiff))*((1<<TMP_TEMPLATE_COST_SHIFT) - 1); +#endif diffSumMrsad += tempDiff1; leftDiffMrsad += tempDiff1; #endif @@ -17886,23 +18669,39 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const { break; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } } else if (tempType == ABOVE_TEMPLATE) { - for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) + for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , refPatchRowLic += uiStride +#endif + ) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER tempDiff1 = 0; tempDiff2 = 0; #endif for (int iX = 0; iX < uiPatchWidth - TMP_TEMPLATE_SIZE; iX++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT intermediate = refPatchRow[iX] - tarPatchRow[iX]; +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tempDiff1 += abs(refPatchRow[iX] - tarPatchRow[iX]); + tempDiff2 += abs(refPatchRowLic[iX] - tarPatchRow[iX] - topMeanDiff); +#else tempDiff1 += abs(intermediate); tempDiff2 += abs(intermediate - topMeanDiff); +#endif #else diffSumSad += abs(intermediate); diffSumMrsad += abs(intermediate - topMeanDiff); @@ -17921,29 +18720,50 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const { break; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } } else if (tempType == LEFT_TEMPLATE) { - for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRow += uiStride) + for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRow += uiStride +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , refPatchRowLic += uiStride +#endif + ) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + diffSumSad += abs(refPatchRow[iX] - tarPatchRow[iX]); + diffSumMrsad += abs(refPatchRowLic[iX] - tarPatchRow[iX] - leftMeanDiff); +#else intermediate = refPatchRow[iX] - tarPatchRow[iX]; diffSumSad += abs(intermediate); diffSumMrsad += abs(intermediate - leftMeanDiff); +#endif } #if JVET_AH0200_INTRA_TMP_BV_REORDER tempDiff1 = (abs(refPatchRow[TMP_TEMPLATE_SIZE - 1] - tarPatchRow[TMP_TEMPLATE_SIZE - 1]))*((1<<TMP_TEMPLATE_COST_SHIFT) - 1); diffSumSad += tempDiff1; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tempDiff1 = (abs(refPatchRowLic[TMP_TEMPLATE_SIZE - 1] - tarPatchRow[TMP_TEMPLATE_SIZE - 1] - leftMeanDiff))*((1<<TMP_TEMPLATE_COST_SHIFT) - 1); +#else tempDiff1 = (abs(refPatchRow[TMP_TEMPLATE_SIZE - 1] - tarPatchRow[TMP_TEMPLATE_SIZE - 1] - leftMeanDiff))*((1<<TMP_TEMPLATE_COST_SHIFT) - 1); +#endif diffSumMrsad += tempDiff1; #endif if (diffSumSad > iMaxSad[0] && diffSumMrsad > iMaxMrsad[0]) { break; } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } } diffSad[0] = diffSumSad; @@ -17956,12 +18776,21 @@ void IntraPrediction::calcTemplateDiffJointSadMrsad(const Pel* const ref, const #endif #if JVET_AD0086_ENHANCED_INTRA_TMP +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT +void IntraPrediction::calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel* tarPatch, int tarStride, unsigned int uiPatchWidth, + unsigned int uiPatchHeight, int* diff, int* iMax, RefTemplateType tempType, int requiredTemplate +#if JVET_AG0136_INTRA_TMP_LIC + , const bool isMrSad, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean +#endif +) +#else void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int *diff, int *iMax, RefTemplateType tempType, int requiredTemplate #if JVET_AG0136_INTRA_TMP_LIC , const bool isMrSad, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean #endif ) +#endif { int diffSum = 0; int topDiff = MAX_INT; @@ -17983,7 +18812,11 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta #else Pel *refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* tarPatchRow = tarPatch; +#else Pel *tarPatchRow; +#endif #if JVET_AG0136_INTRA_TMP_LIC int topMeanDiff = 0; @@ -18055,8 +18888,10 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta #endif #if JVET_AH0200_INTRA_TMP_BV_REORDER int tempDiff1 = 0; +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT int tempDiff2 = 0; #endif +#endif #if JVET_W0069_TMP_BOUNDARY if (tempType == L_SHAPE_TEMPLATE) { @@ -18070,12 +18905,28 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER tempDiff1 = 0; +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tempDiff2 = 0; #endif +#endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) + { + diffSum += abs(refPatchRow[iX] - tarPatchRow[iX] +#if JVET_AG0136_INTRA_TMP_LIC + - topMeanDiff +#endif + ); + } + for (int iX = TMP_TEMPLATE_SIZE; iX < uiPatchWidth; iX++) +#else for (int iX = 0; iX < uiPatchWidth; iX++) +#endif { #if JVET_AH0200_INTRA_TMP_BV_REORDER tempDiff1 += abs(refPatchRow[iX] - tarPatchRow[iX] @@ -18087,6 +18938,7 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta #endif ); +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT if (iX >= TMP_TEMPLATE_SIZE) { #if JVET_AH0200_INTRA_TMP_BV_REORDER @@ -18099,15 +18951,22 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta #endif ); } +#endif } #if JVET_AH0200_INTRA_TMP_BV_REORDER if (iY == (TMP_TEMPLATE_SIZE - 1)) { tempDiff1 <<= TMP_TEMPLATE_COST_SHIFT; +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tempDiff2 <<= TMP_TEMPLATE_COST_SHIFT; +#endif } diffSum += tempDiff1; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + topDiff += tempDiff1; +#else topDiff += tempDiff2; +#endif #endif if (diffSum > iMax[0] && topDiff > iMax[1]) @@ -18116,14 +18975,22 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta } refPatchRow += uiStride; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } refPatchRow = ref - TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow = tarPatch + TMP_TEMPLATE_SIZE * tarStride; +#endif // vertical difference for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) { diffSum += abs(refPatchRow[iX] - tarPatchRow[iX] @@ -18153,17 +19020,34 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta } refPatchRow += uiStride; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } } else if (requiredTemplate == 0)//TL { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER tempDiff1 = 0; #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) + { + diffSum += abs(refPatchRow[iX] - tarPatchRow[iX] +#if JVET_AG0136_INTRA_TMP_LIC + - topMeanDiff +#endif + ); + } + for (int iX = TMP_TEMPLATE_SIZE; iX < uiPatchWidth; iX++) +#else for (int iX = 0; iX < uiPatchWidth; iX++) +#endif { #if JVET_AH0200_INTRA_TMP_BV_REORDER tempDiff1 += abs(refPatchRow[iX] - tarPatchRow[iX] @@ -18189,14 +19073,22 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta } refPatchRow += uiStride; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } refPatchRow = ref - TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow = tarPatch + TMP_TEMPLATE_SIZE * tarStride; +#endif // vertical difference for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) { diffSum += abs(refPatchRow[iX] - tarPatchRow[iX] @@ -18220,13 +19112,18 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta } refPatchRow += uiStride; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } } else if(requiredTemplate == 1) //T { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER tempDiff1 = 0; #endif @@ -18256,16 +19153,24 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta } refPatchRow += uiStride; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } } else // L { refPatchRow = ref - TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow = tarPatch + TMP_TEMPLATE_SIZE * tarStride; +#endif // vertical difference for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) { leftDiff += abs(refPatchRow[iX] - tarPatchRow[iX] @@ -18289,6 +19194,9 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta } refPatchRow += uiStride; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } } #if JVET_W0069_TMP_BOUNDARY @@ -18298,7 +19206,9 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta // top template difference for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER tempDiff1 = 0; #endif @@ -18326,6 +19236,9 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta break; } refPatchRow += uiStride; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } } else if (tempType == LEFT_TEMPLATE) @@ -18333,7 +19246,9 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta // left template difference for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT tarPatchRow = tarPatch[iY]; +#endif for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) { diffSum += abs(refPatchRow[iX] - tarPatchRow[iX] @@ -18355,6 +19270,9 @@ void IntraPrediction::calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **ta break; } refPatchRow += uiStride; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow += tarStride; +#endif } } #endif diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h index faa92c107..3f76cef5a 100644 --- a/source/Lib/CommonLib/IntraPrediction.h +++ b/source/Lib/CommonLib/IntraPrediction.h @@ -78,6 +78,12 @@ class TempLibFast public: int m_pX; //offset X int m_pY; //offset Y +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + Mv m_windTL; // TL offset of refine window + Mv m_windBR; // BR offset of refine window + bool m_isTransferredLeft; + bool m_isTransferredTop; +#endif #if JVET_AD0086_ENHANCED_INTRA_TMP int m_rId; #else @@ -88,10 +94,20 @@ public: TempLibFast(); ~TempLibFast(); #if JVET_AD0086_ENHANCED_INTRA_TMP - TempLibFast(const int pX, const int pY, const int rId) + TempLibFast(const int pX, const int pY +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , const Mv windTL, const Mv windBR, const bool isTransferredLeft, const bool isTransferredTop +#endif + , const int rId) { m_pX = pX; m_pY = pY; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_windTL = windTL; + m_windBR = windBR; + m_isTransferredLeft = isTransferredLeft; + m_isTransferredTop = isTransferredTop; +#endif m_rId = rId; }; #endif @@ -341,6 +357,9 @@ private: static const uint8_t m_aucIntraFilterExt[MAX_INTRA_FILTER_DEPTHS]; RdCost* m_timdSatdCost; #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + RdCost* m_itmpSatdCost; +#endif #if JVET_AC0071_DBV RdCost *m_dbvSadCost; #endif @@ -474,8 +493,12 @@ protected: Picture* m_refPicBuf; unsigned int m_uiPicStride; unsigned int m_uiVaildCandiNum; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + Pel* m_pppTarPatch; +#else Pel*** m_pppTarPatch; #endif +#endif #if TMP_FAST_ENC #if JVET_AD0086_ENHANCED_INTRA_TMP @@ -1073,16 +1096,34 @@ public: #if JVET_W0069_TMP_BOUNDARY #if JVET_AD0086_ENHANCED_INTRA_TMP #if JVET_AG0136_INTRA_TMP_LIC +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + void (*m_calcTemplateDiffJointSadMrsad) (const Pel* const ref, const unsigned int uiStride, Pel* tarPatch, int tarStride, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, int* diffSad, int* diffMrsad, int* iMaxSad, int* iMaxMrsad, const RefTemplateType tempType, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean, const int licShift); + void(*m_calcTargetMean) (Pel* tarPatch, int tarStride, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, const RefTemplateType tempType, const int requiredTemplate, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, int& topTargetMean, int& leftTargetMean); + static void calcTemplateDiffJointSadMrsad(const Pel* const ref, const unsigned int uiStride, Pel* tarPatch, int tarStride, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, int* diffSad, int* diffMrsad, int* iMaxSad, int* iMaxMrsad, const RefTemplateType tempType, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean, const int licShift); + static void calcTargetMean(Pel* tarPatch, int tarStride, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, const RefTemplateType tempType, const int requiredTemplate, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, int& topTargetMean, int& leftTargetMean); +#else void (*m_calcTemplateDiffJointSadMrsad) (const Pel* const ref, const unsigned int uiStride, Pel** tarPatch, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, int* diffSad, int* diffMrsad, int* iMaxSad, int* iMaxMrsad, const RefTemplateType tempType, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean); void(*m_calcTargetMean) (Pel** tarPatch, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, const RefTemplateType tempType, const int requiredTemplate, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, int& topTargetMean, int& leftTargetMean); static void calcTemplateDiffJointSadMrsad(const Pel* const ref, const unsigned int uiStride, Pel** tarPatch, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, int* diffSad, int* diffMrsad, int* iMaxSad, int* iMaxMrsad, const RefTemplateType tempType, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean); static void calcTargetMean(Pel** tarPatch, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, const RefTemplateType tempType, const int requiredTemplate, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, int& topTargetMean, int& leftTargetMean); +#endif // #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + void(*m_calcTemplateDiff) (Pel* ref, unsigned int uiStride, Pel* tarPatch, int tarStride, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int* diff, int* iMax, RefTemplateType TempType, int requiredTemplate +#else void(*m_calcTemplateDiff) (Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int *diff, int *iMax, RefTemplateType TempType, int requiredTemplate +#endif #if JVET_AG0136_INTRA_TMP_LIC , const bool isMrSad, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean #endif ); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + static void calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel* tarPatch, int tarStride, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int* diff, int* iMax, RefTemplateType TempType, int requiredTemplate +#if JVET_AG0136_INTRA_TMP_LIC + , const bool isMrSad, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean +#endif + ); +#else static void calcTemplateDiff(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int *diff, int *iMax, RefTemplateType TempType, int requiredTemplate @@ -1090,6 +1131,7 @@ public: , const bool isMrSad, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean #endif ); +#endif #else int( *m_calcTemplateDiff ) ( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType ); static int calcTemplateDiff ( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType ); @@ -1098,7 +1140,11 @@ public: int( *m_calcTemplateDiff ) (Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax); static int calcTemplateDiff ( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax ); #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + Pel* getTargetPatch() { return m_pppTarPatch; } +#else Pel** getTargetPatch ( unsigned int uiDepth ) { return m_pppTarPatch[uiDepth]; } +#endif Pel* getRefPicUsed () { return m_refPicUsed; } void setRefPicUsed ( Pel* ref ) { m_refPicUsed = ref; } unsigned int getStride () { return m_uiPicStride; } @@ -1106,7 +1152,11 @@ public: #if JVET_W0069_TMP_BOUNDARY RefTemplateType getRefTemplateType ( CodingUnit& cu, CompArea& area ); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + void searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel* tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, RefTemplateType tempType +#else void searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, RefTemplateType tempType +#endif #if JVET_AG0136_INTRA_TMP_LIC , const bool useMR #endif @@ -1122,7 +1172,11 @@ public: #if JVET_AH0200_INTRA_TMP_BV_REORDER void xPadForFracSearchInterpolation (CodingUnit* pcCU, RefTemplateType tempType); void xTmpFracSearchIF(PredictionUnit& pu, Pel* padbf0, unsigned int padStride, Pel* preTmpbf0, unsigned int predTempStride, Pel* tmp0, unsigned int tmpStride, int extUiWidth, int extUiHeight, int fracPrec, int fracDir); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + void searchFracCandidate(CodingUnit* pcCU, Pel* tarPatch, RefTemplateType tempType); +#else void searchFracCandidate( CodingUnit* pcCU, Pel** tarPatch, RefTemplateType tempType); +#endif InterPrediction *m_pcInterPred; void setInterPrediction( InterPrediction *inter); #endif diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 50cfa23e0..85355e9b7 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -143,6 +143,7 @@ #define JVET_AG0146_DIMD_ITMP_IBC 1 // JVET-AG0146: DIMD with Intra TMP and IBC #define JVET_AH0055_INTRA_TMP_ARBVP 1 // JVET-AH0055: AR-BVP for intra TMP merge candidates #define JVET_AH0200_INTRA_TMP_BV_REORDER 1 // JVET-AH0200: Intra TMP BV reordering +#define JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT 1 // JVET-AI0129: Intra TMP candidates with overlapping refinement window enhanced. #endif #define JVET_W0123_TIMD_FUSION 1 // JVET-W0123: Template based intra mode derivation and fusion diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index 417e2bd8d..85a8389bb 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -1454,6 +1454,99 @@ void getNeighBv(const PredictionUnit& puOrg, const PredictionUnit* pu, std::vect } } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT +void PU::getSparseArBvMergeCandidate(const PredictionUnit& pu, std::vector<Mv>& pBvs, static_vector<TempLibFast, MTMP_NUM_SPARSE> &sparseMtmpCandList) +{ + const int numMrgMArbvp = static_cast<int>(pBvs.size()); + const int totalNum = numMrgMArbvp + NUM_TMP_ARBVP_S; + int end = (int) sparseMtmpCandList.size(); + Position posCand[5] = { pu.Y().center(), pu.Y().topLeft(), pu.Y().topRight(), pu.Y().bottomLeft(), pu.Y().bottomRight() }; + int offsetX = 0, offsetY = 0; + Mv cMv, arbv, arbv2, bv; + + for (int mergeIndex = 0; mergeIndex < end && pBvs.size() < totalNum; mergeIndex++) + { + offsetX = sparseMtmpCandList[mergeIndex].m_pX; + offsetY = sparseMtmpCandList[mergeIndex].m_pY; + cMv = Mv(offsetX, offsetY); + + for (int n = 0; n < 5 && pBvs.size() < totalNum; n++) + { + const PredictionUnit* puCascaded = pu.cs->getPURestricted(posCand[n].offset(offsetX, offsetY), pu, pu.chType); + if (!puCascaded || ((puCascaded->cu->predMode != MODE_IBC) && (!puCascaded->cu->tmpFlag))) + { + continue; + } + + arbv = cMv + puCascaded->bv; + if (PU::validItmpBv(pu, arbv.hor, arbv.ver)) + { + if (!PU::CheckBvAvailable(pBvs, arbv)) + { + pBvs.push_back(arbv); + if (pBvs.size() >= totalNum) + { + break; + } + } + } + if (PU::validItmpBv(pu, puCascaded->bv.hor, puCascaded->bv.ver)) + { + if (!PU::CheckBvAvailable(pBvs, puCascaded->bv)) + { + pBvs.push_back(puCascaded->bv); + if (pBvs.size() >= totalNum) + { + break; + } + } + } + if ((puCascaded->cu->predMode == MODE_IBC && puCascaded->interDir == 3) || (puCascaded->cu->tmpFlag && puCascaded->cu->tmpIdx > 0 +#if JVET_AG0136_INTRA_TMP_LIC + && !puCascaded->cu->tmpLicFlag +#endif + )) + { + if (puCascaded->cu->predMode == MODE_IBC) + { + bv = puCascaded->mv[REF_PIC_LIST_1]; + bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); + } + else + { + bv = Mv(puCascaded->cu->tmpXdisp, puCascaded->cu->tmpYdisp); + } + arbv2 = cMv + bv; + if (PU::validItmpBv(pu, arbv2.hor, arbv2.ver)) + { + if (!PU::CheckBvAvailable(pBvs, arbv2)) + { + pBvs.push_back(arbv2); + if (pBvs.size() >= totalNum) + { + break; + } + } + } + if (PU::validItmpBv(pu, bv.hor, bv.ver)) + { + if (!PU::CheckBvAvailable(pBvs, bv)) + { + pBvs.push_back(bv); + + if (pBvs.size() >= totalNum) + { + break; + } + } + } + } + } + } + return; +} +#endif + int PU::getItmpMergeCandidate(const PredictionUnit& pu, std::vector<Mv>& pBvs #if JVET_AH0200_INTRA_TMP_BV_REORDER , std::vector<Mv>& pSgpmMvs diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index 1ec488897..2bbce580a 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -213,6 +213,9 @@ namespace PU #if JVET_AH0200_INTRA_TMP_BV_REORDER bool validIBCItmpMv(const PredictionUnit& pu, Mv curMv, int templateSize); #endif +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + void getSparseArBvMergeCandidate(const PredictionUnit& pu, std::vector<Mv>& pBvs, static_vector<TempLibFast, MTMP_NUM_SPARSE>& sparseMtmpCandList); +#endif #endif #if JVET_AD0184_REMOVAL_OF_DIVISION_OPERATIONS int getMeanValue(int sum, int div); diff --git a/source/Lib/CommonLib/x86/IntraX86.h b/source/Lib/CommonLib/x86/IntraX86.h index b3302bbdb..51c195977 100644 --- a/source/Lib/CommonLib/x86/IntraX86.h +++ b/source/Lib/CommonLib/x86/IntraX86.h @@ -164,11 +164,25 @@ inline int summation16(const short* pSrc, const int start, const int end) #endif template<X86_VEXT vext> -inline void calcDiffDelta4Joint(const short* const pSrc1, const short* const pSrc2, const __m128i delta, const int start, const int end, int& sad, int& mrsad) +inline void calcDiffDelta4Joint(const short* const pSrc1, const short* const pSrc2, const __m128i delta, const int start, const int end, int& sad, int& mrsad +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , const int licShift +#endif + ) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const __m128i reference_v = _mm_loadl_epi64((const __m128i*) &pSrc2[start]); + const __m128i difference = _mm_sub_epi16(_mm_loadl_epi64((const __m128i*) &pSrc1[start]), reference_v); + const __m128i difference_lic = _mm_sub_epi16(_mm_loadl_epi64((const __m128i*) &pSrc1[start + licShift]), reference_v); +#else const __m128i difference = _mm_sub_epi16(_mm_loadl_epi64((const __m128i*) &pSrc1[start]), _mm_loadl_epi64((const __m128i*) &pSrc2[start])); +#endif const __m128i vsumSad16 = _mm_abs_epi16(difference); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const __m128i vsumMrsad16 = _mm_abs_epi16(_mm_sub_epi16(difference_lic, delta)); +#else const __m128i vsumMrsad16 = _mm_abs_epi16(_mm_sub_epi16(difference, delta)); +#endif const __m128i vzero = _mm_setzero_si128(); __m128i vsumSad32 = _mm_unpacklo_epi16(vsumSad16, vzero); vsumSad32 = _mm_add_epi32(vsumSad32, _mm_shuffle_epi32(vsumSad32, 0x4e)); @@ -192,12 +206,26 @@ inline int calcDiffDelta4(const short* const pSrc1, const short* const pSrc2, co } template<X86_VEXT vext> -inline void calcDiffDelta8Joint(const short* const pSrc1, const short* const pSrc2, const __m128i delta, const int start, const int end, int& sad, int& mrsad) +inline void calcDiffDelta8Joint(const short* const pSrc1, const short* const pSrc2, const __m128i delta, const int start, const int end, int& sad, int& mrsad +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , const int licShift +#endif + ) { #if USE_AVX2 +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const __m128i reference_v = _mm_lddqu_si128((const __m128i*) &pSrc2[start]); + const __m128i difference = _mm_sub_epi16(_mm_loadu_si128((const __m128i*) &pSrc1[start]), reference_v); + const __m128i difference_lic = _mm_sub_epi16(_mm_loadu_si128((const __m128i*) &pSrc1[start + licShift]), reference_v); +#else const __m128i difference = _mm_sub_epi16(_mm_loadu_si128((const __m128i*) &pSrc1[start]), _mm_lddqu_si128((const __m128i*) &pSrc2[start])); +#endif const __m128i vsumSad16 = _mm_abs_epi16(difference); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const __m128i vsumMrsad16 = _mm_abs_epi16(_mm_sub_epi16(difference_lic, delta)); +#else const __m128i vsumMrsad16 = _mm_abs_epi16(_mm_sub_epi16(difference, delta)); +#endif const __m128i vzero = _mm_setzero_si128(); #else const __m128i vzero = _mm_setzero_si128(); @@ -207,9 +235,17 @@ inline void calcDiffDelta8Joint(const short* const pSrc1, const short* const pSr { const __m128i vsrc1 = _mm_loadu_si128((const __m128i*) &pSrc1[iX]); const __m128i vsrc2 = _mm_lddqu_si128((const __m128i*) &pSrc2[iX]); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const __m128i vsrc1_lic = _mm_loadu_si128((const __m128i*) & pSrc1[iX + licShift]); + const __m128i difference_lic = _mm_sub_epi16(vsrc1_lic, vsrc2); +#endif const __m128i difference = _mm_sub_epi16(vsrc1, vsrc2); vsumSad16 = _mm_add_epi16(vsumSad16, _mm_abs_epi16(difference)); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + vsumMrsad16 = _mm_add_epi16(vsumMrsad16, _mm_abs_epi16(_mm_sub_epi16(difference_lic, delta))); +#else vsumMrsad16 = _mm_add_epi16(vsumMrsad16, _mm_abs_epi16(_mm_sub_epi16(difference, delta))); +#endif } #endif __m128i vsumSad32 = _mm_add_epi32(_mm_unpacklo_epi16(vsumSad16, vzero), _mm_unpackhi_epi16(vsumSad16, vzero)); @@ -246,7 +282,11 @@ inline int calcDiffDelta8(const short* const pSrc1, const short* const pSrc2, co #if USE_AVX2 template<X86_VEXT vext> -inline void calcDiffDelta16Joint(const short* pSrc1, const short* pSrc2, const __m256i delta, const int start, const int end, int& sad, int& mrsad) +inline void calcDiffDelta16Joint(const short* pSrc1, const short* pSrc2, const __m256i delta, const int start, const int end, int& sad, int& mrsad +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , const int licShift +#endif + ) { const __m256i vzero = _mm256_setzero_si256(); __m256i vsumSad16 = vzero; @@ -254,10 +294,20 @@ inline void calcDiffDelta16Joint(const short* pSrc1, const short* pSrc2, const _ for (int iX = start; iX < end; iX += 16) { const __m256i vsrc1 = _mm256_loadu_si256((const __m256i*) &pSrc1[iX]); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const __m256i vsrc1_lic = _mm256_loadu_si256((const __m256i*) & pSrc1[iX + licShift]); +#endif const __m256i vsrc2 = _mm256_lddqu_si256((const __m256i*) &pSrc2[iX]); const __m256i difference = _mm256_sub_epi16(vsrc1, vsrc2); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const __m256i difference_lic = _mm256_sub_epi16(vsrc1_lic, vsrc2); +#endif vsumSad16 = _mm256_add_epi16(vsumSad16, _mm256_abs_epi16(difference)); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + vsumMrsad16 = _mm256_add_epi16(vsumMrsad16, _mm256_abs_epi16(_mm256_sub_epi16(difference_lic, delta))); +#else vsumMrsad16 = _mm256_add_epi16(vsumMrsad16, _mm256_abs_epi16(_mm256_sub_epi16(difference, delta))); +#endif } __m256i vsumSad32 = _mm256_add_epi32(_mm256_unpacklo_epi16(vsumSad16, vzero), _mm256_unpackhi_epi16(vsumSad16, vzero)); vsumSad32 = _mm256_add_epi32(vsumSad32, _mm256_shuffle_epi32(vsumSad32, 0x4e)); @@ -288,10 +338,17 @@ inline int calcDiffDelta16(const short* pSrc1, const short* pSrc2, const __m256i #endif template<X86_VEXT vext> +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT +void calcTargetMeanSIMD(Pel* tarPatch, int tarStride, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, const RefTemplateType tempType, const int requiredTemplate, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, int& topTargetMean, int& leftTargetMean) +#else void calcTargetMeanSIMD(Pel** tarPatch, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, const RefTemplateType tempType, const int requiredTemplate, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, int& topTargetMean, int& leftTargetMean) +#endif { topTargetMean = 0; leftTargetMean = 0; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* tmpBuf = tarPatch; +#endif if (tempType == L_SHAPE_TEMPLATE) { if (requiredTemplate == 3 || requiredTemplate == 0 || requiredTemplate == 1) @@ -301,7 +358,12 @@ void calcTargetMeanSIMD(Pel** tarPatch, const unsigned int uiPatchWidth, const u { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + topTargetMean += summation16<vext>((const short*)tmpBuf, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else topTargetMean += summation16<vext>((const short*) tarPatch[iY], TMP_TEMPLATE_SIZE, uiPatchWidth); +#endif } } else if (((uiPatchWidth - TMP_TEMPLATE_SIZE) & 7) == 0) @@ -311,14 +373,24 @@ void calcTargetMeanSIMD(Pel** tarPatch, const unsigned int uiPatchWidth, const u { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + topTargetMean += summation8<vext>((const short*)tmpBuf, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else topTargetMean += summation8<vext>((const short*) tarPatch[iY], TMP_TEMPLATE_SIZE, uiPatchWidth); +#endif } } else if (uiPatchWidth == 8) { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + topTargetMean += summation4<vext>((const short*)tmpBuf, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else topTargetMean += summation4<vext>((const short*) tarPatch[iY], TMP_TEMPLATE_SIZE, uiPatchWidth); +#endif } } topTargetMean >>= log2SizeTop; @@ -327,7 +399,12 @@ void calcTargetMeanSIMD(Pel** tarPatch, const unsigned int uiPatchWidth, const u { for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + leftTargetMean += summation4<vext>((const short*)tmpBuf, 0, TMP_TEMPLATE_SIZE); + tmpBuf += tarStride; +#else leftTargetMean += summation4<vext>((const short*) tarPatch[iY], 0, TMP_TEMPLATE_SIZE); +#endif } leftTargetMean >>= log2SizeLeft; } @@ -339,7 +416,12 @@ void calcTargetMeanSIMD(Pel** tarPatch, const unsigned int uiPatchWidth, const u { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + topTargetMean += summation16<vext>((const short*)tmpBuf, 0, uiPatchWidth - TMP_TEMPLATE_SIZE); + tmpBuf += tarStride; +#else topTargetMean += summation16<vext>((const short*) tarPatch[iY], 0, uiPatchWidth - TMP_TEMPLATE_SIZE); +#endif } } else if (((uiPatchWidth - TMP_TEMPLATE_SIZE) & 7) == 0) @@ -349,14 +431,24 @@ void calcTargetMeanSIMD(Pel** tarPatch, const unsigned int uiPatchWidth, const u { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + topTargetMean += summation8<vext>((const short*)tmpBuf, 0, uiPatchWidth - TMP_TEMPLATE_SIZE); + tmpBuf += tarStride; +#else topTargetMean += summation8<vext>((const short*) tarPatch[iY], 0, uiPatchWidth - TMP_TEMPLATE_SIZE); +#endif } } else if (uiPatchWidth == 8) { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + topTargetMean += summation4<vext>((const short*)tmpBuf, 0, uiPatchWidth - TMP_TEMPLATE_SIZE); + tmpBuf += tarStride; +#else topTargetMean += summation4<vext>((const short*) tarPatch[iY], 0, uiPatchWidth - TMP_TEMPLATE_SIZE); +#endif } } topTargetMean >>= log2SizeTop; @@ -365,7 +457,12 @@ void calcTargetMeanSIMD(Pel** tarPatch, const unsigned int uiPatchWidth, const u { for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + leftTargetMean += summation4<vext>((const short*)tmpBuf, 0, TMP_TEMPLATE_SIZE); + tmpBuf += tarStride; +#else leftTargetMean += summation4<vext>((const short*) tarPatch[iY], 0, TMP_TEMPLATE_SIZE); +#endif } leftTargetMean >>= log2SizeLeft; } @@ -385,7 +482,11 @@ inline __m128i calcMeanRefLeftSIMD(const Pel* const ref, const unsigned int uiPa } template<X86_VEXT vext> +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT +void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int uiStride, Pel* tarPatch, int tarStride, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, int* diffSad, int* diffMrsad, int* iMaxSad, int* iMaxMrsad, const RefTemplateType tempType, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean, const int licShift) +#else void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int uiStride, Pel** tarPatch, const unsigned int uiPatchWidth, const unsigned int uiPatchHeight, int* diffSad, int* diffMrsad, int* iMaxSad, int* iMaxMrsad, const RefTemplateType tempType, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean) +#endif { int diffSumSad = 0; int diffSumMrsad = 0; @@ -393,7 +494,14 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int int topDiffMrsad = MAX_INT; int leftDiffSad = MAX_INT; int leftDiffMrsad = MAX_INT; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* const refLic = ref + licShift; + const Pel* tmpBuf = tarPatch; +#endif const Pel* refPatchRow = tempType == L_SHAPE_TEMPLATE ? ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE : (tempType == ABOVE_TEMPLATE ? ref - TMP_TEMPLATE_SIZE * uiStride : ref - TMP_TEMPLATE_SIZE); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* refPatchRowLic = tempType == L_SHAPE_TEMPLATE ? refLic - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE : (tempType == ABOVE_TEMPLATE ? refLic - TMP_TEMPLATE_SIZE * uiStride : refLic - TMP_TEMPLATE_SIZE); +#endif int topMeanRef = 0; __m128i topMeanDelta = _mm_setzero_si128(); #if USE_AVX2 @@ -401,7 +509,11 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int #endif if (tempType == L_SHAPE_TEMPLATE) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* refPatchRowTemp = refPatchRowLic; +#else const Pel* refPatchRowTemp = refPatchRow; +#endif #if USE_AVX2 if (vext >= AVX2 && ((uiPatchWidth - TMP_TEMPLATE_SIZE) & 15) == 0) { @@ -431,7 +543,11 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int } else if (tempType == ABOVE_TEMPLATE) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const Pel* refPatchRowTemp = refPatchRowLic; +#else const Pel* refPatchRowTemp = refPatchRow; +#endif #if USE_AVX2 if (vext >= AVX2 && ((uiPatchWidth - TMP_TEMPLATE_SIZE) & 15) == 0) { @@ -481,9 +597,18 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRowTemp += uiStride) { const short* const pSrc1 = (const short*) refPatchRowTemp; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* const pSrc2 = (const short*)tmpBuf; + tmpBuf += tarStride; +#else const short* const pSrc2 = (const short*) tarPatch[iY]; - calcDiffDelta4Joint<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE, iSumSad, iSumMrsad); -#if JVET_AH0200_INTRA_TMP_BV_REORDER +#endif + calcDiffDelta4Joint<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE, iSumSad, iSumMrsad +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , licShift +#endif + ); +#if JVET_AH0200_INTRA_TMP_BV_REORDER && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT if (iY == (TMP_TEMPLATE_SIZE - 1)) { iSumSad <<= TMP_TEMPLATE_COST_SHIFT; @@ -492,7 +617,11 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int #endif diffSumSad += iSumSad; diffSumMrsad += iSumMrsad; - calcDiffDelta16Joint<vext>(pSrc1, pSrc2, topMeanDelta256, TMP_TEMPLATE_SIZE, uiPatchWidth, iSumSad, iSumMrsad); + calcDiffDelta16Joint<vext>(pSrc1, pSrc2, topMeanDelta256, TMP_TEMPLATE_SIZE, uiPatchWidth, iSumSad, iSumMrsad +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , licShift +#endif + ); #if JVET_AH0200_INTRA_TMP_BV_REORDER if (iY == (TMP_TEMPLATE_SIZE - 1)) { @@ -519,9 +648,18 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRowTemp += uiStride) { const short* const pSrc1 = (const short*) refPatchRowTemp; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* const pSrc2 = (const short*)tmpBuf; + tmpBuf += tarStride; +#else const short* const pSrc2 = (const short*) tarPatch[iY]; - calcDiffDelta4Joint<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE, iSumSad, iSumMrsad); -#if JVET_AH0200_INTRA_TMP_BV_REORDER +#endif + calcDiffDelta4Joint<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE, iSumSad, iSumMrsad +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , licShift +#endif + ); +#if JVET_AH0200_INTRA_TMP_BV_REORDER && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT if (iY == (TMP_TEMPLATE_SIZE - 1)) { iSumSad <<= TMP_TEMPLATE_COST_SHIFT; @@ -530,7 +668,11 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int #endif diffSumSad += iSumSad; diffSumMrsad += iSumMrsad; - calcDiffDelta8Joint<vext>(pSrc1, pSrc2, topMeanDelta, TMP_TEMPLATE_SIZE, uiPatchWidth, iSumSad, iSumMrsad); + calcDiffDelta8Joint<vext>(pSrc1, pSrc2, topMeanDelta, TMP_TEMPLATE_SIZE, uiPatchWidth, iSumSad, iSumMrsad +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , licShift +#endif + ); #if JVET_AH0200_INTRA_TMP_BV_REORDER if (iY == (TMP_TEMPLATE_SIZE - 1)) { @@ -554,9 +696,18 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRowTemp += uiStride) { const short* const pSrc1 = (const short*) refPatchRowTemp; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* const pSrc2 = (const short*)tmpBuf; + tmpBuf += tarStride; +#else const short* const pSrc2 = (const short*) tarPatch[iY]; - calcDiffDelta4Joint<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE, iSumSad, iSumMrsad); -#if JVET_AH0200_INTRA_TMP_BV_REORDER +#endif + calcDiffDelta4Joint<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE, iSumSad, iSumMrsad +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , licShift +#endif + ); +#if JVET_AH0200_INTRA_TMP_BV_REORDER && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT if (iY == (TMP_TEMPLATE_SIZE - 1)) { iSumSad <<= TMP_TEMPLATE_COST_SHIFT; @@ -565,7 +716,11 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int #endif diffSumSad += iSumSad; diffSumMrsad += iSumMrsad; - calcDiffDelta4Joint<vext>(pSrc1, pSrc2, topMeanDelta, TMP_TEMPLATE_SIZE, uiPatchWidth, iSumSad, iSumMrsad); + calcDiffDelta4Joint<vext>(pSrc1, pSrc2, topMeanDelta, TMP_TEMPLATE_SIZE, uiPatchWidth, iSumSad, iSumMrsad +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + , licShift +#endif + ); #if JVET_AH0200_INTRA_TMP_BV_REORDER if (iY == (TMP_TEMPLATE_SIZE - 1)) { @@ -583,21 +738,41 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int } } } +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const __m128i leftMeanDelta = calcMeanRefLeftSIMD<vext>(refLic, uiPatchHeight, uiStride, log2SizeLeft, leftTargetMean); +#else const __m128i leftMeanDelta = calcMeanRefLeftSIMD<vext>(ref, uiPatchHeight, uiStride, log2SizeLeft, leftTargetMean); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER const short leftMeanDeltaVal = (short)_mm_cvtsi128_si32(leftMeanDelta); #endif refPatchRow = ref - TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tmpBuf = tarPatch + TMP_TEMPLATE_SIZE * tarStride; +#endif for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + calcDiffDelta4Joint<vext>((const short*)refPatchRow, (const short*)tmpBuf, leftMeanDelta, 0, TMP_TEMPLATE_SIZE, iSumSad, iSumMrsad, licShift); +#else calcDiffDelta4Joint<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], leftMeanDelta, 0, TMP_TEMPLATE_SIZE, iSumSad, iSumMrsad); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER const short *pSrc1 = (const short *) refPatchRow; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* pSrc2 = tmpBuf; + tmpBuf += tarStride; +#else const short *pSrc2 = (const short *) tarPatch[iY]; +#endif iSumSad += (abs(pSrc1[TMP_TEMPLATE_SIZE - 1] - pSrc2[TMP_TEMPLATE_SIZE - 1]) * ((1 <<TMP_TEMPLATE_COST_SHIFT)-1)); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + iSumMrsad += (abs(pSrc1[TMP_TEMPLATE_SIZE - 1 + licShift] - pSrc2[TMP_TEMPLATE_SIZE - 1] - leftMeanDeltaVal) * ((1 << TMP_TEMPLATE_COST_SHIFT) - 1)); +#else iSumMrsad += (abs(pSrc1[TMP_TEMPLATE_SIZE - 1] - pSrc2[TMP_TEMPLATE_SIZE - 1] - leftMeanDeltaVal) * ((1 << TMP_TEMPLATE_COST_SHIFT) - 1)); +#endif #endif diffSumSad += iSumSad; diffSumMrsad += iSumMrsad; @@ -617,7 +792,12 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + calcDiffDelta16Joint<vext>((const short*)refPatchRow, tmpBuf, topMeanDelta256, 0, iCols, iSumSad, iSumMrsad, licShift); + tmpBuf += tarStride; +#else calcDiffDelta16Joint<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], topMeanDelta256, 0, iCols, iSumSad, iSumMrsad); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER if (iY == (TMP_TEMPLATE_SIZE - 1)) { @@ -640,7 +820,12 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + calcDiffDelta8Joint<vext>((const short*)refPatchRow, (const short*)tmpBuf, topMeanDelta, 0, iCols, iSumSad, iSumMrsad, licShift); + tmpBuf += tarStride; +#else calcDiffDelta8Joint<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], topMeanDelta, 0, iCols, iSumSad, iSumMrsad); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER if (iY == (TMP_TEMPLATE_SIZE - 1)) { @@ -660,7 +845,12 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int { for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + calcDiffDelta4Joint<vext>((const short*)refPatchRow, tmpBuf, topMeanDelta, 0, iCols, iSumSad, iSumMrsad, licShift); + tmpBuf += tarStride; +#else calcDiffDelta4Joint<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], topMeanDelta, 0, iCols, iSumSad, iSumMrsad); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER if (iY == (TMP_TEMPLATE_SIZE - 1)) { @@ -679,20 +869,37 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int } else if (tempType == LEFT_TEMPLATE) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const __m128i leftMeanDelta = calcMeanRefLeftSIMD<vext>(refLic, uiPatchHeight, uiStride, log2SizeLeft, leftTargetMean); +#else const __m128i leftMeanDelta = calcMeanRefLeftSIMD<vext>(ref, uiPatchHeight, uiStride, log2SizeLeft, leftTargetMean); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER const short leftMeanDeltaVal = (short)_mm_cvtsi128_si32(leftMeanDelta); #endif for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + calcDiffDelta4Joint<vext>((const short*)refPatchRow, (const short*)tmpBuf, leftMeanDelta, 0, TMP_TEMPLATE_SIZE, iSumSad, iSumMrsad, licShift); +#else calcDiffDelta4Joint<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], leftMeanDelta, 0, TMP_TEMPLATE_SIZE, iSumSad, iSumMrsad); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER const short *pSrc1 = (const short *) refPatchRow; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* pSrc2 = tmpBuf; + tmpBuf += tarStride; +#else const short *pSrc2 = (const short *) tarPatch[iY]; +#endif iSumSad += (abs(pSrc1[TMP_TEMPLATE_SIZE - 1] - pSrc2[TMP_TEMPLATE_SIZE - 1]) * ((1 << TMP_TEMPLATE_COST_SHIFT) - 1)); +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + iSumMrsad += (abs(pSrc1[TMP_TEMPLATE_SIZE - 1 + licShift] - pSrc2[TMP_TEMPLATE_SIZE - 1] - leftMeanDeltaVal) * ((1 << TMP_TEMPLATE_COST_SHIFT) - 1)); +#else iSumMrsad += (abs(pSrc1[TMP_TEMPLATE_SIZE - 1] - pSrc2[TMP_TEMPLATE_SIZE - 1] - leftMeanDeltaVal) * ((1 << TMP_TEMPLATE_COST_SHIFT) - 1)); +#endif #endif diffSumSad += iSumSad; diffSumMrsad += iSumMrsad; @@ -711,6 +918,15 @@ void calcTemplateDiffJointSadMrsadSIMD(const Pel* const ref, const unsigned int } #endif template<X86_VEXT vext> +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT +void calcTemplateDiffSIMD(Pel* ref, unsigned int uiStride, Pel* tarPatch, int tarStride, unsigned int uiPatchWidth, + unsigned int uiPatchHeight, int* diff, int* iMax, RefTemplateType tempType, + int requiredTemplate +#if JVET_AG0136_INTRA_TMP_LIC + , const bool isMrSad, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean +#endif +) +#else void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int *diff, int *iMax, RefTemplateType tempType, int requiredTemplate @@ -718,11 +934,15 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig , const bool isMrSad, const int log2SizeTop, const int log2SizeLeft, const int sizeTopLeft, const int topTargetMean, const int leftTargetMean #endif ) +#endif { int diffSum = 0; int topDiff = MAX_INT; int leftDiff = MAX_INT; int iY; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + Pel* tmpBuf = tarPatch; +#endif #if JVET_W0069_TMP_BOUNDARY Pel *refPatchRow; if (tempType == L_SHAPE_TEMPLATE) @@ -833,8 +1053,13 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRowTemp += uiStride) { const short* const pSrc1 = (const short*) refPatchRowTemp; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* const pSrc2 = (const short*)tmpBuf; + tmpBuf += tarStride; +#else const short* const pSrc2 = (const short*) tarPatch[iY]; -#if JVET_AH0200_INTRA_TMP_BV_REORDER +#endif +#if JVET_AH0200_INTRA_TMP_BV_REORDER && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT uiSum = calcDiffDelta4<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE); if (iY == (TMP_TEMPLATE_SIZE-1)) { @@ -868,8 +1093,13 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRowTemp += uiStride) { const short* const pSrc1 = (const short*) refPatchRowTemp; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* const pSrc2 = (const short*)tmpBuf; + tmpBuf += tarStride; +#else const short* const pSrc2 = (const short*) tarPatch[iY]; -#if JVET_AH0200_INTRA_TMP_BV_REORDER +#endif +#if JVET_AH0200_INTRA_TMP_BV_REORDER && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT uiSum = calcDiffDelta4<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE); if (iY == (TMP_TEMPLATE_SIZE-1)) { @@ -900,8 +1130,13 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRowTemp += uiStride) { const short* const pSrc1 = (const short*) refPatchRowTemp; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* const pSrc2 = (const short*)tmpBuf; + tmpBuf += tarStride; +#else const short* const pSrc2 = (const short*) tarPatch[iY]; -#if JVET_AH0200_INTRA_TMP_BV_REORDER +#endif +#if JVET_AH0200_INTRA_TMP_BV_REORDER && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT uiSum = calcDiffDelta4<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE); if (iY == (TMP_TEMPLATE_SIZE-1)) { @@ -931,12 +1166,24 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig const short leftMeanDeltaVal = (short)_mm_cvtsi128_si32(leftMeanDelta); #endif refPatchRow = ref - TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tmpBuf = tarPatch + TMP_TEMPLATE_SIZE * tarStride; +#endif for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiffDelta4<vext>((const short*)refPatchRow, (const short*)tmpBuf, leftMeanDelta, 0, TMP_TEMPLATE_SIZE); +#else uiSum = calcDiffDelta4<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], leftMeanDelta, 0, TMP_TEMPLATE_SIZE); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER const short *pSrc1 = (const short *) refPatchRow; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* pSrc2 = tmpBuf; + tmpBuf += tarStride; +#else const short *pSrc2 = (const short *) tarPatch[iY]; +#endif uiSum += (abs(pSrc1[TMP_TEMPLATE_SIZE - 1] - pSrc2[TMP_TEMPLATE_SIZE - 1] - leftMeanDeltaVal) * ((1 << TMP_TEMPLATE_COST_SHIFT) - 1)); #endif @@ -957,14 +1204,23 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { const short* const pSrc1 = (const short*) refPatchRow; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* const pSrc2 = tmpBuf; + tmpBuf += tarStride; +#else const short* const pSrc2 = (const short*) tarPatch[iY]; +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + diffSum += calcDiffDelta4<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE); +#else uiSum = calcDiffDelta4<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE); if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; } diffSum += uiSum; +#endif uiSum = calcDiffDelta16<vext>(pSrc1, pSrc2, topMeanDelta256, TMP_TEMPLATE_SIZE, uiPatchWidth); if (iY == (TMP_TEMPLATE_SIZE-1)) { @@ -990,14 +1246,23 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { const short* const pSrc1 = (const short*) refPatchRow; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* const pSrc2 = tmpBuf; + tmpBuf += tarStride; +#else const short* const pSrc2 = (const short*) tarPatch[iY]; +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + diffSum += calcDiffDelta4<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE); +#else uiSum = calcDiffDelta4<vext>(pSrc1, pSrc2, topMeanDelta, 0, TMP_TEMPLATE_SIZE); if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; } diffSum += uiSum; +#endif uiSum = calcDiffDelta8<vext>(pSrc1, pSrc2, topMeanDelta, TMP_TEMPLATE_SIZE, uiPatchWidth); if (iY == (TMP_TEMPLATE_SIZE-1)) { @@ -1020,7 +1285,13 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + diffSum += calcDiffDelta4<vext>((const short*) refPatchRow, tmpBuf, topMeanDelta, 0, TMP_TEMPLATE_SIZE); + uiSum = calcDiffDelta4<vext>((const short*) refPatchRow, tmpBuf, topMeanDelta, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else uiSum = calcDiffDelta8<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], topMeanDelta, 0, uiPatchWidth); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1043,12 +1314,24 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig const short leftMeanDeltaVal = (short)_mm_cvtsi128_si32(leftMeanDelta); #endif refPatchRow = ref - TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tmpBuf = tarPatch + TMP_TEMPLATE_SIZE * tarStride; +#endif for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + diffSum += calcDiffDelta4<vext>((const short*)refPatchRow, tmpBuf, leftMeanDelta, 0, TMP_TEMPLATE_SIZE); +#else diffSum += calcDiffDelta4<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], leftMeanDelta, 0, TMP_TEMPLATE_SIZE); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER const short *pSrc1 = (const short *) refPatchRow; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* pSrc2 = tmpBuf; + tmpBuf += tarStride; +#else const short *pSrc2 = (const short *) tarPatch[iY]; +#endif diffSum += (abs(pSrc1[TMP_TEMPLATE_SIZE - 1] - pSrc2[TMP_TEMPLATE_SIZE - 1] - leftMeanDeltaVal) * ((1 << TMP_TEMPLATE_COST_SHIFT) - 1)); #endif @@ -1067,7 +1350,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiffDelta16<vext>((const short*)refPatchRow, tmpBuf, topMeanDelta256, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else uiSum = calcDiffDelta16<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], topMeanDelta256, TMP_TEMPLATE_SIZE, uiPatchWidth); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1090,7 +1378,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiffDelta8<vext>((const short*)refPatchRow, tmpBuf, topMeanDelta, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else uiSum = calcDiffDelta8<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], topMeanDelta, TMP_TEMPLATE_SIZE, uiPatchWidth); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1110,7 +1403,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiffDelta4<vext>((const short*)refPatchRow, tmpBuf, topMeanDelta, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else uiSum = calcDiffDelta4<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], topMeanDelta, TMP_TEMPLATE_SIZE, uiPatchWidth); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1133,12 +1431,24 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig const short leftMeanDeltaVal = (short)_mm_cvtsi128_si32(leftMeanDelta); #endif refPatchRow = ref - TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tmpBuf = tarPatch + TMP_TEMPLATE_SIZE * tarStride; +#endif for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + leftDiff += calcDiffDelta4<vext>((const short*)refPatchRow, tmpBuf, leftMeanDelta, 0, TMP_TEMPLATE_SIZE); +#else leftDiff += calcDiffDelta4<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], leftMeanDelta, 0, TMP_TEMPLATE_SIZE); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER const short *pSrc1 = (const short *) refPatchRow; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* pSrc2 = tmpBuf; + tmpBuf += tarStride; +#else const short *pSrc2 = (const short *) tarPatch[iY]; +#endif leftDiff += (abs(pSrc1[TMP_TEMPLATE_SIZE - 1] - pSrc2[TMP_TEMPLATE_SIZE - 1] - leftMeanDeltaVal) * ((1 << TMP_TEMPLATE_COST_SHIFT) - 1)); #endif @@ -1158,7 +1468,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiffDelta16<vext>((const short*)refPatchRow, tmpBuf, topMeanDelta256, 0, iCols); + tmpBuf += tarStride; +#else uiSum = calcDiffDelta16<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], topMeanDelta256, 0, iCols); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1181,7 +1496,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiffDelta8<vext>((const short*)refPatchRow, tmpBuf, topMeanDelta, 0, iCols); + tmpBuf += tarStride; +#else uiSum = calcDiffDelta8<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], topMeanDelta, 0, iCols); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1201,7 +1521,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiffDelta4<vext>((const short*)refPatchRow, tmpBuf, topMeanDelta, 0, iCols); + tmpBuf += tarStride; +#else uiSum = calcDiffDelta4<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], topMeanDelta, 0, iCols); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1225,10 +1550,19 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig #endif for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + diffSum += calcDiffDelta4<vext>((const short*)refPatchRow, tmpBuf, leftMeanDelta, 0, TMP_TEMPLATE_SIZE); +#else diffSum += calcDiffDelta4<vext>((const short*) refPatchRow, (const short*) tarPatch[iY], leftMeanDelta, 0, TMP_TEMPLATE_SIZE); +#endif #if JVET_AH0200_INTRA_TMP_BV_REORDER const short *pSrc1 = (const short *) refPatchRow; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* pSrc2 = tmpBuf; + tmpBuf += tarStride; +#else const short *pSrc2 = (const short *) tarPatch[iY]; +#endif diffSum += (abs(pSrc1[TMP_TEMPLATE_SIZE - 1] - pSrc2[TMP_TEMPLATE_SIZE - 1] - leftMeanDeltaVal) * ((1 << TMP_TEMPLATE_COST_SHIFT) - 1)); #endif @@ -1258,9 +1592,14 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig { for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* pSrc1 = tmpBuf; + tmpBuf += tarStride; +#else const short* pSrc1 = (const short*) tarPatch[iY]; +#endif const short* pSrc2 = (const short*) refPatchRow; -#if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AH0200_INTRA_TMP_BV_REORDER && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT uiSum = calcDiff4<vext>(pSrc1, pSrc2, 0, TMP_TEMPLATE_SIZE); if (iY == (TMP_TEMPLATE_SIZE-1)) { @@ -1292,9 +1631,14 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig { for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* pSrc1 = tmpBuf; + tmpBuf += tarStride; +#else const short* pSrc1 = (const short*) tarPatch[iY]; +#endif const short* pSrc2 = (const short*) refPatchRow; -#if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AH0200_INTRA_TMP_BV_REORDER && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT uiSum = calcDiff4<vext>(pSrc1, pSrc2, 0, TMP_TEMPLATE_SIZE); if (iY == (TMP_TEMPLATE_SIZE-1)) { @@ -1323,9 +1667,14 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig { for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* pSrc1 = tmpBuf; + tmpBuf += tarStride; +#else const short* pSrc1 = (const short*) tarPatch[iY]; +#endif const short* pSrc2 = (const short*) refPatchRow; -#if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AH0200_INTRA_TMP_BV_REORDER && !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT uiSum = calcDiff4<vext>(pSrc1, pSrc2, 0, TMP_TEMPLATE_SIZE); if (iY == (TMP_TEMPLATE_SIZE-1)) { @@ -1401,10 +1750,18 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig // vertical difference int iCols = TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tmpBuf = tarPatch + TMP_TEMPLATE_SIZE * tarStride; +#endif for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow = tmpBuf; + tmpBuf += tarStride; +#else tarPatchRow = tarPatch[iY]; +#endif const short *pSrc1 = (const short *) tarPatchRow; const short *pSrc2 = (const short *) refPatchRow; @@ -1439,15 +1796,24 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig { for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* const pSrc1 = (const short*)tmpBuf; + tmpBuf += tarStride; +#else const short* pSrc1 = (const short*) tarPatch[iY]; +#endif const short* pSrc2 = (const short*) refPatchRow; #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + diffSum += calcDiff4<vext>(pSrc1, pSrc2, 0, TMP_TEMPLATE_SIZE); +#else uiSum = calcDiff4<vext>(pSrc1, pSrc2, 0, TMP_TEMPLATE_SIZE); if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; } diffSum += uiSum; +#endif uiSum = calcDiff16<vext>(pSrc1, pSrc2, TMP_TEMPLATE_SIZE, uiPatchWidth); if (iY == (TMP_TEMPLATE_SIZE-1)) { @@ -1472,15 +1838,24 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig { for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + const short* const pSrc1 = (const short*)tmpBuf; + tmpBuf += tarStride; +#else const short* pSrc1 = (const short*) tarPatch[iY]; +#endif const short* pSrc2 = (const short*) refPatchRow; #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + diffSum += calcDiff4<vext>(pSrc1, pSrc2, 0, TMP_TEMPLATE_SIZE); +#else uiSum = calcDiff4<vext>(pSrc1, pSrc2, 0, TMP_TEMPLATE_SIZE); if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; } diffSum += uiSum; +#endif uiSum = calcDiff8<vext>(pSrc1, pSrc2, TMP_TEMPLATE_SIZE, uiPatchWidth); if (iY == (TMP_TEMPLATE_SIZE-1)) { @@ -1503,7 +1878,13 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + diffSum += calcDiff4<vext>(tmpBuf, (const short*)refPatchRow, 0, TMP_TEMPLATE_SIZE); + uiSum = calcDiff4<vext>(tmpBuf, (const short*)refPatchRow, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else uiSum = calcDiff8<vext>((const short*) tarPatch[iY], (const short*) refPatchRow, 0, uiPatchWidth); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1558,10 +1939,18 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig // vertical difference int iCols = TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tmpBuf = tarPatch + TMP_TEMPLATE_SIZE * tarStride; +#endif for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow = tmpBuf; + tmpBuf += tarStride; +#else tarPatchRow = tarPatch[iY]; +#endif const short *pSrc1 = (const short *) tarPatchRow; const short *pSrc2 = (const short *) refPatchRow; @@ -1598,7 +1987,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiff16<vext>(tmpBuf, (const short*)refPatchRow, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else uiSum = calcDiff16<vext>((const short*) tarPatch[iY], (const short*) refPatchRow, TMP_TEMPLATE_SIZE, uiPatchWidth); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1621,7 +2015,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiff8<vext>(tmpBuf, (const short*)refPatchRow, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else uiSum = calcDiff8<vext>((const short*) tarPatch[iY], (const short*) refPatchRow, TMP_TEMPLATE_SIZE, uiPatchWidth); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1641,7 +2040,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiff4<vext>(tmpBuf, (const short*)refPatchRow, TMP_TEMPLATE_SIZE, uiPatchWidth); + tmpBuf += tarStride; +#else uiSum = calcDiff4<vext>((const short*) tarPatch[iY], (const short*) refPatchRow, TMP_TEMPLATE_SIZE, uiPatchWidth); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1694,13 +2098,21 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig else // L { refPatchRow = ref - TMP_TEMPLATE_SIZE; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tmpBuf = tarPatch + TMP_TEMPLATE_SIZE * tarStride; +#endif // vertical difference int iCols = TMP_TEMPLATE_SIZE; for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow = tmpBuf; + tmpBuf += tarStride; +#else tarPatchRow = tarPatch[iY]; +#endif const short *pSrc1 = (const short *) tarPatchRow; const short *pSrc2 = (const short *) refPatchRow; @@ -1739,7 +2151,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiff16<vext>(tmpBuf, (const short*)refPatchRow, 0, iCols); + tmpBuf += tarStride; +#else uiSum = calcDiff16<vext>((const short*) tarPatch[iY], (const short*) refPatchRow, 0, iCols); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1762,7 +2179,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiff8<vext>(tmpBuf, (const short*)refPatchRow, 0, iCols); + tmpBuf += tarStride; +#else uiSum = calcDiff8<vext>((const short*) tarPatch[iY], (const short*) refPatchRow, 0, iCols); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1782,7 +2204,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++, refPatchRow += uiStride) { #if JVET_AH0200_INTRA_TMP_BV_REORDER +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + uiSum = calcDiff4<vext>(tmpBuf, (const short*)refPatchRow, 0, iCols); + tmpBuf += tarStride; +#else uiSum = calcDiff4<vext>((const short*) tarPatch[iY], (const short*) refPatchRow, 0, iCols); +#endif if (iY == (TMP_TEMPLATE_SIZE-1)) { uiSum <<= TMP_TEMPLATE_COST_SHIFT; @@ -1841,7 +2268,12 @@ void calcTemplateDiffSIMD(Pel *ref, unsigned int uiStride, Pel **tarPatch, unsig for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + tarPatchRow = tmpBuf; + tmpBuf += tarStride; +#else tarPatchRow = tarPatch[iY]; +#endif const short *pSrc1 = (const short *) tarPatchRow; const short *pSrc2 = (const short *) refPatchRow; diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index 90fb33183..6259742b3 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -1026,7 +1026,11 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) } else { +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + m_pcIntraPred->searchFracCandidate(tu.cu, m_pcIntraPred->getTargetPatch(), tempType); +#else m_pcIntraPred->searchFracCandidate(tu.cu, m_pcIntraPred->getTargetPatch(floorLog2(std::max(pu.lwidth(), pu.lheight())) - 2), tempType); +#endif } } #endif diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 309f917a4..162b0dcde 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -1964,7 +1964,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c CHECK(cu.tmpLicFlag, "cu.tmpLicFlag == 1"); cu.ibcLicFlag = cu.tmpLicFlag; cu.ibcLicIdx = uiRdModeListTmp[idxInList].tmpLicIdc; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + searchFracCandidate(&cu, getTargetPatch(), templateType); +#else searchFracCandidate(&cu, getTargetPatch(floorLog2(std::max(cu.lwidth(), cu.lheight())) - 2), templateType); +#endif for (int spIdx = 0; spIdx < std::min(2, (int) m_mtmpFracCandList[cu.tmpIdx].size()); spIdx++) { cu.tmpIsSubPel = m_mtmpFracCandList[cu.tmpIdx][spIdx].m_subpel; @@ -2031,7 +2035,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c CHECK(!cu.tmpLicFlag, "cu.tmpLicFlag != 0"); cu.ibcLicFlag = cu.tmpLicFlag; cu.ibcLicIdx = uiRdModeListTmpLic[idxInList].tmpLicIdc; +#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT + searchFracCandidate(&cu, getTargetPatch(), templateType); +#else searchFracCandidate(&cu, getTargetPatch(floorLog2(std::max(cu.lwidth(), cu.lheight())) - 2), templateType); +#endif for (int spIdx = 0; spIdx < std::min(2, (int) m_mtmpFracCandList[cu.tmpIdx].size()); spIdx++) { cu.tmpIsSubPel = m_mtmpFracCandList[cu.tmpIdx][spIdx].m_subpel; -- GitLab From 2e93815e90771b6a8cc4358c81122d7e83ce7844 Mon Sep 17 00:00:00 2001 From: Thierry Dumas <thierry.dumas@InterDigital.com> Date: Thu, 25 Jul 2024 08:48:09 +0200 Subject: [PATCH 2/3] Cleanings --- source/Lib/CommonLib/IntraPrediction.cpp | 194 +++++++++++------------ source/Lib/CommonLib/x86/IntraX86.h | 28 ++-- 2 files changed, 109 insertions(+), 113 deletions(-) diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index dbf9606d3..1f3e76a42 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -15025,25 +15025,27 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT const int numInitMrg = static_cast<int>(m_bvBasedMergeCandidates.size()); if (uiBlkWidth <= 8 && uiBlkHeight <= 8) - { + { regTL = Mv(mvXMins[regionId], mvYMins[regionId]); regBR = Mv(mvXMaxs[regionId], mvYMaxs[regionId]); bRegOverlap = false; + for (int mrgIdx = 0; mrgIdx < numInitMrg; mrgIdx++) + { + bvMrg = m_bvBasedMergeCandidates[mrgIdx]; + iMrgWindTL = bvMrg - bvOffMerge; + iMrgWindBR = bvMrg + bvOffMerge; - for (int mrgIdx = 0; mrgIdx < numInitMrg; mrgIdx++) - { - bvMrg = m_bvBasedMergeCandidates[mrgIdx]; - iMrgWindTL = bvMrg - bvOffMerge; - iMrgWindBR = bvMrg + bvOffMerge; - - if ((regTL.hor >= iMrgWindTL.hor) && (regTL.ver >= iMrgWindTL.ver) && (regBR.hor <= iMrgWindBR.hor) && (regBR.ver <= iMrgWindBR.ver)) + if ((regTL.hor >= iMrgWindTL.hor) && (regTL.ver >= iMrgWindTL.ver) && (regBR.hor <= iMrgWindBR.hor) && (regBR.ver <= iMrgWindBR.ver)) + { + bRegOverlap = true; + break; + } + } + if (bRegOverlap) { - bRegOverlap = true; - break; + continue; } } - if (bRegOverlap) continue; - } #endif #if JVET_AB0130_ITMP_SAMPLING #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT @@ -15054,26 +15056,23 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta bool isAvailablePairFound{false}; for (iXOffset = mvXMax - shiftX++; iXOffset >= mvXMin; iXOffset -= TMP_SAMPLING) { - int iXOffset_metric = iXOffset + offset; - int iYOffset_metric = iYOffset + offset; + int iXOffsetMetric = iXOffset + offset; + int iYOffsetMetric = iYOffset + offset; bool isTransferredLeft = false; bool isTransferredTop = false; - if (iXOffset_metric > mvXMax) + if (iXOffsetMetric > mvXMax) { - iXOffset_metric = iXOffset; + iXOffsetMetric = iXOffset; isTransferredLeft = true; } - if (iYOffset_metric > mvYMax) + if (iYOffsetMetric > mvYMax) { - iYOffset_metric = iYOffset; + iYOffsetMetric = iYOffset; isTransferredTop = true; } #else for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset -= TMP_SAMPLING) { -#if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - bool isAvailablePairFound{false}; -#endif for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset -= TMP_SAMPLING) { #endif @@ -15094,10 +15093,10 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta { if (bJointCalc || useMR) { - if (!pcCU->cs->isDecomp(Position(iCurrX + iXOffset_metric + uiBlkWidth - 1, iCurrY + iYOffset_metric + uiBlkHeight - 1), CHANNEL_TYPE_LUMA)) + if (!pcCU->cs->isDecomp(Position(iCurrX + iXOffsetMetric + uiBlkWidth - 1, iCurrY + iYOffsetMetric + uiBlkHeight - 1), CHANNEL_TYPE_LUMA)) { - iXOffset_metric = iXOffset; - iYOffset_metric = iYOffset; + iXOffsetMetric = iXOffset; + iYOffsetMetric = iYOffset; isTransferredLeft = true; isTransferredTop = true; } @@ -15150,7 +15149,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta if (diffSupp[temIdx] < pDiffSupp[temIdx]) { #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - updateCandList(TempLibFast(iXOffset_metric, iYOffset_metric, Mv(iXOffset_metric, iYOffset_metric) - bvOffSparseTL, Mv(iXOffset_metric, iYOffset_metric) + bvOffSparseBR, isTransferredLeft, isTransferredTop, regionId), diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); + updateCandList(TempLibFast(iXOffsetMetric, iYOffsetMetric, Mv(iXOffsetMetric, iYOffsetMetric) - bvOffSparseTL, Mv(iXOffsetMetric, iYOffsetMetric) + bvOffSparseBR, isTransferredLeft, isTransferredTop, regionId), diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); #else updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); #endif @@ -15164,8 +15163,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta else { #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - Pel* ref_tmp = pcCU->tmpLicFlag ? refCurr + licShift : refCurr; - m_calcTemplateDiff(ref_tmp, refStride, tarPatch, m_uiPicStride, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0, useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); + m_calcTemplateDiff(pcCU->tmpLicFlag ? refCurr + licShift : refCurr, refStride, tarPatch, m_uiPicStride, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0, useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); #else m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0, useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); #endif @@ -15183,8 +15181,8 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta if (diff[temIdx] < pDiff[temIdx]) { #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - int curXoffset = (!bJointCalc && (useMR)) ? iXOffset_metric : iXOffset; - int curYoffset = (!bJointCalc && (useMR)) ? iYOffset_metric : iYOffset; + int curXoffset = (!bJointCalc && (useMR)) ? iXOffsetMetric : iXOffset; + int curYoffset = (!bJointCalc && (useMR)) ? iYOffsetMetric : iYOffset; updateCandList(TempLibFast(curXoffset, curYoffset, Mv(curXoffset, curYoffset) - bvOffSparseTL, Mv(curXoffset, curYoffset) + bvOffSparseBR, !bJointCalc && (useMR) ? isTransferredLeft : false, !bJointCalc && (useMR) ? isTransferredTop : false, regionId), diff[temIdx], sparseMtmpCandList[temIdx], sparseMtmpCostList[temIdx], mtmpNumSparse[temIdx]); #else updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diff[temIdx], @@ -15311,7 +15309,6 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta bvBasedMergeCandidatesITMP = m_bvBasedMergeCandidates; std::vector<Mv> bvBasedMergeCandidatesOut; std::vector<Mv> bvBasedMergeCandidatesIn; - for (int iBv = 0; iBv < bvRegionIdList.size(); iBv++) { if (bvRegionIdList[iBv] == TMP_MRG_REG_ID) @@ -15325,7 +15322,6 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } bvBasedMergeCandidatesOut.insert(bvBasedMergeCandidatesOut.end(), bvBasedMergeCandidatesIn.begin(), bvBasedMergeCandidatesIn.end()); bvBasedMergeCandidatesITMP = bvBasedMergeCandidatesOut; - #else if (m_bvBasedMergeCandidates.size() > TMP_NUM_MERGE_CANDS) { @@ -15473,8 +15469,8 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } // Clustering of the Merge and ARBVP candidates based on refinement window - bool ClustMrgReg[3] = { false, false, false }; - bool ClustMrgSupp[3] = { false, false, false }; + bool clustMrgReg[3] = { false, false, false }; + bool clustMrgSupp[3] = { false, false, false }; Mv iMergeWindTL, iMergeWindBR; Mv iSparseWindTL, iSparseWindBR; Mv mergeCand, sparseCand; @@ -15497,13 +15493,13 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta sparseCand = Mv(sparseMtmpCandList[temIdx][i].m_pX, sparseMtmpCandList[temIdx][i].m_pY); if (mergeCand == sparseCand) { - ClustMrgReg[temIdx] = true; + clustMrgReg[temIdx] = true; bOverlap = true; break; } - iSparseWindTL = (sparseMtmpCandList[temIdx][i].m_windTL); - iSparseWindBR = (sparseMtmpCandList[temIdx][i].m_windBR); + iSparseWindTL = sparseMtmpCandList[temIdx][i].m_windTL; + iSparseWindBR = sparseMtmpCandList[temIdx][i].m_windBR; if (!((iMergeWindBR.hor < iSparseWindTL.hor) || (iMergeWindTL.hor > iSparseWindBR.hor)) && !((iMergeWindBR.ver < iSparseWindTL.ver) || (iMergeWindTL.ver > iSparseWindBR.ver))) { @@ -15529,7 +15525,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta sparseMtmpCandList[temIdx][i].m_windBR = iSparseWindBR; sparseMtmpCandList[temIdx][i].m_rId = regionId; } - ClustMrgReg[temIdx] = true; + clustMrgReg[temIdx] = true; bOverlap = true; break; } @@ -15544,7 +15540,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta { pDiff[temIdx] = std::min(static_cast<int>(sparseMtmpCostList[temIdx][mtmpNumSparse[temIdx] - 1]), pDiff[temIdx]); } - ClustMrgReg[temIdx] = true; + clustMrgReg[temIdx] = true; } } } @@ -15569,13 +15565,13 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta sparseCand = Mv(sparseMtmpCandListSupp[temIdx][i].m_pX, sparseMtmpCandListSupp[temIdx][i].m_pY); if (mergeCand == sparseCand) { - ClustMrgSupp[temIdx] = true; + clustMrgSupp[temIdx] = true; bOverlap = true; break; } - iSparseWindTL = (sparseMtmpCandListSupp[temIdx][i].m_windTL); - iSparseWindBR = (sparseMtmpCandListSupp[temIdx][i].m_windBR); + iSparseWindTL = sparseMtmpCandListSupp[temIdx][i].m_windTL; + iSparseWindBR = sparseMtmpCandListSupp[temIdx][i].m_windBR; if (!((iMergeWindBR.hor < iSparseWindTL.hor) || (iMergeWindTL.hor > iSparseWindBR.hor)) && !((iMergeWindBR.ver < iSparseWindTL.ver) || (iMergeWindTL.ver > iSparseWindBR.ver))) { iSparseWindTL = Mv(std::min(iSparseWindTL.hor, iMergeWindTL.hor), std::min(iSparseWindTL.ver, iMergeWindTL.ver)); @@ -15598,7 +15594,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta sparseMtmpCandListSupp[temIdx][i].m_rId = regionId; } bOverlap = true; - ClustMrgSupp[temIdx] = true; + clustMrgSupp[temIdx] = true; break; } } @@ -15612,7 +15608,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta { pDiffSupp[temIdx] = std::min(static_cast<int>(sparseMtmpCostListSupp[temIdx][mtmpNumSparseForLic[temIdx] - 1]), pDiffSupp[temIdx]); } - ClustMrgSupp[temIdx] = true; + clustMrgSupp[temIdx] = true; } } } @@ -15626,23 +15622,57 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta for (int iM = 0; iM < bvBasedMergeCandidatesITMP.size(); iM++) { #if JVET_AH0055_INTRA_TMP_ARBVP - regionId = iM < numNeighborMerge ? regionNum : regionNum + 1; + regionId = iM < numNeighborMerge ? regionNum: regionNum + 1; #else - regionId = regionNum; + regionId = regionNum; #endif - iYOffset = bvBasedMergeCandidatesITMP[iM].ver; - iXOffset = bvBasedMergeCandidatesITMP[iM].hor; - refCurr = ref + iYOffset * refStride + iXOffset; + iYOffset = bvBasedMergeCandidatesITMP[iM].ver; + iXOffset = bvBasedMergeCandidatesITMP[iM].hor; + refCurr = ref + iYOffset * refStride + iXOffset; #if JVET_AG0136_INTRA_TMP_LIC - if (bJointCalc) - { - m_calcTemplateDiffJointSadMrsad(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, diffSupp, pDiff, pDiffSupp, tempType, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); + if (bJointCalc) + { + m_calcTemplateDiffJointSadMrsad(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, diffSupp, pDiff, pDiffSupp, tempType, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); + for (int temIdx = 0; temIdx < 3; temIdx++) + { + bool bRedundant = false; + for (int i = 0; i < sparseMtmpCandListSupp[temIdx].size(); i++) + { + if (iYOffset == sparseMtmpCandListSupp[temIdx][i].m_pY && iXOffset == sparseMtmpCandListSupp[temIdx][i].m_pX) + { + bRedundant = true; + break; + } + } + if (bRedundant) + { + continue; + } + if (diffSupp[temIdx] < pDiffSupp[temIdx]) + { + updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); + if (sparseMtmpCandListSupp[temIdx].size() == mtmpNumSparseForLic[temIdx]) + { + pDiffSupp[temIdx] = std::min((int)sparseMtmpCostListSupp[temIdx][mtmpNumSparseForLic[temIdx] - 1], pDiffSupp[temIdx]); + } + isBvAddedSupp = true; + } + } + } + else + { + m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0, useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); + } +#else + m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0); +#endif for (int temIdx = 0; temIdx < 3; temIdx++) { + // check for redundancy bool bRedundant = false; - for (int i = 0; i < sparseMtmpCandListSupp[temIdx].size(); i++) + for (int i = 0; i < sparseMtmpCandList[temIdx].size(); i++) { - if (iYOffset == sparseMtmpCandListSupp[temIdx][i].m_pY && iXOffset == sparseMtmpCandListSupp[temIdx][i].m_pX) + if (iYOffset == sparseMtmpCandList[temIdx][i].m_pY && iXOffset == sparseMtmpCandList[temIdx][i].m_pX) { bRedundant = true; break; @@ -15652,54 +15682,20 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta { continue; } - if (diffSupp[temIdx] < pDiffSupp[temIdx]) + if (diff[temIdx] < pDiff[temIdx]) { - updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diffSupp[temIdx], sparseMtmpCandListSupp[temIdx], sparseMtmpCostListSupp[temIdx], mtmpNumSparseForLic[temIdx]); - if (sparseMtmpCandListSupp[temIdx].size() == mtmpNumSparseForLic[temIdx]) + updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diff[temIdx], + sparseMtmpCandList[temIdx], sparseMtmpCostList[temIdx], mtmpNumSparse[temIdx]); + if (sparseMtmpCandList[temIdx].size() == mtmpNumSparse[temIdx]) { - pDiffSupp[temIdx] = std::min((int)sparseMtmpCostListSupp[temIdx][mtmpNumSparseForLic[temIdx] - 1], pDiffSupp[temIdx]); + pDiff[temIdx] = std::min((int)sparseMtmpCostList[temIdx][mtmpNumSparse[temIdx] - 1], pDiff[temIdx]); } - isBvAddedSupp = true; - } - } - } - else - { - m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0, useMR, log2SizeTop, log2SizeLeft, sizeTopLeft, topTargetMean, leftTargetMean); - } -#else - m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, diff, pDiff, tempType, needTopLeft ? 3 : 0); -#endif - for (int temIdx = 0; temIdx < 3; temIdx++) - { - // check for redundancy - bool bRedundant = false; - for (int i = 0; i < sparseMtmpCandList[temIdx].size(); i++) - { - if (iYOffset == sparseMtmpCandList[temIdx][i].m_pY && iXOffset == sparseMtmpCandList[temIdx][i].m_pX) - { - bRedundant = true; - break; - } - } - if (bRedundant) - { - continue; - } - if (diff[temIdx] < pDiff[temIdx]) - { - updateCandList(TempLibFast(iXOffset, iYOffset, regionId), diff[temIdx], - sparseMtmpCandList[temIdx], sparseMtmpCostList[temIdx], mtmpNumSparse[temIdx]); - if (sparseMtmpCandList[temIdx].size() == mtmpNumSparse[temIdx]) - { - pDiff[temIdx] = std::min((int)sparseMtmpCostList[temIdx][mtmpNumSparse[temIdx] - 1], pDiff[temIdx]); - } #if JVET_AG0136_INTRA_TMP_LIC - isBvAddedReg = true; + isBvAddedReg = true; #endif + } } } -} #endif #endif @@ -15866,7 +15862,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta #if JVET_AG0151_INTRA_TMP_MERGE_MODE #if JVET_AG0136_INTRA_TMP_LIC #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - if (ClustMrgReg[temIdx]) + if (clustMrgReg[temIdx]) #else if (isBvAddedReg) #endif @@ -16043,7 +16039,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } } #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - } + } #endif } #else @@ -16301,7 +16297,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } #if JVET_AG0151_INTRA_TMP_MERGE_MODE #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - if (ClustMrgSupp[temIdx]) + if (clustMrgSupp[temIdx]) #else if (isBvAddedSupp) #endif @@ -16373,6 +16369,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta m_mtmpCostListUseMR = refineMtmpCostList[0]; static_vector<TempLibFast, MTMP_NUM>& refMtmpCandListTemp = m_mtmpCandListUseMR; if (tempType == L_SHAPE_TEMPLATE && needTopLeft) + { #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT // If the list size is less than INIT_TL_POS the Only-TL candidates are skip if (refMtmpCandListTemp.size() >= INIT_TL_POS) @@ -16381,7 +16378,6 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta bool bRedundant = false; int mvXCur, mvYCur, pos; #endif - { for (int temIdx = 2; temIdx > 0; temIdx--) { #if !JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT @@ -16433,7 +16429,7 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta } } #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - } + } #endif } m_tmpNumCandUseMR = static_cast<int>(m_mtmpCandListUseMR.size()); diff --git a/source/Lib/CommonLib/x86/IntraX86.h b/source/Lib/CommonLib/x86/IntraX86.h index 51c195977..96cf4777f 100644 --- a/source/Lib/CommonLib/x86/IntraX86.h +++ b/source/Lib/CommonLib/x86/IntraX86.h @@ -171,15 +171,15 @@ inline void calcDiffDelta4Joint(const short* const pSrc1, const short* const pSr ) { #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - const __m128i reference_v = _mm_loadl_epi64((const __m128i*) &pSrc2[start]); - const __m128i difference = _mm_sub_epi16(_mm_loadl_epi64((const __m128i*) &pSrc1[start]), reference_v); - const __m128i difference_lic = _mm_sub_epi16(_mm_loadl_epi64((const __m128i*) &pSrc1[start + licShift]), reference_v); + const __m128i referenceVec = _mm_loadl_epi64((const __m128i*) &pSrc2[start]); + const __m128i difference = _mm_sub_epi16(_mm_loadl_epi64((const __m128i*) &pSrc1[start]), referenceVec); + const __m128i differenceLic = _mm_sub_epi16(_mm_loadl_epi64((const __m128i*) &pSrc1[start + licShift]), referenceVec); #else const __m128i difference = _mm_sub_epi16(_mm_loadl_epi64((const __m128i*) &pSrc1[start]), _mm_loadl_epi64((const __m128i*) &pSrc2[start])); #endif const __m128i vsumSad16 = _mm_abs_epi16(difference); #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - const __m128i vsumMrsad16 = _mm_abs_epi16(_mm_sub_epi16(difference_lic, delta)); + const __m128i vsumMrsad16 = _mm_abs_epi16(_mm_sub_epi16(differenceLic, delta)); #else const __m128i vsumMrsad16 = _mm_abs_epi16(_mm_sub_epi16(difference, delta)); #endif @@ -214,15 +214,15 @@ inline void calcDiffDelta8Joint(const short* const pSrc1, const short* const pSr { #if USE_AVX2 #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - const __m128i reference_v = _mm_lddqu_si128((const __m128i*) &pSrc2[start]); - const __m128i difference = _mm_sub_epi16(_mm_loadu_si128((const __m128i*) &pSrc1[start]), reference_v); - const __m128i difference_lic = _mm_sub_epi16(_mm_loadu_si128((const __m128i*) &pSrc1[start + licShift]), reference_v); + const __m128i referenceVec = _mm_lddqu_si128((const __m128i*) &pSrc2[start]); + const __m128i difference = _mm_sub_epi16(_mm_loadu_si128((const __m128i*) &pSrc1[start]), referenceVec); + const __m128i differenceLic = _mm_sub_epi16(_mm_loadu_si128((const __m128i*) &pSrc1[start + licShift]), referenceVec); #else const __m128i difference = _mm_sub_epi16(_mm_loadu_si128((const __m128i*) &pSrc1[start]), _mm_lddqu_si128((const __m128i*) &pSrc2[start])); #endif const __m128i vsumSad16 = _mm_abs_epi16(difference); #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - const __m128i vsumMrsad16 = _mm_abs_epi16(_mm_sub_epi16(difference_lic, delta)); + const __m128i vsumMrsad16 = _mm_abs_epi16(_mm_sub_epi16(differenceLic, delta)); #else const __m128i vsumMrsad16 = _mm_abs_epi16(_mm_sub_epi16(difference, delta)); #endif @@ -236,13 +236,13 @@ inline void calcDiffDelta8Joint(const short* const pSrc1, const short* const pSr const __m128i vsrc1 = _mm_loadu_si128((const __m128i*) &pSrc1[iX]); const __m128i vsrc2 = _mm_lddqu_si128((const __m128i*) &pSrc2[iX]); #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - const __m128i vsrc1_lic = _mm_loadu_si128((const __m128i*) & pSrc1[iX + licShift]); - const __m128i difference_lic = _mm_sub_epi16(vsrc1_lic, vsrc2); + const __m128i vsrcLic1 = _mm_loadu_si128((const __m128i*) & pSrc1[iX + licShift]); + const __m128i differenceLic = _mm_sub_epi16(vsrcLic1, vsrc2); #endif const __m128i difference = _mm_sub_epi16(vsrc1, vsrc2); vsumSad16 = _mm_add_epi16(vsumSad16, _mm_abs_epi16(difference)); #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - vsumMrsad16 = _mm_add_epi16(vsumMrsad16, _mm_abs_epi16(_mm_sub_epi16(difference_lic, delta))); + vsumMrsad16 = _mm_add_epi16(vsumMrsad16, _mm_abs_epi16(_mm_sub_epi16(differenceLic, delta))); #else vsumMrsad16 = _mm_add_epi16(vsumMrsad16, _mm_abs_epi16(_mm_sub_epi16(difference, delta))); #endif @@ -295,16 +295,16 @@ inline void calcDiffDelta16Joint(const short* pSrc1, const short* pSrc2, const _ { const __m256i vsrc1 = _mm256_loadu_si256((const __m256i*) &pSrc1[iX]); #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - const __m256i vsrc1_lic = _mm256_loadu_si256((const __m256i*) & pSrc1[iX + licShift]); + const __m256i vsrcLic1 = _mm256_loadu_si256((const __m256i*) & pSrc1[iX + licShift]); #endif const __m256i vsrc2 = _mm256_lddqu_si256((const __m256i*) &pSrc2[iX]); const __m256i difference = _mm256_sub_epi16(vsrc1, vsrc2); #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - const __m256i difference_lic = _mm256_sub_epi16(vsrc1_lic, vsrc2); + const __m256i differenceLic = _mm256_sub_epi16(vsrcLic1, vsrc2); #endif vsumSad16 = _mm256_add_epi16(vsumSad16, _mm256_abs_epi16(difference)); #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT - vsumMrsad16 = _mm256_add_epi16(vsumMrsad16, _mm256_abs_epi16(_mm256_sub_epi16(difference_lic, delta))); + vsumMrsad16 = _mm256_add_epi16(vsumMrsad16, _mm256_abs_epi16(_mm256_sub_epi16(differenceLic, delta))); #else vsumMrsad16 = _mm256_add_epi16(vsumMrsad16, _mm256_abs_epi16(_mm256_sub_epi16(difference, delta))); #endif -- GitLab From 5cda9da7f0d1cbb4ecdb4f8c1e0ea22bfa1d0818 Mon Sep 17 00:00:00 2001 From: Thierry Dumas <thierry.dumas@InterDigital.com> Date: Fri, 26 Jul 2024 11:52:41 +0200 Subject: [PATCH 3/3] Cleanings --- source/Lib/CommonLib/CommonDef.h | 2 -- source/Lib/CommonLib/IntraPrediction.cpp | 10 +++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 7b0000884..2ca0acedd 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -2182,8 +2182,6 @@ static const int EBVP_RANGE = 1; #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT static const int NUM_TMP_ARBVP_S = 5; static const int TMP_MRG_REG_ID = 6; -static const int TMP_MRG_REF_WIND = 5; -static const int TMP_AR_REF_WIND = EBVP_RANGE; static const int INIT_TL_POS = (MTMP_NUM - TL_NUM_SPARSE); #endif diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index 1f3e76a42..99d07f921 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -14911,11 +14911,11 @@ void IntraPrediction::searchCandidateFromOnePicIntra( CodingUnit* pcCU, Pel** ta #if JVET_AI0129_INTRA_TMP_OVERLAPPING_REFINEMENT const int iRefine = 1; const int iRefineRange = TMP_SAMPLING >> 1; - const int TMP_REG_REF_WIND = iRefineRange; - const Mv bvOffSparseTL = Mv(TMP_REG_REF_WIND, TMP_REG_REF_WIND); - const Mv bvOffSparseBR = Mv(TMP_REG_REF_WIND, TMP_REG_REF_WIND); - const Mv bvOffMerge = Mv(TMP_MRG_REF_WIND, TMP_MRG_REF_WIND); - const Mv bvOffArbvp = Mv(TMP_AR_REF_WIND, TMP_AR_REF_WIND); + const int tmpMrgRefWind = 5; + const Mv bvOffSparseTL = Mv(iRefineRange, iRefineRange); + const Mv bvOffSparseBR = Mv(iRefineRange, iRefineRange); + const Mv bvOffMerge = Mv(tmpMrgRefWind, tmpMrgRefWind); + const Mv bvOffArbvp = Mv(EBVP_RANGE, EBVP_RANGE); Mv regTL, regBR, bvMrg; Mv iMrgWindTL, iMrgWindBR; bool bRegOverlap = false; -- GitLab