diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index b7f45d32aae6c42cb38ab92c06ae4207a3b5534c..34cd83ca8e9b0858bd7ec57dd6b35c6ef4f9a5f4 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -736,6 +736,9 @@ void EncApp::xInitLibCfg() #endif m_cEncLib.setPROF ( m_PROF ); m_cEncLib.setBIO (m_BIO); +#if ARMC_TM + m_cEncLib.setAML ( m_AML ); +#endif m_cEncLib.setUseLMChroma ( m_LMChroma ); m_cEncLib.setHorCollocatedChromaFlag ( m_horCollocatedChromaFlag ); m_cEncLib.setVerCollocatedChromaFlag ( m_verCollocatedChromaFlag ); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 9d312e8c8656ce6e11391a3800eaa870fd622000..f684965eb91ce3b440f30ff157fb102e75212873 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -978,6 +978,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #endif ("PROF", m_PROF, false, "Enable Prediction refinement with optical flow for affine mode (0:off, 1:on) [default: off]") ("BIO", m_BIO, false, "Enable bi-directional optical flow") +#if ARMC_TM + ("AML", m_AML, true, "Enable adaptive merge list") +#endif ("IMV", m_ImvMode, 1, "Adaptive MV precision Mode (IMV)\n" "\t0: disabled\n" "\t1: enabled (1/2-Pel, Full-Pel and 4-PEL)\n") @@ -4266,6 +4269,9 @@ void EncAppCfg::xPrintParameter() msg( VERBOSE, "DualITree:%d ", m_dualTree ); msg( VERBOSE, "IMV:%d ", m_ImvMode ); msg( VERBOSE, "BIO:%d ", m_BIO ); +#if ARMC_TM + msg( VERBOSE, "AML:%d ", m_AML ); +#endif msg( VERBOSE, "LMChroma:%d ", m_LMChroma ); msg( VERBOSE, "HorCollocatedChroma:%d ", m_horCollocatedChromaFlag ); msg( VERBOSE, "VerCollocatedChroma:%d ", m_verCollocatedChromaFlag ); diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 965161ce8943a1f2b4d519332fb100ed409cd8c6..3a6a3d1b4ddef2838bf9177945a8060638beb078 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -358,6 +358,9 @@ protected: #endif bool m_PROF; bool m_BIO; +#if ARMC_TM + bool m_AML; +#endif int m_LMChroma; bool m_horCollocatedChromaFlag; bool m_verCollocatedChromaFlag; diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index 1c62b65a642917ef775f102a4e988400a4724a89..fe9c674101b97287aa7fc7f6976615de495cb5a9 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -913,11 +913,13 @@ void AreaBuf<Pel>::copyClip( const AreaBuf<const Pel> &src, const ClpRng& clpRng const unsigned srcStride = src.stride; const unsigned destStride = stride; +#if !ARMC_TM if( width == 1 ) { THROW( "Blocks of width = 1 not supported" ); } else +#endif { #define RECO_OP( ADDR ) dest[ADDR] = ClipPel( srcp[ADDR], clpRng ) #define RECO_INC \ @@ -1008,6 +1010,18 @@ void AreaBuf<Pel>::linearTransform( const int scale, const int shift, const int const Pel* src = buf; Pel* dst = buf; +#if ARMC_TM +#if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86) + if ((width & 7) == 0) + { + g_pelBufOP.linTf8(src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip); + } + else if ((width & 3) == 0) + { + g_pelBufOP.linTf4(src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip); + } +#endif +#else if( width == 1 ) { THROW( "Blocks of width = 1 not supported" ); @@ -1021,6 +1035,7 @@ void AreaBuf<Pel>::linearTransform( const int scale, const int shift, const int { g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip ); } +#endif #endif else { diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 510b0f039f154372866fc88ca2fac83780dc0699..197fc3ceb92883fd6d11a41490da8b12ae8251c9 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -578,6 +578,12 @@ static const double AMAXBT_TH64 = 30.0; static const double AMAXBT_TH128 = 60.0; #endif +#if ARMC_TM +static const int AML_MERGE_TEMPLATE_SIZE = 1; +static const int ADAPTIVE_SUB_GROUP_SIZE = 5; +static const int ADAPTIVE_AFFINE_SUB_GROUP_SIZE = 3; +#endif + // need to know for static memory allocation static const int MAX_DELTA_QP = 7; ///< maximum supported delta QP value static const int MAX_TESTED_QPs = ( 1 + 1 + ( MAX_DELTA_QP << 1 ) ); ///< dqp=0 +- max_delta_qp + lossless mode diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index be9096990085991782b9590b417a6bf1297cf9d7..1c49d1ec186526497d5167d732f70f3d9dd23704 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -44,7 +44,7 @@ #include <memory.h> #include <algorithm> -#if INTER_LIC || (TM_AMVP || TM_MRG) +#if INTER_LIC || (TM_AMVP || TM_MRG) || ARMC_TM #include "Reshape.h" #endif @@ -181,6 +181,18 @@ InterPrediction::InterPrediction() CHECK(mvSearchIdx_bilMrg != (2 * BDMVR_INTME_RANGE + 1) * (2 * BDMVR_INTME_RANGE + 1), "this is wrong, mvSearchIdx_bilMrg != (2 * BDMVR_INTME_RANGE + 1) * (2 * BDMVR_INTME_RANGE + 1)"); #endif +#if ARMC_TM + for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) + { + for (uint32_t tmplt = 0; tmplt < 2; tmplt++) + { + m_acYuvCurAMLTemplate[tmplt][ch] = nullptr; + m_acYuvRefAboveTemplate[tmplt][ch] = nullptr; + m_acYuvRefLeftTemplate[tmplt][ch] = nullptr; + m_acYuvRefAMLTemplate[tmplt][ch] = nullptr; + } + } +#endif } InterPrediction::~InterPrediction() @@ -276,16 +288,32 @@ void InterPrediction::destroy() #if MULTI_HYP_PRED m_additionalHypothesisStorage.destroy(); #endif +#if ARMC_TM + for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) + { + for (uint32_t tmplt = 0; tmplt < 2; tmplt++) + { + xFree(m_acYuvCurAMLTemplate[tmplt][ch]); + xFree(m_acYuvRefAboveTemplate[tmplt][ch]); + xFree(m_acYuvRefLeftTemplate[tmplt][ch]); + xFree(m_acYuvRefAMLTemplate[tmplt][ch]); + m_acYuvCurAMLTemplate[tmplt][ch] = nullptr; + m_acYuvRefAboveTemplate[tmplt][ch] = nullptr; + m_acYuvRefLeftTemplate[tmplt][ch] = nullptr; + m_acYuvRefAMLTemplate[tmplt][ch] = nullptr; + } +} +#endif } -#if INTER_LIC || (TM_AMVP || TM_MRG) +#if INTER_LIC || (TM_AMVP || TM_MRG) || ARMC_TM void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape ) #else void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize ) #endif { m_pcRdCost = pcRdCost; -#if INTER_LIC || (TM_AMVP || TM_MRG) +#if INTER_LIC || (TM_AMVP || TM_MRG) || ARMC_TM m_pcReshape = reshape; #endif @@ -411,6 +439,18 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, cons m_pcLICRecAboveTemplate = (Pel*)xMalloc(Pel, MAX_CU_SIZE); } #endif +#if ARMC_TM + for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) + { + for (uint32_t tmplt = 0; tmplt < 2; tmplt++) + { + m_acYuvCurAMLTemplate[tmplt][ch] = (Pel*)xMalloc(Pel, MAX_CU_SIZE * MAX_CU_SIZE); + m_acYuvRefAboveTemplate[tmplt][ch] = (Pel*)xMalloc(Pel, MAX_CU_SIZE * MAX_CU_SIZE); + m_acYuvRefLeftTemplate[tmplt][ch] = (Pel*)xMalloc(Pel, MAX_CU_SIZE * MAX_CU_SIZE); + m_acYuvRefAMLTemplate[tmplt][ch] = (Pel*)xMalloc(Pel, MAX_CU_SIZE * MAX_CU_SIZE); + } + } +#endif if (m_storedMv == nullptr) { @@ -1462,8 +1502,22 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio , bool bilinearMC , Pel *srcPadBuf , int32_t srcPadStride +#if ARMC_TM + , bool AML +#if INTER_LIC + , bool doLic + , Mv mvCurr +#endif +#endif ) { +#if ARMC_TM + int nFilterIdx = AML ? 0 : 0; + if (bilinearMC) + { + nFilterIdx = 1; + } +#endif JVET_J0090_SET_REF_PICTURE( refPic, compID ); const ChromaFormat chFmt = pu.chromaFormat; const bool rndRes = !bi; @@ -1571,11 +1625,19 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio if( yFrac == 0 ) { +#if ARMC_TM + m_if.filterHor( compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, nFilterIdx, bilinearMC, useAltHpelIf ); +#else m_if.filterHor( compID, ( Pel* ) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); +#endif } else if( xFrac == 0 ) { +#if ARMC_TM + m_if.filterVer( compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, nFilterIdx, bilinearMC, useAltHpelIf ); +#else m_if.filterVer( compID, ( Pel* ) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); +#endif } else { @@ -1607,13 +1669,23 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio #else int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA; #endif +#if ARMC_TM + if (isLuma(compID) && nFilterIdx == 1) +#else if (bilinearMC) +#endif { vFilterSize = NTAPS_BILINEAR; } +#if ARMC_TM + m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, nFilterIdx, bilinearMC, useAltHpelIf); + JVET_J0090_SET_CACHE_ENABLE(false); + m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, nFilterIdx, bilinearMC, useAltHpelIf); +#else m_if.filterHor( compID, ( Pel* ) refBuf.buf - ( ( vFilterSize >> 1 ) - 1 ) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); JVET_J0090_SET_CACHE_ENABLE( false ); m_if.filterVer( compID, ( Pel* ) tmpBuf.buf + ( ( vFilterSize >> 1 ) - 1 ) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); +#endif #if SIMD_4x4_12 } #endif @@ -1677,12 +1749,25 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio m_predictionBeforeLIC.bufs[compID].copyFrom( dstBuf ); } +#if ARMC_TM + if (pu.cu->LICFlag && (!pu.ciipFlag || doLic)) +#else if( pu.cu->LICFlag && !pu.ciipFlag ) +#endif { CHECK( pu.cu->geoFlag, "Geometric mode is not used with LIC" ); CHECK( CU::isIBC( *pu.cu ), "IBC mode is not used with LIC" ); CHECK( pu.interDir == 3, "Bi-prediction is not used with LIC" ); +#if !ARMC_TM CHECK( pu.ciipFlag, "CIIP mode is not used with LIC" ); +#endif +#if ARMC_TM + if (AML) + { + xLocalIlluComp(pu, compID, *refPic, mvCurr, bi, dstBuf); + } + else +#endif xLocalIlluComp( pu, compID, *refPic, _mv, bi, dstBuf ); } #endif @@ -2838,6 +2923,204 @@ void InterPrediction::xWeightedAverage( } } +#if ARMC_TM +#if !INTER_LIC +template <bool TrueA_FalseL> +void InterPrediction::xGetPredBlkTpl(const CodingUnit& cu, const ComponentID compID, const CPelBuf& refBuf, const Mv& mv, const int posW, const int posH, const int tplSize, Pel* predBlkTpl) +{ + const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; + const int horShift = (lumaShift + ::getComponentScaleX(compID, cu.chromaFormat)); + const int verShift = (lumaShift + ::getComponentScaleY(compID, cu.chromaFormat)); + + const int xInt = mv.getHor() >> horShift; + const int yInt = mv.getVer() >> verShift; + const int xFrac = mv.getHor() & ((1 << horShift) - 1); + const int yFrac = mv.getVer() & ((1 << verShift) - 1); + + const Pel* ref; + Pel* dst; + int refStride, dstStride, bw, bh; + if (TrueA_FalseL) + { + ref = refBuf.bufAt(cu.blocks[compID].pos().offset(xInt + posW, yInt + posH - 1)); + dst = predBlkTpl + posW; + refStride = refBuf.stride; + dstStride = tplSize; + bw = tplSize; + bh = 1; + } + else + { + ref = refBuf.bufAt(cu.blocks[compID].pos().offset(xInt + posW - 1, yInt + posH)); + dst = predBlkTpl + posH; + refStride = refBuf.stride; + dstStride = 1; + bw = 1; + bh = tplSize; + } + + const int nFilterIdx = 0; + const bool useAltHpelIf = false; + + if (yFrac == 0) + { + m_if.filterHor(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, xFrac, true, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); + } + else if (xFrac == 0) + { + m_if.filterVer(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, yFrac, true, true, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); + } + else + { +#if IF_12TAP + const int vFilterSize = isLuma(compID) ? NTAPS_LUMA(0) : NTAPS_CHROMA; +#else + const int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA; +#endif + PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], Size(bw, bh + vFilterSize - 1)); + + m_if.filterHor(compID, (Pel*)ref - ((vFilterSize >> 1) - 1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh + vFilterSize - 1, xFrac, false, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); + JVET_J0090_SET_CACHE_ENABLE(false); + m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, true, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); + JVET_J0090_SET_CACHE_ENABLE(true); + } +} +#endif +void InterPrediction::xWeightedAverageY(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs) +{ + const int iRefIdx0 = pu.refIdx[0]; + const int iRefIdx1 = pu.refIdx[1]; + + if (iRefIdx0 >= 0 && iRefIdx1 >= 0) + { + if (pu.cu->BcwIdx != BCW_DEFAULT) + { + pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->BcwIdx, false, true); + } + else + { + pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, false, true); + } + } + else if (iRefIdx0 >= 0 && iRefIdx1 < 0) + { + pcYuvDst.copyClip(pcYuvSrc0, clpRngs, true); + } + else if (iRefIdx0 < 0 && iRefIdx1 >= 0) + { + pcYuvDst.copyClip(pcYuvSrc1, clpRngs, true); + } +} +void InterPrediction::xPredAffineTpl(const PredictionUnit &pu, const RefPicList &eRefPicList, int* numTemplate, Pel* refLeftTemplate, Pel* refAboveTemplate) +{ + int iRefIdx = pu.refIdx[eRefPicList]; + CHECK(iRefIdx < 0, "iRefIdx incorrect."); + const Picture* refPic = pu.cu->slice->getRefPic(eRefPicList, iRefIdx)->unscaledPic; + Mv mvLT = pu.mvAffi[eRefPicList][0]; + Mv mvRT = pu.mvAffi[eRefPicList][1]; + Mv mvLB = pu.mvAffi[eRefPicList][2]; + // get affine sub-block width and height + const int width = pu.Y().width; + const int height = pu.Y().height; + int blockWidth = AFFINE_MIN_BLOCK_SIZE; + int blockHeight = AFFINE_MIN_BLOCK_SIZE; + + CHECK(blockWidth > width, "Sub Block width > Block width"); + CHECK(blockHeight > height, "Sub Block height > Block height"); + + const int cxWidth = width; + const int cxHeight = height; + const int iBit = MAX_CU_DEPTH; + int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY; + iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(width)); + iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(width)); + if (pu.cu->affineType == AFFINEMODEL_6PARAM) + { + iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(height)); + iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(height)); + } + else + { + iDMvVerX = -iDMvHorY; + iDMvVerY = iDMvHorX; + } + int iMvScaleHor = mvLT.getHor() << iBit; + int iMvScaleVer = mvLT.getVer() << iBit; + + const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; +#if !AFFINE_RM_CONSTRAINTS_AND_OPT + const bool subblkMVSpreadOverLimit = isSubblockVectorSpreadOverLimit(iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY, pu.interDir); +#endif +#if AFFINE_RM_CONSTRAINTS_AND_OPT + if (iDMvHorX == 0 && iDMvHorY == 0) + blockWidth = width; + else + { + int maxDmv = std::max(abs(iDMvHorX), abs(iDMvHorY)) * blockWidth; + int TH = 1 << (iBit - 1); // Half pel + while (maxDmv < TH && blockWidth < width) + { + blockWidth <<= 1; + maxDmv <<= 1; + } + } + if (iDMvVerX == 0 && iDMvVerY == 0) + blockHeight = height; + else + { + int maxDmv = std::max(abs(iDMvVerX), abs(iDMvVerY)) * blockHeight; + int TH = 1 << (iBit - 1); // Half pel + while (maxDmv < TH && blockHeight < height) + { + blockHeight <<= 1; + maxDmv <<= 1; + } + } +#endif + int iMvScaleTmpHor0 = iMvScaleHor + ((iDMvHorX * blockWidth + iDMvVerX * blockHeight) >> 1); + int iMvScaleTmpVer0 = iMvScaleVer + ((iDMvHorY * blockWidth + iDMvVerY * blockHeight) >> 1); + // get prediction block by block + for (int h = 0; h < cxHeight; h += blockHeight) + { + for (int w = 0; w < cxWidth; w += blockWidth) + { + if (w == 0 || h == 0) + { + int iMvScaleTmpHor, iMvScaleTmpVer; + +#if !AFFINE_RM_CONSTRAINTS_AND_OPT + if (!subblkMVSpreadOverLimit) +#endif + { + iMvScaleTmpHor = iMvScaleTmpHor0 + iDMvHorX * w + iDMvVerX * h; + iMvScaleTmpVer = iMvScaleTmpVer0 + iDMvHorY * w + iDMvVerY * h; + } +#if !AFFINE_RM_CONSTRAINTS_AND_OPT + else + { + iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (cxWidth >> 1) + iDMvVerX * (cxHeight >> 1); + iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (cxWidth >> 1) + iDMvVerY * (cxHeight >> 1); + } +#endif + roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift); + Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); + tmpMv.clipToStorageBitDepth(); + iMvScaleTmpHor = tmpMv.getHor(); + iMvScaleTmpVer = tmpMv.getVer(); + + // clip and scale + if (refPic->isRefScaled(pu.cs->pps) == false) + { + clipMv(tmpMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + iMvScaleTmpHor = tmpMv.getHor(); + iMvScaleTmpVer = tmpMv.getVer(); + } + xGetSublkAMLTemplate(*pu.cu, COMPONENT_Y, *refPic, Mv(iMvScaleTmpHor, iMvScaleTmpVer), blockWidth, blockHeight, w, h, numTemplate, refLeftTemplate, refAboveTemplate); + } + } + } +} +#endif void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList , const bool luma, const bool chroma @@ -4192,6 +4475,734 @@ void InterPrediction::cacheAssign( CacheModel *cache ) } #endif +#if ARMC_TM +void InterPrediction::adjustInterMergeCandidates(PredictionUnit &pu, MergeCtx& mrgCtx, int mrgCandIdx) +{ + uint32_t RdCandList[MRG_MAX_NUM_CANDS][MRG_MAX_NUM_CANDS]; + Distortion candCostList[MRG_MAX_NUM_CANDS][MRG_MAX_NUM_CANDS]; + + for (uint32_t i = 0; i < MRG_MAX_NUM_CANDS; i++) + { + for (uint32_t j = 0; j < MRG_MAX_NUM_CANDS; j++) + { + RdCandList[i][j] = j; + candCostList[i][j] = MAX_UINT; + } + } + + Distortion uiCost; + + DistParam cDistParam; + cDistParam.applyWeight = false; + + /*const SPS &sps = *pu.cs->sps; + Position puPos = pu.lumaPos();*/ + int nWidth = pu.lumaSize().width; + int nHeight = pu.lumaSize().height; + + if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) + { + return; + } + + for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE)*ADAPTIVE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE > mrgCtx.numValidMergeCand)) ? mrgCtx.numValidMergeCand : ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE)); ++uiMergeCand) + { + bool firstGroup = (uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE) == 0 ? true : false; + bool lastGroup = ((uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE >= mrgCtx.numValidMergeCand) ? true : false; + if (lastGroup && !firstGroup) + { + break; + } + uiCost = 0; + + mrgCtx.setMergeInfo(pu, uiMergeCand); + PU::spanMotionInfo(pu, mrgCtx); + + PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); + PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); + PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); + PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); + + getBlkAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); + + if (m_bAMLTemplateAvailabe[0]) + { + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); + + uiCost += cDistParam.distFunc(cDistParam); + } + + if (m_bAMLTemplateAvailabe[1]) + { + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); + + uiCost += cDistParam.distFunc(cDistParam); + } + updateCandList(uiMergeCand, uiCost, ADAPTIVE_SUB_GROUP_SIZE, RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE], candCostList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE]); + } + pu.mergeIdx = mrgCandIdx; //restore the merge index + updateCandInfo(mrgCtx, RdCandList + , mrgCandIdx + ); + +} +bool InterPrediction::xAMLGetCurBlkTemplate(PredictionUnit& pu, int nCurBlkWidth, int nCurBlkHeight) +{ + m_bAMLTemplateAvailabe[0] = xAMLIsTopTempAvailable(pu); + m_bAMLTemplateAvailabe[1] = xAMLIsLeftTempAvailable(pu); + + if (!m_bAMLTemplateAvailabe[0] && !m_bAMLTemplateAvailabe[1]) + { + return false; + } + + /* const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; + const int horShift = (lumaShift + ::getComponentScaleX(COMPONENT_Y, pu.chromaFormat)); + const int verShift = (lumaShift + ::getComponentScaleY(COMPONENT_Y, pu.chromaFormat));*/ + const Picture& currPic = *pu.cs->picture; + const CPelBuf recBuf = currPic.getRecoBuf(pu.cs->picture->blocks[COMPONENT_Y]); + std::vector<Pel>& invLUT = m_pcReshape->getInvLUT(); + + if (m_bAMLTemplateAvailabe[0]) + { + const Pel* rec = recBuf.bufAt(pu.blocks[COMPONENT_Y].pos().offset(0, -AML_MERGE_TEMPLATE_SIZE)); + PelBuf pcYBuf = PelBuf(m_acYuvCurAMLTemplate[0][0], nCurBlkWidth, AML_MERGE_TEMPLATE_SIZE); + Pel* pcY = pcYBuf.bufAt(0, 0); + for (int k = 0; k < nCurBlkWidth; k++) + { + for (int l = 0; l < AML_MERGE_TEMPLATE_SIZE; l++) + { + int recVal = rec[k + l * recBuf.stride]; + + if (m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) + { + recVal = invLUT[recVal]; + } + + pcY[k + l * nCurBlkWidth] = recVal; + } + } + } + + if (m_bAMLTemplateAvailabe[1]) + { + PelBuf pcYBuf = PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nCurBlkHeight); + Pel* pcY = pcYBuf.bufAt(0, 0); + const Pel* rec = recBuf.bufAt(pu.blocks[COMPONENT_Y].pos().offset(-AML_MERGE_TEMPLATE_SIZE, 0)); + for (int k = 0; k < nCurBlkHeight; k++) + { + for (int l = 0; l < AML_MERGE_TEMPLATE_SIZE; l++) + { + int recVal = rec[recBuf.stride * k + l]; + + if (m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) + { + recVal = invLUT[recVal]; + } + + pcY[AML_MERGE_TEMPLATE_SIZE * k + l] = recVal; + } + } + } + + return true; +} + +bool InterPrediction::xAMLIsTopTempAvailable(PredictionUnit& pu) +{ + const CodingStructure &cs = *pu.cs; + Position posRT = pu.Y().topRight(); + const PredictionUnit *puAbove = cs.getPURestricted(posRT.offset(0, -1), pu, pu.chType); + + return (puAbove && pu.cu != puAbove->cu); +} + +bool InterPrediction::xAMLIsLeftTempAvailable(PredictionUnit& pu) +{ + const CodingStructure &cs = *pu.cs; + Position posLB = pu.Y().bottomLeft(); + const PredictionUnit *puLeft = cs.getPURestricted(posLB.offset(-1, 0), pu, pu.chType); + + return (puLeft && pu.cu != puLeft->cu); +} +void InterPrediction::updateCandList(uint32_t uiCand, Distortion uiCost, uint32_t uiMrgCandNum, uint32_t* RdCandList, Distortion* CandCostList) +{ + uint32_t i; + uint32_t shift = 0; + + while (shift < uiMrgCandNum && uiCost < CandCostList[uiMrgCandNum - 1 - shift]) + { + shift++; + } + + if (shift != 0) + { + for (i = 1; i < shift; i++) + { + RdCandList[uiMrgCandNum - i] = RdCandList[uiMrgCandNum - 1 - i]; + CandCostList[uiMrgCandNum - i] = CandCostList[uiMrgCandNum - 1 - i]; + } + RdCandList[uiMrgCandNum - shift] = uiCand; + CandCostList[uiMrgCandNum - shift] = uiCost; + } +} +void InterPrediction::updateCandInfo(MergeCtx& mrgCtx, uint32_t(*RdCandList)[MRG_MAX_NUM_CANDS], int mrgCandIdx) +{ + MergeCtx mrgCtxTmp; + for (uint32_t ui = 0; ui < MRG_MAX_NUM_CANDS; ++ui) + { + mrgCtxTmp.BcwIdx[ui] = BCW_DEFAULT; + mrgCtxTmp.interDirNeighbours[ui] = 0; + mrgCtxTmp.mvFieldNeighbours[(ui << 1)].refIdx = NOT_VALID; + mrgCtxTmp.mvFieldNeighbours[(ui << 1) + 1].refIdx = NOT_VALID; + mrgCtxTmp.useAltHpelIf[ui] = false; +#if INTER_LIC + mrgCtxTmp.LICFlags[ui] = false; +#endif +#if MULTI_HYP_PRED + mrgCtxTmp.addHypNeighbours[ui].clear(); +#endif + } + for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE)*ADAPTIVE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE > mrgCtx.numValidMergeCand)) ? mrgCtx.numValidMergeCand : ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE)); ++uiMergeCand) + { + bool firstGroup = (uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE) == 0 ? true : false; + bool lastGroup = ((uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE >= mrgCtx.numValidMergeCand) ? true : false; + if (lastGroup && !firstGroup) + { + break; + } + mrgCtxTmp.BcwIdx[uiMergeCand] = mrgCtx.BcwIdx[uiMergeCand]; + mrgCtxTmp.interDirNeighbours[uiMergeCand] = mrgCtx.interDirNeighbours[uiMergeCand]; + mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)]; + mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1]; + mrgCtxTmp.useAltHpelIf[uiMergeCand] = mrgCtx.useAltHpelIf[uiMergeCand]; +#if INTER_LIC + mrgCtxTmp.LICFlags[uiMergeCand] = mrgCtx.LICFlags[uiMergeCand]; +#endif +#if MULTI_HYP_PRED + mrgCtxTmp.addHypNeighbours[uiMergeCand] = mrgCtx.addHypNeighbours[uiMergeCand]; +#endif + } + //update + for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE)*ADAPTIVE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE > mrgCtx.numValidMergeCand)) ? mrgCtx.numValidMergeCand : ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE)); ++uiMergeCand) + { + bool firstGroup = (uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE) == 0 ? true : false; + bool lastGroup = ((uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE >= mrgCtx.numValidMergeCand) ? true : false; + if (lastGroup && !firstGroup) + { + break; + } + + mrgCtx.BcwIdx[uiMergeCand] = mrgCtxTmp.BcwIdx[RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE]]; + mrgCtx.interDirNeighbours[uiMergeCand] = mrgCtxTmp.interDirNeighbours[RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE]]; + mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE] << 1)]; + mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE] << 1) + 1]; + mrgCtx.useAltHpelIf[uiMergeCand] = mrgCtxTmp.useAltHpelIf[RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE]]; +#if INTER_LIC + mrgCtx.LICFlags[uiMergeCand] = mrgCtxTmp.LICFlags[RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE]]; +#endif +#if MULTI_HYP_PRED + mrgCtx.addHypNeighbours[uiMergeCand] = mrgCtxTmp.addHypNeighbours[RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE]]; +#endif + } +} +void InterPrediction::getBlkAMLRefTemplate(PredictionUnit &pu, PelUnitBuf &pcBufPredRefTop, PelUnitBuf &pcBufPredRefLeft) +{ + Mv mvCurr; + const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; + const int horShift = (lumaShift + ::getComponentScaleX(COMPONENT_Y, pu.chromaFormat)); + const int verShift = (lumaShift + ::getComponentScaleY(COMPONENT_Y, pu.chromaFormat)); + + if (xCheckIdenticalMotion(pu)) + { + mvCurr = pu.mv[0]; + /*const int horIntMv = (mvCurr.getHor() + ((1 << horShift) >> 1)) >> horShift; + const int verIntMv = (mvCurr.getVer() + ((1 << verShift) >> 1)) >> verShift; + Mv subPelMv(horIntMv << horShift, verIntMv << verShift);*/ + Mv subPelMv = mvCurr; + clipMv(mvCurr, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + CHECK(pu.refIdx[0] < 0, "invalid ref idx"); + + if (m_bAMLTemplateAvailabe[0]) + { + Mv mvTop(0, -(AML_MERGE_TEMPLATE_SIZE << verShift)); + mvTop += subPelMv; + + clipMv(mvTop, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + +#if INTER_LIC + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0]), mvTop, pcBufPredRefTop, + false, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, + mvCurr); +#else + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0]), mvTop, pcBufPredRefTop, + false, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true); +#endif + } + if (m_bAMLTemplateAvailabe[1]) + { + Mv mvLeft(-(AML_MERGE_TEMPLATE_SIZE << horShift), 0); + mvLeft += subPelMv; + + clipMv(mvLeft, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + +#if INTER_LIC + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0]), mvLeft, pcBufPredRefLeft, + false, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, + mvCurr); +#else + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0]), mvLeft, pcBufPredRefLeft, + false, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true); +#endif + } + } + else + { + for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) + { + if (pu.refIdx[refList] < 0) + { + continue; + } + RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); + CHECK(pu.refIdx[refList] >= pu.cu->slice->getNumRefIdx(eRefPicList), "Invalid reference index"); + + m_iRefListIdx = refList; + mvCurr = pu.mv[refList]; + /*const int horIntMv = (mvCurr.getHor() + ((1 << horShift) >> 1)) >> horShift; + const int verIntMv = (mvCurr.getVer() + ((1 << verShift) >> 1)) >> verShift; + Mv subPelMv(horIntMv << horShift, verIntMv << verShift);*/ + Mv subPelMv = mvCurr; + clipMv(mvCurr, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + + if (m_bAMLTemplateAvailabe[0]) + { + Mv mvTop(0, -(AML_MERGE_TEMPLATE_SIZE << verShift)); + mvTop += subPelMv; + + clipMv(mvTop, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + + PelUnitBuf pcMbBuf = + PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[refList][0], pcBufPredRefTop.Y())); + + if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0) + { +#if INTER_LIC + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvTop, pcMbBuf, true, + pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, + mvCurr); +#else + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvTop, pcMbBuf, true, + pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true); +#endif + } + else + { +#if INTER_LIC + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvTop, pcMbBuf, + false, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, + true, mvCurr); +#else + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvTop, pcMbBuf, + false, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true); +#endif + } + } + if (m_bAMLTemplateAvailabe[1]) + { + Mv mvLeft(-(AML_MERGE_TEMPLATE_SIZE << horShift), 0); + mvLeft += subPelMv; + + clipMv(mvLeft, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + + PelUnitBuf pcMbBuf = + PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[refList][0], pcBufPredRefLeft.Y())); + + if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0) + { +#if INTER_LIC + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvLeft, pcMbBuf, + true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, + true, mvCurr); +#else + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvLeft, pcMbBuf, + true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true); +#endif + } + else + { +#if INTER_LIC + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvLeft, pcMbBuf, + false, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, + true, mvCurr); +#else + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvLeft, pcMbBuf, + false, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true); +#endif + } + } + } + if (m_bAMLTemplateAvailabe[0]) + { + CPelUnitBuf srcPred0 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[0][0], pcBufPredRefTop.Y())); + CPelUnitBuf srcPred1 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[1][0], pcBufPredRefTop.Y())); + xWeightedAverageY(pu, srcPred0, srcPred1, pcBufPredRefTop, pu.cu->slice->getSPS()->getBitDepths(), + pu.cu->slice->clpRngs()); + } + if (m_bAMLTemplateAvailabe[1]) + { + CPelUnitBuf srcPred0 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[0][0], pcBufPredRefLeft.Y())); + CPelUnitBuf srcPred1 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[1][0], pcBufPredRefLeft.Y())); + xWeightedAverageY(pu, srcPred0, srcPred1, pcBufPredRefLeft, pu.cu->slice->getSPS()->getBitDepths(), + pu.cu->slice->clpRngs()); + } + } +} + +void InterPrediction::adjustAffineMergeCandidates(PredictionUnit &pu, AffineMergeCtx& affMrgCtx, int mrgCandIdx) +{ + uint32_t RdCandList[AFFINE_MRG_MAX_NUM_CANDS][AFFINE_MRG_MAX_NUM_CANDS]; + Distortion candCostList[AFFINE_MRG_MAX_NUM_CANDS][AFFINE_MRG_MAX_NUM_CANDS]; + + for (uint32_t i = 0; i < AFFINE_MRG_MAX_NUM_CANDS; i++) + { + for (uint32_t j = 0; j < AFFINE_MRG_MAX_NUM_CANDS; j++) + { + RdCandList[i][j] = j; + candCostList[i][j] = MAX_UINT; + } + } + + Distortion uiCost; + + DistParam cDistParam; + cDistParam.applyWeight = false; + + int nWidth = pu.lumaSize().width; + int nHeight = pu.lumaSize().height; + + if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) + { + return; + } + + for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE > affMrgCtx.maxNumMergeCand)) ? affMrgCtx.maxNumMergeCand : ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE)); ++uiMergeCand) + { + bool firstGroup = (uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE) == 0 ? true : false; + bool lastGroup = ((uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE >= affMrgCtx.maxNumMergeCand) ? true : false; + if (lastGroup && !firstGroup) + { + break; + } + uiCost = 0; + + // set merge information + pu.interDir = affMrgCtx.interDirNeighbours[uiMergeCand]; + pu.mergeFlag = true; + pu.regularMergeFlag = false; + pu.mergeIdx = uiMergeCand; + pu.cu->affine = true; + pu.cu->affineType = affMrgCtx.affineType[uiMergeCand]; +#if AFFINE_MMVD + pu.afMmvdFlag = false; +#endif + pu.cu->BcwIdx = affMrgCtx.BcwIdx[uiMergeCand]; +#if INTER_LIC + pu.cu->LICFlag = affMrgCtx.LICFlags[uiMergeCand]; +#endif + + pu.mergeType = affMrgCtx.mergeType[uiMergeCand]; + if (pu.mergeType == MRG_TYPE_SUBPU_ATMVP) + { + pu.refIdx[0] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][0].refIdx; + pu.refIdx[1] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][0].refIdx; + PU::spanMotionInfo(pu, *affMrgCtx.mrgCtx); + } + else + { + for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) + { + for (int i = 0; i < 3; i++) + { + pu.mvAffi[refList][i] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + refList][i].mv; + } + pu.refIdx[refList] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + refList][0].refIdx; + } + + PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); + PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); + PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); + PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); + + getAffAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); + + if (m_bAMLTemplateAvailabe[0]) + { + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); + + uiCost += cDistParam.distFunc(cDistParam); + } + + if (m_bAMLTemplateAvailabe[1]) + { + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); + + uiCost += cDistParam.distFunc(cDistParam); + } + } + updateCandList(uiMergeCand, uiCost, ADAPTIVE_AFFINE_SUB_GROUP_SIZE, RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE], candCostList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE]); + } + pu.mergeIdx = mrgCandIdx; //restore the merge index + updateAffineCandInfo(pu, affMrgCtx, RdCandList + , mrgCandIdx + ); + +} +void InterPrediction::updateAffineCandInfo(PredictionUnit &pu, AffineMergeCtx& affMrgCtx, uint32_t(*RdCandList)[AFFINE_MRG_MAX_NUM_CANDS], int mrgCandIdx) +{ + AffineMergeCtx affMrgCtxTmp; + const uint32_t maxNumAffineMergeCand = pu.cs->slice->getPicHeader()->getMaxNumAffineMergeCand(); + for (int i = 0; i < maxNumAffineMergeCand; i++) + { + for (int mvNum = 0; mvNum < 3; mvNum++) + { + affMrgCtxTmp.mvFieldNeighbours[(i << 1) + 0][mvNum].setMvField(Mv(), -1); + affMrgCtxTmp.mvFieldNeighbours[(i << 1) + 1][mvNum].setMvField(Mv(), -1); + } + affMrgCtxTmp.interDirNeighbours[i] = 0; + affMrgCtxTmp.affineType[i] = AFFINEMODEL_4PARAM; + affMrgCtxTmp.mergeType[i] = MRG_TYPE_DEFAULT_N; + affMrgCtxTmp.BcwIdx[i] = BCW_DEFAULT; +#if INTER_LIC + affMrgCtxTmp.LICFlags[i] = false; +#endif + } + for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE > affMrgCtx.maxNumMergeCand)) ? affMrgCtx.maxNumMergeCand : ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE)); ++uiMergeCand) + { + bool firstGroup = (uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE) == 0 ? true : false; + bool lastGroup = ((uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE >= affMrgCtx.maxNumMergeCand) ? true : false; + if (lastGroup && !firstGroup) + { + break; + } + for (int mvNum = 0; mvNum < 3; mvNum++) + { + affMrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 0][mvNum] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][mvNum]; + affMrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 1][mvNum] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][mvNum]; + } + affMrgCtxTmp.interDirNeighbours[uiMergeCand] = affMrgCtx.interDirNeighbours[uiMergeCand]; + affMrgCtxTmp.affineType[uiMergeCand] = affMrgCtx.affineType[uiMergeCand]; + affMrgCtxTmp.mergeType[uiMergeCand] = affMrgCtx.mergeType[uiMergeCand]; + affMrgCtxTmp.BcwIdx[uiMergeCand] = affMrgCtx.BcwIdx[uiMergeCand]; +#if INTER_LIC + affMrgCtxTmp.LICFlags[uiMergeCand] = affMrgCtx.LICFlags[uiMergeCand]; +#endif + } + //update + for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE > affMrgCtx.maxNumMergeCand)) ? affMrgCtx.maxNumMergeCand : ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE)); ++uiMergeCand) + { + bool firstGroup = (uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE) == 0 ? true : false; + bool lastGroup = ((uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE >= affMrgCtx.maxNumMergeCand) ? true : false; + if (lastGroup && !firstGroup) + { + break; + } + for (int mvNum = 0; mvNum < 3; mvNum++) + { + affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][mvNum] = affMrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE] << 1) + 0][mvNum]; + affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][mvNum] = affMrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE] << 1) + 1][mvNum]; + } + affMrgCtx.interDirNeighbours[uiMergeCand] = affMrgCtxTmp.interDirNeighbours[RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE]]; + affMrgCtx.affineType[uiMergeCand] = affMrgCtxTmp.affineType[RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE]]; + affMrgCtx.mergeType[uiMergeCand] = affMrgCtxTmp.mergeType[RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE]]; + affMrgCtx.BcwIdx[uiMergeCand] = affMrgCtxTmp.BcwIdx[RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE]]; +#if INTER_LIC + affMrgCtx.LICFlags[uiMergeCand] = affMrgCtxTmp.LICFlags[RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE]]; +#endif + } +} +void InterPrediction::xGetSublkAMLTemplate(const CodingUnit& cu, + const ComponentID compID, + const Picture& refPic, + const Mv& mv, + const int sublkWidth, + const int sublkHeight, + const int posW, + const int posH, + int* numTemplate, + Pel* refLeftTemplate, + Pel* refAboveTemplate) +{ + const int bitDepth = cu.cs->sps->getBitDepth(toChannelType(compID)); + const int precShift = std::max(0, bitDepth - 12); + + const CodingUnit* const cuAbove = cu.cs->getCU(cu.blocks[compID].pos().offset(0, -1), toChannelType(compID)); + const CodingUnit* const cuLeft = cu.cs->getCU(cu.blocks[compID].pos().offset(-1, 0), toChannelType(compID)); + const CPelBuf refBuf = cuAbove || cuLeft ? refPic.getRecoBuf(refPic.blocks[compID]) : CPelBuf(); + + // above + if (cuAbove && posH == 0) + { + xGetPredBlkTpl<true>(cu, compID, refBuf, mv, posW, posH, sublkWidth, refAboveTemplate); + + for (int k = posW; k < posW + sublkWidth; k++) + { + int refVal = refAboveTemplate[k]; + refVal >>= precShift; + refAboveTemplate[k] = refVal; + numTemplate[0]++; + } + } + + // left + if (cuLeft && posW == 0) + { + xGetPredBlkTpl<false>(cu, compID, refBuf, mv, posW, posH, sublkHeight, refLeftTemplate); + + for (int k = posH; k < posH + sublkHeight; k++) + { + int refVal = refLeftTemplate[k]; + refVal >>= precShift; + refLeftTemplate[k] = refVal; + numTemplate[1]++; + } + } +} +void InterPrediction::getAffAMLRefTemplate(PredictionUnit &pu, PelUnitBuf &pcBufPredRefTop, PelUnitBuf &pcBufPredRefLeft) +{ +#if INTER_LIC + int LICshift[2] = { 0 }; + int scale[2] = { 0 }; + int offset[2] = { 0 }; +#endif + const int bitDepth = pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA); + if (xCheckIdenticalMotion(pu)) + { + Pel * refLeftTemplate = m_acYuvRefAMLTemplate[1][0]; + Pel * refAboveTemplate = m_acYuvRefAMLTemplate[0][0]; + int numTemplate[2] = { 0, 0 }; // 0:Above, 1:Left + const RefPicList eRefPicList = REF_PIC_LIST_0; + xPredAffineTpl(pu, eRefPicList, numTemplate, refLeftTemplate, refAboveTemplate); +#if INTER_LIC + if (pu.cu->LICFlag) + { + Pel *recLeftTemplate = m_acYuvCurAMLTemplate[1][0]; + Pel *recAboveTemplate = m_acYuvCurAMLTemplate[0][0]; + xGetLICParamGeneral(*pu.cu, COMPONENT_Y, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, + recAboveTemplate, LICshift[0], scale[0], offset[0]); + if (m_bAMLTemplateAvailabe[0]) + { + PelBuf & dstBuf = pcBufPredRefTop.bufs[0]; + const ClpRng &clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y); + dstBuf.linearTransform(scale[0], LICshift[0], offset[0], true, clpRng); + } + if (m_bAMLTemplateAvailabe[1]) + { + PelBuf & dstBuf = pcBufPredRefLeft.bufs[0]; + const ClpRng &clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y); + dstBuf.linearTransform(scale[0], LICshift[0], offset[0], true, clpRng); + } + } +#endif + } + else + { + for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) + { + if (pu.refIdx[refList] < 0) + { + continue; + } + RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); + CHECK(pu.refIdx[refList] >= pu.cu->slice->getNumRefIdx(eRefPicList), "Invalid reference index"); + Pel *refLeftTemplate = m_acYuvRefLeftTemplate[refList][0]; + Pel *refAboveTemplate = m_acYuvRefAboveTemplate[refList][0]; + int numTemplate[2] = { 0, 0 }; // 0:Above, 1:Left + xPredAffineTpl(pu, eRefPicList, numTemplate, refLeftTemplate, refAboveTemplate); +#if INTER_LIC + if (pu.cu->LICFlag) + { + Pel *recLeftTemplate = m_acYuvCurAMLTemplate[1][0]; + Pel *recAboveTemplate = m_acYuvCurAMLTemplate[0][0]; + xGetLICParamGeneral(*pu.cu, COMPONENT_Y, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, + recAboveTemplate, LICshift[refList], scale[refList], offset[refList]); + } +#endif + } + if (m_bAMLTemplateAvailabe[0]) + { + PelUnitBuf srcPred[2]; + srcPred[0] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[0][0], pcBufPredRefTop.Y())); + srcPred[1] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[1][0], pcBufPredRefTop.Y())); +#if INTER_LIC + if (pu.cu->LICFlag) + { + for (int i = 0; i < 2; i++) + { + if (pu.refIdx[i] >= 0) + { + PelBuf & dstBuf = srcPred[i].bufs[0]; + const ClpRng &clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y); + dstBuf.linearTransform(scale[i], LICshift[i], offset[i], true, clpRng); + } + } + } +#endif + const int iRefIdx0 = pu.refIdx[0]; + const int iRefIdx1 = pu.refIdx[1]; + if (iRefIdx0 >= 0 && iRefIdx1 >= 0) + { + for (int i = 0; i < 2; i++) + { + PelBuf & dstBuf = srcPred[i].bufs[0]; + const int biShift = IF_INTERNAL_PREC - bitDepth; + const Pel biOffset = -IF_INTERNAL_OFFS; + ClpRng clpRngDummy; + dstBuf.linearTransform(1, -biShift, biOffset, false, clpRngDummy); + } + } + xWeightedAverageY(pu, srcPred[0], srcPred[1], pcBufPredRefTop, pu.cu->slice->getSPS()->getBitDepths(), + pu.cu->slice->clpRngs()); + } + if (m_bAMLTemplateAvailabe[1]) + { + PelUnitBuf srcPred[2]; + srcPred[0] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[0][0], pcBufPredRefLeft.Y())); + srcPred[1] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[1][0], pcBufPredRefLeft.Y())); +#if INTER_LIC + if (pu.cu->LICFlag) + { + for (int i = 0; i < 2; i++) + { + if (pu.refIdx[i] >= 0) + { + PelBuf & dstBuf = srcPred[i].bufs[0]; + const ClpRng &clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y); + dstBuf.linearTransform(scale[i], LICshift[i], offset[i], true, clpRng); + } + } + } +#endif + const int iRefIdx0 = pu.refIdx[0]; + const int iRefIdx1 = pu.refIdx[1]; + if (iRefIdx0 >= 0 && iRefIdx1 >= 0) + { + for (int i = 0; i < 2; i++) + { + PelBuf & dstBuf = srcPred[i].bufs[0]; + const int biShift = IF_INTERNAL_PREC - bitDepth; + const Pel biOffset = -IF_INTERNAL_OFFS; + ClpRng clpRngDummy; + dstBuf.linearTransform(1, -biShift, biOffset, false, clpRngDummy); + } + } + xWeightedAverageY(pu, srcPred[0], srcPred[1], pcBufPredRefLeft, pu.cu->slice->getSPS()->getBitDepths(), + pu.cu->slice->clpRngs()); + } + } +} +#endif + void InterPrediction::xFillIBCBuffer(CodingUnit &cu) { for (auto &currPU : CU::traverseTUs(cu)) diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index 0333d844882cb197cc72f69d681d7e2d92bcc57b..e3a13e149d5df0ef4c2ac9e18857f12d96191376 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -51,7 +51,7 @@ #include "ContextModelling.h" // forward declaration class Mv; -#if INTER_LIC || (TM_AMVP || TM_MRG) +#if INTER_LIC || (TM_AMVP || TM_MRG) || ARMC_TM class Reshape; #endif @@ -70,7 +70,7 @@ public: PelUnitBuf m_predictionBeforeLIC; bool m_storeBeforeLIC; #endif -#if INTER_LIC || (TM_AMVP || TM_MRG) // note: already refactor +#if INTER_LIC || (TM_AMVP || TM_MRG) || ARMC_TM // note: already refactor Reshape* m_pcReshape; #endif @@ -216,6 +216,13 @@ protected: , bool bilinearMC = false , Pel *srcPadBuf = NULL , int32_t srcPadStride = 0 +#if ARMC_TM + , bool AML = false +#if INTER_LIC + , bool doLic = false + , Mv mvCurr = Mv(0, 0) +#endif +#endif ); void xAddBIOAvg4 (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng); @@ -227,6 +234,14 @@ protected: #else void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, const bool lumaOnly = false, const bool chromaOnly = false, PelUnitBuf* yuvDstTmp = NULL ); #endif +#if ARMC_TM +#if !INTER_LIC + template <bool TrueA_FalseL> + void xGetPredBlkTpl(const CodingUnit& cu, const ComponentID compID, const CPelBuf& refBuf, const Mv& mv, const int posW, const int posH, const int tplSize, Pel* predBlkTpl); +#endif + void xWeightedAverageY(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs); + void xPredAffineTpl(const PredictionUnit &pu, const RefPicList &eRefPicList, int* numTemplate, Pel* refLeftTemplate, Pel* refAboveTemplate); +#endif #if AFFINE_ENC_OPT void xPredAffineBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const bool genChromaMv = false, const std::pair<int, int> scalingRatio = SCALE_1X, const bool calGradient = false); #else @@ -248,6 +263,13 @@ protected: MotionInfo m_SubPuMiBuf[(MAX_CU_SIZE * MAX_CU_SIZE) >> (MIN_CU_LOG2 << 1)]; +#if ARMC_TM + Pel* m_acYuvCurAMLTemplate[2][MAX_NUM_COMPONENT]; //0: top, 1: left + bool m_bAMLTemplateAvailabe[2]; + Pel* m_acYuvRefAboveTemplate[2][MAX_NUM_COMPONENT]; //0: list0, 1: list1 + Pel* m_acYuvRefLeftTemplate[2][MAX_NUM_COMPONENT]; //0: list0, 1: list1 + Pel* m_acYuvRefAMLTemplate[2][MAX_NUM_COMPONENT]; //0: top, 1: left +#endif #if JVET_J0090_MEMORY_BANDWITH_MEASURE CacheModel *m_cacheModel; #endif @@ -260,7 +282,7 @@ public: InterPrediction(); virtual ~InterPrediction(); -#if INTER_LIC || (TM_AMVP || TM_MRG) +#if INTER_LIC || (TM_AMVP || TM_MRG) || ARMC_TM void init (RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape); #else void init (RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize); @@ -299,6 +321,19 @@ public: #if !AFFINE_RM_CONSTRAINTS_AND_OPT static bool isSubblockVectorSpreadOverLimit( int a, int b, int c, int d, int predType ); #endif +#if ARMC_TM + void adjustInterMergeCandidates(PredictionUnit &pu, MergeCtx& mrgCtx, int mrgCandIdx = -1); + bool xAMLGetCurBlkTemplate(PredictionUnit& pu, int nCurBlkWidth, int nCurBlkHeight); + bool xAMLIsTopTempAvailable(PredictionUnit& pu); + bool xAMLIsLeftTempAvailable(PredictionUnit& pu); + void updateCandList(uint32_t uiCand, Distortion uiCost, uint32_t uiMrgCandNum, uint32_t* RdCandList, Distortion* CandCostList); + void updateCandInfo(MergeCtx& mrgCtx, uint32_t(*RdCandList)[MRG_MAX_NUM_CANDS], int mrgCandIdx = -1); + void getBlkAMLRefTemplate(PredictionUnit &pu, PelUnitBuf &pcBufPredRefTop, PelUnitBuf &pcBufPredRefLeft); + void adjustAffineMergeCandidates(PredictionUnit &pu, AffineMergeCtx& affMrgCtx, int mrgCandIdx = -1); + void updateAffineCandInfo(PredictionUnit &pu, AffineMergeCtx& affMrgCtx, uint32_t(*RdCandList)[AFFINE_MRG_MAX_NUM_CANDS], int mrgCandIdx = -1); + void xGetSublkAMLTemplate(const CodingUnit& cu, const ComponentID compID, const Picture& refPic, const Mv& mv, const int sublkWidth, const int sublkHeight, const int posW, const int posH, int* numTemplate, Pel* refLeftTemplate, Pel* refAboveTemplate); + void getAffAMLRefTemplate(PredictionUnit &pu, PelUnitBuf &pcBufPredRefTop, PelUnitBuf &pcBufPredRefLeft); +#endif #if INTER_LIC void xGetLICParamGeneral (const CodingUnit& cu, const ComponentID compID, int* numTemplate, Pel* refLeftTemplate, Pel* refAboveTemplate, Pel* recLeftTemplate, Pel* recAboveTemplate, int& shift, int& scale, int& offset); void xGetSublkTemplate (const CodingUnit& cu, const ComponentID compID, const Picture& refPic, const Mv& mv, const int sublkWidth, const int sublkHeight, const int posW, const int posH, int* numTemplate, Pel* refLeftTemplate, Pel* refAboveTemplate, Pel* recLeftTemplate, Pel* recAboveTemplate); diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp index b2a4f35265b1c6c9062483ffff23bac04ebe269f..ace9eb99c6b902049556c8fe1078e8e8e4faf6b8 100644 --- a/source/Lib/CommonLib/InterpolationFilter.cpp +++ b/source/Lib/CommonLib/InterpolationFilter.cpp @@ -1482,7 +1482,7 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in CHECK(frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction"); if (nFilterIdx == 1) { -#if TM_AMVP || TM_MRG +#if TM_AMVP || TM_MRG || ARMC_TM filterHor<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, (biMCForDMVR ? m_bilinearFilterPrec4 : m_bilinearFilter)[frac], biMCForDMVR ); #else filterHor<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR); @@ -1528,7 +1528,7 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); if( nFilterIdx == 1 ) { -#if TM_AMVP || TM_MRG +#if TM_AMVP || TM_MRG || ARMC_TM filterHor<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, (biMCForDMVR ? m_bilinearFilterPrec4 : m_bilinearFilter)[frac], biMCForDMVR ); #else filterHor<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR ); @@ -1618,7 +1618,7 @@ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, in CHECK(frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction"); if (nFilterIdx == 1) { -#if TM_AMVP || TM_MRG +#if TM_AMVP || TM_MRG || ARMC_TM filterVer<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, (biMCForDMVR ? m_bilinearFilterPrec4 : m_bilinearFilter)[frac], biMCForDMVR ); #else filterVer<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR); @@ -1662,7 +1662,7 @@ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, in CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); if( nFilterIdx == 1 ) { -#if TM_AMVP || TM_MRG +#if TM_AMVP || TM_MRG || ARMC_TM filterVer<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, (biMCForDMVR ? m_bilinearFilterPrec4 : m_bilinearFilter)[frac], biMCForDMVR ); #else filterVer<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR ); diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index d82e0bd2969d87b2dc6110911f75e9d22e0df70e..19d9e489e2db1ec06a5b29884a00f9f763eb885d 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -3142,6 +3142,9 @@ SPS::SPS() #endif , m_MRL ( false ) , m_MIP ( false ) +#if ARMC_TM +, m_AML ( false ) +#endif , m_GDREnabledFlag ( true ) , m_SubLayerCbpParametersPresentFlag ( true ) , m_rprEnabledFlag ( false ) diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 70d7b8bc6b0106e57954db4266f2769999574c9c..2ad0c76fcd6f912ee8357b6884a11d916a82bf26 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -1691,6 +1691,9 @@ private: #endif bool m_MRL; bool m_MIP; +#if ARMC_TM + bool m_AML; +#endif ChromaQpMappingTable m_chromaQpMappingTable; bool m_GDREnabledFlag; bool m_SubLayerCbpParametersPresentFlag; @@ -2117,6 +2120,10 @@ void setCCALFEnabledFlag( bool b ) void setUseMIP ( bool b ) { m_MIP = b; } bool getUseMIP () const { return m_MIP; } +#if ARMC_TM + void setUseAML ( bool b ) { m_AML = b; } + bool getUseAML () const { return m_AML; } +#endif bool getUseWP () const { return m_useWeightPred; } bool getUseWPBiPred () const { return m_useWeightedBiPred; } void setUseWP ( bool b ) { m_useWeightPred = b; } diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 83d7c38c4c00b07468f3815a5f00b0e51a6bad89..1bfdbdec7b08905d272d241d85a0715091164e6c 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,7 +50,7 @@ #include <assert.h> #include <cassert> - +#define ARMC_TM 1 //Adaptive reordering of merge candidates with template matching diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index f3c5ec18b4cb47fa5a83083a94c4056233f287b7..0e07891a9c9be8131af12200c91c17a9112a0502 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -1298,7 +1298,12 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) affineMergeCtx.mrgCtx = &mrgCtx; } #if AFFINE_MMVD +#if ARMC_TM + int affMrgIdx = pu.cs->sps->getUseAML() && (((pu.mergeIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE < pu.cs->sps->getMaxNumAffineMergeCand()) || (pu.mergeIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE) == 0) ? pu.mergeIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE * ADAPTIVE_AFFINE_SUB_GROUP_SIZE + ADAPTIVE_AFFINE_SUB_GROUP_SIZE - 1 : pu.mergeIdx; + PU::getAffineMergeCand(pu, affineMergeCtx, (pu.afMmvdFlag ? pu.afMmvdBaseIdx : affMrgIdx), pu.afMmvdFlag); +#else PU::getAffineMergeCand(pu, affineMergeCtx, (pu.afMmvdFlag ? pu.afMmvdBaseIdx : pu.mergeIdx), pu.afMmvdFlag); +#endif if (pu.afMmvdFlag) { @@ -1306,8 +1311,26 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) CHECK(pu.mergeIdx >= pu.cu->slice->getPicHeader()->getMaxNumAffineMergeCand(), "Affine MMVD mode doesn't have a valid base candidate!"); PU::getAfMmvdMvf(pu, affineMergeCtx, affineMergeCtx.mvFieldNeighbours + (pu.mergeIdx << 1), pu.mergeIdx, pu.afMmvdStep, pu.afMmvdDir); } +#if ARMC_TM + else + { + if (pu.cs->sps->getUseAML()) + { + m_pcInterPred->adjustAffineMergeCandidates(pu, affineMergeCtx, pu.mergeIdx); + } + } +#endif +#else +#if ARMC_TM + int affMrgIdx = pu.cs->sps->getUseAML() && (((pu.mergeIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE < pu.cs->sps->getMaxNumAffineMergeCand()) || (pu.mergeIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE) == 0) ? pu.mergeIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE * ADAPTIVE_AFFINE_SUB_GROUP_SIZE + ADAPTIVE_AFFINE_SUB_GROUP_SIZE - 1 : pu.mergeIdx; + PU::getAffineMergeCand(pu, affineMergeCtx, affMrgIdx); + if (pu.cs->sps->getUseAML()) + { + m_pcInterPred->adjustAffineMergeCandidates(pu, affineMergeCtx, pu.mergeIdx); + } #else PU::getAffineMergeCand( pu, affineMergeCtx, pu.mergeIdx ); +#endif #endif pu.interDir = affineMergeCtx.interDirNeighbours[pu.mergeIdx]; pu.cu->affineType = affineMergeCtx.affineType[pu.mergeIdx]; @@ -1345,7 +1368,19 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) if (CU::isIBC(*pu.cu)) PU::getIBCMergeCandidates(pu, mrgCtx, pu.mergeIdx); else +#if ARMC_TM + if (pu.cs->sps->getUseAML()) + { + PU::getInterMergeCandidates(pu, mrgCtx, 0, pu.cs->sps->getUseAML() && (((pu.mergeIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE < pu.cs->sps->getMaxNumMergeCand()) || (pu.mergeIdx / ADAPTIVE_SUB_GROUP_SIZE) == 0) ? pu.mergeIdx / ADAPTIVE_SUB_GROUP_SIZE * ADAPTIVE_SUB_GROUP_SIZE + ADAPTIVE_SUB_GROUP_SIZE - 1 : pu.mergeIdx); + m_pcInterPred->adjustInterMergeCandidates(pu, mrgCtx, pu.mergeIdx); + } + else + { + PU::getInterMergeCandidates(pu, mrgCtx, 0, pu.mergeIdx); + } +#else PU::getInterMergeCandidates(pu, mrgCtx, 0, pu.mergeIdx); +#endif mrgCtx.setMergeInfo( pu, pu.mergeIdx ); #if TM_MRG && !MULTI_PASS_DMVR if (pu.tmMergeFlag) diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp index f2808eb733dc8302fd443ecce9314ed72466aee3..ec27df33991409c01a52574c12d9c264bff541b0 100644 --- a/source/Lib/DecoderLib/DecLib.cpp +++ b/source/Lib/DecoderLib/DecLib.cpp @@ -1643,7 +1643,7 @@ void DecLib::xActivateParameterSets( const InputNALUnit nalu ) log2SaoOffsetScaleLuma, log2SaoOffsetScaleChroma ); m_cLoopFilter.create(maxDepth); m_cIntraPred.init( sps->getChromaFormatIdc(), sps->getBitDepth( CHANNEL_TYPE_LUMA ) ); -#if INTER_LIC || (TM_AMVP || TM_MRG) +#if INTER_LIC || (TM_AMVP || TM_MRG) || ARMC_TM m_cInterPred.init( &m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight(), &m_cReshaper); #else m_cInterPred.init( &m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight() ); diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index 96d62b5f8372b48ce07d3d99461577d8834ff1fd..2eb37b75586f8c745dc16d4355bd0a49b0e2d765 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -2153,6 +2153,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) READ_FLAG( uiCode, "sps_amvr_enabled_flag" ); pcSPS->setAMVREnabledFlag ( uiCode != 0 ); +#if ARMC_TM + READ_FLAG( uiCode, "sps_aml_enabled_flag"); pcSPS->setUseAML ( uiCode != 0 ); +#endif READ_FLAG( uiCode, "sps_bdof_enabled_flag" ); pcSPS->setBDOFEnabledFlag ( uiCode != 0 ); if (pcSPS->getBDOFEnabledFlag()) { diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 14ecf41a012ef787653f69c1d7c6ab738dd6b4ae..84929d8a066564622400a086a33adf84a04c27a1 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -373,6 +373,9 @@ protected: #endif bool m_PROF; bool m_BIO; +#if ARMC_TM + bool m_AML; +#endif bool m_SMVD; bool m_compositeRefEnabled; //composite reference @@ -1204,6 +1207,10 @@ public: bool getPROF () const { return m_PROF; } void setBIO(bool b) { m_BIO = b; } bool getBIO() const { return m_BIO; } +#if ARMC_TM + void setAML(bool b) { m_AML = b; } + bool getAML() const { return m_AML; } +#endif #if ENABLE_OBMC void setUseOBMC ( bool n ) { m_OBMC = n; } bool getUseOBMC () const { return m_OBMC; } diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index d6dc2b317dd81897ab461b923c9eca00e19e1048..3160705f07a621c6dee2874ef15743a451c32468 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -2708,6 +2708,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& tempCS->initStructData( encTestMode.qp ); MergeCtx mergeCtx; +#if ARMC_TM + MergeCtx mergeCtxtmp; +#endif const SPS &sps = *tempCS->sps; #if MERGE_ENC_OPT @@ -2715,6 +2718,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& && !(bestCS->area.lumaSize().width < 8 || bestCS->area.lumaSize().height < 8); AffineMergeCtx affineMergeCtx; +#if ARMC_TM + AffineMergeCtx affineMergeCtxTmp; +#endif MergeCtx mrgCtx; #if TM_MRG MergeCtx tmMrgCtx; @@ -2764,12 +2770,25 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& , 0 ); PU::getInterMMVDMergeCandidates(pu, mergeCtx); +#if ARMC_TM + mergeCtxtmp = mergeCtx; + if (sps.getUseAML()) + { + m_pcInterSearch->adjustInterMergeCandidates(pu, mergeCtx); + } +#endif #if TM_MRG && MERGE_ENC_OPT if (cu.cs->sps->getUseDMVDMode()) { cu.firstPU = &pu; pu.tmMergeFlag = true; PU::getInterMergeCandidates(pu, tmMrgCtx, 0); +#if ARMC_TM + if (sps.getUseAML()) + { + m_pcInterSearch->adjustInterMergeCandidates(pu, tmMrgCtx); + } +#endif for( uint32_t uiMergeCand = 0; uiMergeCand < tmMrgCtx.numValidMergeCand; uiMergeCand++ ) { @@ -2817,6 +2836,13 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& pu.regularMergeFlag = false; cu.affine = true; PU::getAffineMergeCand(pu, affineMergeCtx); +#if ARMC_TM + affineMergeCtxTmp = affineMergeCtx; + if (sps.getUseAML()) + { + m_pcInterSearch->adjustAffineMergeCandidates(pu, affineMergeCtx); + } +#endif cu.affine = false; } #endif @@ -3319,7 +3345,11 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& if ( pu.cs->sps->getUseMMVD() ) { #if MERGE_ENC_OPT +#if ARMC_TM + xCheckSATDCostMmvdMerge(tempCS, cu, pu, mergeCtxtmp, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart); +#else xCheckSATDCostMmvdMerge(tempCS, cu, pu, mergeCtx, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart); +#endif #else cu.mmvdSkip = true; pu.regularMergeFlag = true; @@ -3330,7 +3360,11 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& int refineStep = (mmvdMergeCand - (baseIdx * MMVD_MAX_REFINE_NUM)) / 4; if (refineStep >= m_pcEncCfg->getMmvdDisNum()) continue; +#if ARMC_TM + mergeCtxtmp.setMmvdMergeCandiInfo(pu, mmvdMergeCand); +#else mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand); +#endif PU::spanMotionInfo(pu, mergeCtx); pu.mvRefine = true; @@ -3393,7 +3427,11 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& #if AFFINE_MMVD if (affineMmvdAvail) { +#if ARMC_TM + xCheckSATDCostAffineMmvdMerge(tempCS, cu, pu, affineMergeCtxTmp, mrgCtx, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart); +#else xCheckSATDCostAffineMmvdMerge(tempCS, cu, pu, affineMergeCtx, mrgCtx, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart); +#endif } #endif #endif @@ -3544,7 +3582,11 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& { cu.mmvdSkip = true; pu.regularMergeFlag = true; +#if ARMC_TM + mergeCtxtmp.setMmvdMergeCandiInfo(pu, uiMergeCand); +#else mergeCtx.setMmvdMergeCandiInfo(pu, uiMergeCand); +#endif } #if MERGE_ENC_OPT #if AFFINE_MMVD @@ -3567,6 +3609,22 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& pu.afMmvdDir = (uint8_t)dirIdx; pu.afMmvdStep = (uint8_t)stepIdx; pu.mergeIdx = (uint8_t)(baseIdx + afMmvdBaseIdxToMergeIdxOffset); +#if ARMC_TM + pu.mergeType = affineMergeCtxTmp.mergeType[pu.mergeIdx]; +#if INTER_LIC + pu.cu->LICFlag = affineMergeCtxTmp.LICFlags[pu.mergeIdx]; +#endif + pu.interDir = affineMergeCtxTmp.interDirNeighbours[pu.mergeIdx]; + pu.cu->affineType = affineMergeCtxTmp.affineType[pu.mergeIdx]; + pu.cu->BcwIdx = affineMergeCtxTmp.BcwIdx[pu.mergeIdx]; + pu.mmvdMergeFlag = false; + pu.ciipFlag = false; + + CHECK(pu.mergeIdx >= affineMergeCtxTmp.numValidMergeCand, "Invalid merge index for AffineMMVD"); + + MvField mvfMmvd[2][3]; + PU::getAfMmvdMvf(pu, affineMergeCtxTmp, mvfMmvd, pu.mergeIdx, pu.afMmvdStep, pu.afMmvdDir); +#else pu.mergeType = affineMergeCtx.mergeType [pu.mergeIdx]; #if INTER_LIC pu.cu->LICFlag = affineMergeCtx.LICFlags [pu.mergeIdx]; @@ -3581,6 +3639,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& MvField mvfMmvd[2][3]; PU::getAfMmvdMvf(pu, affineMergeCtx, mvfMmvd, pu.mergeIdx, pu.afMmvdStep, pu.afMmvdDir); +#endif for (int i = 0; i < 2; i++) { @@ -5098,6 +5157,12 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct pu.cs = tempCS; pu.regularMergeFlag = false; PU::getAffineMergeCand( pu, affineMergeCtx ); +#if ARMC_TM + if (sps.getUseAML()) + { + m_pcInterSearch->adjustAffineMergeCandidates(pu, affineMergeCtx); + } +#endif if ( affineMergeCtx.numValidMergeCand <= 0 ) { @@ -5736,6 +5801,12 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure * pu.tmMergeFlag = true; PU::getInterMergeCandidates(pu, mergeCtx, 0); +#if ARMC_TM + if (sps.getUseAML()) + { + m_pcInterSearch->adjustInterMergeCandidates(pu, mergeCtx); + } +#endif for( uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++ ) { diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 16965b1ac1592b814f890798740006b84e007d93..65361885e723be5c9a19b1eb9302057b72bd409c 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -1370,6 +1370,9 @@ void EncLib::xInitSPS( SPS& sps ) sps.setSbTMVPEnabledFlag(m_sbTmvpEnableFlag); sps.setAMVREnabledFlag ( m_ImvMode != IMV_OFF ); sps.setBDOFEnabledFlag ( m_BIO ); +#if ARMC_TM + sps.setUseAML ( m_AML ); +#endif sps.setMaxNumMergeCand(getMaxNumMergeCand()); sps.setMaxNumAffineMergeCand(getMaxNumAffineMergeCand()); sps.setMaxNumIBCMergeCand(getMaxNumIBCMergeCand()); diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index 2a7145c80f8ea7bacd578ea73f6005b05dc961c7..061b1b82bb58637175d2d97b65e8adcbcd0ada28 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -275,7 +275,7 @@ void InterSearch::init( EncCfg* pcEncCfg, } const ChromaFormat cform = pcEncCfg->getChromaFormatIdc(); -#if INTER_LIC || (TM_AMVP || TM_MRG) +#if INTER_LIC || (TM_AMVP || TM_MRG) || ARMC_TM InterPrediction::init( pcRdCost, cform, maxCUHeight, m_pcReshape ); #else InterPrediction::init( pcRdCost, cform, maxCUHeight ); diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index 19aeb75d35f7e95bb82c2507245036e6afbf7d34..aad9ff6f41b04b17a339d32f6fa5cff89f5d970d 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -1307,6 +1307,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) WRITE_FLAG( pcSPS->getAMVREnabledFlag() ? 1 : 0, "sps_amvr_enabled_flag" ); +#if ARMC_TM + WRITE_FLAG( pcSPS->getUseAML() ? 1 : 0, "sps_aml_enabled_flag" ); +#endif WRITE_FLAG( pcSPS->getBDOFEnabledFlag() ? 1 : 0, "sps_bdof_enabled_flag" ); if (pcSPS->getBDOFEnabledFlag()) {