From 8f9519a985b2406421bcc1d85e98b2710420fc20 Mon Sep 17 00:00:00 2001 From: Jie Chen <jiechen.cj@alibaba-inc.com> Date: Tue, 1 Nov 2022 17:47:21 +0000 Subject: [PATCH] JVET-AB0112: EE2-2.6 Affine DMVR --- source/Lib/CommonLib/CommonDef.h | 4 + source/Lib/CommonLib/ContextModelling.cpp | 122 +++++ source/Lib/CommonLib/ContextModelling.h | 6 + source/Lib/CommonLib/InterPrediction.cpp | 617 +++++++++++++++++++++- source/Lib/CommonLib/InterPrediction.h | 27 +- source/Lib/CommonLib/TypeDef.h | 2 +- source/Lib/CommonLib/UnitTools.cpp | 5 +- source/Lib/DecoderLib/DecCu.cpp | 40 +- source/Lib/EncoderLib/EncCu.cpp | 57 +- source/Lib/EncoderLib/EncCu.h | 3 + 10 files changed, 863 insertions(+), 20 deletions(-) diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index f3c86093d..a9d4e4668 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -852,6 +852,10 @@ static const int BDMVR_CENTER_POSITION = BDMVR_INTME_RANGE * BDMVR_BUF_STRIDE static const int BM_MRG_MAX_NUM_CANDS = 6; ///< maximum number of BM merge candidates (note: should be at most equal to MRG_MAX_NUM_CANDS) static const int BM_MRG_SUB_PU_INT_MAX_SRCH_ROUND = 3; #endif +#if JVET_AB0112_AFFINE_DMVR +static const int AFFINE_DMVR_MAX_NUM_ITERATIONS = 26; +static const int AFFINE_DMVR_SEARCH_RANGE = 3; +#endif #endif #if TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM || MULTI_PASS_DMVR static const int DECODER_SIDE_MV_WEIGHT = 4; ///< lambda for decoder-side derived MVs diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp index 9494bd3a4..bb791a5c9 100644 --- a/source/Lib/CommonLib/ContextModelling.cpp +++ b/source/Lib/CommonLib/ContextModelling.cpp @@ -1211,7 +1211,129 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) PU::restrictBiPredMergeCandsOne(pu); } +#if JVET_AB0112_AFFINE_DMVR +bool AffineMergeCtx::xCheckSimilarMotion(int mergeCandIndex, uint32_t mvdSimilarityThresh) const +{ + if (mvFieldNeighbours[(mergeCandIndex << 1)][0].refIdx < 0 && mvFieldNeighbours[(mergeCandIndex << 1) + 1][0].refIdx < 0) + { + return true; + } + + if (mvdSimilarityThresh > 1) + { + int mvdTh = mvdSimilarityThresh; + for (uint32_t ui = 0; ui < mergeCandIndex; ui++) + { + if (interDirNeighbours[ui] == interDirNeighbours[mergeCandIndex]) + { + if (interDirNeighbours[ui] == 3) + { + if (mvFieldNeighbours[(ui << 1)][0].refIdx == mvFieldNeighbours[(mergeCandIndex << 1)][0].refIdx && + mvFieldNeighbours[(ui << 1) + 1][0].refIdx == mvFieldNeighbours[(mergeCandIndex << 1) + 1][0].refIdx) + { + Mv mvDiff0L0 = mvFieldNeighbours[(ui << 1)][0].mv - mvFieldNeighbours[(mergeCandIndex << 1)][0].mv; + Mv mvDiff0L1 = mvFieldNeighbours[(ui << 1) + 1][0].mv - mvFieldNeighbours[(mergeCandIndex << 1) + 1][0].mv; + + Mv mvDiff1L0 = mvFieldNeighbours[(ui << 1)][1].mv - mvFieldNeighbours[(mergeCandIndex << 1)][1].mv; + Mv mvDiff1L1 = mvFieldNeighbours[(ui << 1) + 1][1].mv - mvFieldNeighbours[(mergeCandIndex << 1) + 1][1].mv; + + Mv mvDiff2L0 = mvFieldNeighbours[(ui << 1)][2].mv - mvFieldNeighbours[(mergeCandIndex << 1)][2].mv; + Mv mvDiff2L1 = mvFieldNeighbours[(ui << 1) + 1][2].mv - mvFieldNeighbours[(mergeCandIndex << 1) + 1][2].mv; + if (mvDiff0L0.getAbsHor() < mvdTh && mvDiff0L0.getAbsVer() < mvdTh + && mvDiff0L1.getAbsHor() < mvdTh && mvDiff0L1.getAbsVer() < mvdTh + &&mvDiff1L0.getAbsHor() < mvdTh && mvDiff1L0.getAbsVer() < mvdTh + && mvDiff1L1.getAbsHor() < mvdTh && mvDiff1L1.getAbsVer() < mvdTh + &&mvDiff2L0.getAbsHor() < mvdTh && mvDiff2L0.getAbsVer() < mvdTh + && mvDiff2L1.getAbsHor() < mvdTh && mvDiff2L1.getAbsVer() < mvdTh + ) + { + return true; + } + } + } + else if (interDirNeighbours[ui] == 1) + { + if (mvFieldNeighbours[(ui << 1)][0].refIdx == mvFieldNeighbours[(mergeCandIndex << 1)][0].refIdx) + { + Mv mvDiff0 = mvFieldNeighbours[(ui << 1)][0].mv - mvFieldNeighbours[(mergeCandIndex << 1)][0].mv; + Mv mvDiff1 = mvFieldNeighbours[(ui << 1)][1].mv - mvFieldNeighbours[(mergeCandIndex << 1)][1].mv; + Mv mvDiff2 = mvFieldNeighbours[(ui << 1)][2].mv - mvFieldNeighbours[(mergeCandIndex << 1)][2].mv; + if (mvDiff0.getAbsHor() < mvdTh && mvDiff0.getAbsVer() < mvdTh + &&mvDiff1.getAbsHor() < mvdTh && mvDiff1.getAbsVer() < mvdTh + &&mvDiff2.getAbsHor() < mvdTh && mvDiff2.getAbsVer() < mvdTh + ) + { + return true; + } + } + } + else if (interDirNeighbours[ui] == 2) + { + if (mvFieldNeighbours[(ui << 1) + 1][0].refIdx == mvFieldNeighbours[(mergeCandIndex << 1) + 1][0].refIdx) + { + Mv mvDiff0 = mvFieldNeighbours[(ui << 1) + 1][0].mv - mvFieldNeighbours[(mergeCandIndex << 1) + 1][0].mv; + Mv mvDiff1 = mvFieldNeighbours[(ui << 1) + 1][1].mv - mvFieldNeighbours[(mergeCandIndex << 1) + 1][1].mv; + Mv mvDiff2 = mvFieldNeighbours[(ui << 1) + 1][2].mv - mvFieldNeighbours[(mergeCandIndex << 1) + 1][2].mv; + if (mvDiff0.getAbsHor() < mvdTh && mvDiff0.getAbsVer() < mvdTh + &&mvDiff1.getAbsHor() < mvdTh && mvDiff1.getAbsVer() < mvdTh + && mvDiff2.getAbsHor() < mvdTh && mvDiff2.getAbsVer() < mvdTh + ) + { + return true; + } + } + } + } + } + return false; + } + for (uint32_t ui = 0; ui < mergeCandIndex; ui++) + { + if (interDirNeighbours[ui] == interDirNeighbours[mergeCandIndex]) + { + if (interDirNeighbours[ui] == 3) + { + if (mvFieldNeighbours[(ui << 1)][0].refIdx == mvFieldNeighbours[(mergeCandIndex << 1)][0].refIdx && + mvFieldNeighbours[(ui << 1) + 1][0].refIdx == mvFieldNeighbours[(mergeCandIndex << 1) + 1][0].refIdx && + mvFieldNeighbours[(ui << 1)][0].mv == mvFieldNeighbours[(mergeCandIndex << 1)][0].mv && + mvFieldNeighbours[(ui << 1) + 1][0].mv == mvFieldNeighbours[(mergeCandIndex << 1) + 1][0].mv&& + mvFieldNeighbours[(ui << 1)][1].mv == mvFieldNeighbours[(mergeCandIndex << 1)][1].mv && + mvFieldNeighbours[(ui << 1) + 1][1].mv == mvFieldNeighbours[(mergeCandIndex << 1) + 1][1].mv&& + mvFieldNeighbours[(ui << 1)][2].mv == mvFieldNeighbours[(mergeCandIndex << 1)][2].mv && + mvFieldNeighbours[(ui << 1) + 1][2].mv == mvFieldNeighbours[(mergeCandIndex << 1) + 1][2].mv + ) + { + return true; + } + } + else if (interDirNeighbours[ui] == 1) + { + if (mvFieldNeighbours[(ui << 1)][0].refIdx == mvFieldNeighbours[(mergeCandIndex << 1)][0].refIdx && + mvFieldNeighbours[(ui << 1)][0].mv == mvFieldNeighbours[(mergeCandIndex << 1)][0].mv&& + mvFieldNeighbours[(ui << 1)][1].mv == mvFieldNeighbours[(mergeCandIndex << 1)][1].mv&& + mvFieldNeighbours[(ui << 1)][2].mv == mvFieldNeighbours[(mergeCandIndex << 1)][2].mv + ) + { + return true; + } + } + else if (interDirNeighbours[ui] == 2) + { + if (mvFieldNeighbours[(ui << 1) + 1][0].refIdx == mvFieldNeighbours[(mergeCandIndex << 1) + 1][0].refIdx && + mvFieldNeighbours[(ui << 1) + 1][0].mv == mvFieldNeighbours[(mergeCandIndex << 1) + 1][0].mv && + mvFieldNeighbours[(ui << 1) + 1][1].mv == mvFieldNeighbours[(mergeCandIndex << 1) + 1][1].mv && + mvFieldNeighbours[(ui << 1) + 1][2].mv == mvFieldNeighbours[(mergeCandIndex << 1) + 1][2].mv + ) + { + return true; + } + } + } + } + return false; +} +#endif #if JVET_AA0061_IBC_MBVD bool MergeCtx::setIbcMbvdMergeCandiInfo(PredictionUnit& pu, int candIdx, int candIdxMaped) { diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h index 2bc404197..81dd14e66 100644 --- a/source/Lib/CommonLib/ContextModelling.h +++ b/source/Lib/CommonLib/ContextModelling.h @@ -659,6 +659,9 @@ public: MergeCtx *mrgCtx; MergeType mergeType[RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE]; +#if JVET_AB0112_AFFINE_DMVR + bool xCheckSimilarMotion(int mergeCandIndex, uint32_t mvdSimilarityThresh = 1) const; +#endif }; #else class AffineMergeCtx @@ -679,6 +682,9 @@ public: MergeCtx *mrgCtx; MergeType mergeType[AFFINE_MRG_MAX_NUM_CANDS]; +#if JVET_AB0112_AFFINE_DMVR + bool xCheckSimilarMotion(int mergeCandIndex, uint32_t mvdSimilarityThresh = 1) const; +#endif }; #endif diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 1077287a8..0a2cfe236 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -137,6 +137,12 @@ InterPrediction::InterPrediction() m_filteredBlockTmp[i][c] = nullptr; } } +#if JVET_AB0112_AFFINE_DMVR + for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++) + { + m_affineDmvrBlockTmp[i] = nullptr; + } +#endif m_cYuvPredTempDMVRL1 = nullptr; m_cYuvPredTempDMVRL0 = nullptr; for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) @@ -144,7 +150,6 @@ InterPrediction::InterPrediction() m_cRefSamplesDMVRL0[ch] = nullptr; m_cRefSamplesDMVRL1[ch] = nullptr; } - #if INTER_LIC m_LICMultApprox[0] = 0; for (int k = 1; k < 64; k++) @@ -299,7 +304,13 @@ void InterPrediction::destroy() m_filteredBlockTmp[i][c] = nullptr; } } - +#if JVET_AB0112_AFFINE_DMVR + for (uint32_t i = 0; i < 2; i++) + { + xFree(m_affineDmvrBlockTmp[i]); + m_affineDmvrBlockTmp[i] = nullptr; + } +#endif m_geoPartBuf[0].destroy(); m_geoPartBuf[1].destroy(); m_colorTransResiBuf[0].destroy(); @@ -475,7 +486,18 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, cons m_acYuvPred[i][c] = ( Pel* ) xMalloc( Pel, MAX_CU_SIZE * MAX_CU_SIZE ); } } - +#if JVET_AB0112_AFFINE_DMVR + int memBlockWidth = AFFINE_MIN_BLOCK_SIZE + (AFFINE_DMVR_SEARCH_RANGE << 1); + int memBlockHeight = AFFINE_MIN_BLOCK_SIZE + (AFFINE_DMVR_SEARCH_RANGE << 1); + int memBlockWidthOffset = BDMVR_SIMD_IF_FACTOR - (memBlockWidth & (BDMVR_SIMD_IF_FACTOR - 1)); + memBlockWidthOffset &= (BDMVR_SIMD_IF_FACTOR - 1); + int memBlockWidthExt = memBlockWidth + memBlockWidthOffset; // This ensures that iWidthExt is a factor-of-n number, assuming BDMVR_SIMD_IF_FACTOR is equal to n + int memBlockNum = (MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE)*(MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE); + for (uint32_t i = 0; i < 2; i++) + { + m_affineDmvrBlockTmp[i] = (Pel*)xMalloc(Pel, memBlockWidthExt*memBlockHeight*memBlockNum); + } +#endif m_geoPartBuf[0].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); m_geoPartBuf[1].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); m_colorTransResiBuf[0].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); @@ -12033,7 +12055,42 @@ void InterPrediction::processBDMVRSubPU(PredictionUnit& pu, bool subPURefine) } #endif +#if JVET_AB0112_AFFINE_DMVR +bool InterPrediction::processBDMVR4Affine(PredictionUnit& pu) +{ + if (!pu.cs->slice->getSPS()->getUseDMVDMode() || !pu.cs->slice->isInterB()) + { + return false; + } + CHECK(!pu.mergeFlag, "Merge mode must be used here"); + CHECK(pu.refIdx[0] < 0 || pu.refIdx[1] < 0, "Bilateral DMVR is performed for bi-prediction"); + + const int lumaArea = pu.lumaSize().area(); + Mv mvFinal_PU[2]; + Mv mvInitial_PU[2]; + mvFinal_PU[0].setZero(); + mvFinal_PU[1].setZero(); + mvInitial_PU[0].setZero(); + mvInitial_PU[1].setZero(); + { + Distortion minCost = std::numeric_limits<Distortion>::max(); + bool bUseMR = lumaArea > 64; +#if JVET_Y0089_DMVR_BCW + bUseMR |= (pu.cu->BcwIdx != BCW_DEFAULT); +#endif + minCost = xBDMVRMvSquareSearchAffine<false>(mvFinal_PU, minCost, pu, mvInitial_PU, AFFINE_DMVR_MAX_NUM_ITERATIONS, MV_FRACTIONAL_BITS_INTERNAL, bUseMR, false); + if (minCost > 0) + { + minCost = xBDMVRMvSquareSearchAffine<true>(mvFinal_PU, minCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, false); + } + } + // span motion to subPU + m_bdmvrSubPuMvBuf[REF_PIC_LIST_0][0] = mvFinal_PU[0]; + m_bdmvrSubPuMvBuf[REF_PIC_LIST_1][0] = mvFinal_PU[1]; + return true; +} +#endif #if JVET_AA0093_REFINED_MOTION_FOR_ARMC bool InterPrediction::processBDMVR(PredictionUnit& pu, int step, Distortion* tmCost) #else @@ -12292,7 +12349,340 @@ bool InterPrediction::processBDMVR(PredictionUnit& pu) pu.mv[1] = puOrgMv[1]; return true; } +#if JVET_AB0112_AFFINE_DMVR +void InterPrediction::xBDMVRFillBlkPredPelBufferAffine(const PredictionUnit& pu, const Picture& refPic, const Mv(&_mv)[3], PelUnitBuf& dstUnitBuf, const ClpRng& clpRng) +{ + + const ComponentID compID = COMPONENT_Y; + const bool bi = true; + JVET_J0090_SET_REF_PICTURE(refPic, compID); + const ChromaFormat chFmt = pu.chromaFormat; + Mv mvLT = _mv[0]; + Mv mvRT = _mv[1]; + Mv mvLB = _mv[2]; + + const int width = pu.Y().width; + const int height = pu.Y().height; + const int iBit = MAX_CU_DEPTH; + int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY; + iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(width)); + iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(width)); + if (pu.cu->affineType == AFFINEMODEL_6PARAM) + { + iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(height)); + iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(height)); + } + else + { + iDMvVerX = -iDMvHorY; + iDMvVerY = iDMvHorX; + } + const SPS &sps = *pu.cs->sps; + const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; + bool wrapRef = false; + bool isLast = !bi; + const int MAX_FILTER_SIZE = NTAPS_BILINEAR; + const int dstExtW = ((width + 7) >> 3) << 3; + const int refExtH = height + MAX_FILTER_SIZE - 1; + PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], dstExtW, refExtH); + PelBuf &dstBuf = dstUnitBuf.bufs[compID]; + int blockWidth = AFFINE_MIN_BLOCK_SIZE; + int blockHeight = AFFINE_MIN_BLOCK_SIZE; + + if (iDMvHorX == 0 && iDMvHorY == 0) + { + blockWidth = width; + } + else + { + int maxDmv = std::max(abs(iDMvHorX), abs(iDMvHorY)) * blockWidth; + int TH = 1 << (iBit - 1); // Half pel + while (maxDmv < TH && blockWidth < width) + { + blockWidth <<= 1; + maxDmv <<= 1; + } + } + if (iDMvVerX == 0 && iDMvVerY == 0) + { + blockHeight = height; + } + else + { + int maxDmv = std::max(abs(iDMvVerX), abs(iDMvVerY)) * blockHeight; + int TH = 1 << (iBit - 1); // Half pel + while (maxDmv < TH && blockHeight < height) + { + blockHeight <<= 1; + maxDmv <<= 1; + } + } + int iMvScaleHor = mvLT.getHor() << iBit; + int iMvScaleVer = mvLT.getVer() << iBit; + int iMvScaleTmpHor0 = iMvScaleHor + ((iDMvHorX * blockWidth + iDMvVerX * blockHeight) >> 1); + int iMvScaleTmpVer0 = iMvScaleVer + ((iDMvHorY * blockWidth + iDMvVerY * blockHeight) >> 1); + + const bool biMCForDMVR = true; + const int nFilterIdx = 1; + const int vFilterSize = NTAPS_BILINEAR; + for (int h = 0; h < height; h += blockHeight) + { + for (int w = 0; w < width; w += blockWidth) + { + int iMvScaleTmpHor, iMvScaleTmpVer; + { + iMvScaleTmpHor = iMvScaleTmpHor0 + iDMvHorX * w + iDMvVerX * h; + iMvScaleTmpVer = iMvScaleTmpVer0 + iDMvHorY * w + iDMvVerY * h; + } + roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift); + Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); + tmpMv.clipToStorageBitDepth(); + iMvScaleTmpHor = tmpMv.getHor(); + iMvScaleTmpVer = tmpMv.getVer(); + // clip and scale + if (refPic.isWrapAroundEnabled(pu.cs->pps)) + { + Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); + wrapRef = wrapClipMv(tmpMv, Position(pu.Y().x + w, pu.Y().y + h), Size(blockWidth, blockHeight), &sps, pu.cs->pps); + iMvScaleTmpHor = tmpMv.getHor(); + iMvScaleTmpVer = tmpMv.getVer(); + } + else + { + wrapRef = false; + if (refPic.isRefScaled(pu.cs->pps) == false) + { + clipMv(tmpMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + iMvScaleTmpHor = tmpMv.getHor(); + iMvScaleTmpVer = tmpMv.getVer(); + } + } + // get the MV in high precision + int xFrac, yFrac, xInt, yInt; + xInt = iMvScaleTmpHor >> 4; + xFrac = iMvScaleTmpHor & 15; + yInt = iMvScaleTmpVer >> 4; + yFrac = iMvScaleTmpVer & 15; + const CPelBuf refBuf = refPic.getRecoBuf( + CompArea(compID, chFmt, pu.blocks[compID].offset(xInt + w, yInt + h), pu.blocks[compID]), wrapRef); + + Pel *ref = (Pel *)refBuf.buf; + Pel *dst = dstBuf.buf + w + h * dstBuf.stride; + + int refStride = refBuf.stride; + int dstStride = dstBuf.stride; + + int bw = blockWidth; + int bh = blockHeight; + + if (yFrac == 0) + { + m_if.filterHor(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); + } + else if (xFrac == 0) + { + m_if.filterVer(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); + } + else + { + m_if.filterHor(compID, (Pel*)ref - ((vFilterSize >> 1) - 1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh + vFilterSize - 1, xFrac, false, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); + JVET_J0090_SET_CACHE_ENABLE(false); + m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); + JVET_J0090_SET_CACHE_ENABLE(true); + } + } + } +} +void InterPrediction::xBDMVRFillBlkPredPelBufferAffineOPT(const PredictionUnit& pu, const Picture& refPic, const RefPicList eRefPicList, const Mv(&_mv)[3], const Mv mvCur, const Mv mvCenter, const bool doInterpolation, PelUnitBuf& dstUnitBuf, const ClpRng& clpRng + , const bool profTh + , const int blockWidth + , const int blockHeight + , const int memBlockWidthExt + , const int memBlockHeight + , const int memHeight + , const int memStride +) +{ + const ComponentID compID = COMPONENT_Y; + const bool bi = true; + JVET_J0090_SET_REF_PICTURE(refPic, compID); + const ChromaFormat chFmt = pu.chromaFormat; + Mv mvOffset = mvCur - mvCenter; + CHECK(mvCenter.hor != 0 || mvCenter.ver != 0, ""); + + // get affine sub-block width and height + const int width = pu.Y().width; + const int height = pu.Y().height; + bool wrapRef = false; + bool isLast = !bi; + const int MAX_FILTER_SIZE = NTAPS_BILINEAR; + PelBuf &dstBuf = dstUnitBuf.bufs[compID]; + PelBuf memBuf = PelBuf(m_affineDmvrBlockTmp[eRefPicList], memStride, memHeight); + const int refExtH = memBlockHeight + MAX_FILTER_SIZE - 1; + PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], memBlockWidthExt, refExtH); + // get prediction block by block + const bool biMCForDMVR = true; + const int nFilterIdx = 1; + const int vFilterSize = NTAPS_BILINEAR; + for (int h = 0, memH = 0, hn = 0; h < height; h += blockHeight, memH += memBlockHeight, hn++) + { + for (int w = 0, memW = 0, wn = 0; w < width; w += blockWidth, memW += memBlockWidthExt, wn++) + { + int xOrgFrac, yOrgFrac, xOrgInt, yOrgInt; + xOrgInt = m_affineSbMvIntX[eRefPicList][wn][hn]; + yOrgInt = m_affineSbMvIntY[eRefPicList][wn][hn]; + xOrgFrac = m_affineSbMvFracX[eRefPicList][wn][hn]; + yOrgFrac = m_affineSbMvFracY[eRefPicList][wn][hn]; + if (doInterpolation) + { + const CPelBuf refBuf = refPic.getRecoBuf(CompArea(compID, chFmt, pu.blocks[compID].offset(xOrgInt + w, yOrgInt + h), pu.blocks[compID]), wrapRef); + Pel *ref = (Pel *)refBuf.buf; + Pel *dst = memBuf.buf + memW + memH * memBuf.stride; + int refStride = refBuf.stride; + int dstStride = memBuf.stride; + int bw = memBlockWidthExt; + int bh = memBlockHeight; + if (yOrgFrac == 0) + { + m_if.filterHor(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, xOrgFrac, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); + } + else if (xOrgFrac == 0) + { + m_if.filterVer(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, yOrgFrac, true, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); + } + else + { + m_if.filterHor(compID, (Pel*)ref - ((vFilterSize >> 1) - 1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh + vFilterSize - 1, xOrgFrac, false, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); + JVET_J0090_SET_CACHE_ENABLE(false); + m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yOrgFrac, false, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); + JVET_J0090_SET_CACHE_ENABLE(true); + } + } + int xTmpInt, yTmpInt; + xTmpInt = xOrgInt + AFFINE_DMVR_SEARCH_RANGE + (mvOffset.hor >> MV_FRACTIONAL_BITS_INTERNAL); + yTmpInt = yOrgInt + AFFINE_DMVR_SEARCH_RANGE + (mvOffset.ver >> MV_FRACTIONAL_BITS_INTERNAL); + Pel *dst = dstBuf.buf + w + h * dstBuf.stride; + Pel *src = memBuf.buf + (memW + xTmpInt - xOrgInt) + (memH + yTmpInt - yOrgInt) * memBuf.stride; + int dstStride = dstBuf.stride; + int srcStride = memBuf.stride; + for (unsigned y = 0; y < blockHeight; y++) + { + memcpy(dst, src, blockWidth * sizeof(Pel)); + dst += dstStride; + src += srcStride; + } + } + } +} +void InterPrediction::xCalculteAffineParameters(const PredictionUnit& pu, const Picture& refPic, const Mv(&_mv)[3], int reflist, bool& profTH, int& blockWidth, int& blockHeight, int& memBlockWidthExt, int& memBlockHeight, int& memHeight, int& memStride) +{ + Mv mvLT = _mv[0]; + Mv mvRT = _mv[1]; + Mv mvLB = _mv[2]; + + const int width = pu.Y().width; + const int height = pu.Y().height; + const int iBit = MAX_CU_DEPTH; + const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; + int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY; + iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(width)); + iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(width)); + if (pu.cu->affineType == AFFINEMODEL_6PARAM) + { + iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(height)); + iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(height)); + } + else + { + iDMvVerX = -iDMvHorY; + iDMvVerY = iDMvHorX; + } + blockWidth = AFFINE_MIN_BLOCK_SIZE; + blockHeight = AFFINE_MIN_BLOCK_SIZE; + + if (iDMvHorX == 0 && iDMvHorY == 0) + { + blockWidth = width; + } + else + { + int maxDmv = std::max(abs(iDMvHorX), abs(iDMvHorY)) * blockWidth; + int TH = 1 << (iBit - 1); // Half pel + while (maxDmv < TH && blockWidth < width) + { + blockWidth <<= 1; + maxDmv <<= 1; + } + } + if (iDMvVerX == 0 && iDMvVerY == 0) + { + blockHeight = height; + } + else + { + int maxDmv = std::max(abs(iDMvVerX), abs(iDMvVerY)) * blockHeight; + int TH = 1 << (iBit - 1); // Half pel + while (maxDmv < TH && blockHeight < height) + { + blockHeight <<= 1; + maxDmv <<= 1; + } + } + + memBlockHeight = blockHeight + (AFFINE_DMVR_SEARCH_RANGE << 1); + int memBlockWidth = blockWidth + (AFFINE_DMVR_SEARCH_RANGE << 1); + int memBlockWidthOffset = BDMVR_SIMD_IF_FACTOR - (memBlockWidth & (BDMVR_SIMD_IF_FACTOR - 1)); + memBlockWidthOffset &= (BDMVR_SIMD_IF_FACTOR - 1); + memBlockWidthExt = memBlockWidth + memBlockWidthOffset; // This ensures that iWidthExt is a factor-of-n number, assuming BDMVR_SIMD_IF_FACTOR is equal to n + memStride = memBlockWidthExt * (width / blockWidth); + memHeight = memBlockHeight * (height / blockHeight); + int iMvScaleHor, iMvScaleVer, iMvScaleTmpHor0, iMvScaleTmpVer0; + iMvScaleHor = mvLT.getHor() << iBit; + iMvScaleVer = mvLT.getVer() << iBit; + iMvScaleTmpHor0 = iMvScaleHor + ((iDMvHorX * blockWidth + iDMvVerX * blockHeight) >> 1); + iMvScaleTmpVer0 = iMvScaleVer + ((iDMvHorY * blockWidth + iDMvVerY * blockHeight) >> 1); + for (int h = 0, hn = 0; h < height; h += blockHeight, hn++) + { + for (int w = 0, wn = 0; w < width; w += blockWidth, wn++) + { + int iMvScaleOrgHor, iMvScaleOrgVer; + iMvScaleOrgHor = iMvScaleTmpHor0 + iDMvHorX * w + iDMvVerX * h - (AFFINE_DMVR_SEARCH_RANGE << (MV_FRACTIONAL_BITS_INTERNAL + iBit)); + iMvScaleOrgVer = iMvScaleTmpVer0 + iDMvHorY * w + iDMvVerY * h - (AFFINE_DMVR_SEARCH_RANGE << (MV_FRACTIONAL_BITS_INTERNAL + iBit)); + roundAffineMv(iMvScaleOrgHor, iMvScaleOrgVer, shift); + Mv tmpOrgMv(iMvScaleOrgHor, iMvScaleOrgVer); + tmpOrgMv.clipToStorageBitDepth(); + iMvScaleOrgHor = tmpOrgMv.getHor(); + iMvScaleOrgVer = tmpOrgMv.getVer(); + // clip and scale + if (refPic.isWrapAroundEnabled(pu.cs->pps)) + { + Mv tmpOrgMv(iMvScaleOrgHor, iMvScaleOrgVer); + iMvScaleOrgHor = tmpOrgMv.getHor(); + iMvScaleOrgVer = tmpOrgMv.getVer(); + } + else + { + if (refPic.isRefScaled(pu.cs->pps) == false) + { + clipMv(tmpOrgMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + iMvScaleOrgHor = tmpOrgMv.getHor(); + iMvScaleOrgVer = tmpOrgMv.getVer(); + } + } + int xOrgFrac, yOrgFrac, xOrgInt, yOrgInt; + xOrgInt = iMvScaleOrgHor >> 4; + xOrgFrac = iMvScaleOrgHor & 15; + yOrgInt = iMvScaleOrgVer >> 4; + yOrgFrac = iMvScaleOrgVer & 15; + m_affineSbMvIntX[reflist][wn][hn] = xOrgInt; + m_affineSbMvIntY[reflist][wn][hn] = yOrgInt; + m_affineSbMvFracX[reflist][wn][hn] = xOrgFrac; + m_affineSbMvFracY[reflist][wn][hn] = yOrgFrac; + } + } +} +#endif void InterPrediction::xBDMVRFillBlkPredPelBuffer(const PredictionUnit& pu, const Picture& refPic, const Mv &_mv, PelUnitBuf &dstBuf, const ClpRng& clpRng) { const ComponentID compID = COMPONENT_Y; @@ -12785,7 +13175,163 @@ Distortion InterPrediction::xBDMVRMvSquareSearch(Mv (&curBestMv)[2], Distortion return curBestCost; } +#if JVET_AB0112_AFFINE_DMVR +template<bool hPel> +Distortion InterPrediction::xBDMVRMvSquareSearchAffine(Mv(&curBestMv)[2], Distortion curBestCost, PredictionUnit& pu, const Mv(&initialMv)[2], int32_t maxSearchRounds, int32_t searchStepShift, bool useMR, bool useHadmard) +{ +#if !JVET_X0049_BDMVR_SW_OPT + if (curBestCost == 0) + { + return 0; + } +#endif + static const Mv cSearchOffset[8] = { Mv(-1 , 1) , Mv(0 , 1) , Mv(1 , 1) , Mv(1 , 0) , Mv(1 , -1) , Mv(0 , -1) , Mv(-1 , -1) , Mv(-1 , 0) }; + int nDirectStart = 0; + int nDirectEnd = 7; + const int nDirectRounding = 8; + const int nDirectMask = 0x07; + bool doPreInterpolation = searchStepShift == MV_FRACTIONAL_BITS_INTERNAL; + bool profTh[2] = { true,true }; + int blockWidth[2] = { 0,0 }, blockHeight[2] = { 0,0 }, memBlockWidthExt[2] = { 0,0 }, memBlockHeight[2] = { 0,0 }, memHeight[2] = { 0,0 }, memStride[2] = { 0,0 }; + // Calculate TM cost of initial MVs, if it is not set + if (curBestCost == std::numeric_limits<Distortion>::max()) + { + CHECK(searchStepShift < MV_FRACTIONAL_BITS_INTERNAL - 1, "this is not possible"); + Mv cpMV[2][3] = { { pu.mvAffi[0][0] , pu.mvAffi[0][1] , pu.mvAffi[0][2] },{ pu.mvAffi[1][0] , pu.mvAffi[1][1] , pu.mvAffi[1][2] } }; + if (hPel) + { + Distortion tmCost = getDecoderSideDerivedMvCost(initialMv[0], curBestMv[0], AFFINE_DMVR_SEARCH_RANGE + (MV_FRACTIONAL_BITS_INTERNAL - searchStepShift), DECODER_SIDE_MV_WEIGHT); + for (int i = 0; i < 2; i++) + { + for (int j = 0; j < 3; j++) + { + cpMV[i][j] += curBestMv[i]; + } + } + curBestCost = xBDMVRGetMatchingErrorAffine(pu, cpMV, curBestMv,initialMv,doPreInterpolation,hPel,useMR, useHadmard,profTh, blockWidth, blockHeight, memBlockWidthExt, memBlockHeight, memHeight, memStride); + if (curBestCost < tmCost) + { + return curBestCost; + } + + curBestCost += tmCost; + } + else + { + CHECK(doPreInterpolation != true, ""); + const Picture& refPic0 = *pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->unscaledPic; + xCalculteAffineParameters(pu, refPic0, cpMV[0],0,profTh[0], blockWidth[0], blockHeight[0], memBlockWidthExt[0], memBlockHeight[0], memHeight[0], memStride[0]); + const Picture& refPic1 = *pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[1])->unscaledPic; + xCalculteAffineParameters(pu, refPic1,cpMV[1],1,profTh[1], blockWidth[1], blockHeight[1], memBlockWidthExt[1], memBlockHeight[1], memHeight[1], memStride[1] ); + curBestCost = xBDMVRGetMatchingErrorAffine(pu, cpMV,curBestMv,initialMv,doPreInterpolation,hPel,useMR, useHadmard, profTh, blockWidth, blockHeight, memBlockWidthExt, memBlockHeight, memHeight, memStride); + } + } + + Distortion localCostArray[9] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), + std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), + std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), curBestCost }; + // Iterative search process + for (uint32_t uiRound = 0; uiRound < maxSearchRounds; uiRound++) + { + int nBestDirect = -1; + Mv mvCurCenter[2] = { curBestMv[0], curBestMv[1] }; + for (int nIdx = nDirectStart; nIdx <= nDirectEnd; nIdx++) + { + int nDirect = (nIdx + nDirectRounding) & nDirectMask; + Mv mvOffset(cSearchOffset[nDirect].getHor() << searchStepShift, cSearchOffset[nDirect].getVer() << searchStepShift); +#if JVET_X0049_BDMVR_SW_OPT + if (hPel && uiRound > 0) +#else + if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL - 1 && uiRound > 0) +#endif + { + if ((nDirect % 2) == 0) + { + continue; + } + } + Mv mvCand[2] = { mvCurCenter[0] + mvOffset, mvCurCenter[1] - mvOffset }; + Mv cpMV[2][3] = { { pu.mvAffi[0][0] , pu.mvAffi[0][1], pu.mvAffi[0][2] },{ pu.mvAffi[1][0], pu.mvAffi[1][1], pu.mvAffi[1][2]} }; +#if JVET_X0049_BDMVR_SW_OPT + if (!hPel) +#else + if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL) +#endif + { + int currentIdx = (AFFINE_DMVR_SEARCH_RANGE*2+1)*AFFINE_DMVR_SEARCH_RANGE+ AFFINE_DMVR_SEARCH_RANGE + ((mvCand[0] - initialMv[0]).hor >> searchStepShift) + ((mvCand[0] - initialMv[0]).ver >> searchStepShift) * (2* AFFINE_DMVR_SEARCH_RANGE+1); + if (currentIdx < 0 || currentIdx >= (2*AFFINE_DMVR_SEARCH_RANGE+1)*(2 * AFFINE_DMVR_SEARCH_RANGE + 1)) + { + continue; + } + } + Distortion tmCost = getDecoderSideDerivedMvCost(initialMv[0], mvCand[0], AFFINE_DMVR_SEARCH_RANGE + (MV_FRACTIONAL_BITS_INTERNAL - searchStepShift), DECODER_SIDE_MV_WEIGHT); + if (tmCost > curBestCost) + { + localCostArray[nDirect] = 2 * tmCost; + continue; + } + if (hPel) + { + for (int i = 0; i < 2; i++) + { + for (int j = 0; j < 3; j++) + { + cpMV[i][j] += mvCand[i]; + } + } + } + tmCost += xBDMVRGetMatchingErrorAffine(pu, cpMV, mvCand,initialMv,doPreInterpolation,hPel,useMR, useHadmard, profTh , blockWidth, blockHeight, memBlockWidthExt, memBlockHeight, memHeight, memStride); + localCostArray[nDirect] = tmCost; + + if (hPel && uiRound > 0) + { + continue; + } + + if (tmCost < curBestCost) + { + nBestDirect = nDirect; + curBestCost = tmCost; + curBestMv[0] = mvCand[0]; + curBestMv[1] = mvCand[1]; + } + } + if (nBestDirect == -1) + { + break; + } + int nStep = 2 - (nBestDirect & 0x01); + nDirectStart = nBestDirect - nStep; + nDirectEnd = nBestDirect + nStep; + if ((uiRound + 1) < maxSearchRounds) + { + xBDMVRUpdateSquareSearchCostLog(localCostArray, nBestDirect); + } + } + if (!hPel) + { + return curBestCost; + } + // Model-based fractional MVD optimization + Mv mvDiff = curBestMv[0] - initialMv[0]; + if (localCostArray[8] > 0 && localCostArray[8] == curBestCost && mvDiff.getAbsHor() != (AFFINE_DMVR_SEARCH_RANGE << MV_FRACTIONAL_BITS_INTERNAL) && mvDiff.getAbsVer() != (AFFINE_DMVR_SEARCH_RANGE << MV_FRACTIONAL_BITS_INTERNAL)) + { + uint64_t sadbuffer[5]; + sadbuffer[0] = (uint64_t)localCostArray[8]; // center + sadbuffer[1] = (uint64_t)localCostArray[7]; // left + sadbuffer[2] = (uint64_t)localCostArray[5]; // above + sadbuffer[3] = (uint64_t)localCostArray[3]; // right + sadbuffer[4] = (uint64_t)localCostArray[1]; // bottom + + int32_t tempDeltaMv[2] = { 0, 0 }; + xSubPelErrorSrfc(sadbuffer, tempDeltaMv); + curBestMv[0] += Mv(tempDeltaMv[0], tempDeltaMv[1]); + curBestMv[1] -= Mv(tempDeltaMv[0], tempDeltaMv[1]); + } + return curBestCost; +} +#endif #if JVET_X0049_ADAPT_DMVR template <uint8_t dir> Distortion InterPrediction::xBDMVRMvOneTemplateHPelSquareSearch(Mv(&curBestMv)[2], Distortion curBestCost, PredictionUnit& pu, @@ -12909,7 +13455,72 @@ Distortion InterPrediction::xBDMVRMvOneTemplateHPelSquareSearch(Mv(&curBestMv)[2 return curBestCost; } #endif +#if JVET_AB0112_AFFINE_DMVR +Distortion InterPrediction::xBDMVRGetMatchingErrorAffine(const PredictionUnit& pu, Mv(&mv)[2][3] + ,Mv(&mvOffset)[2] + ,const Mv(&initialMv)[2] + ,bool& doInterpolation + ,bool hPel + ,bool useMR, bool useHadmard + , const bool (&profTh)[2] + , const int(&blockWidth)[2], const int(&blockHeight)[2], const int(&memBlockWidthExt)[2], const int (&memBlockHeight)[2], const int (&memHeight)[2], const int(&memStride)[2] +) +{ + // Fill L0'a and L1's prediction blocks +#if JVET_X0049_BDMVR_SW_OPT + Pel* pelBuffer[2] = { m_filteredBlock[3][REF_PIC_LIST_0][0] + BDMVR_CENTER_POSITION, m_filteredBlock[3][REF_PIC_LIST_1][0] + BDMVR_CENTER_POSITION }; + const SizeType stride = BDMVR_BUF_STRIDE; +#else + Pel* pelBuffer[2] = { m_filteredBlock[3][REF_PIC_LIST_0][0], m_filteredBlock[3][REF_PIC_LIST_1][0] }; + const SizeType stride = pu.lwidth(); +#endif + PelUnitBuf predBuf[2] = { PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_0], stride, pu.lwidth(), pu.lheight())), + PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_1], stride, pu.lwidth(), pu.lheight())) }; + + for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) + { + const Picture& refPic = *pu.cu->slice->getRefPic((RefPicList)refList, pu.refIdx[refList])->unscaledPic; + if (!hPel) + { + xBDMVRFillBlkPredPelBufferAffineOPT(pu, refPic, (RefPicList)refList, mv[refList], mvOffset[refList], initialMv[refList], doInterpolation, predBuf[refList], pu.cs->slice->clpRng(COMPONENT_Y), profTh[refList] , blockWidth[refList], blockHeight[refList], memBlockWidthExt[refList],memBlockHeight[refList], memHeight[refList], memStride[refList]); + } + else + { + xBDMVRFillBlkPredPelBufferAffine(pu, refPic, mv[refList], predBuf[refList], pu.cs->slice->clpRng(COMPONENT_Y)); + } + } + doInterpolation = false; + // Compute distortion between L0'a and L1's prediction blocks + DistParam cDistParam; + cDistParam.applyWeight = false; + cDistParam.useMR = useMR; +#if JVET_X0049_BDMVR_SW_OPT + m_pcRdCost->setDistParam(cDistParam, predBuf[0].Y(), predBuf[1].Y(), pu.cu->slice->clpRng(COMPONENT_Y).bd, COMPONENT_Y, useHadmard); +#if FULL_NBIT + if (useHadmard) + { + return cDistParam.distFunc(cDistParam) >> 1; // magic shift, benefit for early terminate + } + else + { + int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0; + return cDistParam.distFunc(cDistParam) >> precisionAdj; + } +#else + return cDistParam.distFunc(cDistParam); +#endif +#else + m_pcRdCost->setDistParam(cDistParam, predBuf[0].Y(), predBuf[1].Y(), pu.cu->slice->clpRng(COMPONENT_Y).bd, COMPONENT_Y, false); +#if FULL_NBIT + int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0; + return cDistParam.distFunc(cDistParam) >> precisionAdj; +#else + return cDistParam.distFunc(cDistParam); +#endif +#endif +} +#endif #if JVET_X0049_BDMVR_SW_OPT Distortion InterPrediction::xBDMVRGetMatchingError(const PredictionUnit& pu, const Mv(&mv)[2], bool useMR, bool useHadmard) #else diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index 90191b11a..ce8c2c3ab 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -116,6 +116,9 @@ protected: Pel* m_acYuvPred [NUM_REF_PIC_LIST_01][MAX_NUM_COMPONENT]; Pel* m_filteredBlock [LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL][LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL][MAX_NUM_COMPONENT]; Pel* m_filteredBlockTmp [LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL][MAX_NUM_COMPONENT]; +#if JVET_AB0112_AFFINE_DMVR + Pel* m_affineDmvrBlockTmp[NUM_REF_PIC_LIST_01]; +#endif #if MULTI_HYP_PRED PelStorage m_additionalHypothesisStorage; @@ -164,6 +167,12 @@ protected: Pel m_gradBuf[2][(AFFINE_MIN_BLOCK_SIZE + 2) * (AFFINE_MIN_BLOCK_SIZE + 2)]; int m_dMvBuf[2][16 * 2]; +#if JVET_AB0112_AFFINE_DMVR + int m_affineSbMvIntX[NUM_REF_PIC_LIST_01][MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE][MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE]; + int m_affineSbMvIntY[NUM_REF_PIC_LIST_01][MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE][MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE]; + int m_affineSbMvFracX[NUM_REF_PIC_LIST_01][MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE][MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE]; + int m_affineSbMvFracY[NUM_REF_PIC_LIST_01][MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE][MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE]; +#endif bool m_skipPROF; bool m_encOnly; bool m_isBi; @@ -729,11 +738,19 @@ public: #if MULTI_PASS_DMVR private: void xBDMVRFillBlkPredPelBuffer(const PredictionUnit& pu, const Picture& refPic, const Mv &_mv, PelUnitBuf &dstBuf, const ClpRng& clpRng); + +#if JVET_AB0112_AFFINE_DMVR + void xBDMVRFillBlkPredPelBufferAffine(const PredictionUnit& pu, const Picture& refPic, const Mv(&_mv)[3], PelUnitBuf& dstUnitBuf, const ClpRng& clpRng); + void xBDMVRFillBlkPredPelBufferAffineOPT(const PredictionUnit& pu, const Picture& refPic, const RefPicList eRefPicList, const Mv(&_mv)[3], const Mv mvCur, const Mv mvCenter, const bool doInterpolation, PelUnitBuf& dstUnitBuf, const ClpRng& clpRng, const bool profTh,const int blockWidth,const int blockHeight ,const int memBlockWidthExt,const int memBlockHeight,const int memHeight,const int memStride); + void xCalculteAffineParameters(const PredictionUnit& pu, const Picture& refPic, const Mv(&_mv)[3],int refList, bool& profTH, int& blockWidth, int& blockHeight, int& memBlockWidthExt, int& memBlockHeight, int& memHeight, int& memStride); +#endif #if JVET_X0049_ADAPT_DMVR template <uint8_t dir> #endif void xBDMVRPreInterpolation (const PredictionUnit& pu, const Mv (&mvCenter)[2], bool doPreInterpolationFP, bool doPreInterpolationHP); - +#if JVET_AB0112_AFFINE_DMVR + Distortion xBDMVRGetMatchingErrorAffine(const PredictionUnit& pu, Mv(&mv)[2][3],Mv(&mvOffset)[2],const Mv(&initialMv)[2],bool& doInterpolation,bool hPel,bool useMR, bool useHadmard, const bool(&profTh)[2], const int(&blockWidth)[2], const int(&blockHeight)[2], const int(&memBlockWidthExt)[2], const int(&memBlockHeight)[2], const int(&memHeight)[2], const int(&memStride)[2]); +#endif #if JVET_X0049_BDMVR_SW_OPT Distortion xBDMVRGetMatchingError (const PredictionUnit& pu, const Mv (&mv)[2], bool useMR, bool useHadmard = false ); #if JVET_X0049_ADAPT_DMVR @@ -755,8 +772,13 @@ private: const int maxHorOffset, const int maxVerOffset, const bool earlySkip, const Distortion earlyTerminateTh, DistParam &cDistParam, Pel* pelBuffer[2], const int stride); + template<bool hPel> Distortion xBDMVRMvSquareSearch(Mv(&curBestMv)[2], Distortion curBestCost, PredictionUnit& pu, const Mv(&initialMv)[2], int32_t maxSearchRounds, int32_t searchStepShift, bool useMR, bool useHadmard); +#if JVET_AB0112_AFFINE_DMVR + template<bool hPel> + Distortion xBDMVRMvSquareSearchAffine(Mv(&curBestMv)[2], Distortion curBestCost, PredictionUnit& pu, const Mv(&initialMv)[2], int32_t maxSearchRounds, int32_t searchStepShift, bool useMR, bool useHadmard); +#endif #if JVET_X0049_ADAPT_DMVR template <uint8_t dir> Distortion xBDMVRMvOneTemplateHPelSquareSearch(Mv(&curBestMv)[2], Distortion curBestCost, PredictionUnit& pu, @@ -783,6 +805,9 @@ public: #else bool processBDMVR (PredictionUnit& pu); #endif +#if JVET_AB0112_AFFINE_DMVR + bool processBDMVR4Affine(PredictionUnit& pu); +#endif #if JVET_X0049_ADAPT_DMVR bool processBDMVRPU2Dir (PredictionUnit& pu, bool subPURefine[2], Mv(&finalMvDir)[2]); void processBDMVRSubPU (PredictionUnit& pu, bool subPURefine); diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index f165780b8..2ecd1b6f1 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -202,7 +202,7 @@ #endif #define JVET_AA0096_MC_BOUNDARY_PADDING 1 // JVET-AA0096: motion compensated picture boundary padding #define JVET_AB0078_AMVPMERGE_LDB 1 // JVET-AB0078: enabling AmvpMerge for low delay picture - +#define JVET_AB0112_AFFINE_DMVR 1 // JVET-AB0112: DMVR (baseMV refinement) for affine // Inter template matching tools #define ENABLE_INTER_TEMPLATE_MATCHING 1 // It controls whether template matching is enabled for inter prediction #if ENABLE_INTER_TEMPLATE_MATCHING diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index 6076732ee..26ddf3df7 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -6888,8 +6888,9 @@ bool PU::checkBDMVRCondition(const PredictionUnit& pu) const bool ref1IsScaled = refIdx1 < 0 || refIdx1 >= MAX_NUM_REF ? false : isResamplingPossible && pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( pu.cs->pps ); - -#if JVET_X0083_BM_AMVP_MERGE_MODE +#if JVET_AB0112_AFFINE_DMVR + return ((pu.mergeFlag && pu.mergeType == MRG_TYPE_DEFAULT_N && (!pu.cu->affine || !pu.afMmvdFlag)) || (pu.amvpMergeModeFlag[0] || pu.amvpMergeModeFlag[1])) && !pu.ciipFlag && !pu.mmvdMergeFlag +#elif JVET_X0083_BM_AMVP_MERGE_MODE return ((pu.mergeFlag && pu.mergeType == MRG_TYPE_DEFAULT_N) || (pu.amvpMergeModeFlag[0] || pu.amvpMergeModeFlag[1])) && !pu.ciipFlag && !pu.cu->affine && !pu.mmvdMergeFlag #else return pu.mergeFlag && pu.mergeType == MRG_TYPE_DEFAULT_N && !pu.ciipFlag && !pu.cu->affine && !pu.mmvdMergeFlag diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index edaade649..2747a4c63 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -51,7 +51,6 @@ #include "CommonLib/ChromaFormat.h" #include "CommonLib/dtrace_blockstatistics.h" #endif - //! \ingroup DecoderLib //! \{ @@ -1924,21 +1923,38 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) } else { - for( int i = 0; i < 2; ++i ) + for (int i = 0; i < 2; ++i) + { + if (pu.cs->slice->getNumRefIdx(RefPicList(i)) > 0) + { + MvField* mvField = affineMergeCtx.mvFieldNeighbours[(pu.mergeIdx << 1) + i]; + pu.mvpIdx[i] = 0; + pu.mvpNum[i] = 0; + pu.mvd[i] = Mv(); + pu.refIdx[i] = mvField[0].refIdx; + pu.mvAffi[i][0] = mvField[0].mv; + pu.mvAffi[i][1] = mvField[1].mv; + pu.mvAffi[i][2] = mvField[2].mv; + } + } + } +#if JVET_AB0112_AFFINE_DMVR + if (!pu.afMmvdFlag&&pu.mergeType != MRG_TYPE_SUBPU_ATMVP && PU::checkBDMVRCondition(pu)) { - if( pu.cs->slice->getNumRefIdx( RefPicList( i ) ) > 0 ) + m_pcInterPred->setBdmvrSubPuMvBuf(m_mvBufBDMVR[0], m_mvBufBDMVR[1]); + pu.bdmvrRefine = false; + if (!affineMergeCtx.xCheckSimilarMotion(pu.mergeIdx, PU::getBDMVRMvdThreshold(pu))) { - MvField* mvField = affineMergeCtx.mvFieldNeighbours[(pu.mergeIdx << 1) + i]; - pu.mvpIdx[i] = 0; - pu.mvpNum[i] = 0; - pu.mvd[i] = Mv(); - pu.refIdx[i] = mvField[0].refIdx; - pu.mvAffi[i][0] = mvField[0].mv; - pu.mvAffi[i][1] = mvField[1].mv; - pu.mvAffi[i][2] = mvField[2].mv; + m_pcInterPred->processBDMVR4Affine(pu); + pu.mvAffi[0][0] += m_mvBufBDMVR[0][0]; + pu.mvAffi[0][1] += m_mvBufBDMVR[0][0]; + pu.mvAffi[0][2] += m_mvBufBDMVR[0][0]; + pu.mvAffi[1][0] += m_mvBufBDMVR[1][0]; + pu.mvAffi[1][1] += m_mvBufBDMVR[1][0]; + pu.mvAffi[1][2] += m_mvBufBDMVR[1][0]; } } - } +#endif PU::spanMotionInfo( pu, mrgCtx ); } #if JVET_X0141_CIIP_TIMD_TM && TM_MRG diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 360b96298..dcccbcf85 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -3785,7 +3785,62 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& } } } - +#if JVET_AB0112_AFFINE_DMVR + if (affineMrgAvail) + { + for (uint32_t uiAffMergeCand = 0; uiAffMergeCand < affineMergeCtx.numValidMergeCand; uiAffMergeCand++) + { + m_mvBufBDMVR4AFFINE[uiAffMergeCand << 1][0].setZero(); + m_mvBufBDMVR4AFFINE[(uiAffMergeCand << 1) + 1][0].setZero(); + pu.bdmvrRefine = false; + if (affineMergeCtx.interDirNeighbours[uiAffMergeCand] == 3 && affineMergeCtx.mergeType[uiAffMergeCand] != MRG_TYPE_SUBPU_ATMVP) + { + pu.regularMergeFlag = false; + pu.mergeFlag = true; + pu.mmvdMergeFlag = false; + pu.cu->affine = true; + pu.interDir = affineMergeCtx.interDirNeighbours[uiAffMergeCand]; + pu.cu->imv = 0; + pu.mergeType = affineMergeCtx.mergeType[uiAffMergeCand]; + pu.mv[0].setZero(); + pu.mv[1].setZero(); + pu.mergeIdx = uiAffMergeCand; + cu.affineType = affineMergeCtx.affineType[uiAffMergeCand]; + cu.BcwIdx = affineMergeCtx.BcwIdx[uiAffMergeCand]; + pu.mmvdEncOptMode = 0; +#if INTER_LIC + cu.LICFlag = affineMergeCtx.LICFlags[uiAffMergeCand]; +#endif + pu.refIdx[0] = affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 0][0].refIdx; + pu.refIdx[1] = affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 1][0].refIdx; + pu.mvAffi[REF_PIC_LIST_0][0] = affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 0][0].mv; + pu.mvAffi[REF_PIC_LIST_0][1] = affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 0][1].mv; + pu.mvAffi[REF_PIC_LIST_0][2] = affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 0][2].mv; + pu.mvAffi[REF_PIC_LIST_1][0] = affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 1][0].mv; + pu.mvAffi[REF_PIC_LIST_1][1] = affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 1][1].mv; + pu.mvAffi[REF_PIC_LIST_1][2] = affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 1][2].mv; + if(PU::checkBDMVRCondition(pu)) + { + // set merge information + m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4AFFINE[uiAffMergeCand << 1], m_mvBufBDMVR4AFFINE[(uiAffMergeCand << 1) + 1]); + if (!affineMergeCtx.xCheckSimilarMotion(pu.mergeIdx, PU::getBDMVRMvdThreshold(pu))) + { + m_pcInterSearch->processBDMVR4Affine(pu); + } + } + } + } + for (uint32_t uiAffMergeCand = 0; uiAffMergeCand < affineMergeCtx.numValidMergeCand; uiAffMergeCand++) + { + affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 0][0].mv += m_mvBufBDMVR4AFFINE[(uiAffMergeCand << 1) + 0][0]; + affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 0][1].mv += m_mvBufBDMVR4AFFINE[(uiAffMergeCand << 1) + 0][0]; + affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 0][2].mv += m_mvBufBDMVR4AFFINE[(uiAffMergeCand << 1) + 0][0]; + affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 1][0].mv += m_mvBufBDMVR4AFFINE[(uiAffMergeCand << 1) + 1][0]; + affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 1][1].mv += m_mvBufBDMVR4AFFINE[(uiAffMergeCand << 1) + 1][0]; + affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 1][2].mv += m_mvBufBDMVR4AFFINE[(uiAffMergeCand << 1) + 1][0]; + } + } +#endif #if JVET_X0049_ADAPT_DMVR checkBmMrg = PU::isBMMergeFlagCoded(pu); if (checkBmMrg) diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index 98ed1f40e..40840fff0 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -336,6 +336,9 @@ private: #else Mv m_mvBufBDMVR4TM[(TM_MRG_MAX_NUM_CANDS << 1)][MAX_NUM_SUBCU_DMVR]; #endif +#if JVET_AB0112_AFFINE_DMVR + Mv m_mvBufBDMVR4AFFINE[(AFFINE_MRG_MAX_NUM_CANDS << 1)][MAX_NUM_SUBCU_DMVR]; +#endif #endif Mv m_mvBufEncBDOF[MRG_MAX_NUM_CANDS][BDOF_SUBPU_MAX_NUM]; Mv m_mvBufEncBDOF4TM[MRG_MAX_NUM_CANDS][BDOF_SUBPU_MAX_NUM]; -- GitLab