From e8c2ead982b18869653d82d285c6d5b5f1276fa2 Mon Sep 17 00:00:00 2001 From: Xiaoyu Xiu <jamesxxiu@gmail.com> Date: Wed, 21 Jul 2021 04:33:54 +0000 Subject: [PATCH] JVET-W0097: GPM-MMVD and GPM-TM --- source/Lib/CommonLib/Buffer.cpp | 95 +- source/Lib/CommonLib/Buffer.h | 6 +- source/Lib/CommonLib/CommonDef.h | 16 +- source/Lib/CommonLib/ContextModelling.cpp | 112 +- source/Lib/CommonLib/ContextModelling.h | 6 +- source/Lib/CommonLib/Contexts.cpp | 40 + source/Lib/CommonLib/Contexts.h | 4 + source/Lib/CommonLib/InterPrediction.cpp | 84 +- source/Lib/CommonLib/InterPrediction.h | 4 + source/Lib/CommonLib/Reshape.cpp | 1 - source/Lib/CommonLib/Rom.cpp | 19 + source/Lib/CommonLib/Rom.h | 3 + source/Lib/CommonLib/Slice.cpp | 6 + source/Lib/CommonLib/Slice.h | 7 + source/Lib/CommonLib/TypeDef.h | 12 +- source/Lib/CommonLib/Unit.cpp | 30 + source/Lib/CommonLib/Unit.h | 12 +- source/Lib/CommonLib/UnitTools.cpp | 620 +++++++- source/Lib/CommonLib/UnitTools.h | 140 +- source/Lib/CommonLib/x86/BufferX86.h | 208 +++ source/Lib/DecoderLib/CABACReader.cpp | 140 ++ source/Lib/DecoderLib/CABACReader.h | 5 + source/Lib/DecoderLib/DecCu.cpp | 54 + source/Lib/DecoderLib/DecCu.h | 3 + source/Lib/DecoderLib/VLCReader.cpp | 12 + source/Lib/EncoderLib/CABACWriter.cpp | 223 ++- source/Lib/EncoderLib/CABACWriter.h | 13 + source/Lib/EncoderLib/EncCu.cpp | 1561 +++++++++++++++++++-- source/Lib/EncoderLib/EncCu.h | 78 + source/Lib/EncoderLib/EncLib.cpp | 24 + source/Lib/EncoderLib/EncModeCtrl.cpp | 40 +- source/Lib/EncoderLib/EncModeCtrl.h | 41 +- source/Lib/EncoderLib/EncReshape.cpp | 1 - source/Lib/EncoderLib/InterSearch.cpp | 9 + source/Lib/EncoderLib/VLCWriter.cpp | 6 + 35 files changed, 3495 insertions(+), 140 deletions(-) diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index 1c62b65a6..79852a83f 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -90,7 +90,71 @@ int64_t getSumOfDifferenceCore(const Pel* src0, int src0Stride, const Pel* src1, return sum; } #endif +#if JVET_W0097_GPM_MMVD_TM +void roundBDCore(const Pel* srcp, const int srcStride, Pel* dest, const int destStride, int width, int height, const ClpRng& clpRng) +{ + const int32_t clipbd = clpRng.bd; +#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT + const int32_t shiftDefault = IF_INTERNAL_FRAC_BITS(clipbd); +#else + const int32_t shiftDefault = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)); +#endif + const int32_t offsetDefault = (1 << (shiftDefault - 1)) + IF_INTERNAL_OFFS; + + if (width == 1) + { + THROW("Blocks of width = 1 not supported"); + } + else + { +#define RND_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( srcp[ADDR] + offsetDefault, shiftDefault), clpRng ) +#define RND_INC \ + srcp += srcStride; \ + dest += destStride; \ + + SIZE_AWARE_PER_EL_OP(RND_OP, RND_INC); + +#undef RND_OP +#undef RND_INC + } +} + +void weightedAvgCore(const Pel* src0, const unsigned src0Stride, const Pel* src1, const unsigned src1Stride, Pel* dest, const unsigned destStride, const int8_t w0, const int8_t w1, int width, int height, const ClpRng& clpRng) +{ + const int8_t log2WeightBase = g_BcwLog2WeightBase; + const int clipbd = clpRng.bd; +#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT + const int shiftNum = IF_INTERNAL_FRAC_BITS(clipbd) + log2WeightBase; +#else + const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase; +#endif + const int offset = (1 << (shiftNum - 1)) + (IF_INTERNAL_OFFS << log2WeightBase); + +#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR]*w0 + src1[ADDR]*w1 + offset ), shiftNum ), clpRng ) +#define ADD_AVG_INC \ + src0 += src0Stride; \ + src1 += src1Stride; \ + dest += destStride; \ + + SIZE_AWARE_PER_EL_OP(ADD_AVG_OP, ADD_AVG_INC); + +#undef ADD_AVG_OP +#undef ADD_AVG_INC +} +void copyClipCore(const Pel* srcp, const unsigned srcStride, Pel* dest, const unsigned destStride, int width, int height, const ClpRng& clpRng) +{ +#define RECO_OP( ADDR ) dest[ADDR] = ClipPel( srcp[ADDR], clpRng ) +#define RECO_INC \ + srcp += srcStride; \ + dest += destStride; \ + + SIZE_AWARE_PER_EL_OP(RECO_OP, RECO_INC); + +#undef RECO_OP +#undef RECO_INC +} +#endif template< typename T > void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, int rshift, int offset, const ClpRng& clpRng ) { @@ -521,6 +585,11 @@ void linTfCore( const T* src, int srcStride, Pel *dst, int dstStride, int width, PelBufferOps::PelBufferOps() { +#if JVET_W0097_GPM_MMVD_TM + roundBD = roundBDCore; + weightedAvg = weightedAvgCore; + copyClip = copyClipCore; +#endif addAvg4 = addAvgCore<Pel>; addAvg8 = addAvgCore<Pel>; @@ -625,6 +694,19 @@ void AreaBuf<Pel>::addHypothesisAndClip(const AreaBuf<const Pel> &other, const i template<> void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t bcwIdx) { +#if JVET_W0097_GPM_MMVD_TM + const int8_t w0 = getBcwWeight(bcwIdx, REF_PIC_LIST_0); + const int8_t w1 = getBcwWeight(bcwIdx, REF_PIC_LIST_1); + + const Pel* src0 = other1.buf; + const Pel* src1 = other2.buf; + Pel* dest = buf; + const unsigned src0Stride = other1.stride; + const unsigned src1Stride = other2.stride; + const unsigned destStride = stride; + + g_pelBufOP.weightedAvg(src0, src0Stride, src1, src1Stride, dest, destStride, w0, w1, width, height, clpRng); +#else const int8_t w0 = getBcwWeight(bcwIdx, REF_PIC_LIST_0); const int8_t w1 = getBcwWeight(bcwIdx, REF_PIC_LIST_1); const int8_t log2WeightBase = g_BcwLog2WeightBase; @@ -654,6 +736,7 @@ void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBu #undef ADD_AVG_OP #undef ADD_AVG_INC +#endif } template<> @@ -919,6 +1002,9 @@ void AreaBuf<Pel>::copyClip( const AreaBuf<const Pel> &src, const ClpRng& clpRng } else { +#if JVET_W0097_GPM_MMVD_TM + g_pelBufOP.copyClip(srcp, srcStride, dest, destStride, width, height, clpRng); +#else #define RECO_OP( ADDR ) dest[ADDR] = ClipPel( srcp[ADDR], clpRng ) #define RECO_INC \ srcp += srcStride; \ @@ -928,6 +1014,7 @@ void AreaBuf<Pel>::copyClip( const AreaBuf<const Pel> &src, const ClpRng& clpRng #undef RECO_OP #undef RECO_INC +#endif } } @@ -938,7 +1025,7 @@ void AreaBuf<Pel>::roundToOutputBitdepth( const AreaBuf<const Pel> &src, const C Pel* dest = buf; const unsigned srcStride = src.stride; const unsigned destStride = stride; - +#if !JVET_W0097_GPM_MMVD_TM const int32_t clipbd = clpRng.bd; #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT const int32_t shiftDefault = IF_INTERNAL_FRAC_BITS(clipbd); @@ -946,13 +1033,16 @@ void AreaBuf<Pel>::roundToOutputBitdepth( const AreaBuf<const Pel> &src, const C const int32_t shiftDefault = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)); #endif const int32_t offsetDefault = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS; - +#endif if( width == 1 ) { THROW( "Blocks of width = 1 not supported" ); } else { +#if JVET_W0097_GPM_MMVD_TM + g_pelBufOP.roundBD(srcp, srcStride, dest, destStride, width, height, clpRng); +#else #define RND_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( srcp[ADDR] + offsetDefault, shiftDefault), clpRng ) #define RND_INC \ srcp += srcStride; \ @@ -962,6 +1052,7 @@ void AreaBuf<Pel>::roundToOutputBitdepth( const AreaBuf<const Pel> &src, const C #undef RND_OP #undef RND_INC +#endif } } diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index 36f47f694..873dfbaf0 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -60,7 +60,11 @@ struct PelBufferOps template<X86_VEXT vext> void _initPelBufOpsX86(); #endif - +#if JVET_W0097_GPM_MMVD_TM + void(*roundBD) (const Pel* srcp, const int srcStride, Pel* dest, const int destStride, int width, int height, const ClpRng& clpRng); + void(*weightedAvg) (const Pel* src0, const unsigned src0Stride, const Pel* src1, const unsigned src1Stride, Pel* dest, const unsigned destStride, const int8_t w0, const int8_t w1, int width, int height, const ClpRng& clpRng); + void(*copyClip) (const Pel* srcp, const unsigned srcStride, Pel* dest, const unsigned destStride, int width, int height, const ClpRng& clpRng); +#endif void ( *addAvg4 ) ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng ); void ( *addAvg8 ) ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng ); void ( *reco4 ) ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, const ClpRng& clpRng ); diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 510b0f039..afc3b0e7d 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -456,7 +456,14 @@ static const int MMVD_MRG_MAX_RD_NUM = 20; static const int MMVD_MRG_MAX_RD_NUM = MRG_MAX_NUM_CANDS; #endif static const int MMVD_MRG_MAX_RD_BUF_NUM = (MMVD_MRG_MAX_RD_NUM + 1);///< increase buffer size by 1 - +#if JVET_W0097_GPM_MMVD_TM +static const int GPM_MMVD_REFINE_STEP = 8; +static const int GPM_MMVD_REFINE_DIRECTION = 4; +static const int GPM_MMVD_MAX_REFINE_NUM = (GPM_MMVD_REFINE_STEP * GPM_MMVD_REFINE_DIRECTION); +static const int GPM_EXT_MMVD_REFINE_STEP = 9; +static const int GPM_EXT_MMVD_REFINE_DIRECTION = 8; +static const int GPM_EXT_MMVD_MAX_REFINE_NUM = (GPM_EXT_MMVD_REFINE_STEP * GPM_EXT_MMVD_REFINE_DIRECTION); +#endif static const int MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA = 28; static const int MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA = 28; @@ -618,7 +625,14 @@ static const int GEO_NUM_DISTANCES = 4; static const int GEO_NUM_PRESTORED_MASK = 6; static const int GEO_WEIGHT_MASK_SIZE = 3 * (GEO_MAX_CU_SIZE >> 3) * 2 + GEO_MAX_CU_SIZE; static const int GEO_MV_MASK_SIZE = GEO_WEIGHT_MASK_SIZE >> 2; +#if JVET_W0097_GPM_MMVD_TM +static const int GEO_MAX_TRY_WEIGHTED_SAD = 70; +#if TM_MRG +static const int GEO_TM_MAX_NUM_CANDS = GEO_MAX_NUM_UNI_CANDS * (GEO_NUM_TM_MV_CAND - 1); +#endif +#else static const int GEO_MAX_TRY_WEIGHTED_SAD = 60; +#endif static const int GEO_MAX_TRY_WEIGHTED_SATD = 8; #if ENABLE_OBMC diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp index e6db8469e..a86b14b13 100644 --- a/source/Lib/CommonLib/ContextModelling.cpp +++ b/source/Lib/CommonLib/ContextModelling.cpp @@ -513,7 +513,7 @@ void MergeCtx::setMergeInfo( PredictionUnit& pu, int candIdx ) #endif } -#if NON_ADJACENT_MRG_CAND || TM_MRG || MULTI_PASS_DMVR +#if NON_ADJACENT_MRG_CAND || TM_MRG || MULTI_PASS_DMVR || JVET_W0097_GPM_MMVD_TM bool MergeCtx::xCheckSimilarMotion(int mergeCandIndex, uint32_t mvdSimilarityThresh) const { if (mvFieldNeighbours[(mergeCandIndex << 1)].refIdx < 0 && mvFieldNeighbours[(mergeCandIndex << 1) + 1].refIdx < 0) @@ -606,7 +606,117 @@ bool MergeCtx::xCheckSimilarMotion(int mergeCandIndex, uint32_t mvdSimilarityThr return false; } #endif +#if JVET_W0097_GPM_MMVD_TM +void MergeCtx::setGeoMmvdMergeInfo(PredictionUnit& pu, int mergeIdx, int mmvdIdx) +{ + bool extMMVD = pu.cs->picHeader->getGPMMMVDTableFlag(); + CHECK(mergeIdx >= numValidMergeCand, "Merge candidate does not exist"); + CHECK(mmvdIdx >= (extMMVD ? GPM_EXT_MMVD_MAX_REFINE_NUM : GPM_MMVD_MAX_REFINE_NUM), "GPM MMVD index is invalid"); + CHECK(!pu.cu->geoFlag || CU::isIBC(*pu.cu), "incorrect GPM setting") + pu.regularMergeFlag = !(pu.ciipFlag || pu.cu->geoFlag); + pu.mergeFlag = true; + pu.mmvdMergeFlag = false; + pu.interDir = interDirNeighbours[mergeIdx]; + pu.cu->imv = 0; + pu.mergeIdx = mergeIdx; + pu.mergeType = MRG_TYPE_DEFAULT_N; +#if MULTI_PASS_DMVR + pu.bdmvrRefine = false; +#endif + + const int mvShift = MV_FRACTIONAL_BITS_DIFF; + const int refMvdCands[8] = { 1 << mvShift , 2 << mvShift , 4 << mvShift , 8 << mvShift , 16 << mvShift , 32 << mvShift, 64 << mvShift , 128 << mvShift }; + const int refExtMvdCands[9] = { 1 << mvShift , 2 << mvShift , 4 << mvShift , 8 << mvShift , 12 << mvShift , 16 << mvShift, 24 << mvShift, 32 << mvShift, 64 << mvShift }; + int fPosStep = (extMMVD ? (mmvdIdx >> 3) : (mmvdIdx >> 2)); + int fPosPosition = (extMMVD ? (mmvdIdx - (fPosStep << 3)) : (mmvdIdx - (fPosStep << 2))); + int offset = (extMMVD ? refExtMvdCands[fPosStep] : refMvdCands[fPosStep]); + Mv mvOffset; + + if (fPosPosition == 0) + { + mvOffset = Mv(offset, 0); + } + else if (fPosPosition == 1) + { + mvOffset = Mv(-offset, 0); + } + else if (fPosPosition == 2) + { + mvOffset = Mv(0, offset); + } + else if (fPosPosition == 3) + { + mvOffset = Mv(0, -offset); + } + else if (fPosPosition == 4) + { + mvOffset = Mv(offset, offset); + } + else if (fPosPosition == 5) + { + mvOffset = Mv(offset, -offset); + } + else if (fPosPosition == 6) + { + mvOffset = Mv(-offset, offset); + } + else if (fPosPosition == 7) + { + mvOffset = Mv(-offset, -offset); + } + + pu.refIdx[REF_PIC_LIST_0] = mvFieldNeighbours[(mergeIdx << 1) + 0].refIdx; + pu.refIdx[REF_PIC_LIST_1] = mvFieldNeighbours[(mergeIdx << 1) + 1].refIdx; + if (pu.refIdx[REF_PIC_LIST_0] >= 0) + { + pu.mv[REF_PIC_LIST_0] = mvFieldNeighbours[(mergeIdx << 1) + 0].mv + mvOffset; + } + else + { + pu.mv[REF_PIC_LIST_0] = Mv(); + } + + if (pu.refIdx[REF_PIC_LIST_1] >= 0) + { + pu.mv[REF_PIC_LIST_1] = mvFieldNeighbours[(mergeIdx << 1) + 1].mv + mvOffset; + } + else + { + pu.mv[REF_PIC_LIST_1] = Mv(); + } + pu.mvd[REF_PIC_LIST_0] = Mv(); + pu.mvd[REF_PIC_LIST_1] = Mv(); + pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID; + pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID; + pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID; + pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID; + pu.cu->BcwIdx = (interDirNeighbours[mergeIdx] == 3) ? BcwIdx[mergeIdx] : BCW_DEFAULT; + +#if MULTI_HYP_PRED + pu.addHypData.clear(); + pu.numMergedAddHyps = 0; +#endif + +#if !INTER_RM_SIZE_CONSTRAINTS + PU::restrictBiPredMergeCandsOne(pu); +#endif + pu.mmvdEncOptMode = 0; + +#if INTER_LIC + pu.cu->LICFlag = pu.cs->slice->getUseLIC() ? LICFlags[mergeIdx] : false; + if (pu.interDir == 3) + { + CHECK(pu.cu->LICFlag, "LIC is not used with bi-prediction in merge"); + } +#endif +} +void MergeCtx::copyMergeCtx(MergeCtx & orgMergeCtx) +{ + memcpy(interDirNeighbours, orgMergeCtx.interDirNeighbours, MRG_MAX_NUM_CANDS * sizeof(unsigned char)); + memcpy(mvFieldNeighbours, orgMergeCtx.mvFieldNeighbours, (MRG_MAX_NUM_CANDS << 1) * sizeof(MvField)); +} +#endif void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) { const Slice &slice = *pu.cs->slice; diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h index dd33025f3..eda11d04e 100644 --- a/source/Lib/CommonLib/ContextModelling.h +++ b/source/Lib/CommonLib/ContextModelling.h @@ -568,13 +568,17 @@ public: bool mmvdUseAltHpelIf [ MMVD_BASE_MV_NUM ]; bool useAltHpelIf [ MRG_MAX_NUM_CANDS ]; void setMergeInfo( PredictionUnit& pu, int candIdx ); -#if NON_ADJACENT_MRG_CAND || TM_MRG || MULTI_PASS_DMVR +#if NON_ADJACENT_MRG_CAND || TM_MRG || MULTI_PASS_DMVR || JVET_W0097_GPM_MMVD_TM bool xCheckSimilarMotion(int mergeCandIndex, uint32_t mvdSimilarityThresh = 1) const; #endif #if TM_MRG void copyRegularMergeCand( int dstCandIdx, MergeCtx& srcCtx, int srcCandIdx ); void convertRegularMergeCandToBi(int candIdx); #endif +#if JVET_W0097_GPM_MMVD_TM + void setGeoMmvdMergeInfo(PredictionUnit& pu, int mergeIdx, int mmvdIdx); + void copyMergeCtx(MergeCtx &orgMergeCtx); +#endif }; class AffineMergeCtx diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index 1bd41905e..b5548efd0 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -853,6 +853,28 @@ const CtxSet ContextSetCfg::MmvdStepMvpIdx = ContextSetCfg::addCtxSet { 0 } }); +#if JVET_W0097_GPM_MMVD_TM +const CtxSet ContextSetCfg::GeoMmvdFlag = ContextSetCfg::addCtxSet +({ + { 25 }, + { 33 }, + { 35 }, + { 5 }, + { 4 }, + { 4 } + }); + +const CtxSet ContextSetCfg::GeoMmvdStepMvpIdx = ContextSetCfg::addCtxSet +({ + { 59 }, + { 60 }, + { 35 }, + { 0 }, + { 0 }, + { 0 } + }); +#endif + #if AFFINE_MMVD const CtxSet ContextSetCfg::AfMmvdFlag = ContextSetCfg::addCtxSet ({ @@ -2002,6 +2024,24 @@ const CtxSet ContextSetCfg::MmvdStepMvpIdx = ContextSetCfg::addCtxSet { 0, }, }); +#if JVET_W0097_GPM_MMVD_TM +const CtxSet ContextSetCfg::GeoMmvdFlag = ContextSetCfg::addCtxSet +({ + { 25, }, + { 26, }, + { CNU, }, + { 4, }, + }); + +const CtxSet ContextSetCfg::GeoMmvdStepMvpIdx = ContextSetCfg::addCtxSet +({ + { 59, }, + { 60, }, + { CNU, }, + { 0, }, + }); +#endif + #if AFFINE_MMVD const CtxSet ContextSetCfg::AfMmvdFlag = ContextSetCfg::addCtxSet ({ diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index a49689489..4f49ae343 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -257,6 +257,10 @@ public: static const CtxSet MmvdFlag; static const CtxSet MmvdMergeIdx; static const CtxSet MmvdStepMvpIdx; +#if JVET_W0097_GPM_MMVD_TM + static const CtxSet GeoMmvdFlag; + static const CtxSet GeoMmvdStepMvpIdx; +#endif static const CtxSet SubblockMergeFlag; static const CtxSet AffineFlag; static const CtxSet AffineType; diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index be9096990..3c5a701c4 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -626,10 +626,21 @@ void InterPrediction::xSubPuBio(PredictionUnit& pu, PelUnitBuf& predBuf, const R int dx = secStep; int dy = fstStep; +#if !JVET_W0097_GPM_MMVD_TM const MotionInfo &curMi = pu.getMotionInfo(Position{ x, y }); +#endif subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); +#if JVET_W0097_GPM_MMVD_TM + subPu.interDir = pu.interDir; + for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++) + { + subPu.refIdx[i] = pu.refIdx[i]; + subPu.mv[i] = pu.mv[i]; + } +#else subPu = curMi; +#endif PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu)); if (yuvDstTmp) @@ -2810,7 +2821,11 @@ void InterPrediction::xWeightedAverage( { if( pu.cu->geoFlag ) { +#if JVET_W0097_GPM_MMVD_TM + pcYuvDst.copyFrom(pcYuvSrc0, lumaOnly, chromaOnly); +#else pcYuvDst.copyFrom( pcYuvSrc0 ); +#endif } else { @@ -2825,7 +2840,11 @@ void InterPrediction::xWeightedAverage( { if( pu.cu->geoFlag ) { +#if JVET_W0097_GPM_MMVD_TM + pcYuvDst.copyFrom(pcYuvSrc1, lumaOnly, chromaOnly); +#else pcYuvDst.copyFrom( pcYuvSrc1 ); +#endif } else { @@ -3489,18 +3508,45 @@ int InterPrediction::rightShiftMSB(int numer, int denom) return numer >> floorLog2(denom); } +#if JVET_W0097_GPM_MMVD_TM && TM_MRG +void InterPrediction::motionCompensationGeo(CodingUnit &cu, MergeCtx &geoMrgCtx, MergeCtx &geoTmMrgCtx0, MergeCtx &geoTmMrgCtx1) +#else void InterPrediction::motionCompensationGeo( CodingUnit &cu, MergeCtx &geoMrgCtx ) +#endif { const uint8_t splitDir = cu.firstPU->geoSplitDir; const uint8_t candIdx0 = cu.firstPU->geoMergeIdx0; const uint8_t candIdx1 = cu.firstPU->geoMergeIdx1; +#if JVET_W0097_GPM_MMVD_TM + const bool geoMMVDFlag0 = cu.firstPU->geoMMVDFlag0; + const uint8_t geoMMVDIdx0 = cu.firstPU->geoMMVDIdx0; + const bool geoMMVDFlag1 = cu.firstPU->geoMMVDFlag1; + const uint8_t geoMMVDIdx1 = cu.firstPU->geoMMVDIdx1; +#if TM_MRG + const bool geoTmFlag0 = cu.firstPU->geoTmFlag0; + const bool geoTmFlag1 = cu.firstPU->geoTmFlag1; +#endif +#endif for( auto &pu : CU::traversePUs( cu ) ) { const UnitArea localUnitArea( cu.cs->area.chromaFormat, Area( 0, 0, pu.lwidth(), pu.lheight() ) ); PelUnitBuf tmpGeoBuf0 = m_geoPartBuf[0].getBuf( localUnitArea ); PelUnitBuf tmpGeoBuf1 = m_geoPartBuf[1].getBuf( localUnitArea ); PelUnitBuf predBuf = cu.cs->getPredBuf( pu ); - +#if JVET_W0097_GPM_MMVD_TM +#if TM_MRG + if (geoTmFlag0) + { + geoTmMrgCtx0.setMergeInfo(pu, candIdx0); + } + else +#endif + if (geoMMVDFlag0) + { + geoMrgCtx.setGeoMmvdMergeInfo(pu, candIdx0, geoMMVDIdx0); + } + else +#endif geoMrgCtx.setMergeInfo( pu, candIdx0 ); motionCompensation(pu, tmpGeoBuf0, REF_PIC_LIST_X, true, isChromaEnabled(pu.chromaFormat)); // TODO: check 4:0:0 interaction with weighted prediction. @@ -3508,7 +3554,20 @@ void InterPrediction::motionCompensationGeo( CodingUnit &cu, MergeCtx &geoMrgCtx { printf( "DECODER_GEO_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() ); } - +#if JVET_W0097_GPM_MMVD_TM +#if TM_MRG + if (geoTmFlag1) + { + geoTmMrgCtx1.setMergeInfo(pu, candIdx1); + } + else +#endif + if (geoMMVDFlag1) + { + geoMrgCtx.setGeoMmvdMergeInfo(pu, candIdx1, geoMMVDIdx1); + } + else +#endif geoMrgCtx.setMergeInfo( pu, candIdx1 ); motionCompensation(pu, tmpGeoBuf1, REF_PIC_LIST_X, true, isChromaEnabled(pu.chromaFormat)); // TODO: check 4:0:0 interaction with weighted prediction. @@ -4937,7 +4996,26 @@ bool TplMatchingCtrl::xFillCurTemplate(Pel* tpl) const CPelBuf recBuf = currPic.getRecoBuf(m_cu.cs->picture->blocks[m_compID]); std::vector<Pel>& invLUT = m_interRes.m_pcReshape->getInvLUT(); const bool useLUT = isLuma(m_compID) && m_cu.cs->picHeader->getLmcsEnabledFlag() && m_interRes.m_pcReshape->getCTUFlag(); - +#if JVET_W0097_GPM_MMVD_TM & TM_MRG + if (m_cu.geoFlag) + { + CHECK(m_pu.geoTmType == GEO_TM_OFF, "invalid geo template type value"); + if (m_pu.geoTmType == GEO_TM_SHAPE_A) + { + if (TrueA_FalseL == 0) + { + return false; + } + } + if (m_pu.geoTmType == GEO_TM_SHAPE_L) + { + if (TrueA_FalseL == 1) + { + return false; + } + } + } +#endif const Size dstSize = (TrueA_FalseL ? Size(m_pu.lwidth(), tplSize) : Size(tplSize, m_pu.lheight())); for (int h = 0; h < (int)dstSize.height; h++) { diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index 0333d8448..bfd8f5282 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -280,7 +280,11 @@ public: #if ENABLE_OBMC void subBlockOBMC(PredictionUnit &pu, PelUnitBuf *pDst = nullptr); #endif +#if JVET_W0097_GPM_MMVD_TM && TM_MRG + void motionCompensationGeo(CodingUnit &cu, MergeCtx &geoMrgCtx, MergeCtx &geoTmMrgCtx0, MergeCtx &geoTmMrgCtx1); +#else void motionCompensationGeo(CodingUnit &cu, MergeCtx &GeoMrgCtx); +#endif void weightedGeoBlk(PredictionUnit &pu, const uint8_t splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1); void xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId, bool forLuma); void xPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId); diff --git a/source/Lib/CommonLib/Reshape.cpp b/source/Lib/CommonLib/Reshape.cpp index bfe772ed6..24f11ab00 100644 --- a/source/Lib/CommonLib/Reshape.cpp +++ b/source/Lib/CommonLib/Reshape.cpp @@ -90,7 +90,6 @@ void Reshape::destroy() } - /** compute chroma residuce scale for TU * \param average luma pred of TU * \return chroma residue scale diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index fb4317e06..6f327a8c6 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -4540,4 +4540,23 @@ int8_t g_angle2mirror[GEO_NUM_ANGLES] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, const int g_addHypWeight[MULTI_HYP_PRED_NUM_WEIGHTS] = { 2, -1 }; static_assert(g_BcwLog2WeightBase == MULTI_HYP_PRED_WEIGHT_BITS, "number of bits for gbi and multi-hyp weights do not match"); #endif +#if JVET_W0097_GPM_MMVD_TM && TM_MRG +uint8_t g_geoTmShape[2][GEO_NUM_ANGLES] = { + { GEO_TM_SHAPE_A, 0, GEO_TM_SHAPE_A, GEO_TM_SHAPE_A, + GEO_TM_SHAPE_A, GEO_TM_SHAPE_AL, 0, 0, + GEO_TM_SHAPE_AL, 0, 0, GEO_TM_SHAPE_AL, + GEO_TM_SHAPE_AL, GEO_TM_SHAPE_A, GEO_TM_SHAPE_A, 0, + GEO_TM_SHAPE_A, 0, GEO_TM_SHAPE_A, GEO_TM_SHAPE_A, + GEO_TM_SHAPE_A, GEO_TM_SHAPE_AL, 0, 0, + GEO_TM_SHAPE_AL, 0, 0, GEO_TM_SHAPE_AL, + GEO_TM_SHAPE_AL, GEO_TM_SHAPE_A, GEO_TM_SHAPE_A, 0, }, + { GEO_TM_SHAPE_AL, 0, GEO_TM_SHAPE_AL, GEO_TM_SHAPE_AL, + GEO_TM_SHAPE_L, GEO_TM_SHAPE_L, 0, 0, + GEO_TM_SHAPE_L, 0, 0, GEO_TM_SHAPE_L, + GEO_TM_SHAPE_AL, GEO_TM_SHAPE_AL, GEO_TM_SHAPE_AL, 0, + GEO_TM_SHAPE_AL, 0, GEO_TM_SHAPE_AL, GEO_TM_SHAPE_AL, + GEO_TM_SHAPE_L, GEO_TM_SHAPE_L, 0, 0, + GEO_TM_SHAPE_L, 0, 0, GEO_TM_SHAPE_L, + GEO_TM_SHAPE_AL, GEO_TM_SHAPE_AL, GEO_TM_SHAPE_AL, 0, } }; +#endif //! \} diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h index b41f205ba..7158a30c9 100644 --- a/source/Lib/CommonLib/Rom.h +++ b/source/Lib/CommonLib/Rom.h @@ -304,6 +304,9 @@ extern int16_t g_weightOffset [GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE][ extern int8_t g_angle2mask [GEO_NUM_ANGLES]; extern int8_t g_Dis[GEO_NUM_ANGLES]; extern int8_t g_angle2mirror[GEO_NUM_ANGLES]; +#if JVET_W0097_GPM_MMVD_TM && TM_MRG +extern uint8_t g_geoTmShape[2][GEO_NUM_ANGLES]; +#endif #if MULTI_HYP_PRED extern const int g_addHypWeight[MULTI_HYP_PRED_NUM_WEIGHTS]; #endif diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index d82e0bd29..77d4e6ba4 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -2819,6 +2819,9 @@ PicHeader::PicHeader() , m_disDmvrFlag ( 0 ) , m_disProfFlag ( 0 ) , m_jointCbCrSignFlag ( 0 ) +#if JVET_W0097_GPM_MMVD_TM +, m_gpmMMVDTableFlag (false) +#endif , m_qpDelta ( 0 ) , m_numAlfAps ( 0 ) , m_alfApsId ( 0 ) @@ -2911,6 +2914,9 @@ void PicHeader::initPicHeader() m_disDmvrFlag = 0; m_disProfFlag = 0; m_jointCbCrSignFlag = 0; +#if JVET_W0097_GPM_MMVD_TM + m_gpmMMVDTableFlag = 0; +#endif m_qpDelta = 0; m_numAlfAps = 0; m_alfChromaApsId = 0; diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 70d7b8bc6..d2abdaa22 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -2658,6 +2658,9 @@ private: bool m_disDmvrFlag; //!< picture level DMVR disable flag bool m_disProfFlag; //!< picture level PROF disable flag bool m_jointCbCrSignFlag; //!< joint Cb/Cr residual sign flag +#if JVET_W0097_GPM_MMVD_TM + bool m_gpmMMVDTableFlag; +#endif int m_qpDelta; //!< value of Qp delta bool m_saoEnabledFlag[MAX_NUM_CHANNEL_TYPE]; //!< sao enabled flags for each channel #if ALF_IMPROVEMENT @@ -2786,6 +2789,10 @@ public: bool getDisProfFlag() const { return m_disProfFlag; } void setJointCbCrSignFlag( bool b ) { m_jointCbCrSignFlag = b; } bool getJointCbCrSignFlag() const { return m_jointCbCrSignFlag; } +#if JVET_W0097_GPM_MMVD_TM + void setGPMMMVDTableFlag(bool b) { m_gpmMMVDTableFlag = b; } + bool getGPMMMVDTableFlag() const { return m_gpmMMVDTableFlag; } +#endif void setQpDelta(int b) { m_qpDelta = b; } int getQpDelta() const { return m_qpDelta; } void setSaoEnabledFlag(ChannelType chType, bool b) { m_saoEnabledFlag[chType] = b; } diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 63d251c26..740e8ddfa 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -126,6 +126,7 @@ #define TM_AMVP 1 // Add template matching to non-subblock inter to refine regular AMVP candidates #define TM_MRG 1 // Add template matching to non-subblock inter to refine regular merge candidates #define ENABLE_OBMC 1 // Enable Overlapped Block Motion Compensation +#define JVET_W0097_GPM_MMVD_TM 1 // JVET-W0097: GPM-MMVD and GPM-TM // Transform and coefficient coding #define TCQ_8STATES 1 @@ -715,7 +716,16 @@ enum RefPicList #define L0 REF_PIC_LIST_0 #define L1 REF_PIC_LIST_1 - +#if JVET_W0097_GPM_MMVD_TM && TM_MRG +enum GeoTmMvCand +{ + GEO_TM_OFF = 0, + GEO_TM_SHAPE_AL, + GEO_TM_SHAPE_A, + GEO_TM_SHAPE_L, + GEO_NUM_TM_MV_CAND +}; +#endif /// distortion function index enum DFunc { diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 4e28e52a2..9cfac21f2 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -637,6 +637,16 @@ void PredictionUnit::initData() geoSplitDir = MAX_UCHAR; geoMergeIdx0 = MAX_UCHAR; geoMergeIdx1 = MAX_UCHAR; +#if JVET_W0097_GPM_MMVD_TM + geoMMVDFlag0 = false; + geoMMVDIdx0 = MAX_UCHAR; + geoMMVDFlag1 = false; + geoMMVDIdx1 = MAX_UCHAR; +#if TM_MRG + geoTmFlag0 = false; + geoTmFlag1 = false; +#endif +#endif mmvdMergeFlag = false; mmvdMergeIdx = MAX_UCHAR; #if AFFINE_MMVD @@ -717,6 +727,16 @@ PredictionUnit& PredictionUnit::operator=(const InterPredictionData& predData) geoSplitDir = predData.geoSplitDir; geoMergeIdx0 = predData.geoMergeIdx0; geoMergeIdx1 = predData.geoMergeIdx1; +#if JVET_W0097_GPM_MMVD_TM + geoMMVDFlag0 = predData.geoMMVDFlag0; + geoMMVDIdx0 = predData.geoMMVDIdx0; + geoMMVDFlag1 = predData.geoMMVDFlag1; + geoMMVDIdx1 = predData.geoMMVDIdx1; +#if TM_MRG + geoTmFlag0 = predData.geoTmFlag0; + geoTmFlag1 = predData.geoTmFlag1; +#endif +#endif mmvdMergeFlag = predData.mmvdMergeFlag; mmvdMergeIdx = predData.mmvdMergeIdx; #if AFFINE_MMVD @@ -793,6 +813,16 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other ) geoSplitDir = other.geoSplitDir; geoMergeIdx0 = other.geoMergeIdx0; geoMergeIdx1 = other.geoMergeIdx1; +#if JVET_W0097_GPM_MMVD_TM + geoMMVDFlag0 = other.geoMMVDFlag0; + geoMMVDIdx0 = other.geoMMVDIdx0; + geoMMVDFlag1 = other.geoMMVDFlag1; + geoMMVDIdx1 = other.geoMMVDIdx1; +#if TM_MRG + geoTmFlag0 = other.geoTmFlag0; + geoTmFlag1 = other.geoTmFlag1; +#endif +#endif mmvdMergeFlag = other.mmvdMergeFlag; mmvdMergeIdx = other.mmvdMergeIdx; #if AFFINE_MMVD diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index 1bd52574d..4e1273375 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -431,6 +431,17 @@ struct InterPredictionData uint8_t geoSplitDir; uint8_t geoMergeIdx0; uint8_t geoMergeIdx1; +#if JVET_W0097_GPM_MMVD_TM + bool geoMMVDFlag0; + uint8_t geoMMVDIdx0; + bool geoMMVDFlag1; + uint8_t geoMMVDIdx1; +#if TM_MRG + bool geoTmFlag0; + bool geoTmFlag1; + uint8_t geoTmType; +#endif +#endif bool mmvdMergeFlag; uint8_t mmvdMergeIdx; #if AFFINE_MMVD @@ -527,7 +538,6 @@ struct TransformUnit : public UnitArea CodingStructure *cs; ChannelType chType; int m_chromaResScaleInv; - uint8_t depth; uint8_t mtsIdx [ MAX_NUM_TBLOCKS ]; bool noResidual; diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index d8528c0f7..1e46844cf 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -1441,7 +1441,7 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const mrgCtx.numValidMergeCand = cnt; } -#if MULTI_PASS_DMVR +#if MULTI_PASS_DMVR || JVET_W0097_GPM_MMVD_TM uint32_t PU::getBDMVRMvdThreshold(const PredictionUnit &pu) { uint32_t numPixels = pu.lwidth() * pu.lheight(); @@ -4777,7 +4777,15 @@ void PU::restrictBiPredMergeCandsOne(PredictionUnit &pu) } } +#if JVET_W0097_GPM_MMVD_TM +#if TM_MRG +void PU::getGeoMergeCandidates(PredictionUnit &pu, MergeCtx& geoMrgCtx, MergeCtx* mergeCtx) +#else +void PU::getGeoMergeCandidates(const PredictionUnit &pu, MergeCtx& geoMrgCtx, MergeCtx* mergeCtx) +#endif +#else void PU::getGeoMergeCandidates( const PredictionUnit &pu, MergeCtx& geoMrgCtx ) +#endif { MergeCtx tmpMergeCtx; @@ -4797,9 +4805,26 @@ void PU::getGeoMergeCandidates( const PredictionUnit &pu, MergeCtx& geoMrgCtx ) geoMrgCtx.LICFlags[i] = false; #endif } - +#if JVET_W0097_GPM_MMVD_TM + if (mergeCtx == NULL) + { +#if TM_MRG + const bool tmMergeFlag = pu.tmMergeFlag; + pu.tmMergeFlag = false; +#endif +#endif PU::getInterMergeCandidates(pu, tmpMergeCtx, 0); - +#if JVET_W0097_GPM_MMVD_TM +#if TM_MRG + pu.tmMergeFlag = tmMergeFlag; +#endif + } + else + { + memcpy(tmpMergeCtx.interDirNeighbours, mergeCtx->interDirNeighbours, maxNumMergeCand * sizeof(unsigned char)); + memcpy(tmpMergeCtx.mvFieldNeighbours, mergeCtx->mvFieldNeighbours, (maxNumMergeCand << 1) * sizeof(MvField)); + } +#endif for (int32_t i = 0; i < maxNumMergeCand; i++) { int parity = i & 1; @@ -4810,6 +4835,12 @@ void PU::getGeoMergeCandidates( const PredictionUnit &pu, MergeCtx& geoMrgCtx ) geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].mv = tmpMergeCtx.mvFieldNeighbours[(i << 1) + parity].mv; geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + !parity].refIdx = -1; geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + parity].refIdx; +#if JVET_W0097_GPM_MMVD_TM + if (geoMrgCtx.xCheckSimilarMotion(geoMrgCtx.numValidMergeCand, PU::getBDMVRMvdThreshold(pu))) + { + continue; + } +#endif geoMrgCtx.numValidMergeCand++; if (geoMrgCtx.numValidMergeCand == GEO_MAX_NUM_UNI_CANDS) { @@ -4825,6 +4856,12 @@ void PU::getGeoMergeCandidates( const PredictionUnit &pu, MergeCtx& geoMrgCtx ) geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].mv = Mv(0, 0); geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + !parity].refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + !parity].refIdx; geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].refIdx = -1; +#if JVET_W0097_GPM_MMVD_TM + if (geoMrgCtx.xCheckSimilarMotion(geoMrgCtx.numValidMergeCand, PU::getBDMVRMvdThreshold(pu))) + { + continue; + } +#endif geoMrgCtx.numValidMergeCand++; if (geoMrgCtx.numValidMergeCand == GEO_MAX_NUM_UNI_CANDS) { @@ -4832,6 +4869,148 @@ void PU::getGeoMergeCandidates( const PredictionUnit &pu, MergeCtx& geoMrgCtx ) } } } +#if JVET_W0097_GPM_MMVD_TM + // add more parity based geo candidates, in an opposite parity rule + if (geoMrgCtx.numValidMergeCand < pu.cs->sps->getMaxNumGeoCand()) + { + for (int32_t i = 0; i < maxNumMergeCand; i++) + { + int parity = i & 1; + if (tmpMergeCtx.interDirNeighbours[i] == 3) + { + geoMrgCtx.interDirNeighbours[geoMrgCtx.numValidMergeCand] = 2 - parity; + geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + !parity].mv = tmpMergeCtx.mvFieldNeighbours[(i << 1) + !parity].mv; + geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].mv = Mv(0, 0); + geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + !parity].refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + !parity].refIdx; + geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].refIdx = -1; + if (geoMrgCtx.xCheckSimilarMotion(geoMrgCtx.numValidMergeCand, PU::getBDMVRMvdThreshold(pu))) + { + continue; + } + geoMrgCtx.numValidMergeCand++; + if (geoMrgCtx.numValidMergeCand == pu.cs->sps->getMaxNumGeoCand()) + { + return; + } + } + } + } + + // add at most two average based geo candidates + if (geoMrgCtx.numValidMergeCand < pu.cs->sps->getMaxNumGeoCand()) + { + // add one L0 cand by averaging the first two available L0 candidates + int cnt = 0; + int firstAvailRefIdx = -1; + Mv avgMv; + avgMv.setZero(); + for (int i = 0; i < geoMrgCtx.numValidMergeCand; i++) + { + if (cnt == 2) + { + break; + } + if (geoMrgCtx.interDirNeighbours[i] == 1) + { + avgMv += geoMrgCtx.mvFieldNeighbours[i * 2].mv; + if (firstAvailRefIdx == -1) + { + firstAvailRefIdx = geoMrgCtx.mvFieldNeighbours[i * 2].refIdx; + } + cnt++; + } + } + if (cnt == 2) + { + roundAffineMv(avgMv.hor, avgMv.ver, 1); + geoMrgCtx.interDirNeighbours[geoMrgCtx.numValidMergeCand] = 1; + geoMrgCtx.mvFieldNeighbours[geoMrgCtx.numValidMergeCand * 2].setMvField(avgMv, firstAvailRefIdx); + geoMrgCtx.mvFieldNeighbours[geoMrgCtx.numValidMergeCand * 2 + 1].setMvField(Mv(0, 0), NOT_VALID); + + if (!geoMrgCtx.xCheckSimilarMotion(geoMrgCtx.numValidMergeCand, PU::getBDMVRMvdThreshold(pu))) + { + geoMrgCtx.numValidMergeCand++; + } + if (geoMrgCtx.numValidMergeCand == pu.cs->sps->getMaxNumGeoCand()) + { + return; + } + } + + // add one L1 cand by averaging the first two available L1 candidates + cnt = 0; + firstAvailRefIdx = -1; + avgMv.setZero(); + for (int i = 0; i < geoMrgCtx.numValidMergeCand; i++) + { + if (cnt == 2) + { + break; + } + if (geoMrgCtx.interDirNeighbours[i] == 2) + { + avgMv += geoMrgCtx.mvFieldNeighbours[i * 2 + 1].mv; + if (firstAvailRefIdx == -1) + { + firstAvailRefIdx = geoMrgCtx.mvFieldNeighbours[i * 2 + 1].refIdx; + } + cnt++; + } + } + if (cnt == 2) + { + roundAffineMv(avgMv.hor, avgMv.ver, 1); + geoMrgCtx.interDirNeighbours[geoMrgCtx.numValidMergeCand] = 2; + geoMrgCtx.mvFieldNeighbours[geoMrgCtx.numValidMergeCand * 2 + 1].setMvField(avgMv, firstAvailRefIdx); + geoMrgCtx.mvFieldNeighbours[geoMrgCtx.numValidMergeCand * 2].setMvField(Mv(0, 0), NOT_VALID); + if (!geoMrgCtx.xCheckSimilarMotion(geoMrgCtx.numValidMergeCand, PU::getBDMVRMvdThreshold(pu))) + { + geoMrgCtx.numValidMergeCand++; + } + if (geoMrgCtx.numValidMergeCand == pu.cs->sps->getMaxNumGeoCand()) + { + return; + } + } + } + if (geoMrgCtx.numValidMergeCand < pu.cs->sps->getMaxNumGeoCand()) + { + const Slice &slice = *pu.cs->slice; + int iNumRefIdx = std::min(slice.getNumRefIdx(REF_PIC_LIST_0), slice.getNumRefIdx(REF_PIC_LIST_1)); + + int r = 0; + int refcnt = 0; + + for (int32_t i = geoMrgCtx.numValidMergeCand; i < pu.cs->sps->getMaxNumGeoCand(); i++) + { + int parity = i & 1; + if (0x01 + parity) + { + geoMrgCtx.interDirNeighbours[geoMrgCtx.numValidMergeCand] = 1 + parity; + geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + !parity].mv = Mv(0, 0); + geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].mv = Mv(0, 0); + geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + !parity].refIdx = -1; + geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].refIdx = r; + + if (refcnt == iNumRefIdx - 1) + { + r = 0; + } + else + { + ++r; + ++refcnt; + } + + geoMrgCtx.numValidMergeCand++; + if (geoMrgCtx.numValidMergeCand == pu.cs->sps->getMaxNumGeoCand()) + { + return; + } + } + } + } +#endif } void PU::spanGeoMotionInfo( PredictionUnit &pu, MergeCtx &geoMrgCtx, const uint8_t splitDir, const uint8_t candIdx0, const uint8_t candIdx1) @@ -4950,6 +5129,441 @@ void PU::spanGeoMotionInfo( PredictionUnit &pu, MergeCtx &geoMrgCtx, const uint8 #endif } +#if JVET_W0097_GPM_MMVD_TM +#if TM_MRG +void PU::spanGeoMMVDMotionInfo(PredictionUnit &pu, MergeCtx &geoMrgCtx, MergeCtx &geoTmMrgCtx0, MergeCtx &geoTmMrgCtx1, const uint8_t splitDir, const uint8_t mergeIdx0, const uint8_t mergeIdx1, const bool tmFlag0, const bool mmvdFlag0, const uint8_t mmvdIdx0, const bool tmFlag1, const bool mmvdFlag1, const uint8_t mmvdIdx1) +{ + pu.geoSplitDir = splitDir; + pu.geoMergeIdx0 = mergeIdx0; + pu.geoMergeIdx1 = mergeIdx1; + pu.geoTmFlag0 = tmFlag0; + pu.geoMMVDFlag0 = mmvdFlag0; + pu.geoMMVDIdx0 = mmvdIdx0; + pu.geoTmFlag1 = tmFlag1; + pu.geoMMVDFlag1 = mmvdFlag1; + pu.geoMMVDIdx1 = mmvdIdx1; + + MergeCtx *mergeCtx0 = (tmFlag0 ? &geoTmMrgCtx0 : &geoMrgCtx); + MergeCtx *mergeCtx1 = (tmFlag1 ? &geoTmMrgCtx1 : &geoMrgCtx); + + const int mvShift = MV_FRACTIONAL_BITS_DIFF; + const bool extMMVD = pu.cs->picHeader->getGPMMMVDTableFlag(); + const int MmvdCands[8] = { 1 << mvShift , 2 << mvShift , 4 << mvShift , 8 << mvShift , 16 << mvShift , 32 << mvShift, 64 << mvShift , 128 << mvShift }; + const int ExtMmvdCands[9] = { 1 << mvShift , 2 << mvShift , 4 << mvShift , 8 << mvShift , 12 << mvShift , 16 << mvShift, 24 << mvShift, 32 << mvShift, 64 << mvShift }; + const int* refMvdCands = (extMMVD ? ExtMmvdCands : MmvdCands); + Mv mvOffset0[2], mvOffset1[2], deltaMv; + + if (mmvdFlag0) + { + int fPosStep = (extMMVD ? (mmvdIdx0 >> 3) : (mmvdIdx0 >> 2)); + int fPosPosition = (extMMVD ? (mmvdIdx0 - (fPosStep << 3)) : (mmvdIdx0 - (fPosStep << 2))); + + if (fPosPosition == 0) + { + deltaMv = Mv(refMvdCands[fPosStep], 0); + } + else if (fPosPosition == 1) + { + deltaMv = Mv(-refMvdCands[fPosStep], 0); + } + else if (fPosPosition == 2) + { + deltaMv = Mv(0, refMvdCands[fPosStep]); + } + else if (fPosPosition == 3) + { + deltaMv = Mv(0, -refMvdCands[fPosStep]); + } + else if (fPosPosition == 4) + { + deltaMv = Mv(refMvdCands[fPosStep], refMvdCands[fPosStep]); + } + else if (fPosPosition == 5) + { + deltaMv = Mv(refMvdCands[fPosStep], -refMvdCands[fPosStep]); + } + else if (fPosPosition == 6) + { + deltaMv = Mv(-refMvdCands[fPosStep], refMvdCands[fPosStep]); + } + else if (fPosPosition == 7) + { + deltaMv = Mv(-refMvdCands[fPosStep], -refMvdCands[fPosStep]); + } + + if (mergeCtx0->interDirNeighbours[mergeIdx0] == 1) + { + mvOffset0[0] = deltaMv; + } + else + { + mvOffset0[1] = deltaMv; + } + } + + if (mmvdFlag1) + { + int fPosStep = (extMMVD ? (mmvdIdx1 >> 3) : (mmvdIdx1 >> 2)); + int fPosPosition = (extMMVD ? (mmvdIdx1 - (fPosStep << 3)) : (mmvdIdx1 - (fPosStep << 2))); + + if (fPosPosition == 0) + { + deltaMv = Mv(refMvdCands[fPosStep], 0); + } + else if (fPosPosition == 1) + { + deltaMv = Mv(-refMvdCands[fPosStep], 0); + } + else if (fPosPosition == 2) + { + deltaMv = Mv(0, refMvdCands[fPosStep]); + } + else if (fPosPosition == 3) + { + deltaMv = Mv(0, -refMvdCands[fPosStep]); + } + else if (fPosPosition == 4) + { + deltaMv = Mv(refMvdCands[fPosStep], refMvdCands[fPosStep]); + } + else if (fPosPosition == 5) + { + deltaMv = Mv(refMvdCands[fPosStep], -refMvdCands[fPosStep]); + } + else if (fPosPosition == 6) + { + deltaMv = Mv(-refMvdCands[fPosStep], refMvdCands[fPosStep]); + } + else if (fPosPosition == 7) + { + deltaMv = Mv(-refMvdCands[fPosStep], -refMvdCands[fPosStep]); + } + + if (mergeCtx1->interDirNeighbours[mergeIdx1] == 1) + { + mvOffset1[0] = deltaMv; + } + else + { + mvOffset1[1] = deltaMv; + } + } + + MotionBuf mb = pu.getMotionBuf(); + + MotionInfo biMv; + biMv.isInter = true; + biMv.sliceIdx = pu.cs->slice->getIndependentSliceIdx(); + + if (mergeCtx0->interDirNeighbours[mergeIdx0] == 1 && mergeCtx1->interDirNeighbours[mergeIdx1] == 2) + { + biMv.interDir = 3; + biMv.mv[0] = mergeCtx0->mvFieldNeighbours[mergeIdx0 << 1].mv + mvOffset0[0]; + biMv.mv[1] = mergeCtx1->mvFieldNeighbours[(mergeIdx1 << 1) + 1].mv + mvOffset1[1]; + biMv.refIdx[0] = mergeCtx0->mvFieldNeighbours[mergeIdx0 << 1].refIdx; + biMv.refIdx[1] = mergeCtx1->mvFieldNeighbours[(mergeIdx1 << 1) + 1].refIdx; + } + else if (mergeCtx0->interDirNeighbours[mergeIdx0] == 2 && mergeCtx1->interDirNeighbours[mergeIdx1] == 1) + { + biMv.interDir = 3; + biMv.mv[0] = mergeCtx1->mvFieldNeighbours[mergeIdx1 << 1].mv + mvOffset1[0]; + biMv.mv[1] = mergeCtx0->mvFieldNeighbours[(mergeIdx0 << 1) + 1].mv + mvOffset0[1]; + biMv.refIdx[0] = mergeCtx1->mvFieldNeighbours[mergeIdx1 << 1].refIdx; + biMv.refIdx[1] = mergeCtx0->mvFieldNeighbours[(mergeIdx0 << 1) + 1].refIdx; + } + else if (mergeCtx0->interDirNeighbours[mergeIdx0] == 1 && mergeCtx1->interDirNeighbours[mergeIdx1] == 1) + { + biMv.interDir = 1; + biMv.mv[0] = mergeCtx1->mvFieldNeighbours[mergeIdx1 << 1].mv + mvOffset1[0]; + biMv.mv[1] = Mv(0, 0); + biMv.refIdx[0] = mergeCtx1->mvFieldNeighbours[mergeIdx1 << 1].refIdx; + biMv.refIdx[1] = -1; + } + else if (mergeCtx0->interDirNeighbours[mergeIdx0] == 2 && mergeCtx1->interDirNeighbours[mergeIdx1] == 2) + { + biMv.interDir = 2; + biMv.mv[0] = Mv(0, 0); + biMv.mv[1] = mergeCtx1->mvFieldNeighbours[(mergeIdx1 << 1) + 1].mv + mvOffset1[1]; + biMv.refIdx[0] = -1; + biMv.refIdx[1] = mergeCtx1->mvFieldNeighbours[(mergeIdx1 << 1) + 1].refIdx; + } + + int16_t angle = g_GeoParams[splitDir][0]; + int tpmMask = 0; + int lookUpY = 0, motionIdx = 0; + bool isFlip = angle >= 13 && angle <= 27; + int distanceIdx = g_GeoParams[splitDir][1]; + int distanceX = angle; + int distanceY = (distanceX + (GEO_NUM_ANGLES >> 2)) % GEO_NUM_ANGLES; + int offsetX = (-(int)pu.lwidth()) >> 1; + int offsetY = (-(int)pu.lheight()) >> 1; + if (distanceIdx > 0) + { + if (angle % 16 == 8 || (angle % 16 != 0 && pu.lheight() >= pu.lwidth())) + { + offsetY += angle < 16 ? ((distanceIdx * pu.lheight()) >> 3) : -(int)((distanceIdx * pu.lheight()) >> 3); + } + else + { + offsetX += angle < 16 ? ((distanceIdx * pu.lwidth()) >> 3) : -(int)((distanceIdx * pu.lwidth()) >> 3); + } + } + for (int y = 0; y < mb.height; y++) + { + lookUpY = (((4 * y + offsetY) << 1) + 5) * g_Dis[distanceY]; + for (int x = 0; x < mb.width; x++) + { + motionIdx = (((4 * x + offsetX) << 1) + 5) * g_Dis[distanceX] + lookUpY; + tpmMask = abs(motionIdx) < 32 ? 2 : (motionIdx <= 0 ? (1 - isFlip) : isFlip); + if (tpmMask == 2) + { + mb.at(x, y).isInter = true; + mb.at(x, y).interDir = biMv.interDir; + mb.at(x, y).refIdx[0] = biMv.refIdx[0]; + mb.at(x, y).refIdx[1] = biMv.refIdx[1]; + mb.at(x, y).mv[0] = biMv.mv[0]; + mb.at(x, y).mv[1] = biMv.mv[1]; + mb.at(x, y).sliceIdx = biMv.sliceIdx; + } + else if (tpmMask == 0) + { + mb.at(x, y).isInter = true; + mb.at(x, y).interDir = mergeCtx0->interDirNeighbours[mergeIdx0]; + mb.at(x, y).refIdx[0] = mergeCtx0->mvFieldNeighbours[mergeIdx0 << 1].refIdx; + mb.at(x, y).refIdx[1] = mergeCtx0->mvFieldNeighbours[(mergeIdx0 << 1) + 1].refIdx; + mb.at(x, y).mv[0] = mergeCtx0->mvFieldNeighbours[mergeIdx0 << 1].mv + mvOffset0[0]; + mb.at(x, y).mv[1] = mergeCtx0->mvFieldNeighbours[(mergeIdx0 << 1) + 1].mv + mvOffset0[1]; + mb.at(x, y).sliceIdx = biMv.sliceIdx; + } + else + { + mb.at(x, y).isInter = true; + mb.at(x, y).interDir = mergeCtx1->interDirNeighbours[mergeIdx1]; + mb.at(x, y).refIdx[0] = mergeCtx1->mvFieldNeighbours[mergeIdx1 << 1].refIdx; + mb.at(x, y).refIdx[1] = mergeCtx1->mvFieldNeighbours[(mergeIdx1 << 1) + 1].refIdx; + mb.at(x, y).mv[0] = mergeCtx1->mvFieldNeighbours[mergeIdx1 << 1].mv + mvOffset1[0]; + mb.at(x, y).mv[1] = mergeCtx1->mvFieldNeighbours[(mergeIdx1 << 1) + 1].mv + mvOffset1[1]; + mb.at(x, y).sliceIdx = biMv.sliceIdx; + } + } + } +} +#else +void PU::spanGeoMMVDMotionInfo(PredictionUnit &pu, MergeCtx &geoMrgCtx, const uint8_t splitDir, const uint8_t mergeIdx0, const uint8_t mergeIdx1, const bool mmvdFlag0, const uint8_t mmvdIdx0, const bool mmvdFlag1, const uint8_t mmvdIdx1) +{ + pu.geoSplitDir = splitDir; + pu.geoMergeIdx0 = mergeIdx0; + pu.geoMergeIdx1 = mergeIdx1; + pu.geoMMVDFlag0 = mmvdFlag0; + pu.geoMMVDIdx0 = mmvdIdx0; + pu.geoMMVDFlag1 = mmvdFlag1; + pu.geoMMVDIdx1 = mmvdIdx1; + + const int mvShift = MV_FRACTIONAL_BITS_DIFF; + const bool extMMVD = pu.cs->picHeader->getGPMMMVDTableFlag(); + const int MmvdCands[8] = { 1 << mvShift , 2 << mvShift , 4 << mvShift , 8 << mvShift , 16 << mvShift , 32 << mvShift, 64 << mvShift , 128 << mvShift }; + const int ExtMmvdCands[9] = { 1 << mvShift , 2 << mvShift , 4 << mvShift , 8 << mvShift , 12 << mvShift , 16 << mvShift, 24 << mvShift, 32 << mvShift, 64 << mvShift }; + const int* refMvdCands = (extMMVD ? ExtMmvdCands : MmvdCands); + Mv mvOffset0[2], mvOffset1[2], deltaMv; + + if (mmvdFlag0) + { + int fPosStep = (extMMVD ? (mmvdIdx0 >> 3) : (mmvdIdx0 >> 2)); + int fPosPosition = (extMMVD ? (mmvdIdx0 - (fPosStep << 3)) : (mmvdIdx0 - (fPosStep << 2))); + + if (fPosPosition == 0) + { + deltaMv = Mv(refMvdCands[fPosStep], 0); + } + else if (fPosPosition == 1) + { + deltaMv = Mv(-refMvdCands[fPosStep], 0); + } + else if (fPosPosition == 2) + { + deltaMv = Mv(0, refMvdCands[fPosStep]); + } + else if (fPosPosition == 3) + { + deltaMv = Mv(0, -refMvdCands[fPosStep]); + } + else if (fPosPosition == 4) + { + deltaMv = Mv(refMvdCands[fPosStep], refMvdCands[fPosStep]); + } + else if (fPosPosition == 5) + { + deltaMv = Mv(refMvdCands[fPosStep], -refMvdCands[fPosStep]); + } + else if (fPosPosition == 6) + { + deltaMv = Mv(-refMvdCands[fPosStep], refMvdCands[fPosStep]); + } + else if (fPosPosition == 7) + { + deltaMv = Mv(-refMvdCands[fPosStep], -refMvdCands[fPosStep]); + } + + if (geoMrgCtx.interDirNeighbours[mergeIdx0] == 1) + { + mvOffset0[0] = deltaMv; + } + else + { + mvOffset0[1] = deltaMv; + } + } + + if (mmvdFlag1) + { + int fPosStep = (extMMVD ? (mmvdIdx1 >> 3) : (mmvdIdx1 >> 2)); + int fPosPosition = (extMMVD ? (mmvdIdx1 - (fPosStep << 3)) : (mmvdIdx1 - (fPosStep << 2))); + + if (fPosPosition == 0) + { + deltaMv = Mv(refMvdCands[fPosStep], 0); + } + else if (fPosPosition == 1) + { + deltaMv = Mv(-refMvdCands[fPosStep], 0); + } + else if (fPosPosition == 2) + { + deltaMv = Mv(0, refMvdCands[fPosStep]); + } + else if (fPosPosition == 3) + { + deltaMv = Mv(0, -refMvdCands[fPosStep]); + } + else if (fPosPosition == 4) + { + deltaMv = Mv(refMvdCands[fPosStep], refMvdCands[fPosStep]); + } + else if (fPosPosition == 5) + { + deltaMv = Mv(refMvdCands[fPosStep], -refMvdCands[fPosStep]); + } + else if (fPosPosition == 6) + { + deltaMv = Mv(-refMvdCands[fPosStep], refMvdCands[fPosStep]); + } + else if (fPosPosition == 7) + { + deltaMv = Mv(-refMvdCands[fPosStep], -refMvdCands[fPosStep]); + } + + if (geoMrgCtx.interDirNeighbours[mergeIdx1] == 1) + { + mvOffset1[0] = deltaMv; + } + else + { + mvOffset1[1] = deltaMv; + } + } + + MotionBuf mb = pu.getMotionBuf(); + + MotionInfo biMv; + biMv.isInter = true; + biMv.sliceIdx = pu.cs->slice->getIndependentSliceIdx(); + + if (geoMrgCtx.interDirNeighbours[mergeIdx0] == 1 && geoMrgCtx.interDirNeighbours[mergeIdx1] == 2) + { + biMv.interDir = 3; + biMv.mv[0] = geoMrgCtx.mvFieldNeighbours[mergeIdx0 << 1].mv + mvOffset0[0]; + biMv.mv[1] = geoMrgCtx.mvFieldNeighbours[(mergeIdx1 << 1) + 1].mv + mvOffset1[1]; + biMv.refIdx[0] = geoMrgCtx.mvFieldNeighbours[mergeIdx0 << 1].refIdx; + biMv.refIdx[1] = geoMrgCtx.mvFieldNeighbours[(mergeIdx1 << 1) + 1].refIdx; + } + else if (geoMrgCtx.interDirNeighbours[mergeIdx0] == 2 && geoMrgCtx.interDirNeighbours[mergeIdx1] == 1) + { + biMv.interDir = 3; + biMv.mv[0] = geoMrgCtx.mvFieldNeighbours[mergeIdx1 << 1].mv + mvOffset1[0]; + biMv.mv[1] = geoMrgCtx.mvFieldNeighbours[(mergeIdx0 << 1) + 1].mv + mvOffset0[1]; + biMv.refIdx[0] = geoMrgCtx.mvFieldNeighbours[mergeIdx1 << 1].refIdx; + biMv.refIdx[1] = geoMrgCtx.mvFieldNeighbours[(mergeIdx0 << 1) + 1].refIdx; + } + else if (geoMrgCtx.interDirNeighbours[mergeIdx0] == 1 && geoMrgCtx.interDirNeighbours[mergeIdx1] == 1) + { + biMv.interDir = 1; + biMv.mv[0] = geoMrgCtx.mvFieldNeighbours[mergeIdx1 << 1].mv + mvOffset1[0]; + biMv.mv[1] = Mv(0, 0); + biMv.refIdx[0] = geoMrgCtx.mvFieldNeighbours[mergeIdx1 << 1].refIdx; + biMv.refIdx[1] = -1; + } + else if (geoMrgCtx.interDirNeighbours[mergeIdx0] == 2 && geoMrgCtx.interDirNeighbours[mergeIdx1] == 2) + { + biMv.interDir = 2; + biMv.mv[0] = Mv(0, 0); + biMv.mv[1] = geoMrgCtx.mvFieldNeighbours[(mergeIdx1 << 1) + 1].mv + mvOffset1[1]; + biMv.refIdx[0] = -1; + biMv.refIdx[1] = geoMrgCtx.mvFieldNeighbours[(mergeIdx1 << 1) + 1].refIdx; + } + + int16_t angle = g_GeoParams[splitDir][0]; + int tpmMask = 0; + int lookUpY = 0, motionIdx = 0; + bool isFlip = angle >= 13 && angle <= 27; + int distanceIdx = g_GeoParams[splitDir][1]; + int distanceX = angle; + int distanceY = (distanceX + (GEO_NUM_ANGLES >> 2)) % GEO_NUM_ANGLES; + int offsetX = (-(int)pu.lwidth()) >> 1; + int offsetY = (-(int)pu.lheight()) >> 1; + if (distanceIdx > 0) + { + if (angle % 16 == 8 || (angle % 16 != 0 && pu.lheight() >= pu.lwidth())) + { + offsetY += angle < 16 ? ((distanceIdx * pu.lheight()) >> 3) : -(int)((distanceIdx * pu.lheight()) >> 3); + } + else + { + offsetX += angle < 16 ? ((distanceIdx * pu.lwidth()) >> 3) : -(int)((distanceIdx * pu.lwidth()) >> 3); + } + } + for (int y = 0; y < mb.height; y++) + { + lookUpY = (((4 * y + offsetY) << 1) + 5) * g_Dis[distanceY]; + for (int x = 0; x < mb.width; x++) + { + motionIdx = (((4 * x + offsetX) << 1) + 5) * g_Dis[distanceX] + lookUpY; + tpmMask = abs(motionIdx) < 32 ? 2 : (motionIdx <= 0 ? (1 - isFlip) : isFlip); + if (tpmMask == 2) + { + mb.at(x, y).isInter = true; + mb.at(x, y).interDir = biMv.interDir; + mb.at(x, y).refIdx[0] = biMv.refIdx[0]; + mb.at(x, y).refIdx[1] = biMv.refIdx[1]; + mb.at(x, y).mv[0] = biMv.mv[0]; + mb.at(x, y).mv[1] = biMv.mv[1]; + mb.at(x, y).sliceIdx = biMv.sliceIdx; + } + else if (tpmMask == 0) + { + mb.at(x, y).isInter = true; + mb.at(x, y).interDir = geoMrgCtx.interDirNeighbours[mergeIdx0]; + mb.at(x, y).refIdx[0] = geoMrgCtx.mvFieldNeighbours[mergeIdx0 << 1].refIdx; + mb.at(x, y).refIdx[1] = geoMrgCtx.mvFieldNeighbours[(mergeIdx0 << 1) + 1].refIdx; + mb.at(x, y).mv[0] = geoMrgCtx.mvFieldNeighbours[mergeIdx0 << 1].mv + mvOffset0[0]; + mb.at(x, y).mv[1] = geoMrgCtx.mvFieldNeighbours[(mergeIdx0 << 1) + 1].mv + mvOffset0[1]; + mb.at(x, y).sliceIdx = biMv.sliceIdx; + } + else + { + mb.at(x, y).isInter = true; + mb.at(x, y).interDir = geoMrgCtx.interDirNeighbours[mergeIdx1]; + mb.at(x, y).refIdx[0] = geoMrgCtx.mvFieldNeighbours[mergeIdx1 << 1].refIdx; + mb.at(x, y).refIdx[1] = geoMrgCtx.mvFieldNeighbours[(mergeIdx1 << 1) + 1].refIdx; + mb.at(x, y).mv[0] = geoMrgCtx.mvFieldNeighbours[mergeIdx1 << 1].mv + mvOffset1[0]; + mb.at(x, y).mv[1] = geoMrgCtx.mvFieldNeighbours[(mergeIdx1 << 1) + 1].mv + mvOffset1[1]; + mb.at(x, y).sliceIdx = biMv.sliceIdx; + } + } + } +} +#endif +#endif + bool CU::hasSubCUNonZeroMVd( const CodingUnit& cu ) { bool bNonZeroMvd = false; diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index 0f1fed6c9..7f781108b 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -155,7 +155,7 @@ namespace PU uint32_t getFinalIntraMode (const PredictionUnit &pu, const ChannelType &chType); uint32_t getCoLocatedIntraLumaMode (const PredictionUnit &pu); int getWideAngle ( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID ); -#if MULTI_PASS_DMVR +#if MULTI_PASS_DMVR || JVET_W0097_GPM_MMVD_TM uint32_t getBDMVRMvdThreshold (const PredictionUnit &pu); #endif #if TM_MRG @@ -236,8 +236,23 @@ namespace PU bool isLMCModeEnabled (const PredictionUnit &pu, unsigned mode); bool isChromaIntraModeCrossCheckMode(const PredictionUnit &pu); +#if JVET_W0097_GPM_MMVD_TM +#if TM_MRG + void getGeoMergeCandidates(PredictionUnit &pu, MergeCtx &GeoMrgCtx, MergeCtx* mergeCtx = NULL); +#else + void getGeoMergeCandidates(const PredictionUnit &pu, MergeCtx &GeoMrgCtx, MergeCtx* mergeCtx = NULL); +#endif +#else void getGeoMergeCandidates (const PredictionUnit &pu, MergeCtx &GeoMrgCtx); +#endif void spanGeoMotionInfo ( PredictionUnit &pu, MergeCtx &GeoMrgCtx, const uint8_t splitDir, const uint8_t candIdx0, const uint8_t candIdx1); +#if JVET_W0097_GPM_MMVD_TM +#if TM_MRG + void spanGeoMMVDMotionInfo(PredictionUnit &pu, MergeCtx &geoMrgCtx, MergeCtx &geoTmMrgCtx0, MergeCtx &geoTmMrgCtx1, const uint8_t splitDir, const uint8_t mergeIdx0, const uint8_t mergeIdx1, const bool tmFlag0, const bool mmvdFlag0, const uint8_t mmvdIdx0, const bool tmFlag1, const bool mmvdFlag1, const uint8_t mmvdIdx1); +#else + void spanGeoMMVDMotionInfo(PredictionUnit &pu, MergeCtx &GeoMrgCtx, const uint8_t splitDir, const uint8_t mergeIdx0, const uint8_t mergeIdx1, const bool mmvdFlag0, const uint8_t mmvdIdx0, const bool mmvdFlag1, const uint8_t mmvdIdx1); +#endif +#endif bool isAddNeighborMv (const Mv& currMv, Mv* neighborMvs, int numNeighborMv); void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* mvPred, int& nbPred); bool getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv); @@ -337,5 +352,128 @@ uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeLi } return 0; } +#if JVET_W0097_GPM_MMVD_TM +template<size_t N> +void orderCandList(uint8_t uiMode, bool bNonMMVDListCand, int splitDir, double uiCost, static_vector<uint8_t, N>& candModeList, static_vector<bool, N>& isNonMMVDListIdx, static_vector<int, N>& candSplitDirList, static_vector<double, N>& candCostList, size_t uiFastCandNum = N) +{ + CHECK(std::min(uiFastCandNum, candModeList.size()) != std::min(uiFastCandNum, candCostList.size()), "Sizes do not match!"); + CHECK(uiFastCandNum > candModeList.capacity(), "The vector is to small to hold all the candidates!"); + + size_t i; + size_t shift = 0; + size_t currSize = std::min(uiFastCandNum, candCostList.size()); + + while (shift < uiFastCandNum && shift < currSize && uiCost < candCostList[currSize - 1 - shift]) + { + shift++; + } + + if (candModeList.size() >= uiFastCandNum && shift != 0) + { + for (i = 1; i < shift; i++) + { + candModeList[currSize - i] = candModeList[currSize - 1 - i]; + isNonMMVDListIdx[currSize - i] = isNonMMVDListIdx[currSize - 1 - i]; + candSplitDirList[currSize - i] = candSplitDirList[currSize - 1 - i]; + candCostList[currSize - i] = candCostList[currSize - 1 - i]; + } + candModeList[currSize - shift] = uiMode; + isNonMMVDListIdx[currSize - shift] = bNonMMVDListCand; + candSplitDirList[currSize - shift] = splitDir; + candCostList[currSize - shift] = uiCost; + return; + } + else if (currSize < uiFastCandNum) + { + candModeList.insert(candModeList.end() - shift, uiMode); + isNonMMVDListIdx.insert(isNonMMVDListIdx.end() - shift, bNonMMVDListCand); + candSplitDirList.insert(candSplitDirList.end() - shift, splitDir); + candCostList.insert(candCostList.end() - shift, uiCost); + return; + } + return; +} + +template<size_t N> +uint32_t updateGeoMMVDCandList(double uiCost, int splitDir, int mergeCand0, int mergeCand1, int mmvdCand0, int mmvdCand1, + static_vector<double, N>& candCostList, static_vector<int, N>& geoSplitDirList, static_vector<int, N>& geoMergeCand0, static_vector<int, N>& geoMergeCand1, static_vector<int, N>& geoMmvdCand0, static_vector<int, N>& geoMmvdCand1, + size_t uiFastCandNum) +{ + CHECK(std::min(uiFastCandNum, geoSplitDirList.size()) != std::min(uiFastCandNum, candCostList.size()), "Sizes do not match!"); + CHECK(uiFastCandNum > candCostList.capacity(), "The vector is to small to hold all the candidates!"); + + size_t i; + size_t shift = 0; + size_t currSize = std::min(uiFastCandNum, candCostList.size()); + + while (shift < uiFastCandNum && shift < currSize && uiCost < candCostList[currSize - 1 - shift]) + { + shift++; + } + + if (candCostList.size() >= uiFastCandNum && shift != 0) + { + for (i = 1; i < shift; i++) + { + geoSplitDirList[currSize - i] = geoSplitDirList[currSize - 1 - i]; + geoMergeCand0[currSize - i] = geoMergeCand0[currSize - 1 - i]; + geoMergeCand1[currSize - i] = geoMergeCand1[currSize - 1 - i]; + geoMmvdCand0[currSize - i] = geoMmvdCand0[currSize - 1 - i]; + geoMmvdCand1[currSize - i] = geoMmvdCand1[currSize - 1 - i]; + candCostList[currSize - i] = candCostList[currSize - 1 - i]; + } + geoSplitDirList[currSize - shift] = splitDir; + geoMergeCand0[currSize - shift] = mergeCand0; + geoMergeCand1[currSize - shift] = mergeCand1; + geoMmvdCand0[currSize - shift] = mmvdCand0; + geoMmvdCand1[currSize - shift] = mmvdCand1; + candCostList[currSize - shift] = uiCost; + return 1; + } + else if (currSize < uiFastCandNum) + { + geoSplitDirList.insert(geoSplitDirList.end() - shift, splitDir); + geoMergeCand0.insert(geoMergeCand0.end() - shift, mergeCand0); + geoMergeCand1.insert(geoMergeCand1.end() - shift, mergeCand1); + geoMmvdCand0.insert(geoMmvdCand0.end() - shift, mmvdCand0); + geoMmvdCand1.insert(geoMmvdCand1.end() - shift, mmvdCand1); + candCostList.insert(candCostList.end() - shift, uiCost); + return 1; + } + return 0; +} +template<size_t N> +void sortCandList(double uiCost, int mergeCand, int mmvdCand, static_vector<double, N>& candCostList, static_vector<int, N>& mergeCandList, static_vector<int, N>& mmvdCandList, int fastCandNum) +{ + size_t i; + size_t shift = 0; + size_t currSize = candCostList.size(); + CHECK(currSize > fastCandNum, "list overflow!"); + + while (shift < currSize && uiCost < candCostList[currSize - 1 - shift]) + { + shift++; + } + + if (currSize == fastCandNum && shift != 0) + { + for (i = 1; i < shift; i++) + { + mergeCandList[currSize - i] = mergeCandList[currSize - 1 - i]; + mmvdCandList[currSize - i] = mmvdCandList[currSize - 1 - i]; + candCostList[currSize - i] = candCostList[currSize - 1 - i]; + } + mergeCandList[currSize - shift] = mergeCand; + mmvdCandList[currSize - shift] = mmvdCand; + candCostList[currSize - shift] = uiCost; + } + else if (currSize < fastCandNum) + { + mergeCandList.insert(mergeCandList.end() - shift, mergeCand); + mmvdCandList.insert(mmvdCandList.end() - shift, mmvdCand); + candCostList.insert(candCostList.end() - shift, uiCost); + } +} +#endif #endif diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index 0d538ec62..7f69bb735 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -47,6 +47,209 @@ #if ENABLE_SIMD_OPT_BUFFER #ifdef TARGET_SIMD_X86 +#if JVET_W0097_GPM_MMVD_TM +template< X86_VEXT vext > +void roundBD_SSE(const Pel* srcp, const int srcStride, Pel* dest, const int destStride, int width, int height, const ClpRng& clpRng) +{ + const int32_t clipbd = clpRng.bd; +#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT + const int32_t shiftDefault = IF_INTERNAL_FRAC_BITS(clipbd); +#else + const int32_t shiftDefault = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)); +#endif + const int32_t offsetDefault = (1 << (shiftDefault - 1)) + IF_INTERNAL_OFFS; + +#if USE_AVX2 + if (vext >= AVX2 && (width & 0x0f) == 0) + { + __m256i voffset = _mm256_set1_epi16((short)offsetDefault); + __m256i vibdmin = _mm256_set1_epi16((short)clpRng.min); + __m256i vibdmax = _mm256_set1_epi16((short)clpRng.max); + __m256i vsrc; + for (int row = 0; row < height; row++) + { + for (int col = 0; col < width; col += 16) + { + vsrc = _mm256_lddqu_si256((__m256i *)&srcp[col]); + vsrc = _mm256_adds_epi16(vsrc, voffset); + vsrc = _mm256_srai_epi16(vsrc, shiftDefault); + vsrc = _mm256_min_epi16(vibdmax, _mm256_max_epi16(vibdmin, vsrc)); + _mm256_storeu_si256((__m256i *)&dest[col], vsrc); + } + srcp += srcStride; + dest += destStride; + } + } + else + { +#endif + __m128i voffset = _mm_set1_epi16((short)offsetDefault); + __m128i vibdmin = _mm_set1_epi16((short)clpRng.min); + __m128i vibdmax = _mm_set1_epi16((short)clpRng.max); + __m128i vsrc; + for (int row = 0; row < height; row++) + { + int col = 0; + for (; col < ((width >> 3) << 3); col += 8) + { + vsrc = _mm_lddqu_si128((__m128i *)&srcp[col]); + vsrc = _mm_adds_epi16(vsrc, voffset); + vsrc = _mm_srai_epi16(vsrc, shiftDefault); + vsrc = _mm_min_epi16(vibdmax, _mm_max_epi16(vibdmin, vsrc)); + _mm_storeu_si128((__m128i *)&dest[col], vsrc); + } + for (; col < ((width >> 2) << 2); col += 4) + { + vsrc = _mm_loadl_epi64((__m128i *)&srcp[col]); + vsrc = _mm_adds_epi16(vsrc, voffset); + vsrc = _mm_srai_epi16(vsrc, shiftDefault); + vsrc = _mm_min_epi16(vibdmax, _mm_max_epi16(vibdmin, vsrc)); + _mm_storel_epi64((__m128i *)&dest[col], vsrc); + } + for (; col < width; col++) + { + dest[col] = ClipPel(rightShift(srcp[col] + offsetDefault, shiftDefault), clpRng); + } + srcp += srcStride; + dest += destStride; + } +#if USE_AVX2 + } +#endif +} + +template< X86_VEXT vext > +void weightedAvg_SSE(const Pel* src0, const unsigned src0Stride, const Pel* src1, const unsigned src1Stride, Pel* dest, const unsigned destStride, const int8_t w0, const int8_t w1, int width, int height, const ClpRng& clpRng) +{ + const int8_t log2WeightBase = g_BcwLog2WeightBase; + const int clipbd = clpRng.bd; +#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT + const int shiftNum = IF_INTERNAL_FRAC_BITS(clipbd) + log2WeightBase; +#else + const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase; +#endif + const int offset = (1 << (shiftNum - 1)) + (IF_INTERNAL_OFFS << log2WeightBase); + +#if USE_AVX2 + if ((vext >= AVX2) && (width & 0x7) == 0) + { + __m256i mw = _mm256_unpacklo_epi16(_mm256_set1_epi16(w0), _mm256_set1_epi16(w1)); + __m256i voffset = _mm256_set1_epi32(offset); + __m256i vibdmin = _mm256_set1_epi16((short)clpRng.min); + __m256i vibdmax = _mm256_set1_epi16((short)clpRng.max); + __m256i msrc0, msrc1, msum0, msum1; + + for (int row = 0; row < height; row++) + { + for (int col = 0; col < width; col += 8) + { + msrc0 = _mm256_castsi128_si256(_mm_lddqu_si128((__m128i*)(&src0[col]))); + msrc1 = _mm256_castsi128_si256(_mm_lddqu_si128((__m128i*)(&src1[col]))); + msum0 = _mm256_unpacklo_epi16(msrc0, msrc1); + msum1 = _mm256_unpackhi_epi16(msrc0, msrc1); + msum0 = _mm256_madd_epi16(msum0, mw); + msum1 = _mm256_madd_epi16(msum1, mw); + msum0 = _mm256_add_epi32(msum0, voffset); + msum1 = _mm256_add_epi32(msum1, voffset); + msum0 = _mm256_srai_epi32(msum0, shiftNum); + msum1 = _mm256_srai_epi32(msum1, shiftNum); + msum0 = _mm256_packs_epi32(msum0, msum1); + msum0 = _mm256_min_epi16(vibdmax, _mm256_max_epi16(vibdmin, msum0)); + _mm_storeu_si128((__m128i *)&dest[col], _mm256_castsi256_si128(msum0)); + } + src0 += src0Stride; + src1 += src1Stride; + dest += destStride; + } + } + else + { +#endif + __m128i mw = _mm_unpacklo_epi16(_mm_set1_epi16(w0), _mm_set1_epi16(w1)); + __m128i voffset = _mm_set1_epi32(offset); + __m128i vibdmin = _mm_set1_epi16((short)clpRng.min); + __m128i vibdmax = _mm_set1_epi16((short)clpRng.max); + + for (int row = 0; row < height; row++) + { + int col = 0; + for (; col < ((width >> 2) << 2); col += 4) + { + __m128i msrc = _mm_unpacklo_epi16(_mm_loadl_epi64((__m128i *)&src0[col]), _mm_loadl_epi64((__m128i *)&src1[col])); + msrc = _mm_madd_epi16(msrc, mw); + msrc = _mm_add_epi32(msrc, voffset); + msrc = _mm_srai_epi32(msrc, shiftNum); + msrc = _mm_packs_epi32(msrc, msrc); + msrc = _mm_min_epi16(vibdmax, _mm_max_epi16(vibdmin, msrc)); + _mm_storel_epi64((__m128i *)&dest[col], msrc); + } + for (; col < width; col++) + { + dest[col] = ClipPel(rightShift(src0[col] * w0 + src1[col] * w1 + offset, shiftNum), clpRng); + } + src0 += src0Stride; + src1 += src1Stride; + dest += destStride; + } +#if USE_AVX2 + } +#endif +} + +template< X86_VEXT vext > +void copyClip_SSE(const Pel* srcp, const unsigned srcStride, Pel* dest, const unsigned destStride, int width, int height, const ClpRng& clpRng) +{ +#if USE_AVX2 + if (vext >= AVX2 && (width & 0x0f) == 0) + { + __m256i vibdmin = _mm256_set1_epi16((short)clpRng.min); + __m256i vibdmax = _mm256_set1_epi16((short)clpRng.max); + __m256i vsrc; + for (int row = 0; row < height; row++) + { + for (int col = 0; col < width; col += 16) + { + vsrc = _mm256_lddqu_si256((__m256i *)&srcp[col]); + vsrc = _mm256_min_epi16(vibdmax, _mm256_max_epi16(vibdmin, vsrc)); + _mm256_storeu_si256((__m256i *)&dest[col], vsrc); + } + srcp += srcStride; + dest += destStride; + } + } + else + { +#endif + __m128i vibdmin = _mm_set1_epi16((short)clpRng.min); + __m128i vibdmax = _mm_set1_epi16((short)clpRng.max); + __m128i vsrc; + for (int row = 0; row < height; row++) + { + int col = 0; + for (; col < ((width >> 3) << 3); col += 8) + { + vsrc = _mm_lddqu_si128((__m128i *)&srcp[col]); + vsrc = _mm_min_epi16(vibdmax, _mm_max_epi16(vibdmin, vsrc)); + _mm_storeu_si128((__m128i *)&dest[col], vsrc); + } + for (; col < ((width >> 2) << 2); col += 4) + { + vsrc = _mm_loadl_epi64((__m128i *)&srcp[col]); + vsrc = _mm_min_epi16(vibdmax, _mm_max_epi16(vibdmin, vsrc)); + _mm_storel_epi64((__m128i *)&dest[col], vsrc); + } + for (; col < width; col++) + { + dest[col] = ClipPel(srcp[col], clpRng); + } + srcp += srcStride; + dest += destStride; + } +#if USE_AVX2 + } +#endif +} +#endif template< X86_VEXT vext, int W > void addAvg_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int src1Stride, int16_t *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng ) @@ -1767,6 +1970,11 @@ int64_t getSumOfDifference_SSE(const Pel* src0, int src0Stride, const Pel* src1, template<X86_VEXT vext> void PelBufferOps::_initPelBufOpsX86() { +#if JVET_W0097_GPM_MMVD_TM + roundBD = roundBD_SSE<vext>; + weightedAvg = weightedAvg_SSE<vext>; + copyClip = copyClip_SSE<vext>; +#endif addAvg8 = addAvg_SSE<vext, 8>; addAvg4 = addAvg_SSE<vext, 4>; diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index a0ced2ca8..ad78f9274 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -2958,6 +2958,58 @@ void CABACReader::merge_idx( PredictionUnit& pu ) if (pu.cu->geoFlag) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__GEO_INDEX); +#if JVET_W0097_GPM_MMVD_TM + pu.geoMMVDFlag0 = m_BinDecoder.decodeBin(Ctx::GeoMmvdFlag()); + if (pu.geoMMVDFlag0) + { + geo_mmvd_idx(pu, REF_PIC_LIST_0); + } + + pu.geoMMVDFlag1 = m_BinDecoder.decodeBin(Ctx::GeoMmvdFlag()); + if (pu.geoMMVDFlag1) + { + geo_mmvd_idx(pu, REF_PIC_LIST_1); + } + +#if TM_MRG + if (!pu.geoMMVDFlag0 && !pu.geoMMVDFlag1) + { + tm_merge_flag(pu); + if (pu.tmMergeFlag) + { + pu.geoTmFlag0 = true; + pu.geoTmFlag1 = true; + geo_merge_idx(pu); + } + else + { + pu.geoTmFlag0 = false; + pu.geoTmFlag1 = false; + geo_merge_idx(pu); + } + } +#else + if (!pu.geoMMVDFlag0 && !pu.geoMMVDFlag1) + { + geo_merge_idx(pu); + } +#endif + else if (pu.geoMMVDFlag0 && pu.geoMMVDFlag1) + { + if (pu.geoMMVDIdx0 == pu.geoMMVDIdx1) + { + geo_merge_idx(pu); + } + else + { + geo_merge_idx1(pu); + } + } + else + { + geo_merge_idx1(pu); + } +#else uint32_t splitDir = 0; xReadTruncBinCode(splitDir, GEO_NUM_PARTITION_MODE); pu.geoSplitDir = splitDir; @@ -2985,6 +3037,7 @@ void CABACReader::merge_idx( PredictionUnit& pu ) DTRACE(g_trace_ctx, D_SYNTAX, "merge_idx() geo_split_dir=%d\n", splitDir); DTRACE(g_trace_ctx, D_SYNTAX, "merge_idx() geo_idx0=%d\n", mergeCand0); DTRACE(g_trace_ctx, D_SYNTAX, "merge_idx() geo_idx1=%d\n", mergeCand1); +#endif return; } @@ -3038,7 +3091,94 @@ void CABACReader::merge_idx( PredictionUnit& pu ) DTRACE( g_trace_ctx, D_SYNTAX, "merge_idx() merge_idx=%d\n", pu.mergeIdx ); } } +#if JVET_W0097_GPM_MMVD_TM +void CABACReader::geo_mmvd_idx(PredictionUnit& pu, RefPicList eRefPicList) +{ + bool extMMVD = pu.cs->picHeader->getGPMMMVDTableFlag(); + int numCandminus1_step = (extMMVD ? GPM_EXT_MMVD_REFINE_STEP : GPM_MMVD_REFINE_STEP) - 1; + int step = 0; + if (m_BinDecoder.decodeBin(Ctx::GeoMmvdStepMvpIdx())) + { + step++; + for (; step < numCandminus1_step; step++) + { + if (!m_BinDecoder.decodeBinEP()) + { + break; + } + } + } + + int idxToMMVDStep[GPM_EXT_MMVD_REFINE_STEP] = { 1, 2, 3, 4, 5, 0, 6, 7, 8 }; + step = idxToMMVDStep[step]; + int direction = 0; + int maxMMVDDir = (extMMVD ? GPM_EXT_MMVD_REFINE_DIRECTION : GPM_MMVD_REFINE_DIRECTION); + direction = m_BinDecoder.decodeBinsEP(maxMMVDDir > 4 ? 3 : 2); + int mvpIdx = (step * maxMMVDDir + direction); + if (eRefPicList == REF_PIC_LIST_0) + { + pu.geoMMVDIdx0 = mvpIdx; + } + else + { + pu.geoMMVDIdx1 = mvpIdx; + } +} + + +void CABACReader::geo_merge_idx(PredictionUnit& pu) +{ + uint32_t splitDir = 0; + xReadTruncBinCode(splitDir, GEO_NUM_PARTITION_MODE); + pu.geoSplitDir = splitDir; + const int maxNumGeoCand = pu.cs->sps->getMaxNumGeoCand(); + CHECK(maxNumGeoCand < 2, "Incorrect max number of geo candidates"); + CHECK(pu.cu->lheight() > 64 || pu.cu->lwidth() > 64, "Incorrect block size of geo flag"); + int numCandminus2 = maxNumGeoCand - 2; + pu.mergeIdx = 0; + int mergeCand0 = 0; + int mergeCand1 = 0; + if (m_BinDecoder.decodeBin(Ctx::MergeIdx())) + { + mergeCand0 += unary_max_eqprob(numCandminus2) + 1; + } + if (numCandminus2 > 0) + { + if (m_BinDecoder.decodeBin(Ctx::MergeIdx())) + { + mergeCand1 += unary_max_eqprob(numCandminus2 - 1) + 1; + } + } + mergeCand1 += mergeCand1 >= mergeCand0 ? 1 : 0; + pu.geoMergeIdx0 = mergeCand0; + pu.geoMergeIdx1 = mergeCand1; +} + +void CABACReader::geo_merge_idx1(PredictionUnit& pu) +{ + uint32_t splitDir = 0; + xReadTruncBinCode(splitDir, GEO_NUM_PARTITION_MODE); + pu.geoSplitDir = splitDir; + const int maxNumGeoCand = pu.cs->sps->getMaxNumGeoCand(); + CHECK(maxNumGeoCand < 2, "Incorrect max number of geo candidates"); + CHECK(pu.cu->lheight() > 64 || pu.cu->lwidth() > 64, "Incorrect block size of geo flag"); + int numCandminus2 = maxNumGeoCand - 2; + pu.mergeIdx = 0; + int mergeCand0 = 0; + int mergeCand1 = 0; + if (m_BinDecoder.decodeBin(Ctx::MergeIdx())) + { + mergeCand0 += unary_max_eqprob(numCandminus2) + 1; + } + if (m_BinDecoder.decodeBin(Ctx::MergeIdx())) + { + mergeCand1 += unary_max_eqprob(numCandminus2) + 1; + } + pu.geoMergeIdx0 = mergeCand0; + pu.geoMergeIdx1 = mergeCand1; +} +#endif void CABACReader::mmvd_merge_idx(PredictionUnit& pu) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__MERGE_INDEX); diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index 4a8b362b9..5d0e3329a 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -131,6 +131,11 @@ public: #endif #if TM_MRG void tm_merge_flag ( PredictionUnit& pu ); +#endif +#if JVET_W0097_GPM_MMVD_TM + void geo_mmvd_idx(PredictionUnit& pu, RefPicList eRefPicList); + void geo_merge_idx(PredictionUnit& pu); + void geo_merge_idx1(PredictionUnit& pu); #endif void imv_mode ( CodingUnit& cu, MergeCtx& mrgCtx ); void affine_amvr_mode ( CodingUnit& cu, MergeCtx& mrgCtx ); diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index f3c5ec18b..1e24c5d7e 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -908,8 +908,18 @@ void DecCu::xReconInter(CodingUnit &cu) { if( cu.geoFlag ) { +#if JVET_W0097_GPM_MMVD_TM +#if TM_MRG + m_pcInterPred->motionCompensationGeo(cu, m_geoMrgCtx, m_geoTmMrgCtx0, m_geoTmMrgCtx1); + PU::spanGeoMMVDMotionInfo(*cu.firstPU, m_geoMrgCtx, m_geoTmMrgCtx0, m_geoTmMrgCtx1, cu.firstPU->geoSplitDir, cu.firstPU->geoMergeIdx0, cu.firstPU->geoMergeIdx1, cu.firstPU->geoTmFlag0, cu.firstPU->geoMMVDFlag0, cu.firstPU->geoMMVDIdx0, cu.firstPU->geoTmFlag1, cu.firstPU->geoMMVDFlag1, cu.firstPU->geoMMVDIdx1); +#else + m_pcInterPred->motionCompensationGeo(cu, m_geoMrgCtx); + PU::spanGeoMMVDMotionInfo(*cu.firstPU, m_geoMrgCtx, cu.firstPU->geoSplitDir, cu.firstPU->geoMergeIdx0, cu.firstPU->geoMergeIdx1, cu.firstPU->geoMMVDFlag0, cu.firstPU->geoMMVDIdx0, cu.firstPU->geoMMVDFlag1, cu.firstPU->geoMMVDIdx1); +#endif +#else m_pcInterPred->motionCompensationGeo( cu, m_geoMrgCtx ); PU::spanGeoMotionInfo( *cu.firstPU, m_geoMrgCtx, cu.firstPU->geoSplitDir, cu.firstPU->geoMergeIdx0, cu.firstPU->geoMergeIdx1 ); +#endif } else { @@ -1285,6 +1295,50 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) if( pu.cu->geoFlag ) { PU::getGeoMergeCandidates( pu, m_geoMrgCtx ); +#if JVET_W0097_GPM_MMVD_TM && TM_MRG + if (pu.geoTmFlag0) + { + m_geoTmMrgCtx0.numValidMergeCand = m_geoMrgCtx.numValidMergeCand; + m_geoTmMrgCtx0.BcwIdx[pu.geoMergeIdx0] = BCW_DEFAULT; + m_geoTmMrgCtx0.useAltHpelIf[pu.geoMergeIdx0] = false; +#if INTER_LIC + m_geoTmMrgCtx0.LICFlags[pu.geoMergeIdx0] = false; +#endif + m_geoTmMrgCtx0.addHypNeighbours[pu.geoMergeIdx0] = m_geoMrgCtx.addHypNeighbours[pu.geoMergeIdx0]; + m_geoTmMrgCtx0.interDirNeighbours[pu.geoMergeIdx0] = m_geoMrgCtx.interDirNeighbours[pu.geoMergeIdx0]; + m_geoTmMrgCtx0.mvFieldNeighbours[(pu.geoMergeIdx0 << 1)].mv = m_geoMrgCtx.mvFieldNeighbours[(pu.geoMergeIdx0 << 1)].mv; + m_geoTmMrgCtx0.mvFieldNeighbours[(pu.geoMergeIdx0 << 1) + 1].mv = m_geoMrgCtx.mvFieldNeighbours[(pu.geoMergeIdx0 << 1) + 1].mv; + m_geoTmMrgCtx0.mvFieldNeighbours[(pu.geoMergeIdx0 << 1)].refIdx = m_geoMrgCtx.mvFieldNeighbours[(pu.geoMergeIdx0 << 1)].refIdx; + m_geoTmMrgCtx0.mvFieldNeighbours[(pu.geoMergeIdx0 << 1) + 1].refIdx = m_geoMrgCtx.mvFieldNeighbours[(pu.geoMergeIdx0 << 1) + 1].refIdx; + + m_geoTmMrgCtx0.setMergeInfo(pu, pu.geoMergeIdx0); + pu.geoTmType = g_geoTmShape[0][g_GeoParams[pu.geoSplitDir][0]]; + m_pcInterPred->deriveTMMv(pu); + m_geoTmMrgCtx0.mvFieldNeighbours[(pu.geoMergeIdx0 << 1)].mv.set(pu.mv[0].getHor(), pu.mv[0].getVer()); + m_geoTmMrgCtx0.mvFieldNeighbours[(pu.geoMergeIdx0 << 1) + 1].mv.set(pu.mv[1].getHor(), pu.mv[1].getVer()); + } + if (pu.geoTmFlag1) + { + m_geoTmMrgCtx1.numValidMergeCand = m_geoMrgCtx.numValidMergeCand; + m_geoTmMrgCtx1.BcwIdx[pu.geoMergeIdx1] = BCW_DEFAULT; + m_geoTmMrgCtx1.useAltHpelIf[pu.geoMergeIdx1] = false; +#if INTER_LIC + m_geoTmMrgCtx1.LICFlags[pu.geoMergeIdx1] = false; +#endif + m_geoTmMrgCtx1.addHypNeighbours[pu.geoMergeIdx1] = m_geoMrgCtx.addHypNeighbours[pu.geoMergeIdx1]; + m_geoTmMrgCtx1.interDirNeighbours[pu.geoMergeIdx1] = m_geoMrgCtx.interDirNeighbours[pu.geoMergeIdx1]; + m_geoTmMrgCtx1.mvFieldNeighbours[(pu.geoMergeIdx1 << 1)].mv = m_geoMrgCtx.mvFieldNeighbours[(pu.geoMergeIdx1 << 1)].mv; + m_geoTmMrgCtx1.mvFieldNeighbours[(pu.geoMergeIdx1 << 1) + 1].mv = m_geoMrgCtx.mvFieldNeighbours[(pu.geoMergeIdx1 << 1) + 1].mv; + m_geoTmMrgCtx1.mvFieldNeighbours[(pu.geoMergeIdx1 << 1)].refIdx = m_geoMrgCtx.mvFieldNeighbours[(pu.geoMergeIdx1 << 1)].refIdx; + m_geoTmMrgCtx1.mvFieldNeighbours[(pu.geoMergeIdx1 << 1) + 1].refIdx = m_geoMrgCtx.mvFieldNeighbours[(pu.geoMergeIdx1 << 1) + 1].refIdx; + + m_geoTmMrgCtx1.setMergeInfo(pu, pu.geoMergeIdx1); + pu.geoTmType = g_geoTmShape[1][g_GeoParams[pu.geoSplitDir][0]]; + m_pcInterPred->deriveTMMv(pu); + m_geoTmMrgCtx1.mvFieldNeighbours[(pu.geoMergeIdx1 << 1)].mv.set(pu.mv[0].getHor(), pu.mv[0].getVer()); + m_geoTmMrgCtx1.mvFieldNeighbours[(pu.geoMergeIdx1 << 1) + 1].mv.set(pu.mv[1].getHor(), pu.mv[1].getVer()); + } +#endif } else { diff --git a/source/Lib/DecoderLib/DecCu.h b/source/Lib/DecoderLib/DecCu.h index 91944c63a..33c761acf 100644 --- a/source/Lib/DecoderLib/DecCu.h +++ b/source/Lib/DecoderLib/DecCu.h @@ -105,6 +105,9 @@ private: #endif MergeCtx m_geoMrgCtx; +#if JVET_W0097_GPM_MMVD_TM && TM_MRG + MergeCtx m_geoTmMrgCtx0, m_geoTmMrgCtx1; +#endif }; //! \} diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index 96d62b5f8..c2836ec3b 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -3576,6 +3576,18 @@ void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManag picHeader->setDisFracMMVD(false); } +#if JVET_W0097_GPM_MMVD_TM + if (sps->getUseGeo() && (!pps->getRplInfoInPhFlag() || picHeader->getRPL(1)->getNumRefEntries() > 0)) + { + READ_FLAG(uiCode, "ph_gpm_ext_mmvd_flag"); + picHeader->setGPMMMVDTableFlag(uiCode); + } + else + { + picHeader->setGPMMMVDTableFlag(false); + } +#endif + #if JVET_R0324_REORDER // mvd L1 zero flag if (!pps->getRplInfoInPhFlag() || picHeader->getRPL(1)->getNumRefEntries() > 0) diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index 4904308cf..ca65ed133 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -2655,6 +2655,57 @@ void CABACWriter::merge_idx( const PredictionUnit& pu ) { if( pu.cu->geoFlag ) { +#if JVET_W0097_GPM_MMVD_TM + m_BinEncoder.encodeBin(pu.geoMMVDFlag0, Ctx::GeoMmvdFlag()); + if (pu.geoMMVDFlag0) + { + geo_mmvd_idx(pu, REF_PIC_LIST_0); + } + m_BinEncoder.encodeBin(pu.geoMMVDFlag1, Ctx::GeoMmvdFlag()); + if (pu.geoMMVDFlag1) + { + geo_mmvd_idx(pu, REF_PIC_LIST_1); + } + +#if TM_MRG + if (!pu.geoMMVDFlag0 && !pu.geoMMVDFlag1) + { + tm_merge_flag(pu); + if (pu.tmMergeFlag) + { + CHECK(!pu.geoTmFlag0 || !pu.geoTmFlag1, "both must be true"); + CHECK(pu.geoMergeIdx0 == pu.geoMergeIdx1, "Incorrect geoMergeIdx0 and geoMergeIdx1"); + geo_merge_idx(pu); + } + else + { + + CHECK(pu.geoTmFlag0 || pu.geoTmFlag1, "both must be false"); + geo_merge_idx(pu); + } + } +#else + if (!pu.geoMMVDFlag0 && !pu.geoMMVDFlag1) + { + geo_merge_idx(pu); + } +#endif + else if (pu.geoMMVDFlag0 && pu.geoMMVDFlag1) + { + if (pu.geoMMVDIdx0 == pu.geoMMVDIdx1) + { + geo_merge_idx(pu); + } + else + { + geo_merge_idx1(pu); + } + } + else + { + geo_merge_idx1(pu); + } +#else uint8_t splitDir = pu.geoSplitDir; uint8_t candIdx0 = pu.geoMergeIdx0; uint8_t candIdx1 = pu.geoMergeIdx1; @@ -2681,7 +2732,7 @@ void CABACWriter::merge_idx( const PredictionUnit& pu ) unary_max_eqprob(candIdx1 - 1, numCandminus2 - 1); } } - +#endif return; } int numCandminus1; @@ -2788,6 +2839,176 @@ void CABACWriter::mmvd_merge_idx(const PredictionUnit& pu) DTRACE(g_trace_ctx, D_SYNTAX, "pos() pos=%d\n", var2); DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_idx() mmvd_merge_idx=%d\n", pu.mmvdMergeIdx); } +#if JVET_W0097_GPM_MMVD_TM +void CABACWriter::geo_mmvd_idx(const PredictionUnit& pu, RefPicList eRefPicList) +{ + int geoMMVDIdx = (eRefPicList == REF_PIC_LIST_0) ? pu.geoMMVDIdx0 : pu.geoMMVDIdx1; + bool extMMVD = pu.cs->picHeader->getGPMMMVDTableFlag(); + CHECK(geoMMVDIdx >= (extMMVD ? GPM_EXT_MMVD_MAX_REFINE_NUM : GPM_MMVD_MAX_REFINE_NUM), "invalid GPM MMVD index exist"); + + int step = (extMMVD ? (geoMMVDIdx >> 3) : (geoMMVDIdx >> 2)); + int direction = (extMMVD ? (geoMMVDIdx - (step << 3)) : (geoMMVDIdx - (step << 2))); + + int mmvdStepToIdx[GPM_EXT_MMVD_REFINE_STEP] = { 5, 0, 1, 2, 3, 4, 6, 7, 8 }; + step = mmvdStepToIdx[step]; + + int numCandminus1_step = (extMMVD ? GPM_EXT_MMVD_REFINE_STEP : GPM_MMVD_REFINE_STEP) - 1; + if (numCandminus1_step > 0) + { + if (step == 0) + { + m_BinEncoder.encodeBin(0, Ctx::GeoMmvdStepMvpIdx()); + } + else + { + m_BinEncoder.encodeBin(1, Ctx::GeoMmvdStepMvpIdx()); + for (unsigned idx = 1; idx < numCandminus1_step; idx++) + { + m_BinEncoder.encodeBinEP(step == idx ? 0 : 1); + if (step == idx) + { + break; + } + } + } + } + + int maxMMVDDir = (extMMVD ? GPM_EXT_MMVD_REFINE_DIRECTION : GPM_MMVD_REFINE_DIRECTION); + m_BinEncoder.encodeBinsEP(direction, maxMMVDDir > 4 ? 3 : 2); +} + +void CABACWriter::geo_merge_idx(const PredictionUnit& pu) +{ + uint8_t splitDir = pu.geoSplitDir; + uint8_t candIdx0 = pu.geoMergeIdx0; + uint8_t candIdx1 = pu.geoMergeIdx1; + + xWriteTruncBinCode(splitDir, GEO_NUM_PARTITION_MODE); + candIdx1 -= candIdx1 < candIdx0 ? 0 : 1; + const int maxNumGeoCand = pu.cs->sps->getMaxNumGeoCand(); + + int numCandminus2 = maxNumGeoCand - 2; + m_BinEncoder.encodeBin(candIdx0 == 0 ? 0 : 1, Ctx::MergeIdx()); + if (candIdx0 > 0) + { + unary_max_eqprob(candIdx0 - 1, numCandminus2); + } + if (numCandminus2 > 0) + { + m_BinEncoder.encodeBin(candIdx1 == 0 ? 0 : 1, Ctx::MergeIdx()); + if (candIdx1 > 0) + { + unary_max_eqprob(candIdx1 - 1, numCandminus2 - 1); + } + } +} + +void CABACWriter::geo_merge_idx1(const PredictionUnit& pu) +{ + uint8_t splitDir = pu.geoSplitDir; + uint8_t candIdx0 = pu.geoMergeIdx0; + uint8_t candIdx1 = pu.geoMergeIdx1; + + xWriteTruncBinCode(splitDir, GEO_NUM_PARTITION_MODE); + const int maxNumGeoCand = pu.cs->sps->getMaxNumGeoCand(); + + int numCandminus2 = maxNumGeoCand - 2; + m_BinEncoder.encodeBin(candIdx0 == 0 ? 0 : 1, Ctx::MergeIdx()); + if (candIdx0 > 0) + { + unary_max_eqprob(candIdx0 - 1, numCandminus2); + } + m_BinEncoder.encodeBin(candIdx1 == 0 ? 0 : 1, Ctx::MergeIdx()); + if (candIdx1 > 0) + { + unary_max_eqprob(candIdx1 - 1, numCandminus2); + } +} + +uint64_t CABACWriter::geo_mode_est(const TempCtx& ctxStart, const int geoMode) +{ + getCtx() = ctxStart; + resetBits(); + + xWriteTruncBinCode(geoMode, GEO_NUM_PARTITION_MODE); + + return getEstFracBits(); +} + +uint64_t CABACWriter::geo_mergeIdx_est(const TempCtx& ctxStart, const int candIdx, const int maxNumGeoCand) +{ + getCtx() = ctxStart; + resetBits(); + + int numCandminus2 = maxNumGeoCand - 2; + m_BinEncoder.encodeBin(candIdx == 0 ? 0 : 1, Ctx::MergeIdx()); + if (candIdx > 0) + { + unary_max_eqprob(candIdx - 1, numCandminus2); + } + + return getEstFracBits(); +} + +uint64_t CABACWriter::geo_mmvdFlag_est(const TempCtx& ctxStart, const int flag) +{ + getCtx() = ctxStart; + resetBits(); + + m_BinEncoder.encodeBin(flag, Ctx::GeoMmvdFlag()); + + return getEstFracBits(); +} + +#if TM_MRG +uint64_t CABACWriter::geo_tmFlag_est(const TempCtx& ctxStart, const int flag) +{ + getCtx() = ctxStart; + resetBits(); + + m_BinEncoder.encodeBin(flag, Ctx::TMMergeFlag()); + + return getEstFracBits(); +} +#endif + +uint64_t CABACWriter::geo_mmvdIdx_est(const TempCtx& ctxStart, const int geoMMVDIdx, const bool extMMVD) +{ + getCtx() = ctxStart; + resetBits(); + + CHECK(geoMMVDIdx >= (extMMVD ? GPM_EXT_MMVD_MAX_REFINE_NUM : GPM_MMVD_MAX_REFINE_NUM), "invalid GPM MMVD index exist"); + int step = (extMMVD ? (geoMMVDIdx >> 3) : (geoMMVDIdx >> 2)); + int direction = (extMMVD ? (geoMMVDIdx - (step << 3)) : (geoMMVDIdx - (step << 2))); + + int mmvdStepToIdx[GPM_EXT_MMVD_REFINE_STEP] = { 5, 0, 1, 2, 3, 4, 6, 7, 8 }; + step = mmvdStepToIdx[step]; + + int numCandminus1_step = (extMMVD ? GPM_EXT_MMVD_REFINE_STEP : GPM_MMVD_REFINE_STEP) - 1; + if (numCandminus1_step > 0) + { + if (step == 0) + { + m_BinEncoder.encodeBin(0, Ctx::GeoMmvdStepMvpIdx()); + } + else + { + m_BinEncoder.encodeBin(1, Ctx::GeoMmvdStepMvpIdx()); + for (unsigned idx = 1; idx < numCandminus1_step; idx++) + { + m_BinEncoder.encodeBinEP(step == idx ? 0 : 1); + if (step == idx) + { + break; + } + } + } + } + int maxMMVDDir = (extMMVD ? GPM_EXT_MMVD_REFINE_DIRECTION : GPM_MMVD_REFINE_DIRECTION); + m_BinEncoder.encodeBinsEP(direction, maxMMVDDir > 4 ? 3 : 2); + return getEstFracBits(); +} +#endif void CABACWriter::inter_pred_idc( const PredictionUnit& pu ) { if( !pu.cs->slice->isInterB() ) diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index bae249e50..0b55a2d36 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -141,6 +141,19 @@ public: #endif #if TM_MRG void tm_merge_flag ( const PredictionUnit& pu); +#endif +#if JVET_W0097_GPM_MMVD_TM + void geo_mmvd_idx(const PredictionUnit& pu, RefPicList eRefPicList); + void geo_merge_idx(const PredictionUnit& pu); + void geo_merge_idx1(const PredictionUnit& pu); + + uint64_t geo_mode_est(const TempCtx& ctxStart, const int geoMode); + uint64_t geo_mergeIdx_est(const TempCtx& ctxStart, const int candIdx, const int maxNumGeoCand); + uint64_t geo_mmvdFlag_est(const TempCtx& ctxStart, const int flag); + uint64_t geo_mmvdIdx_est(const TempCtx& ctxStart, const int mmvdIdx, const bool extMMVD); +#if TM_MRG + uint64_t geo_tmFlag_est(const TempCtx& ctxStart, const int flag); +#endif #endif void imv_mode ( const CodingUnit& cu ); void affine_amvr_mode ( const CodingUnit& cu ); diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 214226047..c988f0054 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -85,6 +85,13 @@ EncCu::EncCu() : m_GeoModeTest } } #endif +#if JVET_W0097_GPM_MMVD_TM + fastGpmMmvdSearch = false; + fastGpmMmvdRelatedCU = false; + includeMoreMMVDCandFirstPass = false; + maxNumGPMDirFirstPass = 64; + numCandPerPar = 5; +#endif } void EncCu::create( EncCfg* encCfg ) @@ -197,6 +204,35 @@ void EncCu::create( EncCfg* encCfg ) { m_acGeoWeightedBuffer[ui].create( chromaFormat, Area( 0, 0, uiMaxWidth, uiMaxHeight ) ); } +#if JVET_W0097_GPM_MMVD_TM + for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++) + { + for (unsigned vi = 0; vi < GPM_EXT_MMVD_MAX_REFINE_NUM; vi++) + { + m_acGeoMMVDBuffer[ui][vi].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight)); + m_acGeoMMVDTmpBuffer[ui][vi].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight)); + } + } + int sourceWidth = encCfg->getSourceWidth(); + int sourceHeight = encCfg->getSourceHeight(); + fastGpmMmvdSearch = (((encCfg->getIntraPeriod() > 0) && ((sourceWidth * sourceHeight) <= (1920 * 1080))) || ((encCfg->getIntraPeriod() < 0) && ((sourceWidth * sourceHeight) >= (1280 * 720)))) && !encCfg->getIBCMode(); +#if TM_MRG + fastGpmMmvdRelatedCU = ((encCfg->getIntraPeriod() < 0) && ((sourceWidth * sourceHeight) >= (1920 * 1080))) && !encCfg->getIBCMode(); +#else + fastGpmMmvdRelatedCU = ((encCfg->getIntraPeriod() < 0) && ((sourceWidth * sourceHeight) >= (1280 * 720))) && !encCfg->getIBCMode(); +#endif + + includeMoreMMVDCandFirstPass = ((encCfg->getIntraPeriod() > 0) || ((encCfg->getIntraPeriod() < 0) && fastGpmMmvdSearch)); + maxNumGPMDirFirstPass = ((encCfg->getIntraPeriod() < 0) ? 50 : (fastGpmMmvdSearch ? 36 : 64)); + numCandPerPar = (fastGpmMmvdSearch ? 4 : 5); +#if TM_MRG + for (uint16_t ui = 0; ui < GEO_TM_MAX_NUM_CANDS; ui++) + { + m_acGeoMergeTmpBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight)); + m_acGeoSADTmpBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight)); + } +#endif +#endif m_ciipBuffer[0].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight)); m_ciipBuffer[1].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight)); @@ -307,6 +343,23 @@ void EncCu::destroy() { m_acGeoWeightedBuffer[ui].destroy(); } +#if JVET_W0097_GPM_MMVD_TM + for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++) + { + for (unsigned vi = 0; vi < GPM_EXT_MMVD_MAX_REFINE_NUM; vi++) + { + m_acGeoMMVDBuffer[ui][vi].destroy(); + m_acGeoMMVDTmpBuffer[ui][vi].destroy(); + } + } +#if TM_MRG + for (uint16_t ui = 0; ui < GEO_TM_MAX_NUM_CANDS; ui++) + { + m_acGeoMergeTmpBuffer[ui].destroy(); + m_acGeoSADTmpBuffer[ui].destroy(); + } +#endif +#endif m_ciipBuffer[0].destroy(); m_ciipBuffer[1].destroy(); } @@ -796,7 +849,9 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par m_acGeoWeightedBuffer, GEO_MAX_TRY_WEIGHTED_SAD, localUnitArea); #endif - +#if JVET_W0097_GPM_MMVD_TM + m_mergeCandAvail = false; +#endif double bestIntPelCost = MAX_DOUBLE; if (tempCS->slice->getSPS()->getUseColorTrans()) @@ -945,7 +1000,20 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par } else if( currTestMode.type == ETM_MERGE_GEO ) { +#if JVET_W0097_GPM_MMVD_TM + CodedCUInfo &relatedCU = ((EncModeCtrlMTnoRQT *)m_modeCtrl)->getBlkInfo(partitioner.currArea()); + if (!relatedCU.isGPMTested) + { + xCheckRDCostMergeGeoComb2Nx2N(tempCS, bestCS, partitioner, currTestMode); + relatedCU.isGPMTested = 1; + } + else + { + xCheckRDCostMergeGeoComb2Nx2N(tempCS, bestCS, partitioner, currTestMode, true); + } +#else xCheckRDCostMergeGeo2Nx2N( tempCS, bestCS, partitioner, currTestMode ); +#endif } #if MULTI_HYP_PRED else if (currTestMode.type == ETM_INTER_MULTIHYP) @@ -2746,6 +2814,10 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& PU::getInterMergeCandidates(pu, mergeCtx , 0 ); +#if JVET_W0097_GPM_MMVD_TM + m_mergeCand.copyMergeCtx(mergeCtx); + m_mergeCandAvail = true; +#endif PU::getInterMMVDMergeCandidates(pu, mergeCtx); #if TM_MRG && MERGE_ENC_OPT if (cu.cs->sps->getUseDMVDMode()) @@ -3893,22 +3965,73 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& } } -void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode) +#if JVET_W0097_GPM_MMVD_TM +void EncCu::xCheckRDCostMergeGeoComb2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode, bool isSecondPass) { - const Slice &slice = *tempCS->slice; - CHECK(slice.getSliceType() == I_SLICE, "Merge modes not available for I-slices"); + int numSATDCands = ((fastGpmMmvdSearch && isSecondPass) ? 60 : 70); tempCS->initStructData(encTestMode.qp); - +#if TM_MRG + MergeCtx mergeCtx[GEO_NUM_TM_MV_CAND]; +#else MergeCtx mergeCtx; +#endif const SPS &sps = *tempCS->sps; + CodedCUInfo& relatedCU = ((EncModeCtrlMTnoRQT *)m_modeCtrl)->getBlkInfo(pm.currArea()); + bool extMMVD = tempCS->picHeader->getGPMMMVDTableFlag(); if (sps.getSbTMVPEnabledFlag()) { Size bufSize = g_miScaling.scale(tempCS->area.lumaSize()); +#if TM_MRG + for (int i = 0; i < GEO_NUM_TM_MV_CAND; i++) + { + mergeCtx[i].subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize); + } +#else mergeCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize); +#endif + } + + // 1. bit estimation + const double sqrtLambdaFracBits = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE; + uint8_t maxNumMergeCandidates = tempCS->sps->getMaxNumGeoCand(); + const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx()); + + double geoModeCost[GEO_NUM_PARTITION_MODE], geoMergeIdxCost[MRG_MAX_NUM_CANDS], geoMMVDFlagCost[2], geoMMVDIdxCost[GPM_EXT_MMVD_MAX_REFINE_NUM]; +#if TM_MRG + double geoTMFlagCost[2]; +#endif + for (int idx = 0; idx < GEO_NUM_PARTITION_MODE; idx++) + { + uint64_t fracBits = m_CABACEstimator->geo_mode_est(ctxStart, idx); + geoModeCost[idx] = (double)fracBits * sqrtLambdaFracBits; + } + for (int idx = 0; idx < maxNumMergeCandidates; idx++) + { + uint64_t fracBits = m_CABACEstimator->geo_mergeIdx_est(ctxStart, idx, maxNumMergeCandidates); + geoMergeIdxCost[idx] = (double)fracBits * sqrtLambdaFracBits; + } + for (int idx = 0; idx < 2; idx++) + { + uint64_t fracBits = m_CABACEstimator->geo_mmvdFlag_est(ctxStart, idx); + geoMMVDFlagCost[idx] = (double)fracBits * sqrtLambdaFracBits; + } + for (int idx = 0; idx < (extMMVD ? GPM_EXT_MMVD_MAX_REFINE_NUM : GPM_MMVD_MAX_REFINE_NUM); idx++) + { + uint64_t fractBits = m_CABACEstimator->geo_mmvdIdx_est(ctxStart, idx, extMMVD); + geoMMVDIdxCost[idx] = (double)fractBits * sqrtLambdaFracBits; + } +#if TM_MRG + for (int idx = 0; idx < 2 && sps.getUseDMVDMode(); idx++) + { + uint64_t fracBits = m_CABACEstimator->geo_tmFlag_est(ctxStart, idx); + geoTMFlagCost[idx] = (double)fracBits * sqrtLambdaFracBits; } +#endif + m_CABACEstimator->getCtx() = ctxStart; + // 2. get SAD for all candidates CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType); pm.setCUData(cu); cu.predMode = MODE_INTER; @@ -3937,53 +4060,88 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure #if TM_MRG pu.tmMergeFlag = false; #endif - PU::getGeoMergeCandidates(pu, mergeCtx); + CHECK(!m_mergeCandAvail, "merge candidates are not available"); +#if TM_MRG + PU::getGeoMergeCandidates(pu, mergeCtx[GEO_TM_OFF], &m_mergeCand); + maxNumMergeCandidates = min((int)maxNumMergeCandidates, mergeCtx[GEO_TM_OFF].numValidMergeCand); +#else + PU::getGeoMergeCandidates(pu, mergeCtx, &m_mergeCand); + maxNumMergeCandidates = min((int)maxNumMergeCandidates, mergeCtx.numValidMergeCand); +#endif - GeoComboCostList comboList; - int bitsCandTB = floorLog2(GEO_NUM_PARTITION_MODE); PelUnitBuf geoBuffer[GEO_MAX_NUM_UNI_CANDS]; PelUnitBuf geoTempBuf[GEO_MAX_NUM_UNI_CANDS]; + PelUnitBuf geoMMVDBuf[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM]; + PelUnitBuf geoMMVDTempBuf[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM]; PelUnitBuf geoCombinations[GEO_MAX_TRY_WEIGHTED_SAD]; - DistParam distParam; + DistParam distParam; const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height)); - const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(); - uint8_t maxNumMergeCandidates = cu.cs->sps->getMaxNumGeoCand(); DistParam distParamWholeBlk; m_pcRdCost->setDistParam(distParamWholeBlk, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y().buf, m_acMergeBuffer[0].Y().stride, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y); - Distortion bestWholeBlkSad = MAX_UINT64; - double bestWholeBlkCost = MAX_DOUBLE; + Distortion sadWholeBlk[GEO_MAX_NUM_UNI_CANDS], sadMMVDWholeBlk[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM]; - Distortion sadWholeBlk[GEO_MAX_NUM_UNI_CANDS]; - int pocMrg[GEO_MAX_NUM_UNI_CANDS]; - Mv MrgMv[GEO_MAX_NUM_UNI_CANDS]; - bool isSkipThisCand[GEO_MAX_NUM_UNI_CANDS] = { false }; + int pocMrg[GEO_MAX_NUM_UNI_CANDS]; + Mv mrgMv[GEO_MAX_NUM_UNI_CANDS]; + bool mrgDuplicated[GEO_MAX_NUM_UNI_CANDS]; + + double bestMrgCost = MAX_DOUBLE; + double bestNormalMrgCost = MAX_DOUBLE; + + bool isGeoChromaAvail[GEO_MAX_NUM_UNI_CANDS]; + bool isGeoMMVDChromaAvail[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM]; + memset(isGeoChromaAvail, false, sizeof(bool) * GEO_MAX_NUM_UNI_CANDS); + memset(isGeoMMVDChromaAvail, false, sizeof(bool) * GEO_MAX_NUM_UNI_CANDS * GPM_EXT_MMVD_MAX_REFINE_NUM); +#if TM_MRG + bool isGeoTmChromaAvail[GEO_TM_MAX_NUM_CANDS]; + memset(isGeoTmChromaAvail, false, sizeof(bool) * GEO_TM_MAX_NUM_CANDS); +#endif for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++) { - geoBuffer[mergeCand] = m_acMergeBuffer[mergeCand].getBuf(localUnitArea); - mergeCtx.setMergeInfo(pu, mergeCand); - int MrgList = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx == -1 ? 1 : 0; - RefPicList MrgeRefPicList = (MrgList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); - int MrgrefIdx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + MrgList].refIdx; - pocMrg[mergeCand] = tempCS->slice->getRefPic(MrgeRefPicList, MrgrefIdx)->getPOC(); - MrgMv[mergeCand] = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + MrgList].mv; - - for( int i = 0; i < mergeCand; i++ ) +#if TM_MRG + int mrgList = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(mergeCand << 1) + 0].refIdx == -1 ? 1 : 0; + int mrgRefIdx = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(mergeCand << 1) + mrgList].refIdx; +#else + int mrgList = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx == -1 ? 1 : 0; + int mrgRefIdx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + mrgList].refIdx; +#endif + pocMrg[mergeCand] = tempCS->slice->getRefPic((RefPicList)mrgList, mrgRefIdx)->getPOC(); +#if TM_MRG + mrgMv[mergeCand] = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(mergeCand << 1) + mrgList].mv; +#else + mrgMv[mergeCand] = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + mrgList].mv; +#endif + mrgDuplicated[mergeCand] = false; + if (mergeCand) { - if( pocMrg[mergeCand] == pocMrg[i] && MrgMv[mergeCand] == MrgMv[i] ) + for (int i = 0; i < mergeCand; i++) { - isSkipThisCand[mergeCand] = true; - break; + if (pocMrg[mergeCand] == pocMrg[i] && mrgMv[mergeCand] == mrgMv[i]) + { + mrgDuplicated[mergeCand] = true; + break; + } } } - +#if !MULTI_HYP_PRED + if (mrgDuplicated[mergeCand]) + { + continue; + } +#endif + geoBuffer[mergeCand] = m_acMergeBuffer[mergeCand].getBuf(localUnitArea); +#if TM_MRG + mergeCtx[GEO_TM_OFF].setMergeInfo(pu, mergeCand); +#else + mergeCtx.setMergeInfo(pu, mergeCand); +#endif if (m_pcEncCfg->getMCTSEncConstraint() && (!(MCTSHelper::checkMvBufferForMCTSConstraint(pu)))) { tempCS->initStructData(encTestMode.qp); return; } - m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand]); + m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand], REF_PIC_LIST_X, true, false); #if MULTI_HYP_PRED geoTempBuf[mergeCand] = m_acRealMergeBuffer[MRG_MAX_NUM_CANDS + mergeCand].getBuf(localUnitArea); #else @@ -3994,35 +4152,45 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure distParamWholeBlk.cur.buf = geoTempBuf[mergeCand].Y().buf; distParamWholeBlk.cur.stride = geoTempBuf[mergeCand].Y().stride; sadWholeBlk[mergeCand] = distParamWholeBlk.distFunc(distParamWholeBlk); - if (sadWholeBlk[mergeCand] < bestWholeBlkSad) + double curCost = sadWholeBlk[mergeCand] + geoMergeIdxCost[mergeCand]; + if (curCost < bestNormalMrgCost) { - bestWholeBlkSad = sadWholeBlk[mergeCand]; - bestWholeBlkCost = ( double ) bestWholeBlkSad + ( mergeCand + 1 ) * sqrtLambdaForFirstPass; + bestNormalMrgCost = curCost; + } + curCost += geoMMVDFlagCost[0]; + if (curCost < bestMrgCost) + { + bestMrgCost = curCost; } } #if MULTI_HYP_PRED +#if TM_MRG + m_pcInterSearch->setGeoTmpBuffer(mergeCtx[GEO_TM_OFF]); +#else m_pcInterSearch->setGeoTmpBuffer(mergeCtx); #endif - bool isGeo = true; - for (uint8_t mergeCand = 1; mergeCand < maxNumMergeCandidates; mergeCand++) - { - isGeo &= isSkipThisCand[mergeCand]; - } - if (isGeo) - { - return; - } +#endif int wIdx = floorLog2(cu.lwidth()) - GEO_MIN_CU_LOG2; int hIdx = floorLog2(cu.lheight()) - GEO_MIN_CU_LOG2; + Distortion sadSmall = 0, sadLarge = 0; + int maskStride = 0, maskStride2 = 0, stepX = 1; + Pel* SADmask; + static_vector<int, GEO_NUM_PARTITION_MODE> selGeoModeList; + static_vector<double, GEO_NUM_PARTITION_MODE> selGeoModeRDList; + static_vector<int, 5> mergeCandList0[GEO_NUM_PARTITION_MODE]; + static_vector<int, 5> mergeCandList1[GEO_NUM_PARTITION_MODE]; + static_vector<int, 5> mmvdCandList0[GEO_NUM_PARTITION_MODE]; + static_vector<int, 5> mmvdCandList1[GEO_NUM_PARTITION_MODE]; + static_vector<double, 5> sadCostList0[GEO_NUM_PARTITION_MODE]; + static_vector<double, 5> sadCostList1[GEO_NUM_PARTITION_MODE]; + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) { - int maskStride = 0, maskStride2 = 0; - int stepX = 1; - Pel* SADmask; int16_t angle = g_GeoParams[splitDir][0]; if (g_angle2mirror[angle] == 2) { + stepX = 1; maskStride = -GEO_WEIGHT_MASK_SIZE; maskStride2 = -(int)cu.lwidth(); SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][1]) * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]]; @@ -4036,97 +4204,155 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure } else { + stepX = 1; maskStride = GEO_WEIGHT_MASK_SIZE; maskStride2 = -(int)cu.lwidth(); SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]]; } - Distortion sadSmall = 0, sadLarge = 0; for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++) { - int bitsCand = mergeCand + 1; - + if (mrgDuplicated[mergeCand]) + { + continue; + } m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTempBuf[mergeCand].Y().buf, geoTempBuf[mergeCand].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y); sadLarge = distParam.distFunc(distParam); - m_GeoCostList.insert(splitDir, 0, mergeCand, (double)sadLarge + (double)bitsCand * sqrtLambdaForFirstPass); + double tempCost = (double)sadLarge + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[0]; + m_GeoMMVDCostList.insert(splitDir, 0, mergeCand, 0, tempCost); + sortCandList(tempCost, mergeCand, 0, sadCostList0[splitDir], mergeCandList0[splitDir], mmvdCandList0[splitDir], numCandPerPar); sadSmall = sadWholeBlk[mergeCand] - sadLarge; - m_GeoCostList.insert(splitDir, 1, mergeCand, (double)sadSmall + (double)bitsCand * sqrtLambdaForFirstPass); + tempCost = (double)sadSmall + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[0]; + m_GeoMMVDCostList.insert(splitDir, 1, mergeCand, 0, tempCost); + sortCandList(tempCost, mergeCand, 0, sadCostList1[splitDir], mergeCandList1[splitDir], mmvdCandList1[splitDir], numCandPerPar); } + updateCandList(splitDir, (sadCostList0[splitDir][0] + sadCostList1[splitDir][0]), selGeoModeList, selGeoModeRDList, GEO_NUM_PARTITION_MODE); } + static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoSplitDirList; + static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoMergeCand0; + static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoMergeCand1; + static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoMmvdCand0; + static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoMmvdCand1; + static_vector<double, GEO_MAX_TRY_WEIGHTED_SAD> geoSADCostList; + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) { - for (int GeoMotionIdx = 0; GeoMotionIdx < maxNumMergeCandidates * (maxNumMergeCandidates - 1); GeoMotionIdx++) + int numCandPart0 = min(numCandPerPar, (int)mergeCandList0[splitDir].size()); + int numCandPart1 = min(numCandPerPar, (int)mergeCandList1[splitDir].size()); + for (int candIdx0 = 0; candIdx0 < numCandPart0; candIdx0++) { - unsigned int mergeCand0 = m_GeoModeTest[GeoMotionIdx].m_candIdx0; - unsigned int mergeCand1 = m_GeoModeTest[GeoMotionIdx].m_candIdx1; - double tempCost = m_GeoCostList.singleDistList[0][splitDir][mergeCand0].cost + m_GeoCostList.singleDistList[1][splitDir][mergeCand1].cost; - if( tempCost > bestWholeBlkCost ) + for (int candIdx1 = 0; candIdx1 < numCandPart1; candIdx1++) { - continue; + int mergeCand0 = mergeCandList0[splitDir][candIdx0]; + int mergeCand1 = mergeCandList1[splitDir][candIdx1]; + + if (mergeCand0 == mergeCand1) + { + continue; + } + + double tempCost = m_GeoMMVDCostList.singleDistList[0][splitDir][mergeCand0][0].cost + m_GeoMMVDCostList.singleDistList[1][splitDir][mergeCand1][0].cost; + tempCost = tempCost + geoModeCost[splitDir]; +#if TM_MRG + if (sps.getUseDMVDMode()) + { + tempCost += geoTMFlagCost[0]; + } +#endif + updateGeoMMVDCandList(tempCost, splitDir, mergeCand0, mergeCand1, 0, 0, geoSADCostList, geoSplitDirList, geoMergeCand0, geoMergeCand1, geoMmvdCand0, geoMmvdCand1, numSATDCands); } - tempCost = tempCost + (double)bitsCandTB * sqrtLambdaForFirstPass; - comboList.list.push_back(GeoMergeCombo(splitDir, mergeCand0, mergeCand1, tempCost)); } } - if( comboList.list.empty() ) - { - return; - } - - comboList.sortByCost(); - bool geocandHasNoResidual[GEO_MAX_TRY_WEIGHTED_SAD] = { false }; - bool bestIsSkip = false; - int geoNumCobo = (int)comboList.list.size(); - static_vector<uint8_t, GEO_MAX_TRY_WEIGHTED_SAD> geoRdModeList; - static_vector<double, GEO_MAX_TRY_WEIGHTED_SAD> geocandCostList; + static_vector<uint8_t, GEO_MAX_TRY_WEIGHTED_SAD> geoRdModeList; + static_vector<bool, GEO_MAX_TRY_WEIGHTED_SAD> isNonMMVDListIdx; + static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoPartitionModeList; + static_vector<double, GEO_MAX_TRY_WEIGHTED_SAD> geocandCostList; DistParam distParamSAD2; const bool useHadamard = !tempCS->slice->getDisableSATDForRD(); m_pcRdCost->setDistParam(distParamSAD2, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, useHadamard); - int geoNumMrgSATDCand = min(GEO_MAX_TRY_WEIGHTED_SATD, geoNumCobo); - for (uint8_t candidateIdx = 0; candidateIdx < min(geoNumCobo, GEO_MAX_TRY_WEIGHTED_SAD); candidateIdx++) + int numberGeoCandChecked = (int)geoSADCostList.size(); + int geoNumMrgSATDCand = min(GEO_MAX_TRY_WEIGHTED_SATD, numberGeoCandChecked); + int numStoredCands = geoNumMrgSATDCand; + for (uint8_t candidateIdx = 0; candidateIdx < numberGeoCandChecked; candidateIdx++) { - int splitDir = comboList.list[candidateIdx].splitDir; - int mergeCand0 = comboList.list[candidateIdx].mergeIdx0; - int mergeCand1 = comboList.list[candidateIdx].mergeIdx1; + int splitDir = geoSplitDirList[candidateIdx]; + int mergeCand0 = geoMergeCand0[candidateIdx]; + int mergeCand1 = geoMergeCand1[candidateIdx]; + bool mmvdFlag0 = false; + bool mmvdFlag1 = false; geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea); m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], geoBuffer[mergeCand0], geoBuffer[mergeCand1]); distParamSAD2.cur = geoCombinations[candidateIdx].Y(); Distortion sad = distParamSAD2.distFunc(distParamSAD2); - int mvBits = 2; - mergeCand1 -= mergeCand1 < mergeCand0 ? 0 : 1; - mvBits += mergeCand0; - mvBits += mergeCand1; - double updateCost = (double)sad + (double)(bitsCandTB + mvBits) * sqrtLambdaForFirstPass; - comboList.list[candidateIdx].cost = updateCost; - updateCandList(candidateIdx, updateCost, geoRdModeList, geocandCostList, geoNumMrgSATDCand); + + double updateCost = geoModeCost[splitDir] + geoMergeIdxCost[mergeCand0] + (fastGpmMmvdSearch ? geoMergeIdxCost[mergeCand1 > mergeCand0 ? (mergeCand1 - 1) : mergeCand1] : geoMergeIdxCost[mergeCand1]) + geoMMVDFlagCost[mmvdFlag0] + geoMMVDFlagCost[mmvdFlag1]; +#if TM_MRG + if (sps.getUseDMVDMode()) + { + updateCost += geoTMFlagCost[0]; + } +#endif + updateCost += (double)sad; + orderCandList(candidateIdx, true, splitDir, updateCost, geoRdModeList, isNonMMVDListIdx, geoPartitionModeList, geocandCostList, numStoredCands); } - for (uint8_t i = 0; i < geoNumMrgSATDCand; i++) + + for (uint8_t i = 1; i < geoNumMrgSATDCand; i++) { +#if MERGE_ENC_OPT + if (geocandCostList[i] > MRG_FAST_RATIO * geocandCostList[0] || geocandCostList[i] > getMergeBestSATDCost()) +#else if (geocandCostList[i] > MRG_FAST_RATIO * geocandCostList[0] || geocandCostList[i] > getMergeBestSATDCost() || geocandCostList[i] > getAFFBestSATDCost()) +#endif { geoNumMrgSATDCand = i; break; } } + for (uint8_t i = 0; i < geoNumMrgSATDCand && isChromaEnabled(pu.chromaFormat); i++) { uint8_t candidateIdx = geoRdModeList[i]; - int splitDir = comboList.list[candidateIdx].splitDir; - int mergeCand0 = comboList.list[candidateIdx].mergeIdx0; - int mergeCand1 = comboList.list[candidateIdx].mergeIdx1; + int splitDir = geoSplitDirList[candidateIdx]; + int mergeCand0 = geoMergeCand0[candidateIdx]; + int mergeCand1 = geoMergeCand1[candidateIdx]; + + if (!isGeoChromaAvail[mergeCand0]) + { +#if TM_MRG + mergeCtx[GEO_TM_OFF].setMergeInfo(pu, mergeCand0); +#else + mergeCtx.setMergeInfo(pu, mergeCand0); +#endif + m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand0], REF_PIC_LIST_X, false, true); + isGeoChromaAvail[mergeCand0] = true; + } + + if (!isGeoChromaAvail[mergeCand1]) + { +#if TM_MRG + mergeCtx[GEO_TM_OFF].setMergeInfo(pu, mergeCand1); +#else + mergeCtx.setMergeInfo(pu, mergeCand1); +#endif + m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand1], REF_PIC_LIST_X, false, true); + isGeoChromaAvail[mergeCand1] = true; + } + geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea); m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], geoBuffer[mergeCand0], geoBuffer[mergeCand1]); } + bool geocandHasNoResidual[GEO_MAX_TRY_WEIGHTED_SAD]; + bool bestIsSkip = false; + std::memset(geocandHasNoResidual, false, GEO_MAX_TRY_WEIGHTED_SAD * sizeof(bool)); + m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; tempCS->initStructData(encTestMode.qp); - uint8_t iteration; - uint8_t iterationBegin = 0; - iteration = 2; + uint8_t iteration = 2, iterationBegin = 0; for (uint8_t noResidualPass = iterationBegin; noResidualPass < iteration; ++noResidualPass) { for (uint8_t mrgHADIdx = 0; mrgHADIdx < geoNumMrgSATDCand; mrgHADIdx++) @@ -4154,20 +4380,30 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure cu.mmvdSkip = false; cu.skip = false; cu.mipFlag = false; -#if JVET_V0130_INTRA_TMP - cu.tmpFlag = false; -#endif cu.bdpcmMode = 0; PredictionUnit &pu = tempCS->addPU(cu, pm.chType); pu.mergeFlag = true; pu.regularMergeFlag = false; - pu.geoSplitDir = comboList.list[candidateIdx].splitDir; - pu.geoMergeIdx0 = comboList.list[candidateIdx].mergeIdx0; - pu.geoMergeIdx1 = comboList.list[candidateIdx].mergeIdx1; + pu.geoSplitDir = geoSplitDirList[candidateIdx]; + pu.geoMergeIdx0 = geoMergeCand0[candidateIdx]; + pu.geoMergeIdx1 = geoMergeCand1[candidateIdx]; +#if TM_MRG + pu.tmMergeFlag = false; + pu.geoTmFlag0 = false; + pu.geoTmFlag1 = false; +#endif + pu.geoMMVDFlag0 = false; + pu.geoMMVDFlag1 = false; + pu.mmvdMergeFlag = false; pu.mmvdMergeIdx = MAX_UCHAR; - - PU::spanGeoMotionInfo(pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1); +#if TM_MRG + MergeCtx *mergeTmCtx0 = nullptr; + MergeCtx *mergeTmCtx1 = nullptr; + PU::spanGeoMMVDMotionInfo(pu, mergeCtx[GEO_TM_OFF], *mergeTmCtx0, *mergeTmCtx1, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoTmFlag0, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoTmFlag1, pu.geoMMVDFlag1, pu.geoMMVDIdx1); +#else + PU::spanGeoMMVDMotionInfo(pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoMMVDFlag1, pu.geoMMVDIdx1); +#endif tempCS->getPredBuf().copyFrom(geoCombinations[candidateIdx]); #if ENABLE_OBMC cu.isobmcMC = true; @@ -4175,6 +4411,7 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure m_pcInterSearch->subBlockOBMC(pu); cu.isobmcMC = false; #endif + xEncodeInterResidual(tempCS, bestCS, pm, encTestMode, noResidualPass, (noResidualPass == 0 ? &geocandHasNoResidual[candidateIdx] : NULL)); if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip) @@ -4184,28 +4421,1102 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure tempCS->initStructData(encTestMode.qp); } } - if (m_bestModeUpdated && bestCS->cost != MAX_DOUBLE) + + CodingUnit *bestCU = bestCS->getCU(CHANNEL_TYPE_LUMA); + bool skipGPMMMVD = false; + if (geoNumMrgSATDCand > 0) { - xCalDebCost(*bestCS, pm); + if (bestCU->skip && !bestCU->geoFlag && !bestCU->affine && !bestCU->mmvdSkip && !bestCU->firstPU->mmvdMergeFlag) + { + skipGPMMMVD = true; + } + else if (bestCU->affine && bestCU->skip && (bestCU->lwidth() >= 16 || bestCU->lheight() >= 16)) + { + skipGPMMMVD = true; + } } -} -#if MERGE_ENC_OPT -void EncCu::xCheckSATDCostRegularMerge(CodingStructure *&tempCS, CodingUnit &cu, PredictionUnit &pu, MergeCtx mergeCtx, PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM], PelUnitBuf *&singleMergeTempBuffer, PelUnitBuf acMergeTmpBuffer[MRG_MAX_NUM_CANDS] -#if !MULTI_PASS_DMVR - , Mv refinedMvdL0[MAX_NUM_PARTS_IN_CTU][MRG_MAX_NUM_CANDS] -#endif - , unsigned& uiNumMrgSATDCand, static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &RdModeList, static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &candCostList, DistParam distParam, const TempCtx &ctxStart -#if MULTI_PASS_DMVR - , bool* applyBDMVR -#endif -) -{ -#if INTER_LIC - cu.LICFlag = false; -#endif - cu.mmvdSkip = false; - cu.geoFlag = false; + bool isBaseMergeCandIncluded[GEO_MAX_NUM_UNI_CANDS]; + std::memset(isBaseMergeCandIncluded, false, GEO_MAX_NUM_UNI_CANDS * sizeof(bool)); + bool isGPMModeIncludedForMMVD[GEO_NUM_PARTITION_MODE]; + std::memset(isGPMModeIncludedForMMVD, false, GEO_NUM_PARTITION_MODE * sizeof(bool)); + + if (!skipGPMMMVD) + { + skipGPMMMVD = (selGeoModeRDList[0] > (bestNormalMrgCost * 1.1)); + } + + if (!skipGPMMMVD) + { + if (isSecondPass) + { + for (int i = 0; i < relatedCU.numGeoDirCand; i++) + { + isGPMModeIncludedForMMVD[relatedCU.geoDirCandList[i]] = true; + if (fastGpmMmvdRelatedCU) + { + isBaseMergeCandIncluded[relatedCU.geoMrgIdx0List[i]] = true; + isBaseMergeCandIncluded[relatedCU.geoMrgIdx1List[i]] = true; + } + } + if (!fastGpmMmvdRelatedCU) + { + std::memset(isBaseMergeCandIncluded, true, GEO_MAX_NUM_UNI_CANDS * sizeof(bool)); + } + } + else + { + double dirCostThresh = (selGeoModeRDList[0] * 1.2); + isGPMModeIncludedForMMVD[selGeoModeList[0]] = true; + isBaseMergeCandIncluded[mergeCandList0[selGeoModeList[0]][0]] = true; + isBaseMergeCandIncluded[mergeCandList1[selGeoModeList[0]][0]] = true; + + for (int i = 1; i < maxNumGPMDirFirstPass; i++) + { + if (selGeoModeRDList[i] > dirCostThresh) + { + break; + } + else + { + isGPMModeIncludedForMMVD[selGeoModeList[i]] = true; + isBaseMergeCandIncluded[mergeCandList0[selGeoModeList[i]][0]] = true; + isBaseMergeCandIncluded[mergeCandList1[selGeoModeList[i]][0]] = true; + } + } + if (includeMoreMMVDCandFirstPass) + { + int num = 0; + // add more cands from best combo results obtained in weighted blended nonmmvd combo + num = min((int)geocandCostList.size(), GEO_MAX_TRY_WEIGHTED_SATD); + for (int i = 0; i < num; i++) + { + if (fastGpmMmvdSearch && (geocandCostList[i] > dirCostThresh)) + { + break; + } + else + { + isGPMModeIncludedForMMVD[geoPartitionModeList[i]] = true; + isBaseMergeCandIncluded[geoMergeCand0[geoRdModeList[i]]] = true; + isBaseMergeCandIncluded[geoMergeCand1[geoRdModeList[i]]] = true; + } + } + } + } + } + + if (!skipGPMMMVD) + { + CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType); + pm.setCUData(cu); + cu.predMode = MODE_INTER; + cu.slice = tempCS->slice; + cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos()); + cu.qp = encTestMode.qp; + cu.affine = false; + cu.mtsFlag = false; +#if INTER_LIC + cu.LICFlag = false; +#endif + cu.BcwIdx = BCW_DEFAULT; + cu.geoFlag = true; + cu.imv = 0; + cu.mmvdSkip = false; + cu.skip = false; + cu.mipFlag = false; + cu.bdpcmMode = 0; + + PredictionUnit &pu = tempCS->addPU(cu, pm.chType); + pu.mergeFlag = true; + pu.regularMergeFlag = false; +#if TM_MRG + pu.tmMergeFlag = false; +#endif + bool simpleGPMMMVDStep = (m_pcEncCfg->getIntraPeriod() == -1); + double mmvdMrgCost[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM]; + for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++) + { + if (mrgDuplicated[mergeCand]) + { + continue; + } + if (!isBaseMergeCandIncluded[mergeCand]) + { + continue; + } + for (uint8_t mmvdCand = 0; mmvdCand < (extMMVD ? GPM_EXT_MMVD_MAX_REFINE_NUM : GPM_MMVD_MAX_REFINE_NUM); mmvdCand++) + { + if (simpleGPMMMVDStep) + { + int mmvdStep = (extMMVD ? (mmvdCand >> 3) : (mmvdCand >> 2)); + if (mmvdStep >= 5 && (!fastGpmMmvdSearch || (fastGpmMmvdSearch && !isSecondPass))) + { + continue; + } + } + geoMMVDBuf[mergeCand][mmvdCand] = m_acGeoMMVDBuffer[mergeCand][mmvdCand].getBuf(localUnitArea); +#if TM_MRG + mergeCtx[GEO_TM_OFF].setGeoMmvdMergeInfo(pu, mergeCand, mmvdCand); +#else + mergeCtx.setGeoMmvdMergeInfo(pu, mergeCand, mmvdCand); +#endif + if (m_pcEncCfg->getMCTSEncConstraint() && (!(MCTSHelper::checkMvBufferForMCTSConstraint(pu)))) + { + tempCS->initStructData(encTestMode.qp); + return; + } + m_pcInterSearch->motionCompensation(pu, geoMMVDBuf[mergeCand][mmvdCand], REF_PIC_LIST_X, true, false); + geoMMVDTempBuf[mergeCand][mmvdCand] = m_acGeoMMVDTmpBuffer[mergeCand][mmvdCand].getBuf(localUnitArea); + geoMMVDTempBuf[mergeCand][mmvdCand].Y().copyFrom(geoMMVDBuf[mergeCand][mmvdCand].Y()); + geoMMVDTempBuf[mergeCand][mmvdCand].Y().roundToOutputBitdepth(geoMMVDTempBuf[mergeCand][mmvdCand].Y(), cu.slice->clpRng(COMPONENT_Y)); + distParamWholeBlk.cur.buf = geoMMVDTempBuf[mergeCand][mmvdCand].Y().buf; + distParamWholeBlk.cur.stride = geoMMVDTempBuf[mergeCand][mmvdCand].Y().stride; + sadMMVDWholeBlk[mergeCand][mmvdCand] = distParamWholeBlk.distFunc(distParamWholeBlk); + mmvdMrgCost[mergeCand][mmvdCand] = sadMMVDWholeBlk[mergeCand][mmvdCand] + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[1] + geoMMVDIdxCost[mmvdCand]; + if (mmvdMrgCost[mergeCand][mmvdCand] < bestMrgCost) + { + bestMrgCost = mmvdMrgCost[mergeCand][mmvdCand]; + } + } + } + + double mrgCostThres = (bestMrgCost * 3.0); + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + if (!isGPMModeIncludedForMMVD[splitDir]) + { + continue; + } + int16_t angle = g_GeoParams[splitDir][0]; + if (g_angle2mirror[angle] == 2) + { + stepX = 1; + maskStride = -GEO_WEIGHT_MASK_SIZE; + maskStride2 = -(int)cu.lwidth(); + SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][1]) * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]]; + } + else if (g_angle2mirror[angle] == 1) + { + stepX = -1; + maskStride2 = cu.lwidth(); + maskStride = GEO_WEIGHT_MASK_SIZE; + SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][0])]; + } + else + { + stepX = 1; + maskStride = GEO_WEIGHT_MASK_SIZE; + maskStride2 = -(int)cu.lwidth(); + SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]]; + } + for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++) + { + if (mrgDuplicated[mergeCand]) + { + continue; + } + if (!isBaseMergeCandIncluded[mergeCand]) + { + continue; + } + for (uint8_t mmvdCand = 0; mmvdCand < (extMMVD ? GPM_EXT_MMVD_MAX_REFINE_NUM : GPM_MMVD_MAX_REFINE_NUM); mmvdCand++) + { + if (simpleGPMMMVDStep) + { + int mmvdStep = (extMMVD ? (mmvdCand >> 3) : (mmvdCand >> 2)); + if (mmvdStep >= 5 && (!fastGpmMmvdSearch || (fastGpmMmvdSearch && !isSecondPass))) + { + continue; + } + } + if (mmvdMrgCost[mergeCand][mmvdCand] > mrgCostThres) + { + continue; + } + m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoMMVDTempBuf[mergeCand][mmvdCand].Y().buf, geoMMVDTempBuf[mergeCand][mmvdCand].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y); + sadLarge = distParam.distFunc(distParam); + double tempCost = (double)sadLarge + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[1] + geoMMVDIdxCost[mmvdCand]; + m_GeoMMVDCostList.insert(splitDir, 0, mergeCand, (mmvdCand + 1), tempCost); + sortCandList(tempCost, mergeCand, (mmvdCand + 1), sadCostList0[splitDir], mergeCandList0[splitDir], mmvdCandList0[splitDir], numCandPerPar); + + sadSmall = sadMMVDWholeBlk[mergeCand][mmvdCand] - sadLarge; + tempCost = (double)sadSmall + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[1] + geoMMVDIdxCost[mmvdCand]; + m_GeoMMVDCostList.insert(splitDir, 1, mergeCand, (mmvdCand + 1), tempCost); + sortCandList(tempCost, mergeCand, (mmvdCand + 1), sadCostList1[splitDir], mergeCandList1[splitDir], mmvdCandList1[splitDir], numCandPerPar); + } + } + } + + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + int numCandPart0 = min(numCandPerPar, (int)mergeCandList0[splitDir].size()); + int numCandPart1 = min(numCandPerPar, (int)mergeCandList1[splitDir].size()); + for (int candIdx0 = 0; candIdx0 < numCandPart0; candIdx0++) + { + for (int candIdx1 = 0; candIdx1 < numCandPart1; candIdx1++) + { + int mergeCand0 = mergeCandList0[splitDir][candIdx0]; + int mergeCand1 = mergeCandList1[splitDir][candIdx1]; + int mmvdCand0 = mmvdCandList0[splitDir][candIdx0]; + int mmvdCand1 = mmvdCandList1[splitDir][candIdx1]; +#if TM_MRG + bool geoTmFlag0 = (mmvdCand0 == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1)); + bool geoTmFlag1 = (mmvdCand1 == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1)); + CHECK(geoTmFlag0 || geoTmFlag1, "GPM TM has not been tested by far"); +#endif + if ((mmvdCand0 == 0) && (mmvdCand1 == 0)) + { + continue; + } + if ((mmvdCand0 == mmvdCand1) && (mmvdCand0 > 0)) + { + if (mergeCand0 == mergeCand1) + { + continue; + } + } + + double tempCost = m_GeoMMVDCostList.singleDistList[0][splitDir][mergeCand0][mmvdCand0].cost + m_GeoMMVDCostList.singleDistList[1][splitDir][mergeCand1][mmvdCand1].cost; + tempCost = tempCost + geoModeCost[splitDir]; +#if TM_MRG + if (sps.getUseDMVDMode()) + { + tempCost += geoTMFlagCost[0]; + } +#endif + updateGeoMMVDCandList(tempCost, splitDir, mergeCand0, mergeCand1, mmvdCand0, mmvdCand1, + geoSADCostList, geoSplitDirList, geoMergeCand0, geoMergeCand1, geoMmvdCand0, geoMmvdCand1, numSATDCands); + } + } + } + +#if TM_MRG + uint8_t maxNumTmMrgCand = maxNumMergeCandidates; + PelUnitBuf geoTmBuffer[GEO_TM_MAX_NUM_CANDS]; + PelUnitBuf geoTmTempBuf[GEO_TM_MAX_NUM_CANDS]; + if (sps.getUseDMVDMode()) + { + for (int i = GEO_TM_SHAPE_AL; i < GEO_NUM_TM_MV_CAND; i++) + { + mergeCtx[i].numValidMergeCand = maxNumTmMrgCand; + for (int idx = 0; idx < maxNumTmMrgCand; idx++) + { + if (mrgDuplicated[idx]) + { + continue; + } + mergeCtx[i].BcwIdx[idx] = BCW_DEFAULT; + mergeCtx[i].useAltHpelIf[idx] = false; +#if INTER_LIC + mergeCtx[i].LICFlags[idx] = false; +#endif + mergeCtx[i].interDirNeighbours[idx] = mergeCtx[GEO_TM_OFF].interDirNeighbours[idx]; + mergeCtx[i].mvFieldNeighbours[(idx << 1)].mv = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(idx << 1)].mv; + mergeCtx[i].mvFieldNeighbours[(idx << 1) + 1].mv = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(idx << 1) + 1].mv; + mergeCtx[i].mvFieldNeighbours[(idx << 1)].refIdx = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(idx << 1)].refIdx; + mergeCtx[i].mvFieldNeighbours[(idx << 1) + 1].refIdx = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(idx << 1) + 1].refIdx; + } + } + + pu.tmMergeFlag = true; + Distortion sadTmWholeBlk[GEO_TM_MAX_NUM_CANDS]; + for (uint8_t tmType = GEO_TM_SHAPE_AL; tmType < GEO_NUM_TM_MV_CAND; tmType++) + { + pu.geoTmType = tmType; + for (uint8_t mrgIdx = 0; mrgIdx < maxNumTmMrgCand; mrgIdx++) + { + if (mrgDuplicated[mrgIdx]) + { + continue; + } + uint8_t mergeCand = mrgIdx + (tmType - 1) * GEO_MAX_NUM_UNI_CANDS; + mergeCtx[tmType].setMergeInfo(pu, mrgIdx); + m_pcInterSearch->deriveTMMv(pu); + mergeCtx[tmType].mvFieldNeighbours[(mrgIdx << 1)].mv = pu.mv[0]; + mergeCtx[tmType].mvFieldNeighbours[(mrgIdx << 1) + 1].mv = pu.mv[1]; + + geoTmBuffer[mergeCand] = m_acGeoMergeTmpBuffer[mergeCand].getBuf(localUnitArea); + m_pcInterSearch->motionCompensation(pu, geoTmBuffer[mergeCand]); + + // calculate SAD for each candidate + geoTmTempBuf[mergeCand] = m_acGeoSADTmpBuffer[mergeCand].getBuf(localUnitArea); + geoTmTempBuf[mergeCand].Y().copyFrom(geoTmBuffer[mergeCand].Y()); + geoTmTempBuf[mergeCand].Y().roundToOutputBitdepth(geoTmTempBuf[mergeCand].Y(), cu.slice->clpRng(COMPONENT_Y)); + distParamWholeBlk.cur.buf = geoTmTempBuf[mergeCand].Y().buf; + distParamWholeBlk.cur.stride = geoTmTempBuf[mergeCand].Y().stride; + sadTmWholeBlk[mergeCand] = distParamWholeBlk.distFunc(distParamWholeBlk); + } + } + pu.tmMergeFlag = false; + + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + int16_t angle = g_GeoParams[splitDir][0]; + if (g_angle2mirror[angle] == 2) + { + stepX = 1; + maskStride = -GEO_WEIGHT_MASK_SIZE; + maskStride2 = -(int)cu.lwidth(); + SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][1]) * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]]; + } + else if (g_angle2mirror[angle] == 1) + { + stepX = -1; + maskStride2 = cu.lwidth(); + maskStride = GEO_WEIGHT_MASK_SIZE; + SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][0])]; + } + else + { + stepX = 1; + maskStride = GEO_WEIGHT_MASK_SIZE; + maskStride2 = -(int)cu.lwidth(); + SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]]; + } + for (uint8_t mergeCand = 0; mergeCand < maxNumTmMrgCand; mergeCand++) + { + if (mrgDuplicated[mergeCand]) + { + continue; + } + + uint8_t mergeCand0 = mergeCand + (g_geoTmShape[0][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS; + m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTmTempBuf[mergeCand0].Y().buf, geoTmTempBuf[mergeCand0].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y); + sadLarge = distParam.distFunc(distParam); + double tempCost = (double)sadLarge + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[0]; + m_GeoMMVDCostList.insert(splitDir, 0, mergeCand, (GPM_EXT_MMVD_MAX_REFINE_NUM + 1), tempCost); + + uint8_t mergeCand1 = mergeCand + (g_geoTmShape[1][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS; + m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTmTempBuf[mergeCand1].Y().buf, geoTmTempBuf[mergeCand1].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y); + sadSmall = sadTmWholeBlk[mergeCand1] - distParam.distFunc(distParam); + tempCost = (double)sadSmall + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[0]; + m_GeoMMVDCostList.insert(splitDir, 1, mergeCand, (GPM_EXT_MMVD_MAX_REFINE_NUM + 1), tempCost); + } + } + + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + for (int mergeCand0 = 0; mergeCand0 < maxNumTmMrgCand; mergeCand0++) + { + if (mrgDuplicated[mergeCand0]) + { + continue; + } + for (int mergeCand1 = 0; mergeCand1 < maxNumTmMrgCand; mergeCand1++) + { + if (mrgDuplicated[mergeCand1]) + { + continue; + } + if (mergeCand0 == mergeCand1) + { + continue; + } + double tempCost = m_GeoMMVDCostList.singleDistList[0][splitDir][mergeCand0][GPM_EXT_MMVD_MAX_REFINE_NUM + 1].cost + m_GeoMMVDCostList.singleDistList[1][splitDir][mergeCand1][GPM_EXT_MMVD_MAX_REFINE_NUM + 1].cost; + tempCost = tempCost + geoModeCost[splitDir] + geoTMFlagCost[1]; + updateGeoMMVDCandList(tempCost, splitDir, mergeCand0, mergeCand1, (GPM_EXT_MMVD_MAX_REFINE_NUM + 1), (GPM_EXT_MMVD_MAX_REFINE_NUM + 1), + geoSADCostList, geoSplitDirList, geoMergeCand0, geoMergeCand1, geoMmvdCand0, geoMmvdCand1, numSATDCands); + } + } + } + } +#endif + + int numberGeoCandChecked = (int)geoSADCostList.size(); + if (numberGeoCandChecked == 0) + { + return; + } + + geoNumMrgSATDCand = min(GEO_MAX_TRY_WEIGHTED_SATD, numberGeoCandChecked); + if (geoRdModeList.size() > geoNumMrgSATDCand) + { + geoRdModeList.resize(geoNumMrgSATDCand); + isNonMMVDListIdx.resize(geoNumMrgSATDCand); + geoPartitionModeList.resize(geoNumMrgSATDCand); + geocandCostList.resize(geoNumMrgSATDCand); + } + for (uint8_t candidateIdx = 0; candidateIdx < numberGeoCandChecked; candidateIdx++) + { + int splitDir = geoSplitDirList[candidateIdx]; + int mergeCand0 = geoMergeCand0[candidateIdx]; + int mergeCand1 = geoMergeCand1[candidateIdx]; +#if TM_MRG + bool tmFlag0 = (geoMmvdCand0[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1)); + bool tmFlag1 = (geoMmvdCand1[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1)); + bool mmvdFlag0 = (geoMmvdCand0[candidateIdx] >= 1 && geoMmvdCand0[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM); + bool mmvdFlag1 = (geoMmvdCand1[candidateIdx] >= 1 && geoMmvdCand1[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM); + int mmvdCand0 = (mmvdFlag0 ? (geoMmvdCand0[candidateIdx] - 1) : MAX_INT); + int mmvdCand1 = (mmvdFlag1 ? (geoMmvdCand1[candidateIdx] - 1) : MAX_INT); + CHECK(tmFlag0 != tmFlag1, "TM flag cannot be enabled/disabled for two partitions separately"); + + if (!tmFlag0 && !tmFlag1 && !mmvdFlag0 && !mmvdFlag1) + { + continue; + } + if (tmFlag0 && mergeCand0 == mergeCand1) + { + continue; + } +#else + int mmvdCand0 = geoMmvdCand0[candidateIdx] - 1; + int mmvdCand1 = geoMmvdCand1[candidateIdx] - 1; + bool mmvdFlag0 = (mmvdCand0 >= 0); + bool mmvdFlag1 = (mmvdCand1 >= 0); + + if (!mmvdFlag0 && !mmvdFlag1) + { + continue; + } +#endif + + geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea); +#if TM_MRG + PelUnitBuf predSrc0, predSrc1; + if (tmFlag0) + { + int mrgTmCand0 = mergeCand0 + (g_geoTmShape[0][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS; + predSrc0 = geoTmBuffer[mrgTmCand0]; + } + else if (mmvdFlag0) + { + predSrc0 = geoMMVDBuf[mergeCand0][mmvdCand0]; + } + else + { + predSrc0 = geoBuffer[mergeCand0]; + } + + if (tmFlag1) + { + int mrgTmCand1 = mergeCand1 + (g_geoTmShape[1][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS; + predSrc1 = geoTmBuffer[mrgTmCand1]; + } + else if (mmvdFlag1) + { + predSrc1 = geoMMVDBuf[mergeCand1][mmvdCand1]; + } + else + { + predSrc1 = geoBuffer[mergeCand1]; + } + + m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], predSrc0, predSrc1); +#else + m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], (mmvdFlag0 ? geoMMVDBuf[mergeCand0][mmvdCand0] : geoBuffer[mergeCand0]), (mmvdFlag1 ? geoMMVDBuf[mergeCand1][mmvdCand1] : geoBuffer[mergeCand1])); +#endif + distParamSAD2.cur = geoCombinations[candidateIdx].Y(); + Distortion sad = distParamSAD2.distFunc(distParamSAD2); + + double updateCost = geoModeCost[splitDir] + geoMergeIdxCost[mergeCand0] + geoMergeIdxCost[mergeCand1] + geoMMVDFlagCost[mmvdFlag0] + geoMMVDFlagCost[mmvdFlag1]; + if (mmvdFlag0) + { + updateCost += geoMMVDIdxCost[mmvdCand0]; + } + if (mmvdFlag1) + { + updateCost += geoMMVDIdxCost[mmvdCand1]; + } +#if TM_MRG + if (sps.getUseDMVDMode()) + { + if (!mmvdFlag0 && !mmvdFlag1) + { + updateCost += geoTMFlagCost[tmFlag0]; + } + } +#endif + updateCost += (double)sad; + orderCandList(candidateIdx, false, splitDir, updateCost, geoRdModeList, isNonMMVDListIdx, geoPartitionModeList, geocandCostList, geoNumMrgSATDCand); + } + + if (fastGpmMmvdRelatedCU) + { + int cnt = 0; + for (uint8_t i = 0; i < geoNumMrgSATDCand; i++) + { + if (isNonMMVDListIdx[i] == false) + { + relatedCU.geoDirCandList[cnt] = geoPartitionModeList[i]; + relatedCU.geoMrgIdx0List[cnt] = geoMergeCand0[geoRdModeList[i]]; + relatedCU.geoMrgIdx1List[cnt] = geoMergeCand1[geoRdModeList[i]]; + cnt++; + } + } + relatedCU.numGeoDirCand = cnt; + } + else + { + relatedCU.numGeoDirCand = geoNumMrgSATDCand; + for (uint8_t i = 0; i < geoNumMrgSATDCand; i++) + { + relatedCU.geoDirCandList[i] = geoPartitionModeList[i]; + } + } + + for (uint8_t i = 1; i < geoNumMrgSATDCand; i++) + { +#if MERGE_ENC_OPT + if (geocandCostList[i] > MRG_FAST_RATIO * geocandCostList[0] || geocandCostList[i] > getMergeBestSATDCost()) +#else + if (geocandCostList[i] > MRG_FAST_RATIO * geocandCostList[0] || geocandCostList[i] > getMergeBestSATDCost() || geocandCostList[i] > getAFFBestSATDCost()) +#endif + { + geoNumMrgSATDCand = i; + break; + } + } + for (uint8_t i = 0; i < geoNumMrgSATDCand && isChromaEnabled(pu.chromaFormat); i++) + { + if (isNonMMVDListIdx[i]) + { + continue; + } + uint8_t candidateIdx = geoRdModeList[i]; + int splitDir = geoSplitDirList[candidateIdx]; + int mergeCand0 = geoMergeCand0[candidateIdx]; + int mergeCand1 = geoMergeCand1[candidateIdx]; +#if TM_MRG + bool tmFlag0 = (geoMmvdCand0[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1)); + bool tmFlag1 = (geoMmvdCand1[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1)); + bool mmvdFlag0 = (geoMmvdCand0[candidateIdx] >= 1 && geoMmvdCand0[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM); + bool mmvdFlag1 = (geoMmvdCand1[candidateIdx] >= 1 && geoMmvdCand1[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM); + int mmvdCand0 = (mmvdFlag0 ? (geoMmvdCand0[candidateIdx] - 1) : MAX_INT); + int mmvdCand1 = (mmvdFlag1 ? (geoMmvdCand1[candidateIdx] - 1) : MAX_INT); +#else + int mmvdCand0 = geoMmvdCand0[candidateIdx] - 1; + int mmvdCand1 = geoMmvdCand1[candidateIdx] - 1; + + bool mmvdFlag0 = (mmvdCand0 >= 0); + bool mmvdFlag1 = (mmvdCand1 >= 0); +#endif + +#if TM_MRG + int mrgTmCand0 = MAX_INT, mrgTmCand1 = MAX_INT; + if (tmFlag0) + { + int geoTmType = g_geoTmShape[0][g_GeoParams[splitDir][0]]; + mrgTmCand0 = mergeCand0 + (geoTmType - 1) * GEO_MAX_NUM_UNI_CANDS; + if (!isGeoTmChromaAvail[mrgTmCand0]) + { + mergeCtx[geoTmType].setMergeInfo(pu, mergeCand0); + m_pcInterSearch->motionCompensation(pu, geoTmBuffer[mrgTmCand0], REF_PIC_LIST_X, false, true); + isGeoTmChromaAvail[mrgTmCand0] = true; + } + } + else +#endif + if (mmvdFlag0) + { + if (!isGeoMMVDChromaAvail[mergeCand0][mmvdCand0]) + { +#if TM_MRG + mergeCtx[GEO_TM_OFF].setGeoMmvdMergeInfo(pu, mergeCand0, mmvdCand0); +#else + mergeCtx.setGeoMmvdMergeInfo(pu, mergeCand0, mmvdCand0); +#endif + m_pcInterSearch->motionCompensation(pu, geoMMVDBuf[mergeCand0][mmvdCand0], REF_PIC_LIST_X, false, true); + isGeoMMVDChromaAvail[mergeCand0][mmvdCand0] = true; + } + } + else + { + if (!isGeoChromaAvail[mergeCand0]) + { +#if TM_MRG + mergeCtx[GEO_TM_OFF].setMergeInfo(pu, mergeCand0); +#else + mergeCtx.setMergeInfo(pu, mergeCand0); +#endif + m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand0], REF_PIC_LIST_X, false, true); + isGeoChromaAvail[mergeCand0] = true; + } + } + +#if TM_MRG + if (tmFlag1) + { + int geoTmType = g_geoTmShape[1][g_GeoParams[splitDir][0]]; + mrgTmCand1 = mergeCand1 + (geoTmType - 1) * GEO_MAX_NUM_UNI_CANDS; + if (!isGeoTmChromaAvail[mrgTmCand1]) + { + mergeCtx[geoTmType].setMergeInfo(pu, mergeCand1); + m_pcInterSearch->motionCompensation(pu, geoTmBuffer[mrgTmCand1], REF_PIC_LIST_X, false, true); + isGeoTmChromaAvail[mrgTmCand1] = true; + } + } + else +#endif + if (mmvdFlag1) + { + if (!isGeoMMVDChromaAvail[mergeCand1][mmvdCand1]) + { +#if TM_MRG + mergeCtx[GEO_TM_OFF].setGeoMmvdMergeInfo(pu, mergeCand1, mmvdCand1); +#else + mergeCtx.setGeoMmvdMergeInfo(pu, mergeCand1, mmvdCand1); +#endif + m_pcInterSearch->motionCompensation(pu, geoMMVDBuf[mergeCand1][mmvdCand1], REF_PIC_LIST_X, false, true); + isGeoMMVDChromaAvail[mergeCand1][mmvdCand1] = true; + } + } + else + { + if (!isGeoChromaAvail[mergeCand1]) + { +#if TM_MRG + mergeCtx[GEO_TM_OFF].setMergeInfo(pu, mergeCand1); +#else + mergeCtx.setMergeInfo(pu, mergeCand1); +#endif + m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand1], REF_PIC_LIST_X, false, true); + isGeoChromaAvail[mergeCand1] = true; + } + } + + geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea); +#if TM_MRG + PelUnitBuf predSrc0, predSrc1; + if (tmFlag0) + { + predSrc0 = geoTmBuffer[mrgTmCand0]; + } + else if (mmvdFlag0) + { + predSrc0 = geoMMVDBuf[mergeCand0][mmvdCand0]; + } + else + { + predSrc0 = geoBuffer[mergeCand0]; + } + + if (tmFlag1) + { + predSrc1 = geoTmBuffer[mrgTmCand1]; + } + else if (mmvdFlag1) + { + predSrc1 = geoMMVDBuf[mergeCand1][mmvdCand1]; + } + else + { + predSrc1 = geoBuffer[mergeCand1]; + } + + m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], predSrc0, predSrc1); +#else + m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], (mmvdFlag0 ? geoMMVDBuf[mergeCand0][mmvdCand0] : geoBuffer[mergeCand0]), (mmvdFlag1 ? geoMMVDBuf[mergeCand1][mmvdCand1] : geoBuffer[mergeCand1])); +#endif + } + + std::memset(geocandHasNoResidual, false, GEO_MAX_TRY_WEIGHTED_SAD * sizeof(bool)); + tempCS->initStructData(encTestMode.qp); + uint8_t iteration = 2, iterationBegin = 0; + for (uint8_t noResidualPass = iterationBegin; noResidualPass < iteration; ++noResidualPass) + { + for (uint8_t mrgHADIdx = 0; mrgHADIdx < geoNumMrgSATDCand; mrgHADIdx++) + { + if (isNonMMVDListIdx[mrgHADIdx]) + { + continue; + } + uint8_t candidateIdx = geoRdModeList[mrgHADIdx]; + if (((noResidualPass != 0) && geocandHasNoResidual[candidateIdx]) + || ((noResidualPass == 0) && bestIsSkip)) + { + continue; + } + CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType); + pm.setCUData(cu); + cu.predMode = MODE_INTER; + cu.slice = tempCS->slice; + cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos()); + cu.qp = encTestMode.qp; + cu.affine = false; + cu.mtsFlag = false; +#if INTER_LIC + cu.LICFlag = false; +#endif + cu.BcwIdx = BCW_DEFAULT; + cu.geoFlag = true; + cu.imv = 0; + cu.mmvdSkip = false; + cu.skip = false; + cu.mipFlag = false; + cu.bdpcmMode = 0; + PredictionUnit &pu = tempCS->addPU(cu, pm.chType); + pu.mergeFlag = true; + pu.regularMergeFlag = false; + pu.geoSplitDir = geoSplitDirList[candidateIdx]; + pu.geoMergeIdx0 = geoMergeCand0[candidateIdx]; + pu.geoMergeIdx1 = geoMergeCand1[candidateIdx]; +#if TM_MRG + pu.geoTmFlag0 = (geoMmvdCand0[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1)); + pu.geoTmFlag1 = (geoMmvdCand1[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1)); + pu.geoMMVDFlag0 = (geoMmvdCand0[candidateIdx] >= 1 && geoMmvdCand0[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM); + pu.geoMMVDFlag1 = (geoMmvdCand1[candidateIdx] >= 1 && geoMmvdCand1[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM); + CHECK(pu.geoTmFlag0 != pu.geoTmFlag1, "TM flag cannot be enabled/disabled for two partitions separately"); + pu.tmMergeFlag = pu.geoTmFlag0; +#else + pu.geoMMVDFlag0 = (geoMmvdCand0[candidateIdx] > 0); + pu.geoMMVDFlag1 = (geoMmvdCand1[candidateIdx] > 0); +#endif + + if (pu.geoMMVDFlag0) + { + pu.geoMMVDIdx0 = (geoMmvdCand0[candidateIdx] - 1); + } + if (pu.geoMMVDFlag1) + { + pu.geoMMVDIdx1 = (geoMmvdCand1[candidateIdx] - 1); + } + pu.mmvdMergeFlag = false; + pu.mmvdMergeIdx = MAX_UCHAR; +#if TM_MRG + MergeCtx* mrgTmCtx0 = (pu.geoTmFlag0 == 0 ? nullptr : &mergeCtx[g_geoTmShape[0][g_GeoParams[pu.geoSplitDir][0]]]); + MergeCtx* mrgTmCtx1 = (pu.geoTmFlag1 == 0 ? nullptr : &mergeCtx[g_geoTmShape[1][g_GeoParams[pu.geoSplitDir][0]]]); + PU::spanGeoMMVDMotionInfo(pu, mergeCtx[GEO_TM_OFF], *mrgTmCtx0, *mrgTmCtx1, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoTmFlag0, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoTmFlag1, pu.geoMMVDFlag1, pu.geoMMVDIdx1); +#else + PU::spanGeoMMVDMotionInfo(pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoMMVDFlag1, pu.geoMMVDIdx1); +#endif + tempCS->getPredBuf().copyFrom(geoCombinations[candidateIdx]); +#if ENABLE_OBMC + cu.isobmcMC = true; + cu.obmcFlag = true; + m_pcInterSearch->subBlockOBMC(pu); + cu.isobmcMC = false; +#endif + + xEncodeInterResidual(tempCS, bestCS, pm, encTestMode, noResidualPass, (noResidualPass == 0 ? &geocandHasNoResidual[candidateIdx] : NULL)); + + if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip) + { + bestIsSkip = bestCS->getCU(pm.chType)->rootCbf == 0; + } + tempCS->initStructData(encTestMode.qp); + } + } + } + if (m_bestModeUpdated && bestCS->cost != MAX_DOUBLE) + { + xCalDebCost(*bestCS, pm); + } +} +#else +void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode) +{ + const Slice &slice = *tempCS->slice; + CHECK(slice.getSliceType() == I_SLICE, "Merge modes not available for I-slices"); + + tempCS->initStructData(encTestMode.qp); + + MergeCtx mergeCtx; + const SPS &sps = *tempCS->sps; + + if (sps.getSbTMVPEnabledFlag()) + { + Size bufSize = g_miScaling.scale(tempCS->area.lumaSize()); + mergeCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize); + } + + CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType); + pm.setCUData(cu); + cu.predMode = MODE_INTER; + cu.slice = tempCS->slice; + cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos()); + cu.qp = encTestMode.qp; + cu.affine = false; + cu.mtsFlag = false; +#if INTER_LIC + cu.LICFlag = false; +#endif + cu.BcwIdx = BCW_DEFAULT; + cu.geoFlag = true; + cu.imv = 0; + cu.mmvdSkip = false; + cu.skip = false; + cu.mipFlag = false; +#if JVET_V0130_INTRA_TMP + cu.tmpFlag = false; +#endif + cu.bdpcmMode = 0; + + PredictionUnit &pu = tempCS->addPU(cu, pm.chType); + pu.mergeFlag = true; + pu.regularMergeFlag = false; +#if TM_MRG + pu.tmMergeFlag = false; +#endif + PU::getGeoMergeCandidates(pu, mergeCtx); + + GeoComboCostList comboList; + int bitsCandTB = floorLog2(GEO_NUM_PARTITION_MODE); + PelUnitBuf geoBuffer[GEO_MAX_NUM_UNI_CANDS]; + PelUnitBuf geoTempBuf[GEO_MAX_NUM_UNI_CANDS]; + PelUnitBuf geoCombinations[GEO_MAX_TRY_WEIGHTED_SAD]; + DistParam distParam; + + const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height)); + const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(); + uint8_t maxNumMergeCandidates = cu.cs->sps->getMaxNumGeoCand(); + DistParam distParamWholeBlk; + m_pcRdCost->setDistParam(distParamWholeBlk, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y().buf, m_acMergeBuffer[0].Y().stride, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y); + Distortion bestWholeBlkSad = MAX_UINT64; + double bestWholeBlkCost = MAX_DOUBLE; + + Distortion sadWholeBlk[GEO_MAX_NUM_UNI_CANDS]; + int pocMrg[GEO_MAX_NUM_UNI_CANDS]; + Mv MrgMv[GEO_MAX_NUM_UNI_CANDS]; + bool isSkipThisCand[GEO_MAX_NUM_UNI_CANDS] = { false }; + + for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++) + { + geoBuffer[mergeCand] = m_acMergeBuffer[mergeCand].getBuf(localUnitArea); + mergeCtx.setMergeInfo(pu, mergeCand); + int MrgList = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx == -1 ? 1 : 0; + RefPicList MrgeRefPicList = (MrgList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); + int MrgrefIdx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + MrgList].refIdx; + pocMrg[mergeCand] = tempCS->slice->getRefPic(MrgeRefPicList, MrgrefIdx)->getPOC(); + MrgMv[mergeCand] = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + MrgList].mv; + + for( int i = 0; i < mergeCand; i++ ) + { + if( pocMrg[mergeCand] == pocMrg[i] && MrgMv[mergeCand] == MrgMv[i] ) + { + isSkipThisCand[mergeCand] = true; + break; + } + } + + if (m_pcEncCfg->getMCTSEncConstraint() && (!(MCTSHelper::checkMvBufferForMCTSConstraint(pu)))) + { + tempCS->initStructData(encTestMode.qp); + return; + } + m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand]); +#if MULTI_HYP_PRED + geoTempBuf[mergeCand] = m_acRealMergeBuffer[MRG_MAX_NUM_CANDS + mergeCand].getBuf(localUnitArea); +#else + geoTempBuf[mergeCand] = m_acMergeTmpBuffer[mergeCand].getBuf(localUnitArea); +#endif + geoTempBuf[mergeCand].Y().copyFrom(geoBuffer[mergeCand].Y()); + geoTempBuf[mergeCand].Y().roundToOutputBitdepth(geoTempBuf[mergeCand].Y(), cu.slice->clpRng(COMPONENT_Y)); + distParamWholeBlk.cur.buf = geoTempBuf[mergeCand].Y().buf; + distParamWholeBlk.cur.stride = geoTempBuf[mergeCand].Y().stride; + sadWholeBlk[mergeCand] = distParamWholeBlk.distFunc(distParamWholeBlk); + if (sadWholeBlk[mergeCand] < bestWholeBlkSad) + { + bestWholeBlkSad = sadWholeBlk[mergeCand]; + bestWholeBlkCost = ( double ) bestWholeBlkSad + ( mergeCand + 1 ) * sqrtLambdaForFirstPass; + } + } +#if MULTI_HYP_PRED + m_pcInterSearch->setGeoTmpBuffer(mergeCtx); +#endif + bool isGeo = true; + for (uint8_t mergeCand = 1; mergeCand < maxNumMergeCandidates; mergeCand++) + { + isGeo &= isSkipThisCand[mergeCand]; + } + if (isGeo) + { + return; + } + + int wIdx = floorLog2(cu.lwidth()) - GEO_MIN_CU_LOG2; + int hIdx = floorLog2(cu.lheight()) - GEO_MIN_CU_LOG2; + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + int maskStride = 0, maskStride2 = 0; + int stepX = 1; + Pel* SADmask; + int16_t angle = g_GeoParams[splitDir][0]; + if (g_angle2mirror[angle] == 2) + { + maskStride = -GEO_WEIGHT_MASK_SIZE; + maskStride2 = -(int)cu.lwidth(); + SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][1]) * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]]; + } + else if (g_angle2mirror[angle] == 1) + { + stepX = -1; + maskStride2 = cu.lwidth(); + maskStride = GEO_WEIGHT_MASK_SIZE; + SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][0])]; + } + else + { + maskStride = GEO_WEIGHT_MASK_SIZE; + maskStride2 = -(int)cu.lwidth(); + SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]]; + } + Distortion sadSmall = 0, sadLarge = 0; + for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++) + { + int bitsCand = mergeCand + 1; + + m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTempBuf[mergeCand].Y().buf, geoTempBuf[mergeCand].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y); + sadLarge = distParam.distFunc(distParam); + m_GeoCostList.insert(splitDir, 0, mergeCand, (double)sadLarge + (double)bitsCand * sqrtLambdaForFirstPass); + sadSmall = sadWholeBlk[mergeCand] - sadLarge; + m_GeoCostList.insert(splitDir, 1, mergeCand, (double)sadSmall + (double)bitsCand * sqrtLambdaForFirstPass); + } + } + + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + for (int GeoMotionIdx = 0; GeoMotionIdx < maxNumMergeCandidates * (maxNumMergeCandidates - 1); GeoMotionIdx++) + { + unsigned int mergeCand0 = m_GeoModeTest[GeoMotionIdx].m_candIdx0; + unsigned int mergeCand1 = m_GeoModeTest[GeoMotionIdx].m_candIdx1; + double tempCost = m_GeoCostList.singleDistList[0][splitDir][mergeCand0].cost + m_GeoCostList.singleDistList[1][splitDir][mergeCand1].cost; + if( tempCost > bestWholeBlkCost ) + { + continue; + } + tempCost = tempCost + (double)bitsCandTB * sqrtLambdaForFirstPass; + comboList.list.push_back(GeoMergeCombo(splitDir, mergeCand0, mergeCand1, tempCost)); + } + } + if( comboList.list.empty() ) + { + return; + } + + comboList.sortByCost(); + + bool geocandHasNoResidual[GEO_MAX_TRY_WEIGHTED_SAD] = { false }; + bool bestIsSkip = false; + int geoNumCobo = (int)comboList.list.size(); + static_vector<uint8_t, GEO_MAX_TRY_WEIGHTED_SAD> geoRdModeList; + static_vector<double, GEO_MAX_TRY_WEIGHTED_SAD> geocandCostList; + + DistParam distParamSAD2; + const bool useHadamard = !tempCS->slice->getDisableSATDForRD(); + m_pcRdCost->setDistParam(distParamSAD2, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, useHadamard); + int geoNumMrgSATDCand = min(GEO_MAX_TRY_WEIGHTED_SATD, geoNumCobo); + + for (uint8_t candidateIdx = 0; candidateIdx < min(geoNumCobo, GEO_MAX_TRY_WEIGHTED_SAD); candidateIdx++) + { + int splitDir = comboList.list[candidateIdx].splitDir; + int mergeCand0 = comboList.list[candidateIdx].mergeIdx0; + int mergeCand1 = comboList.list[candidateIdx].mergeIdx1; + + geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea); + m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], geoBuffer[mergeCand0], geoBuffer[mergeCand1]); + distParamSAD2.cur = geoCombinations[candidateIdx].Y(); + Distortion sad = distParamSAD2.distFunc(distParamSAD2); + int mvBits = 2; + mergeCand1 -= mergeCand1 < mergeCand0 ? 0 : 1; + mvBits += mergeCand0; + mvBits += mergeCand1; + double updateCost = (double)sad + (double)(bitsCandTB + mvBits) * sqrtLambdaForFirstPass; + comboList.list[candidateIdx].cost = updateCost; + updateCandList(candidateIdx, updateCost, geoRdModeList, geocandCostList, geoNumMrgSATDCand); + } + for (uint8_t i = 0; i < geoNumMrgSATDCand; i++) + { + if (geocandCostList[i] > MRG_FAST_RATIO * geocandCostList[0] || geocandCostList[i] > getMergeBestSATDCost() || geocandCostList[i] > getAFFBestSATDCost()) + { + geoNumMrgSATDCand = i; + break; + } + } + for (uint8_t i = 0; i < geoNumMrgSATDCand && isChromaEnabled(pu.chromaFormat); i++) + { + uint8_t candidateIdx = geoRdModeList[i]; + int splitDir = comboList.list[candidateIdx].splitDir; + int mergeCand0 = comboList.list[candidateIdx].mergeIdx0; + int mergeCand1 = comboList.list[candidateIdx].mergeIdx1; + geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea); + m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], geoBuffer[mergeCand0], geoBuffer[mergeCand1]); + } + + m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; + tempCS->initStructData(encTestMode.qp); + uint8_t iteration; + uint8_t iterationBegin = 0; + iteration = 2; + for (uint8_t noResidualPass = iterationBegin; noResidualPass < iteration; ++noResidualPass) + { + for (uint8_t mrgHADIdx = 0; mrgHADIdx < geoNumMrgSATDCand; mrgHADIdx++) + { + uint8_t candidateIdx = geoRdModeList[mrgHADIdx]; + if (((noResidualPass != 0) && geocandHasNoResidual[candidateIdx]) + || ((noResidualPass == 0) && bestIsSkip)) + { + continue; + } + CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType); + pm.setCUData(cu); + cu.predMode = MODE_INTER; + cu.slice = tempCS->slice; + cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos()); + cu.qp = encTestMode.qp; + cu.affine = false; + cu.mtsFlag = false; +#if INTER_LIC + cu.LICFlag = false; +#endif + cu.BcwIdx = BCW_DEFAULT; + cu.geoFlag = true; + cu.imv = 0; + cu.mmvdSkip = false; + cu.skip = false; + cu.mipFlag = false; +#if JVET_V0130_INTRA_TMP + cu.tmpFlag = false; +#endif + cu.bdpcmMode = 0; + PredictionUnit &pu = tempCS->addPU(cu, pm.chType); + pu.mergeFlag = true; + pu.regularMergeFlag = false; + pu.geoSplitDir = comboList.list[candidateIdx].splitDir; + pu.geoMergeIdx0 = comboList.list[candidateIdx].mergeIdx0; + pu.geoMergeIdx1 = comboList.list[candidateIdx].mergeIdx1; + pu.mmvdMergeFlag = false; + pu.mmvdMergeIdx = MAX_UCHAR; + + PU::spanGeoMotionInfo(pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1); + tempCS->getPredBuf().copyFrom(geoCombinations[candidateIdx]); +#if ENABLE_OBMC + cu.isobmcMC = true; + cu.obmcFlag = true; + m_pcInterSearch->subBlockOBMC(pu); + cu.isobmcMC = false; +#endif + xEncodeInterResidual(tempCS, bestCS, pm, encTestMode, noResidualPass, (noResidualPass == 0 ? &geocandHasNoResidual[candidateIdx] : NULL)); + + if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip) + { + bestIsSkip = bestCS->getCU(pm.chType)->rootCbf == 0; + } + tempCS->initStructData(encTestMode.qp); + } + } + if (m_bestModeUpdated && bestCS->cost != MAX_DOUBLE) + { + xCalDebCost(*bestCS, pm); + } +} +#endif +#if MERGE_ENC_OPT +void EncCu::xCheckSATDCostRegularMerge(CodingStructure *&tempCS, CodingUnit &cu, PredictionUnit &pu, MergeCtx mergeCtx, PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM], PelUnitBuf *&singleMergeTempBuffer, PelUnitBuf acMergeTmpBuffer[MRG_MAX_NUM_CANDS] +#if !MULTI_PASS_DMVR + , Mv refinedMvdL0[MAX_NUM_PARTS_IN_CTU][MRG_MAX_NUM_CANDS] +#endif + , unsigned& uiNumMrgSATDCand, static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &RdModeList, static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &candCostList, DistParam distParam, const TempCtx &ctxStart +#if MULTI_PASS_DMVR + , bool* applyBDMVR +#endif +) +{ +#if INTER_LIC + cu.LICFlag = false; +#endif + cu.mmvdSkip = false; + cu.geoFlag = false; cu.affine = false; cu.imv = 0; @@ -4820,7 +6131,7 @@ void EncCu::xCheckSATDCostAffineMmvdMerge( CodingStructure*& tempCS, pu.afMmvdFlag = false; } #endif - +#if !JVET_W0097_GPM_MMVD_TM void EncCu::xCheckSATDCostGeoMerge(CodingStructure *&tempCS, CodingUnit &cu, PredictionUnit &pu, MergeCtx geoMergeCtx, PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM], PelUnitBuf *&singleMergeTempBuffer , unsigned& uiNumMrgSATDCand, static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &RdModeList, static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &candCostList, DistParam distParam, const TempCtx &ctxStart) { @@ -5028,6 +6339,7 @@ void EncCu::xCheckSATDCostGeoMerge(CodingStructure *&tempCS, CodingUnit &cu, Pre uiNumMrgSATDCand = (unsigned int)RdModeList.size(); } } +#endif #else void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { @@ -5122,7 +6434,12 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct { RdModeList.clear(); mrgTempBufSet = true; +#if JVET_W0097_GPM_MMVD_TM + const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE; + const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx()); +#else const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( ); +#endif CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); @@ -5189,12 +6506,18 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand], REF_PIC_LIST_X, true, false ); Distortion uiSad = distParam.distFunc( distParam ); +#if JVET_W0097_GPM_MMVD_TM + m_CABACEstimator->getCtx() = ctxStart; + uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu); + double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra; +#else uint32_t uiBitsCand = uiMergeCand + 1; if ( uiMergeCand == tempCS->picHeader->getMaxNumAffineMergeCand() - 1 ) { uiBitsCand--; } double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass; +#endif #if MULTI_HYP_PRED if (testMHP && pu.addHypData.size() < tempCS->sps->getMaxNumAddHyps()) { @@ -5225,7 +6548,9 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct tempCS->initStructData( encTestMode.qp ); setAFFBestSATDCost(candCostList[0]); - +#if JVET_W0097_GPM_MMVD_TM + m_CABACEstimator->getCtx() = ctxStart; +#endif } else { diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index 0f30ed338..46c3a0410 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -148,7 +148,61 @@ public: } int numGeoTemplatesInitialized; }; +#if JVET_W0097_GPM_MMVD_TM +struct SingleGeoMMVDMergeEntry +{ + int mergeIdx; + int mmvdIdx; // 0 - mmvd OFF; 1 - mmvdIdx = 0; 2 - mmvdIdx = 1; 3 - mmvdIdx = 2; ... + double cost; + SingleGeoMMVDMergeEntry() : mergeIdx(0), mmvdIdx(0), cost(MAX_DOUBLE) {}; + SingleGeoMMVDMergeEntry(int _mergeIdx, int _mmvdIdx, double _cost) : mergeIdx(_mergeIdx), mmvdIdx(_mmvdIdx), cost(_cost) {}; +}; +class FastGeoMMVDCostList +{ +public: + FastGeoMMVDCostList() + { + for (int partIdx = 0; partIdx < 2; partIdx++) + { + singleDistList[partIdx] = new SingleGeoMMVDMergeEntry**[GEO_NUM_PARTITION_MODE]; + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + singleDistList[partIdx][splitDir] = new SingleGeoMMVDMergeEntry*[MRG_MAX_NUM_CANDS]; + for (int candIdx = 0; candIdx < MRG_MAX_NUM_CANDS; candIdx++) + { +#if JVET_W0097_GPM_MMVD_TM && TM_MRG + singleDistList[partIdx][splitDir][candIdx] = new SingleGeoMMVDMergeEntry[GPM_EXT_MMVD_MAX_REFINE_NUM + 2]; +#else + singleDistList[partIdx][splitDir][candIdx] = new SingleGeoMMVDMergeEntry[GPM_EXT_MMVD_MAX_REFINE_NUM + 1]; +#endif + } + } + } + } + ~FastGeoMMVDCostList() + { + for (int partIdx = 0; partIdx < 2; partIdx++) + { + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + for (int candIdx = 0; candIdx < MRG_MAX_NUM_CANDS; candIdx++) + { + delete[] singleDistList[partIdx][splitDir][candIdx]; + } + delete[] singleDistList[partIdx][splitDir]; + } + delete[] singleDistList[partIdx]; + singleDistList[partIdx] = nullptr; + } + } + SingleGeoMMVDMergeEntry*** singleDistList[2]; + void insert(int geoIdx, int partIdx, int mergeIdx, int mmvdIdx, double cost) + { + singleDistList[partIdx][geoIdx][mergeIdx][mmvdIdx] = SingleGeoMMVDMergeEntry(mergeIdx, mmvdIdx, cost); + } +}; +#endif class EncCu : DecCu { @@ -212,6 +266,20 @@ private: PelStorage m_acGeoWeightedBuffer[GEO_MAX_TRY_WEIGHTED_SAD]; // to store weighted prediction pixels FastGeoCostList m_GeoCostList; +#if JVET_W0097_GPM_MMVD_TM + PelStorage m_acGeoMMVDBuffer[MRG_MAX_NUM_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM]; + PelStorage m_acGeoMMVDTmpBuffer[MRG_MAX_NUM_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM]; + FastGeoMMVDCostList m_GeoMMVDCostList; + bool fastGpmMmvdSearch; + bool fastGpmMmvdRelatedCU; + bool includeMoreMMVDCandFirstPass; + int maxNumGPMDirFirstPass; + int numCandPerPar; +#if TM_MRG + PelStorage m_acGeoMergeTmpBuffer[GEO_TM_MAX_NUM_CANDS]; + PelStorage m_acGeoSADTmpBuffer[GEO_TM_MAX_NUM_CANDS]; +#endif +#endif double m_AFFBestSATDCost; double m_mergeBestSATDCost; MotionInfo m_SubPuMiBuf [( MAX_CU_SIZE * MAX_CU_SIZE ) >> ( MIN_CU_LOG2 << 1 )]; @@ -240,6 +308,10 @@ private: const bool updateRdCostLambda ); #endif double m_sbtCostSave[2]; +#if JVET_W0097_GPM_MMVD_TM + MergeCtx m_mergeCand; + bool m_mergeCandAvail; +#endif public: /// copy parameters from encoder class void init ( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int jId = 0 ) ); @@ -333,9 +405,11 @@ protected: #endif ); #endif +#if !JVET_W0097_GPM_MMVD_TM void xCheckSATDCostGeoMerge ( CodingStructure *&tempCS, CodingUnit &cu, PredictionUnit &pu, MergeCtx geoMergeCtx, PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM], PelUnitBuf *&singleMergeTempBuffer , unsigned& uiNumMrgSATDCand, static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &RdModeList, static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &candCostList, DistParam distParam, const TempCtx &ctxStart); +#endif #else void xCheckRDCostAffineMerge2Nx2N ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ); @@ -360,7 +434,11 @@ protected: #if ENABLE_OBMC void xCheckRDCostInterWoOBMC(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode); #endif +#if JVET_W0097_GPM_MMVD_TM + void xCheckRDCostMergeGeoComb2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode, bool isSecondPass = false); +#else void xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode); +#endif void xEncodeInterResidual( CodingStructure *&tempCS , CodingStructure *&bestCS , Partitioner &partitioner diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 16965b1ac..55baaafb0 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -2133,6 +2133,30 @@ void EncLib::xInitPicHeader(PicHeader &picHeader, const SPS &sps, const PPS &pps picHeader.setDisBdofFlag(false); picHeader.setDisDmvrFlag(false); picHeader.setDisProfFlag(false); +#if JVET_W0097_GPM_MMVD_TM + if (sps.getUseGeo()) + { +#if TOOLS + if (getIntraPeriod() > 0) + { + if ((getSourceWidth() * getSourceHeight()) > (1920 * 1080)) + { + picHeader.setGPMMMVDTableFlag(false); + } + else + { + picHeader.setGPMMMVDTableFlag(true); + } + } + else + { + picHeader.setGPMMMVDTableFlag(true); + } +#else + picHeader.setGPMMMVDTableFlag(false); +#endif + } +#endif } void EncLib::xInitAPS(APS &aps) diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index 007b25ea1..dc656f338 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -1251,6 +1251,9 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru cuECtx.set( DO_TRIV_SPLIT, 1 ); cuECtx.set( BEST_IMV_COST, MAX_DOUBLE * .5 ); cuECtx.set( BEST_NO_IMV_COST, MAX_DOUBLE * .5 ); +#if JVET_W0097_GPM_MMVD_TM + cuECtx.set( BEST_GPM_COST, MAX_DOUBLE * .5); +#endif cuECtx.set( QT_BEFORE_BT, qtBeforeBt ); cuECtx.set( DID_QUAD_SPLIT, false ); cuECtx.set( IS_BEST_NOSPLIT_SKIP, false ); @@ -1892,6 +1895,12 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt { return false; } +#if JVET_W0097_GPM_MMVD_TM + if (encTestmode.type == ETM_MERGE_GEO && relatedCU.skipGPM) + { + return false; + } +#endif return true; } else if( isModeSplit( encTestmode ) ) @@ -2131,6 +2140,27 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt // assume the non-split modes are done and set the marks for the best found mode if( bestCS && bestCU ) { +#if JVET_W0097_GPM_MMVD_TM + if (!slice.isIntra()) + { + double gpmCost = cuECtx.get<double>(BEST_GPM_COST); + if (gpmCost != (MAX_DOUBLE * .5)) + { + if (gpmCost > (bestCS->cost * 1.2)) + { + relatedCU.skipGPM = true; + } + else if (gpmCost > bestCS->cost && bestCU->skip) + { + relatedCU.skipGPM = true; + } + else if (gpmCost > bestCS->cost && (!cuECtx.bestTU->cbf[0] && !cuECtx.bestTU->cbf[1] && !cuECtx.bestTU->cbf[2])) + { + relatedCU.skipGPM = true; + } + } + } +#endif if( CU::isInter( *bestCU ) ) { relatedCU.isInter = true; @@ -2321,7 +2351,15 @@ bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingSt } } } - +#if JVET_W0097_GPM_MMVD_TM + if (encTestmode.type == ETM_MERGE_GEO) + { + if (tempCS->cost < cuECtx.get<double>(BEST_GPM_COST)) + { + cuECtx.set(BEST_GPM_COST, tempCS->cost); + } + } +#endif if( encTestmode.type == ETM_SPLIT_QT ) { int maxQtD = 0; diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h index 528ddb8da..ee90073a6 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.h +++ b/source/Lib/EncoderLib/EncModeCtrl.h @@ -462,6 +462,14 @@ struct CodedCUInfo bool isSkip; bool isMMVDSkip; bool isIBC; +#if JVET_W0097_GPM_MMVD_TM + bool skipGPM; + char isGPMTested; + int geoDirCandList[GEO_MAX_TRY_WEIGHTED_SATD]; + int numGeoDirCand; + int geoMrgIdx0List[GEO_MAX_TRY_WEIGHTED_SATD]; + int geoMrgIdx1List[GEO_MAX_TRY_WEIGHTED_SATD]; +#endif bool validMv[NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS]; Mv saveMv [NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS]; @@ -512,7 +520,9 @@ public: protected: void touch ( const UnitArea& area ); #endif - +#if JVET_W0097_GPM_MMVD_TM +public: +#endif CodedCUInfo& getBlkInfo( const UnitArea& area ); public: @@ -609,13 +619,38 @@ public: // EncModeCtrlMTnoRQT - allows and controls modes introduced by QTBT (inkl. multi-type-tree) // - only 2Nx2N, no RQT, additional binary/triary CU splits ////////////////////////////////////////////////////////////////////////// - +#if JVET_W0097_GPM_MMVD_TM +enum ExtraFeatures +{ + DID_HORZ_SPLIT = 0, + DID_VERT_SPLIT, + DID_QUAD_SPLIT, + BEST_HORZ_SPLIT_COST, + BEST_VERT_SPLIT_COST, + BEST_TRIH_SPLIT_COST, + BEST_TRIV_SPLIT_COST, + DO_TRIH_SPLIT, + DO_TRIV_SPLIT, + BEST_NON_SPLIT_COST, + BEST_NO_IMV_COST, + BEST_IMV_COST, + BEST_GPM_COST, + QT_BEFORE_BT, + IS_BEST_NOSPLIT_SKIP, + MAX_QT_SUB_DEPTH, +#if REUSE_CU_RESULTS + IS_REUSING_CU, +#endif + NUM_EXTRA_FEATURES +}; +#endif class EncModeCtrlMTnoRQT : public EncModeCtrl, public CacheBlkInfoCtrl #if REUSE_CU_RESULTS , public BestEncInfoCache #endif , public SaveLoadEncInfoSbt { +#if !JVET_W0097_GPM_MMVD_TM enum ExtraFeatures { DID_HORZ_SPLIT = 0, @@ -638,7 +673,7 @@ class EncModeCtrlMTnoRQT : public EncModeCtrl, public CacheBlkInfoCtrl #endif NUM_EXTRA_FEATURES }; - +#endif unsigned m_skipThreshold; public: diff --git a/source/Lib/EncoderLib/EncReshape.cpp b/source/Lib/EncoderLib/EncReshape.cpp index 1af97f01b..4bfa516f9 100644 --- a/source/Lib/EncoderLib/EncReshape.cpp +++ b/source/Lib/EncoderLib/EncReshape.cpp @@ -104,7 +104,6 @@ void EncReshape::createEnc(int picWidth, int picHeight, uint32_t maxCUWidth, ui { m_chromaAdjHelpLUT.resize(PIC_CODE_CW_BINS, 1<<CSCALE_FP_PREC); } - m_sliceReshapeInfo.setUseSliceReshaper(true); m_sliceReshapeInfo.setSliceReshapeChromaAdj(true); m_sliceReshapeInfo.setSliceReshapeModelPresentFlag(true); diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index 2a7145c80..c4ce53564 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -3414,6 +3414,9 @@ void InterSearch::predInterSearchAdditionalHypothesis(PredictionUnit& pu, const { PU::getGeoMergeCandidates(fakePredData, m_geoMrgCtx); } +#if JVET_W0097_GPM_MMVD_TM + maxNumMergeCandidates = min((int)maxNumMergeCandidates, m_geoMrgCtx.numValidMergeCand); +#endif const auto savedAffine = pu.cu->affine; const auto savedIMV = pu.cu->imv; for (int i = 0; i < maxNumMergeCandidates; i++) @@ -3443,6 +3446,12 @@ void InterSearch::predInterSearchAdditionalHypothesis(PredictionUnit& pu, const } setGeoTmpBuffer(); } +#if JVET_W0097_GPM_MMVD_TM + else + { + maxNumMergeCandidates = min((int)maxNumMergeCandidates, m_geoMrgCtx.numValidMergeCand); + } +#endif for (int i = 0; i < maxNumMergeCandidates; i++) { int refList = m_geoMrgCtx.interDirNeighbours[i] - 1; CHECK(refList != 0 && refList != 1, ""); diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index 19aeb75d3..cb4c13e9f 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -2265,6 +2265,12 @@ void HLSWriter::codePictureHeader( PicHeader* picHeader, bool writeRbspTrailingB { picHeader->setDisFracMMVD(false); } +#if JVET_W0097_GPM_MMVD_TM + if (sps->getUseGeo() && (!pps->getRplInfoInPhFlag() || picHeader->getRPL(1)->getNumRefEntries() > 0)) + { + WRITE_FLAG(picHeader->getGPMMMVDTableFlag(), "ph_gpm_ext_mmvd_flag"); + } +#endif #if JVET_R0324_REORDER // mvd L1 zero flag if (!pps->getRplInfoInPhFlag() || picHeader->getRPL(1)->getNumRefEntries() > 0) -- GitLab