diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index e714d2869888ebeb6cf2a80c2cb0288acdac23a8..216f8b57598cf0b37c6d87620ec93d4ba107fe0d 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -686,9 +686,17 @@ struct UnitBuf const AreaBuf<T>& Cr() const { return bufs[2]; } void fill ( const T &val ); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + void copyFrom ( const UnitBuf<const T> &other, const bool lumaOnly = false, const bool chromaOnly = false ); +#else void copyFrom ( const UnitBuf<const T> &other ); +#endif void reconstruct ( const UnitBuf<const T> &pred, const UnitBuf<const T> &resi, const ClpRngs& clpRngs ); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + void copyClip ( const UnitBuf<const T> &src, const ClpRngs& clpRngs, const bool lumaOnly = false, const bool chromaOnly = false ); +#else void copyClip ( const UnitBuf<const T> &src, const ClpRngs& clpRngs ); +#endif void subtract ( const UnitBuf<const T> &other ); void addWeightedAvg ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t gbiIdx = GBI_DEFAULT, const bool chromaOnly = false, const bool lumaOnly = false); void addAvg ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly = false, const bool lumaOnly = false); @@ -718,11 +726,22 @@ void UnitBuf<T>::fill( const T &val ) } template<typename T> +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP +void UnitBuf<T>::copyFrom(const UnitBuf<const T> &other, const bool lumaOnly, const bool chromaOnly ) +#else void UnitBuf<T>::copyFrom( const UnitBuf<const T> &other ) +#endif { CHECK( chromaFormat != other.chromaFormat, "Incompatible formats" ); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" ); + const size_t compStart = chromaOnly ? 1 : 0; + const size_t compEnd = lumaOnly ? 1 : (unsigned) bufs.size(); + for( size_t i = compStart; i < compEnd; i++ ) +#else for( unsigned i = 0; i < bufs.size(); i++ ) +#endif { bufs[i].copyFrom( other.bufs[i] ); } @@ -742,11 +761,22 @@ void UnitBuf<T>::subtract( const UnitBuf<const T> &other ) } template<typename T> +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP +void UnitBuf<T>::copyClip(const UnitBuf<const T> &src, const ClpRngs &clpRngs, const bool lumaOnly, const bool chromaOnly ) +#else void UnitBuf<T>::copyClip(const UnitBuf<const T> &src, const ClpRngs& clpRngs) +#endif { CHECK( chromaFormat != src.chromaFormat, "Incompatible formats" ); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" ); + const size_t compStart = chromaOnly ? 1 : 0; + const size_t compEnd = lumaOnly ? 1 : bufs.size(); + for( size_t i = compStart; i < compEnd; i++ ) +#else for( unsigned i = 0; i < bufs.size(); i++ ) +#endif { bufs[i].copyClip( src.bufs[i], clpRngs.comp[i] ); } diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index ef51d6960c16156e393fe28174a96d9b31f482c6..2cb8fc51a357c061b40cfbab162e186fa969bfc5 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -264,7 +264,11 @@ bool InterPrediction::xCheckIdenticalMotion( const PredictionUnit &pu ) return false; } +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP +void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/, const bool luma /*= true*/, const bool chroma /*= true*/) +#else void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/ ) +#endif { // compute the location of the current PU @@ -331,7 +335,11 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu)); subPu.mmvdEncOptMode = 0; subPu.mvRefine = false; +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + motionCompensation(subPu, subPredBuf, eRefPicList, luma, chroma); +#else motionCompensation(subPu, subPredBuf, eRefPicList); +#endif secDim = later - secStep; } } @@ -433,7 +441,12 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& { CHECK( bioApplied, "BIO is not allowed with affine" ); m_iRefListIdx = eRefPicList; +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + bool genChromaMv = (!luma && chroma && compID == COMPONENT_Cb); + xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->unscaledPic, mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ), genChromaMv, pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx )); +#else xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->unscaledPic, mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ), pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx )); +#endif } else { @@ -452,7 +465,11 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& } } +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP +void InterPrediction::xPredInterBi(PredictionUnit &pu, PelUnitBuf &pcYuvPred, const bool luma, const bool chroma, PelUnitBuf *yuvPredTmp /*= NULL*/) +#else void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred, PelUnitBuf* yuvPredTmp /*= NULL*/) +#endif { const PPS &pps = *pu.cs->pps; const Slice &slice = *pu.cs->slice; @@ -537,12 +554,20 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred, Pe if (dmvrApplied) { if (yuvPredTmp) +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + xPredInterUni(pu, eRefPicList, pcMbBuf, true, false, luma, chroma); +#else xPredInterUni(pu, eRefPicList, pcMbBuf, true, false, true, true); +#endif continue; } xPredInterUni ( pu, eRefPicList, pcMbBuf, true , bioApplied +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , luma, chroma +#else , true, true +#endif ); } else @@ -551,14 +576,22 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred, Pe { xPredInterUni ( pu, eRefPicList, pcMbBuf, true , bioApplied +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , luma, chroma +#else , true, true +#endif ); } else { xPredInterUni( pu, eRefPicList, pcMbBuf, pu.cu->triangle , bioApplied +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , luma, chroma +#else , true, true +#endif ); } } @@ -569,15 +602,27 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred, Pe CPelUnitBuf srcPred1 = ( pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[1][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvPred.Cr())) ); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + bool lumaOnly = luma && !chroma; + bool chromaOnly = !luma && chroma; +#endif if( !pu.cu->triangle && (!dmvrApplied) && (!bioApplied) && pps.getWPBiPred() && slice.getSliceType() == B_SLICE && pu.cu->GBiIdx==GBI_DEFAULT) { +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + xWeightedPredictionBi( pu, srcPred0, srcPred1, pcYuvPred, m_maxCompIDToPred, lumaOnly, chromaOnly ); +#else xWeightedPredictionBi( pu, srcPred0, srcPred1, pcYuvPred, m_maxCompIDToPred ); +#endif if (yuvPredTmp) yuvPredTmp->copyFrom(pcYuvPred); } else if( !pu.cu->triangle && pps.getUseWP() && slice.getSliceType() == P_SLICE ) { +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + xWeightedPredictionUni( pu, srcPred0, REF_PIC_LIST_0, pcYuvPred, -1, m_maxCompIDToPred, lumaOnly, chromaOnly ); +#else xWeightedPredictionUni( pu, srcPred0, REF_PIC_LIST_0, pcYuvPred, -1, m_maxCompIDToPred ); +#endif if (yuvPredTmp) yuvPredTmp->copyFrom(pcYuvPred); } @@ -593,7 +638,11 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred, Pe } else { - xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, yuvPredTmp); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp ); +#else + xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, yuvPredTmp ); +#endif } } } @@ -790,7 +839,11 @@ bool InterPrediction::isSubblockVectorSpreadOverLimit( int a, int b, int c, int return false; } +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP +void InterPrediction::xPredAffineBlk(const ComponentID &compID, const PredictionUnit &pu, const Picture *refPic, const Mv *_mv, PelUnitBuf &dstPic, const bool &bi, const ClpRng &clpRng, bool genChromaMv, const std::pair<int, int> scalingRatio) +#else void InterPrediction::xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const std::pair<int, int> scalingRatio ) +#endif { JVET_J0090_SET_REF_PICTURE( refPic, compID ); @@ -946,6 +999,68 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio #endif } } +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + int iScaleXLuma = ::getComponentScaleX(COMPONENT_Y, chFmt); + int iScaleYLuma = ::getComponentScaleY(COMPONENT_Y, chFmt); + + if (genChromaMv && pu.chromaFormat != CHROMA_444) + { + CHECK(compID == COMPONENT_Y, "Chroma only subblock MV calculation should not apply to Luma"); + int lumaBlockWidth = AFFINE_MIN_BLOCK_SIZE; + int lumaBlockHeight = AFFINE_MIN_BLOCK_SIZE; + + CHECK(lumaBlockWidth > (width >> iScaleXLuma), "Sub Block width > Block width"); + CHECK(lumaBlockHeight > (height >> iScaleYLuma), "Sub Block height > Block height"); + + const int cxWidthLuma = width >> iScaleXLuma; + const int cxHeightLuma = height >> iScaleYLuma; + const int iHalfBWLuma = lumaBlockWidth >> 1; + const int iHalfBHLuma = lumaBlockHeight >> 1; + + int iDMvHorXLuma, iDMvHorYLuma, iDMvVerXLuma, iDMvVerYLuma; + iDMvHorXLuma = (mvRT - mvLT).getHor() << (iBit - floorLog2(cxWidthLuma)); + iDMvHorYLuma = (mvRT - mvLT).getVer() << (iBit - floorLog2(cxWidthLuma)); + if (pu.cu->affineType == AFFINEMODEL_6PARAM) + { + iDMvVerXLuma = (mvLB - mvLT).getHor() << (iBit - floorLog2(cxHeightLuma)); + iDMvVerYLuma = (mvLB - mvLT).getVer() << (iBit - floorLog2(cxHeightLuma)); + } + else + { + iDMvVerXLuma = -iDMvHorYLuma; + iDMvVerYLuma = iDMvHorXLuma; + } + + const bool subblkMVSpreadOverLimitLuma = isSubblockVectorSpreadOverLimit(iDMvHorXLuma, iDMvHorYLuma, iDMvVerXLuma, iDMvVerYLuma, pu.interDir); + + // get prediction block by block + for (int h = 0; h < cxHeightLuma; h += lumaBlockHeight) + { + for (int w = 0; w < cxWidthLuma; w += lumaBlockWidth) + { + int iMvScaleTmpHor, iMvScaleTmpVer; + if (!subblkMVSpreadOverLimitLuma) + { + iMvScaleTmpHor = iMvScaleHor + iDMvHorXLuma * (iHalfBWLuma + w) + iDMvVerXLuma * (iHalfBHLuma + h); + iMvScaleTmpVer = iMvScaleVer + iDMvHorYLuma * (iHalfBWLuma + w) + iDMvVerYLuma * (iHalfBHLuma + h); + } + else + { + iMvScaleTmpHor = iMvScaleHor + iDMvHorXLuma * (cxWidthLuma >> 1) + iDMvVerXLuma * (cxHeightLuma >> 1); + iMvScaleTmpVer = iMvScaleVer + iDMvHorYLuma * (cxWidthLuma >> 1) + iDMvVerYLuma * (cxHeightLuma >> 1); + } + + roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift); + Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); + tmpMv.clipToStorageBitDepth(); + iMvScaleTmpHor = tmpMv.getHor(); + iMvScaleTmpVer = tmpMv.getVer(); + + m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer); + } + } + } +#endif // get prediction block by block for ( int h = 0; h < cxHeight; h += blockHeight ) { @@ -1247,8 +1362,16 @@ void InterPrediction::xCalcBlkGradient(int sx, int sy, int *arraysGx2, int g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize); } +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP +void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, bool lumaOnly, bool chromaOnly, PelUnitBuf* yuvDstTmp /*= NULL*/) +#else void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, PelUnitBuf* yuvDstTmp /*= NULL*/) +#endif { +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + CHECK( (chromaOnly && lumaOnly), "should not happen" ); +#endif + const int iRefIdx0 = pu.refIdx[0]; const int iRefIdx1 = pu.refIdx[1]; @@ -1264,9 +1387,17 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB if( pu.cu->GBiIdx != GBI_DEFAULT && (yuvDstTmp || !pu.mhIntraFlag) ) { CHECK(bioApplied, "GBi is disallowed with BIO"); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx, chromaOnly, lumaOnly); +#else pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx); +#endif if (yuvDstTmp) +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + yuvDstTmp->addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly); +#else yuvDstTmp->addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, false); +#endif return; } if (bioApplied) @@ -1299,13 +1430,30 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB getWpScaling(pu.cu->slice, iRefIdx0, iRefIdx1, pwp0, pwp1); if (!bioApplied) { +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + if (!chromaOnly) +#endif addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Y); } - addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cb); - addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cr); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + if (!lumaOnly) + { +#endif + addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cb); + addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cr); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + } +#endif } else { +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + if (!bioApplied && (lumaOnly || chromaOnly)) + { + pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly); + } + else +#endif pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied); } if (yuvDstTmp) @@ -1316,7 +1464,11 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB yuvDstTmp->bufs[2].copyFrom(pcYuvDst.bufs[2]); } else +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + yuvDstTmp->copyFrom(pcYuvDst, lumaOnly, chromaOnly); +#else yuvDstTmp->copyFrom(pcYuvDst); +#endif } } else if( iRefIdx0 >= 0 && iRefIdx1 < 0 ) @@ -1326,9 +1478,17 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB pcYuvDst.copyFrom( pcYuvSrc0 ); } else - pcYuvDst.copyClip( pcYuvSrc0, clpRngs ); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + pcYuvDst.copyClip( pcYuvSrc0, clpRngs, lumaOnly, chromaOnly ); +#else + pcYuvDst.copyClip( pcYuvSrc0, clpRngs ); +#endif if (yuvDstTmp) - yuvDstTmp->copyFrom(pcYuvDst); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + yuvDstTmp->copyFrom( pcYuvDst, lumaOnly, chromaOnly ); +#else + yuvDstTmp->copyFrom( pcYuvDst ); +#endif } else if( iRefIdx0 < 0 && iRefIdx1 >= 0 ) { @@ -1337,9 +1497,17 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB pcYuvDst.copyFrom( pcYuvSrc1 ); } else - pcYuvDst.copyClip( pcYuvSrc1, clpRngs ); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + pcYuvDst.copyClip( pcYuvSrc1, clpRngs, lumaOnly, chromaOnly ); +#else + pcYuvDst.copyClip( pcYuvSrc1, clpRngs ); +#endif if (yuvDstTmp) +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + yuvDstTmp->copyFrom(pcYuvDst, lumaOnly, chromaOnly); +#else yuvDstTmp->copyFrom(pcYuvDst); +#endif } } @@ -1514,15 +1682,27 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu { xPredInterUni ( pu, eRefPicList, predBuf, true , false +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , luma, chroma +#else , true, true +#endif + ); + xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , (luma && !chroma), (!luma && chroma) +#endif ); - xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred ); } else { xPredInterUni( pu, eRefPicList, predBuf, false , false +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , luma, chroma +#else , true, true +#endif ); } } @@ -1593,20 +1773,36 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC) { CHECK(predBufWOBIO != NULL, "the case should not happen!"); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + xSubPuMC( pu, predBuf, eRefPicList, luma, chroma ); +#else xSubPuMC( pu, predBuf, eRefPicList ); +#endif } else if( xCheckIdenticalMotion( pu ) ) { xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false , false +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , luma, chroma +#else , true, true +#endif ); if (predBufWOBIO) +#if 0//JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + predBufWOBIO->copyFrom(predBuf, (luma && !chroma), (chroma && !luma)); +#else predBufWOBIO->copyFrom(predBuf); +#endif } else { +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + xPredInterBi(pu, predBuf, luma, chroma, predBufWOBIO); +#else xPredInterBi(pu, predBuf, predBufWOBIO); +#endif } } return; diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index b28b49dd3c93969837a8ef870ff9575f4ba593e8..0a03a6567f449f0b99e898e982175a6cf9b64f6f 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -124,7 +124,11 @@ protected: , const bool& bioApplied , const bool luma, const bool chroma ); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + void xPredInterBi ( PredictionUnit& pu, PelUnitBuf &pcYuvPred, const bool luma = true, const bool chroma = true, PelUnitBuf* yuvPredTmp = NULL ); +#else void xPredInterBi ( PredictionUnit& pu, PelUnitBuf &pcYuvPred, PelUnitBuf* yuvPredTmp = NULL ); +#endif void xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng , const bool& bioApplied , bool isIBC @@ -140,15 +144,26 @@ protected: void xBioGradFilter (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth); void xCalcBIOPar (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth); void xCalcBlkGradient (int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, const bool lumaOnly = false, const bool chromaOnly = false, PelUnitBuf* yuvDstTmp = NULL ); +#else void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, PelUnitBuf* yuvDstTmp = NULL ); +#endif void xApplyBiPROF (const PredictionUnit& pu, const CPelBuf& pcYuvSrc0, const CPelBuf& pcYuvSrc1, PelBuf& pcYuvDst, const ClpRng& clpRng); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + void xPredAffineBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const bool genChromaMv = false, const std::pair<int, int> scalingRatio = SCALE_1X ); +#else void xPredAffineBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const std::pair<int, int> scalingRatio = SCALE_1X ); - +#endif void xWeightedTriangleBlk ( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 ); static bool xCheckIdenticalMotion( const PredictionUnit& pu ); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + void xSubPuMC(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X, const bool luma = true, const bool chroma = true); +#else void xSubPuMC(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X); +#endif void xSubPuBio(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X, PelUnitBuf* yuvDstTmp = NULL); void destroy(); diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index fbb7edbfbfc8055154a63dc3c684cf7e9afc3d78..435427f2b2b7067f3a8952d4381a7613a8b19369 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,6 +50,8 @@ #include <assert.h> #include <cassert> +#define JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP 1 // JVET-P0445: encoder speed up for sub-block based merge candidate search + #define JVET_P0057_BDOF_PROF_HARMONIZATION 1 // JVET-P0057: harmonization of BDOF and PROF on motion refinement precis #define JVET_P0400_REMOVE_SHARED_MERGE_LIST 1 // JVET-P0400: removeal of shared merge list diff --git a/source/Lib/CommonLib/WeightPrediction.cpp b/source/Lib/CommonLib/WeightPrediction.cpp index 007fb08fc5a487d02b8c18df6826acc73719dfe3..d39c65b4d283324ad6f19a6fe3d3f28b77bad47e 100644 --- a/source/Lib/CommonLib/WeightPrediction.cpp +++ b/source/Lib/CommonLib/WeightPrediction.cpp @@ -161,13 +161,25 @@ void WeightPrediction::addWeightBi(const CPelUnitBuf &pcYuvSrc0, const WPScalingParam *const wp1, PelUnitBuf &rpcYuvDst, const bool bRoundLuma /*= true*/, - const ComponentID maxNumComp) + const ComponentID maxNumComp +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , bool lumaOnly + , bool chromaOnly +#endif +) { const bool enableRounding[MAX_NUM_COMPONENT] = { bRoundLuma, true, true }; const uint32_t numValidComponent = (const uint32_t)pcYuvSrc0.bufs.size(); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" ); + int firstComponent = chromaOnly ? 1 : 0; + int lastComponent = lumaOnly ? 0 : maxNumComp; + for (int componentIndex = firstComponent; componentIndex < numValidComponent && componentIndex <= lastComponent; componentIndex++) +#else for (int componentIndex = 0; componentIndex < numValidComponent && componentIndex <= maxNumComp; componentIndex++) +#endif { const ComponentID compID = ComponentID(componentIndex); @@ -275,11 +287,24 @@ void WeightPrediction::addWeightUni(const CPelUnitBuf &pcYuvSrc0, const ClpRngs &clpRngs, const WPScalingParam *const wp0, PelUnitBuf &rpcYuvDst, - const ComponentID maxNumComp) + const ComponentID maxNumComp +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , bool lumaOnly + , bool chromaOnly +#endif +) { const uint32_t numValidComponent = (const uint32_t)pcYuvSrc0.bufs.size(); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" ); + int firstComponent = chromaOnly ? 1 : 0; + int lastComponent = lumaOnly ? 0 : maxNumComp; + for (int componentIndex = firstComponent; componentIndex < numValidComponent && componentIndex <= lastComponent; + componentIndex++) +#else for (int componentIndex = 0; componentIndex < numValidComponent && componentIndex <= maxNumComp; componentIndex++) +#endif { const ComponentID compID = ComponentID(componentIndex); @@ -371,7 +396,12 @@ void WeightPrediction::xWeightedPredictionUni(const PredictionUnit &pu, const RefPicList &eRefPicList, PelUnitBuf &pcYuvPred, const int iRefIdx_input/* = -1*/, - const ComponentID maxNumComp) + const ComponentID maxNumComp +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , bool lumaOnly + , bool chromaOnly +#endif +) { WPScalingParam *pwp, *pwpTmp; @@ -391,14 +421,23 @@ void WeightPrediction::xWeightedPredictionUni(const PredictionUnit &pu, { getWpScaling(pu.cs->slice, -1, iRefIdx, pwpTmp, pwp, maxNumComp); } +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + addWeightUni(pcYuvSrc, pu.cu->slice->clpRngs(), pwp, pcYuvPred, maxNumComp, lumaOnly, chromaOnly); +#else addWeightUni(pcYuvSrc, pu.cu->slice->clpRngs(), pwp, pcYuvPred, maxNumComp); +#endif } void WeightPrediction::xWeightedPredictionBi(const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc0, const CPelUnitBuf &pcYuvSrc1, PelUnitBuf &rpcYuvDst, - const ComponentID maxNumComp) + const ComponentID maxNumComp +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , bool lumaOnly + , bool chromaOnly +#endif +) { const int iRefIdx0 = pu.refIdx[0]; const int iRefIdx1 = pu.refIdx[1]; @@ -413,15 +452,27 @@ void WeightPrediction::xWeightedPredictionBi(const PredictionUnit &pu, if (iRefIdx0 >= 0 && iRefIdx1 >= 0) { +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + addWeightBi(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, rpcYuvDst, true, maxNumComp, lumaOnly, chromaOnly); +#else addWeightBi(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, rpcYuvDst, true, maxNumComp); +#endif } else if (iRefIdx0 >= 0 && iRefIdx1 < 0) { +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + addWeightUni(pcYuvSrc0, pu.cu->slice->clpRngs(), pwp0, rpcYuvDst, maxNumComp, lumaOnly, chromaOnly); +#else addWeightUni(pcYuvSrc0, pu.cu->slice->clpRngs(), pwp0, rpcYuvDst, maxNumComp); +#endif } else if (iRefIdx0 < 0 && iRefIdx1 >= 0) { +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + addWeightUni(pcYuvSrc1, pu.cu->slice->clpRngs(), pwp1, rpcYuvDst, maxNumComp, lumaOnly, chromaOnly); +#else addWeightUni(pcYuvSrc1, pu.cu->slice->clpRngs(), pwp1, rpcYuvDst, maxNumComp); +#endif } else { diff --git a/source/Lib/CommonLib/WeightPrediction.h b/source/Lib/CommonLib/WeightPrediction.h index a038dd2571dbbe69de1b33f475b6ed05e5bfb31c..5df9ad1cc5a100641aeaf9267607a473f1d39681 100644 --- a/source/Lib/CommonLib/WeightPrediction.h +++ b/source/Lib/CommonLib/WeightPrediction.h @@ -69,7 +69,12 @@ public: const WPScalingParam *const wp1, PelUnitBuf &rpcYuvDst, const bool bRoundLuma = true, - const ComponentID maxNumComp = MAX_NUM_COMPONENT ); + const ComponentID maxNumComp = MAX_NUM_COMPONENT +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , bool lumaOnly = false + , bool chromaOnly = false +#endif + ); void addWeightBiComponent( const CPelUnitBuf &pcYuvSrc0, const CPelUnitBuf &pcYuvSrc1, @@ -84,20 +89,35 @@ public: const ClpRngs &clpRngs, const WPScalingParam *const wp0, PelUnitBuf &rpcYuvDst, - const ComponentID maxNumComp = MAX_NUM_COMPONENT); + const ComponentID maxNumComp = MAX_NUM_COMPONENT +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , bool lumaOnly = false + , bool chromaOnly = false +#endif + ); void xWeightedPredictionUni( const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc, const RefPicList &eRefPicList, PelUnitBuf &pcYuvPred, const int iRefIdx=-1, - const ComponentID maxNumComp = MAX_NUM_COMPONENT); + const ComponentID maxNumComp = MAX_NUM_COMPONENT +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , bool lumaOnly = false + , bool chromaOnly = false +#endif + ); void xWeightedPredictionBi( const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc0, const CPelUnitBuf &pcYuvSrc1, PelUnitBuf &pcYuvDst, - const ComponentID maxNumComp = MAX_NUM_COMPONENT ); + const ComponentID maxNumComp = MAX_NUM_COMPONENT +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + , bool lumaOnly = false + , bool chromaOnly = false +#endif + ); }; #endif diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 10d54df714e00d0fe6efb503aaa74413b5297f2c..5485e57ba51f56b8006fdda253116306f0498aaf 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -3149,7 +3149,11 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct distParam.cur = acMergeBuffer[uiMergeCand].Y(); +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand], REF_PIC_LIST_X, true, false ); +#else m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand] ); +#endif Distortion uiSad = distParam.distFunc( distParam ); uint32_t uiBitsCand = uiMergeCand + 1; @@ -3248,7 +3252,12 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct } if ( mrgTempBufSet ) { +#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP + tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand], true, false); // Copy Luma Only + m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_X, false, true); +#else tempCS->getPredBuf().copyFrom( acMergeBuffer[uiMergeCand] ); +#endif } else {