diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg index 0b61084fb0bdc8c4265d4f2fea0f1c42dca97bad..2f0ff3dad0747818d5482af32a690a4d4119efea 100644 --- a/cfg/encoder_randomaccess_vtm.cfg +++ b/cfg/encoder_randomaccess_vtm.cfg @@ -150,6 +150,7 @@ IBC : 0 # turned off in CTC AllowDisFracMMVD : 1 AffineAmvr : 1 LumaReshapeEnable : 1 # luma reshaping. 0: disable 1:enable +DMVR : 1 # Fast tools PBIntraFast : 1 diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index a1d5a27c1023d1ccb40377a6676e1ba091433e27..9cc41985218ca698db9c6ec8b5cbe75a52ae22e9 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -272,6 +272,9 @@ void EncApp::xInitLibCfg() #endif #if JVET_M0247_AFFINE_AMVR_ENCOPT m_cEncLib.setUseAffineAmvrEncOpt ( m_AffineAmvrEncOpt ); +#endif +#if JVET_M0147_DMVR + m_cEncLib.setDMVR ( m_DMVR ); #endif m_cEncLib.setIBCMode ( m_IBCMode ); m_cEncLib.setIBCLocalSearchRangeX ( m_IBCLocalSearchRangeX ); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index b233f169029178027f38974a96f66a122fd77dbd..1d6de204bbf5d06ac28d2aae75eba624d99c19a1 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -879,6 +879,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #endif #if JVET_M0247_AFFINE_AMVR_ENCOPT ("AffineAmvrEncOpt", m_AffineAmvrEncOpt, false, "Enable encoder optimization of affine AMVR") +#endif +#if JVET_M0147_DMVR + ("DMVR", m_DMVR, false, "Decoder-side Motion Vector Refinement") #endif ( "IBC", m_IBCMode, 0u, "IBCMode (0x1:enabled, 0x0:disabled) [default: disabled]") ( "IBCLocalSearchRangeX", m_IBCLocalSearchRangeX, 128u, "Search range of IBC local search in x direction") @@ -1978,6 +1981,9 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_GBi, "GBi is only allowed with NEXT profile" ); xConfirmPara( m_GBiFast, "GBiFast is only allowed with NEXT profile" ); xConfirmPara( m_Triangle, "Triangle is only allowed with NEXT profile" ); +#if JVET_M0147_DMVR + xConfirmPara(m_DMVR, "DMVR only allowed with NEXT profile"); +#endif // ADD_NEW_TOOL : (parameter check) add a check for next tools here } else @@ -3209,6 +3215,9 @@ void EncAppCfg::xPrintParameter() #if JVET_M0247_AFFINE_AMVR_ENCOPT m_AffineAmvrEncOpt = m_AffineAmvr ? m_AffineAmvrEncOpt : false; msg( VERBOSE, "AffineAmvrEncOpt:%d ", m_AffineAmvrEncOpt ); +#endif +#if JVET_M0147_DMVR + msg(VERBOSE, "DMVR:%d ", m_DMVR); #endif } msg(VERBOSE, "IBC:%d ", m_IBCMode); diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 36e079be34fa04220a26480c689e275e519ad86f..f79bcaa44fae23a16aaab54d51b0e96ca273e670 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -252,6 +252,9 @@ protected: #if JVET_M0247_AFFINE_AMVR_ENCOPT bool m_AffineAmvrEncOpt; #endif +#if JVET_M0147_DMVR + bool m_DMVR; +#endif unsigned m_IBCMode; unsigned m_IBCLocalSearchRangeX; diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index a5c7197e8966d0558b5acfa5103c7d06e593386d..f245bff72d8aa2a3ebe80d1569707cdaf63a5b7d 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -299,6 +299,10 @@ PelBufferOps::PelBufferOps() calcBIOPar = calcBIOParCore; calcBlkGradient = calcBlkGradientCore; +#if JVET_M0147_DMVR + copyBuffer = copyBufferCore; + padding = paddingCore; +#endif #if ENABLE_SIMD_OPT_GBI removeWeightHighFreq8 = removeWeightHighFreq; removeWeightHighFreq4 = removeWeightHighFreq; @@ -313,6 +317,42 @@ PelBufferOps g_pelBufOP = PelBufferOps(); #endif #endif +#if JVET_M0147_DMVR +void copyBufferCore(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height) +{ + int numBytes = width * sizeof(Pel); + for (int i = 0; i < height; i++) + { + memcpy(dst + i * dstStride, src + i * srcStride, numBytes); + } +} + +void paddingCore(Pel *ptr, int stride, int width, int height, int padSize) +{ + /*left and right padding*/ + Pel *ptrTemp1 = ptr; + Pel *ptrTemp2 = ptr + (width - 1); + int offset = 0; + for (int i = 0; i < height; i++) + { + offset = stride * i; + for (int j = 1; j <= padSize; j++) + { + *(ptrTemp1 - j + offset) = *(ptrTemp1 + offset); + *(ptrTemp2 + j + offset) = *(ptrTemp2 + offset); + } + } + /*Top and Bottom padding*/ + int numBytes = (width + padSize + padSize) * sizeof(Pel); + ptrTemp1 = (ptr - padSize); + ptrTemp2 = (ptr + (stride * (height - 1)) - padSize); + for (int i = 1; i <= padSize; i++) + { + memcpy(ptrTemp1 - (i * stride), (ptrTemp1), numBytes); + memcpy(ptrTemp2 + (i * stride), (ptrTemp2), numBytes); + } +} +#endif template<> void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t gbiIdx) { diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index e80b015059c292db541c232746837caa33656641..3d9c703aba7db39907bbee5f61012bd6bfbefda7 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -77,6 +77,10 @@ struct PelBufferOps void(*calcBIOPar) (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG); #endif void(*calcBlkGradient)(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize); +#if JVET_M0147_DMVR + void(*copyBuffer)(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height); + void(*padding)(Pel *dst, int stride, int width, int height, int padSize); +#endif #if ENABLE_SIMD_OPT_GBI void ( *removeWeightHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight); void ( *removeWeightHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight); @@ -90,6 +94,12 @@ extern PelBufferOps g_pelBufOP; #endif #endif + +#if JVET_M0147_DMVR +void paddingCore(Pel *ptr, int stride, int width, int height, int padSize); +void copyBufferCore(Pel *src, int srcStride, Pel *Dst, int dstStride, int width, int height); +#endif + template<typename T> struct AreaBuf : public Size { diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 9c1c06d633da7e46e064dd434267a25ec199d25d..b2b7a9135047231cd348114148e5ac13b116ea1a 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -117,7 +117,7 @@ static const double AFFINE_ME_LIST_MVP_TH = 1.0; // ==================================================================================================================== // Common constants // ==================================================================================================================== - +static const uint64_t MAX_UINT64 = 0xFFFFFFFFFFFFFFFFU; static const uint32_t MAX_UINT = 0xFFFFFFFFU; ///< max. value of unsigned 32-bit integer static const int MAX_INT = 2147483647; ///< max. value of signed 32-bit integer static const uint8_t MAX_UCHAR = 255; @@ -332,6 +332,14 @@ static const uint32_t LUMA_LEVEL_TO_DQP_LUT_MAXSIZE = 1024; ///< #if !JVET_M0464_UNI_MTS static const int NUM_EMT_CU_FLAG_CTX = 6; ///< number of context models for EMT CU-level flag #endif +#if JVET_M0147_DMVR +static const int DMVR_SUBCU_WIDTH = 16; +static const int DMVR_SUBCU_HEIGHT = 16; +static const int DMVR_SUBCU_WIDTH_LOG2 = 4; +static const int DMVR_SUBCU_HEIGHT_LOG2 = 4; +static const int MAX_NUM_SUBCU_DMVR = ((MAX_CU_SIZE * MAX_CU_SIZE) >> (DMVR_SUBCU_WIDTH_LOG2 + DMVR_SUBCU_HEIGHT_LOG2)); +static const int DMVR_NUM_ITERATION = 2; +#endif //QTBT high level parameters //for I slice luma CTB configuration para. diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index cd524e4c7d019bf30961f3722be968b92082887a..0b426e3c004a72e2e0b2734d2d110bed8aed9d27 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -82,7 +82,15 @@ InterPrediction::InterPrediction() m_filteredBlockTmp[i][c] = nullptr; } } - +#if JVET_M0147_DMVR + m_cYuvPredTempDMVRL1 = nullptr; + m_cYuvPredTempDMVRL0 = nullptr; + for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) + { + m_cRefSamplesDMVRL0[ch] = nullptr; + m_cRefSamplesDMVRL1[ch] = nullptr; + } +#endif } InterPrediction::~InterPrediction() @@ -128,6 +136,19 @@ void InterPrediction::destroy() xFree(m_gradY0); m_gradY0 = nullptr; xFree(m_gradX1); m_gradX1 = nullptr; xFree(m_gradY1); m_gradY1 = nullptr; +#if JVET_M0147_DMVR + xFree(m_cYuvPredTempDMVRL0); + m_cYuvPredTempDMVRL0 = nullptr; + xFree(m_cYuvPredTempDMVRL1); + m_cYuvPredTempDMVRL1 = nullptr; + for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) + { + xFree(m_cRefSamplesDMVRL0[ch]); + m_cRefSamplesDMVRL0[ch] = nullptr; + xFree(m_cRefSamplesDMVRL1[ch]); + m_cRefSamplesDMVRL1[ch] = nullptr; + } +#endif } void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC ) @@ -148,6 +169,10 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC ) { int extWidth = MAX_CU_SIZE + (2 * BIO_EXTEND_SIZE + 2) + 16; int extHeight = MAX_CU_SIZE + (2 * BIO_EXTEND_SIZE + 2) + 1; +#if JVET_M0147_DMVR + extWidth = extWidth > (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 16) ? extWidth : MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 16; + extHeight = extHeight > (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 1) ? extHeight : MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 1; +#endif for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; i++ ) { m_filteredBlockTmp[i][c] = ( Pel* ) xMalloc( Pel, ( extWidth + 4 ) * ( extHeight + 7 + 4 ) ); @@ -175,6 +200,15 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC ) m_gradY1 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); } +#if JVET_M0147_DMVR + m_cYuvPredTempDMVRL0 = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION))); + m_cYuvPredTempDMVRL1 = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION))); + for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) + { + m_cRefSamplesDMVRL0[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA)); + m_cRefSamplesDMVRL1[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA)); + } +#endif #if !JVET_J0090_MEMORY_BANDWITH_MEASURE m_if.initInterpolationFilter( true ); #endif @@ -328,6 +362,9 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu)); #if JVET_M0823_MMVD_ENCOPT subPu.mmvdEncOptMode = 0; +#endif +#if JVET_M0147_DMVR + subPu.mvRefine = false; #endif motionCompensation(subPu, subPredBuf, eRefPicList); secDim = later - secStep; @@ -468,6 +505,10 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) if (pu.mmvdEncOptMode == 2 && pu.mmvdMergeFlag) { bioApplied = false; } +#endif +#if JVET_M0147_DMVR + bool dmvrApplied = false; + dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu); #endif for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { @@ -487,6 +528,10 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0) { +#if JVET_M0147_DMVR + if (dmvrApplied) + continue; // mc will happen in processDMVR +#endif xPredInterUni ( pu, eRefPicList, pcMbBuf, true , bioApplied , true, true @@ -510,6 +555,12 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) } } } +#if JVET_M0147_DMVR + if (dmvrApplied) + { + xProcessDMVR(pu, pcYuvPred, slice.clpRngs(), bioApplied); + } +#endif CPelUnitBuf srcPred0 = ( pu.chromaFormat == CHROMA_400 ? @@ -528,14 +579,33 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) } else { +#if JVET_M0147_DMVR + if (dmvrApplied == false) + { +#endif xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied ); +#if JVET_M0147_DMVR + } +#endif } } +#if JVET_M0147_DMVR void InterPrediction::xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng , const bool& bioApplied , bool isIBC + , SizeType dmvrWidth + , SizeType dmvrHeight + , bool bilinearMC + , Pel *srcPadBuf + , int32_t srcPadStride ) +#else +void InterPrediction::xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng + , const bool& bioApplied + , bool isIBC + ) +#endif { JVET_J0090_SET_REF_PICTURE( refPic, compID ); const ChromaFormat chFmt = pu.chromaFormat; @@ -559,9 +629,28 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio CPelBuf refBuf; { Position offset = pu.blocks[compID].pos().offset( _mv.getHor() >> shiftHor, _mv.getVer() >> shiftVer ); +#if JVET_M0147_DMVR + if (dmvrWidth) + { + refBuf = refPic->getRecoBuf(CompArea(compID, chFmt, offset, Size(dmvrWidth, dmvrHeight))); + } + else +#endif refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, pu.blocks[compID].size() ) ); } +#if JVET_M0147_DMVR + if (NULL != srcPadBuf) + { + refBuf.buf = srcPadBuf; + refBuf.stride = srcPadStride; + } + if (dmvrWidth) + { + width = dmvrWidth; + height = dmvrHeight; + } +#endif // backup data int backupWidth = width; int backupHeight = height; @@ -580,21 +669,49 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio if( yFrac == 0 ) { +#if JVET_M0147_DMVR + m_if.filterHor(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, bilinearMC, bilinearMC); +#else m_if.filterHor(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng); +#endif } else if( xFrac == 0 ) { +#if JVET_M0147_DMVR + m_if.filterVer(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, bilinearMC, bilinearMC); +#else m_if.filterVer(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng); +#endif } else { +#if JVET_M0147_DMVR + PelBuf tmpBuf = dmvrWidth ? PelBuf(m_filteredBlockTmp[0][compID], Size(dmvrWidth, dmvrHeight)) : PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]); + if (dmvrWidth == 0) + tmpBuf.stride = dstBuf.stride; +#else PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]); tmpBuf.stride = dstBuf.stride; +#endif int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA; +#if JVET_M0147_DMVR + if (bilinearMC) + { + vFilterSize = NTAPS_BILINEAR; + } +#endif +#if JVET_M0147_DMVR + m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, bilinearMC, bilinearMC); +#else m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng); +#endif JVET_J0090_SET_CACHE_ENABLE( false ); +#if JVET_M0147_DMVR + m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, bilinearMC, bilinearMC); +#else m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng); +#endif } JVET_J0090_SET_CACHE_ENABLE( true ); if (bioApplied && compID == COMPONENT_Y) @@ -632,6 +749,13 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio } #else refBuf.buf = refBuf.buf - refBuf.stride - 1; +#if JVET_M0147_DMVR + if (srcPadBuf) + { + refBuf.buf = srcPadBuf - srcPadStride - 1; + refBuf.stride = srcPadStride; + } +#endif dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + dstBuf.stride + 1; bioSampleExtendBilinearFilter(refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width - 2, height - 2, 1, xFrac, yFrac, rndRes, chFmt, clpRng); #endif @@ -1250,9 +1374,15 @@ void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRef for( auto &pu : CU::traversePUs( cu ) ) { PelUnitBuf predBuf = cu.cs->getPredBuf( pu ); +#if JVET_M0147_DMVR + pu.mvRefine = true; +#endif motionCompensation( pu, predBuf, eRefPicList , luma, chroma ); +#if JVET_M0147_DMVR + pu.mvRefine = false; +#endif } } @@ -1452,6 +1582,445 @@ void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint } } +#if JVET_M0147_DMVR +void InterPrediction::xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId) +{ + int offset, width, height; + int padsize; + Mv cMv; + const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]); + int mvShift = (MV_FRACTIONAL_BITS_INTERNAL); + for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) + { + cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer()); + pcPad.bufs[compID].stride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA); + int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA; + width = pcPad.bufs[compID].width; + height = pcPad.bufs[compID].height; + offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1); + padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + width += (filtersize - 1); + height += (filtersize - 1); + cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), + -(((filtersize >> 1) - 1) << mvshiftTemp)); + clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps); + /* Pre-fetch similar to HEVC*/ + { + CPelBuf refBuf; + Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp); + refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size())); + PelBuf &dstBuf = pcPad.bufs[compID]; + g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height); + } + /*padding on all side of size DMVR_PAD_LENGTH*/ + { + g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize); + } + } +} +inline int32_t div_for_maxq7(int64_t N, int64_t D) +{ + int32_t sign, q; + sign = 0; + if (N < 0) + { + sign = 1; + N = -N; + } + + q = 0; + D = (D << 3); + if (N >= D) + { + N -= D; + q++; + } + q = (q << 1); + + D = (D >> 1); + if (N >= D) + { + N -= D; + q++; + } + q = (q << 1); + + if (N >= (D >> 1)) + q++; + + if (sign) + return (-q); + return(q); +} + +void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv) +{ + int64_t numerator, denominator; + int32_t mvDeltaSubPel; + int32_t mvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/ + /*horizontal*/ + numerator = (int64_t)((sadBuffer[1] - sadBuffer[3]) << mvSubPelLvl); + denominator = (int64_t)((sadBuffer[1] + sadBuffer[3] - (sadBuffer[0] << 1))); + + if (0 != denominator) + { + if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0])) + { + mvDeltaSubPel = div_for_maxq7(numerator, denominator); + deltaMv[0] = (mvDeltaSubPel); + } + else + { + if (sadBuffer[1] == sadBuffer[0]) + { + deltaMv[0] = -8;// half pel + } + else + { + deltaMv[0] = 8;// half pel + } + } + } + + /*vertical*/ + numerator = (int64_t)((sadBuffer[2] - sadBuffer[4]) << mvSubPelLvl); + denominator = (int64_t)((sadBuffer[2] + sadBuffer[4] - (sadBuffer[0] << 1))); + if (0 != denominator) + { + if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0])) + { + mvDeltaSubPel = div_for_maxq7(numerator, denominator); + deltaMv[1] = (mvDeltaSubPel); + } + else + { + if (sadBuffer[2] == sadBuffer[0]) + { + deltaMv[1] = -8;// half pel + } + else + { + deltaMv[1] = 8;// half pel + } + } + } + return; +} + +void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *deltaMV, uint64_t *pSADsArray, int width, int height) +{ + const int32_t refStrideL0 = m_biLinearBufStride; + const int32_t refStrideL1 = m_biLinearBufStride; + Pel *pRefL0Orig = pRefL0; + Pel *pRefL1Orig = pRefL1; + for (int nIdx = SAD_BOTTOM; nIdx <= SAD_TOP_LEFT; ++nIdx) + { + int32_t sadOffset = ((m_pSearchOffset[nIdx].getVer() * ((2 * DMVR_NUM_ITERATION) + 1)) + m_pSearchOffset[nIdx].getHor()); + pRefL0 = pRefL0Orig + m_pSearchOffset[nIdx].hor + (m_pSearchOffset[nIdx].ver * refStrideL0); + pRefL1 = pRefL1Orig - m_pSearchOffset[nIdx].hor - (m_pSearchOffset[nIdx].ver * refStrideL1); + if (*(pSADsArray + sadOffset) == MAX_UINT64) + { + const uint64_t cost = xDMVRCost(bd, pRefL0, refStrideL0, pRefL1, refStrideL1, width, height); + *(pSADsArray + sadOffset) = cost; + } + if (nIdx == SAD_LEFT) + { + int32_t down = -1, right = -1; + if (pSADsArray[(((2 * DMVR_NUM_ITERATION) + 1))] < pSADsArray[-(((2 * DMVR_NUM_ITERATION) + 1))]) + { + down = 1; + } + if (pSADsArray[1] < pSADsArray[-1]) + { + right = 1; + } + m_pSearchOffset[SAD_TOP_LEFT].set(right, down); + } + if (*(pSADsArray + sadOffset) < minCost) + { + minCost = *(pSADsArray + sadOffset); + deltaMV[0] = m_pSearchOffset[nIdx].getHor(); + deltaMV[1] = m_pSearchOffset[nIdx].getVer(); + } + } +} + +void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied + , const Mv mergeMV[NUM_REF_PIC_LIST_01] +) +{ + int offset, deltaIntMvX, deltaIntMvY; + + PelUnitBuf pcYUVTemp = pcYuvSrc0; + PelUnitBuf pcPadTemp = pcPad0; + /*always high precision MVs are used*/ + int mvShift = MV_FRACTIONAL_BITS_INTERNAL; + + for (int k = 0; k < NUM_REF_PIC_LIST_01; k++) + { + RefPicList refId = (RefPicList)k; + Mv cMv = pu.mv[refId]; + m_iRefListIdx = refId; + const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]); + clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps); + + Mv startMv = mergeMV[refId]; + clipMv(startMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps); + + for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) + { + int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int leftPixelExtra; + if (compID == COMPONENT_Y) + { + leftPixelExtra = (NTAPS_LUMA >> 1) - 1; + } + else + { + leftPixelExtra = (NTAPS_CHROMA >> 1) - 1; + } + + deltaIntMvX = (cMv.getHor() >> mvshiftTemp) - + (startMv.getHor() >> mvshiftTemp); + deltaIntMvY = (cMv.getVer() >> mvshiftTemp) - + (startMv.getVer() >> mvshiftTemp); + + CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement"); + + offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1); + offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride; + offset += (deltaIntMvX); + PelBuf &srcBuf = pcPadTemp.bufs[compID]; + xPredInterBlk((ComponentID)compID, pu, refPic, cMv, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID], + bioApplied, false, 0, 0, 0, (srcBuf.buf + offset), pcPadTemp.bufs[compID].stride); + } + pcYUVTemp = pcYuvSrc1; + pcPadTemp = pcPad1; + } +} + +uint64_t InterPrediction::xDMVRCost(int bitDepth, Pel* pOrg, uint32_t refStride, const Pel* pRef, uint32_t orgStride, int width, int height) +{ + DistParam cDistParam; + cDistParam.applyWeight = false; + cDistParam.useMR = false; + m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, orgStride, refStride, bitDepth, COMPONENT_Y, width, height, 1); + uint64_t uiCost = cDistParam.distFunc(cDistParam); + return uiCost; +} + +void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *totalDeltaMV, int16_t *deltaMV, uint64_t *pSADsArray) +{ + + int sadStride = (((2 * DMVR_NUM_ITERATION) + 1)); + uint64_t sadbuffer[5]; + if (notZeroCost && deltaMV[0] == 0 && deltaMV[1] == 0) + { + int32_t tempDeltaMv[2] = { 0,0 }; + sadbuffer[0] = pSADsArray[0]; + sadbuffer[1] = pSADsArray[-1]; + sadbuffer[2] = pSADsArray[-sadStride]; + sadbuffer[3] = pSADsArray[1]; + sadbuffer[4] = pSADsArray[sadStride]; + xSubPelErrorSrfc(sadbuffer, tempDeltaMv); + totalDeltaMV[0] += tempDeltaMv[0]; + totalDeltaMV[1] += tempDeltaMv[1]; + } +} + +void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs) +{ + const int refIdx0 = pu.refIdx[0]; + const int refIdx1 = pu.refIdx[1]; + /*use merge MV as starting MV*/ + Mv mergeMVL0(pu.mv[REF_PIC_LIST_0]); + Mv mergeMVL1(pu.mv[REF_PIC_LIST_1]); + + /*Clip the starting MVs*/ + clipMv(mergeMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps); + clipMv(mergeMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps); + + /*L0 MC for refinement*/ + { + int offset; + int leftPixelExtra = (NTAPS_LUMA >> 1) - 1; + offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride + 1); + offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride; + offset += (-(int)DMVR_NUM_ITERATION); + PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y]; + PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0, + (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)), pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION))); + + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, refIdx0), mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y], + false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride + ); + } + + /*L1 MC for refinement*/ + { + int offset; + int leftPixelExtra = (NTAPS_LUMA >> 1) - 1; + offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride + 1); + offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride; + offset += (-(int)DMVR_NUM_ITERATION); + PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y]; + PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1, + (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)), pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION))); + + xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_1, refIdx1), mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y], + false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride + ); + } +} + +void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bioApplied) +{ + int iterationCount = DMVR_NUM_ITERATION; + /*Always High Precision*/ + int mvShift = MV_FRACTIONAL_BITS_INTERNAL; + + /*use merge MV as starting MV*/ + Mv mergeMv[] = { pu.mv[REF_PIC_LIST_0] , pu.mv[REF_PIC_LIST_1] }; + + m_biLinearBufStride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)); + + int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); + int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); + /*L0 Padding*/ + m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ? + PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) : + PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()), + PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr()))); + + xPrefetchPad(pu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0); + + /*L1 Padding*/ + m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ? + PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) : + PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()), + PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr()))); + + xPrefetchPad(pu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1); + + xinitMC(pu, clpRngs); + + // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining + Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (iterationCount * m_biLinearBufStride) + iterationCount; + Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (iterationCount * m_biLinearBufStride) + iterationCount; + + Position puPos = pu.lumaPos(); + + int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd; + + { + int num = 0; + + int yStart = 0; + for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) + { + for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) + { + uint64_t minCost = MAX_UINT64; + bool notZeroCost = true; + int16_t totalDeltaMV[2] = { 0,0 }; + int16_t deltaMV[2] = { 0, 0 }; + uint64_t *pSADsArray; + for (int i = 0; i < (((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)); i++) + { + m_SADsArray[i] = MAX_UINT64; + } + pSADsArray = &m_SADsArray[(((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)) >> 1]; + + Pel *addrL0Centre = biLinearPredL0 + yStart * m_biLinearBufStride + xStart; + Pel *addrL1Centre = biLinearPredL1 + yStart * m_biLinearBufStride + xStart; + for (int i = 0; i < iterationCount; i++) + { + deltaMV[0] = 0; + deltaMV[1] = 0; + Pel *addrL0 = addrL0Centre + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride); + Pel *addrL1 = addrL1Centre - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride); + if (i == 0) + { + minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy); + if (minCost < ((4 * dx * (dy >> 1/*for alternate line*/)))) + { + notZeroCost = false; + break; + } + pSADsArray[0] = minCost; + } + if (!minCost) + { + notZeroCost = false; + break; + } + + xBIPMVRefine(bd, addrL0, addrL1, minCost, deltaMV, pSADsArray, dx, dy); + + if (deltaMV[0] == 0 && deltaMV[1] == 0) + { + break; + } + totalDeltaMV[0] += deltaMV[0]; + totalDeltaMV[1] += deltaMV[1]; + pSADsArray += ((deltaMV[1] * (((2 * DMVR_NUM_ITERATION) + 1))) + deltaMV[0]); + } + + totalDeltaMV[0] = (totalDeltaMV[0] << mvShift); + totalDeltaMV[1] = (totalDeltaMV[1] << mvShift); + xDMVRSubPixelErrorSurface(notZeroCost, totalDeltaMV, deltaMV, pSADsArray); + + pu.mvdL0SubPu[num] = Mv(totalDeltaMV[0], totalDeltaMV[1]); + + num++; + } + } + } + + { + PredictionUnit subPu = pu; + subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy))); + PelUnitBuf m_cYuvRefBuffSubCuDMVRL0; + PelUnitBuf m_cYuvRefBuffSubCuDMVRL1; + PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ? + PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) : + PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr()))); + PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ? + PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) : + PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr()))); + + srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu)); + srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu)); + PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu)); + + int x = 0, y = 0; + int xStart = 0, yStart = 0; + int num = 0; + + int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, pcYuvDst.bufs[COMPONENT_Cb].stride, pcYuvDst.bufs[COMPONENT_Cr].stride }; + for (y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) + { + for (x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) + { + subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); + + subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num]; + subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num]; + m_cYuvRefBuffSubCuDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu)); + m_cYuvRefBuffSubCuDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu)); + xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffSubCuDMVRL0, m_cYuvRefBuffSubCuDMVRL1, bioApplied, mergeMv); + + subPredBuf.bufs[COMPONENT_Y].buf = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y]; + subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> 1) + ((yStart >> 1) * dstStride[COMPONENT_Cb]); + subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> 1) + ((yStart >> 1) * dstStride[COMPONENT_Cr]); + xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioApplied); + num++; + } + } + } +} +#endif #if JVET_J0090_MEMORY_BANDWITH_MEASURE void InterPrediction::cacheAssign( CacheModel *cache ) { diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index 5574f28e05bdd8a2abb7b1ab9a54ea7a1a26f9c5..d25ef21d7539cebb5b9dd4ecfeb02b4695868863 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -94,6 +94,33 @@ protected: int m_iRefListIdx; PelStorage m_triangleBuf; Mv* m_storedMv; +#if JVET_M0147_DMVR + /*buffers for bilinear Filter data for DMVR refinement*/ + Pel* m_cYuvPredTempDMVRL0; + Pel* m_cYuvPredTempDMVRL1; + int m_biLinearBufStride; + /*buffers for padded data*/ + PelUnitBuf m_cYuvRefBuffDMVRL0; + PelUnitBuf m_cYuvRefBuffDMVRL1; + Pel* m_cRefSamplesDMVRL0[MAX_NUM_COMPONENT]; + Pel* m_cRefSamplesDMVRL1[MAX_NUM_COMPONENT]; + enum SAD_POINT_INDEX + { + NOT_AVAILABLE = -1, + SAD_BOTTOM = 0, + SAD_TOP, + SAD_RIGHT, + SAD_LEFT, + SAD_TOP_LEFT, + SAD_TOP_RIGHT, + SAD_BOTTOM_LEFT, + SAD_BOTTOM_RIGHT, + SAD_CENTER, + SAD_COUNT + }; + Mv m_pSearchOffset[5] = { Mv(0, 1), Mv(0, -1), Mv(1, 0), Mv(-1, 0), Mv(0, 0) }; + uint64_t m_SADsArray[((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)]; +#endif Pel* m_gradX0; Pel* m_gradY0; @@ -112,10 +139,22 @@ protected: , const bool luma, const bool chroma ); void xPredInterBi ( PredictionUnit& pu, PelUnitBuf &pcYuvPred ); +#if JVET_M0147_DMVR void xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng , const bool& bioApplied , bool isIBC + , SizeType dmvrWidth = 0 + , SizeType dmvrHeight = 0 + , bool bilinearMC = false + , Pel *srcPadBuf = NULL + , int32_t srcPadStride = 0 ); +#else + void xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng + , const bool& bioApplied + , bool isIBC + ); +#endif void xAddBIOAvg4 (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng); #if JVET_M0063_BDOF_FIX @@ -169,6 +208,16 @@ public: #else void weightedTriangleBlk ( PredictionUnit &pu, bool weights, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 ); #endif +#if JVET_M0147_DMVR + void xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId); + void xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied + , const Mv startMV[NUM_REF_PIC_LIST_01] + ); + void xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *deltaMV, uint64_t *pSADsArray, int width, int height); + uint64_t xDMVRCost(int bitDepth, Pel* pRef, uint32_t refStride, const Pel* pOrg, uint32_t orgStride, int width, int height); + void xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs); + void xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bioApplied ); +#endif #if JVET_J0090_MEMORY_BANDWITH_MEASURE void cacheAssign( CacheModel *cache ); diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp index 4daa5056e7e2a603f5773e009e481101d6bde309..3c16052f66e480c48286f60c4eca9fade9f73fff 100644 --- a/source/Lib/CommonLib/InterpolationFilter.cpp +++ b/source/Lib/CommonLib/InterpolationFilter.cpp @@ -131,6 +131,27 @@ const TFilterCoeff InterpolationFilter::m_bilinearFilter[LUMA_INTERPOLATION_FILT { 4, 60, }, }; +#if JVET_M0147_DMVR +const TFilterCoeff InterpolationFilter::m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR] = +{ + { 16, 0, }, + { 15, 1, }, + { 14, 2, }, + { 13, 3, }, + { 12, 4, }, + { 11, 5, }, + { 10, 6, }, + { 9, 7, }, + { 8, 8, }, + { 7, 9, }, + { 6, 10, }, + { 5, 11, }, + { 4, 12, }, + { 3, 13, }, + { 2, 14, }, + { 1, 15, } +}; +#endif // ==================================================================================================================== // Private member functions // ==================================================================================================================== @@ -197,7 +218,11 @@ InterpolationFilter::InterpolationFilter() // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template<bool isFirst, bool isLast> +#if JVET_M0147_DMVR +void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMCForDMVR) +#else void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height ) +#endif { int row, col; @@ -223,6 +248,40 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int { const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); +#if JVET_M0147_DMVR + if (biMCForDMVR) + { + int shift10BitOut, offset; + if ((clpRng.bd - IF_INTERNAL_PREC_BILINEAR) > 0) + { + shift10BitOut = (clpRng.bd - IF_INTERNAL_PREC_BILINEAR); + offset = (1 << (shift10BitOut - 1)); + for (row = 0; row < height; row++) + { + for (col = 0; col < width; col++) + { + dst[col] = (src[col] + offset) >> shift10BitOut; + } + src += srcStride; + dst += dstStride; + } + } + else + { + shift10BitOut = (IF_INTERNAL_PREC_BILINEAR - clpRng.bd); + for (row = 0; row < height; row++) + { + for (col = 0; col < width; col++) + { + dst[col] = src[col] << shift10BitOut; + } + src += srcStride; + dst += dstStride; + } + } + } + else +#endif for (row = 0; row < height; row++) { for (col = 0; col < width; col++) @@ -240,6 +299,40 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int { const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); +#if JVET_M0147_DMVR + if (biMCForDMVR) + { + int shift10BitOut, offset; + if ((clpRng.bd - IF_INTERNAL_PREC_BILINEAR) > 0) + { + shift10BitOut = (clpRng.bd - IF_INTERNAL_PREC_BILINEAR); + offset = (1 << (shift10BitOut - 1)); + for (row = 0; row < height; row++) + { + for (col = 0; col < width; col++) + { + dst[col] = (src[col] + offset) >> shift10BitOut; + } + src += srcStride; + dst += dstStride; + } + } + else + { + shift10BitOut = (IF_INTERNAL_PREC_BILINEAR - clpRng.bd); + for (row = 0; row < height; row++) + { + for (col = 0; col < width; col++) + { + dst[col] = src[col] << shift10BitOut; + } + src += srcStride; + dst += dstStride; + } + } + } + else +#endif for (row = 0; row < height; row++) { for (col = 0; col < width; col++) @@ -282,7 +375,11 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template<int N, bool isVertical, bool isFirst, bool isLast> +#if JVET_M0147_DMVR +void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR) +#else void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff) +#endif { int row, col; @@ -327,6 +424,13 @@ void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcSt offset = (isFirst) ? -IF_INTERNAL_OFFS << shift : 0; } +#if JVET_M0147_DMVR + if (biMCForDMVR) + { + shift = IF_FILTER_PREC_BILINEAR - (IF_INTERNAL_PREC_BILINEAR - clpRng.bd); + offset = 1 << (shift - 1); + } +#endif for (row = 0; row < height; row++) { for (col = 0; col < width; col++) @@ -387,20 +491,36 @@ void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcSt * \param coeff Pointer to filter taps */ template<int N> +#if JVET_M0147_DMVR +void InterpolationFilter::filterHor(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff, bool biMCForDMVR) +#else void InterpolationFilter::filterHor(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff) +#endif { //#if ENABLE_SIMD_OPT_MCIF if( N == 8 ) { +#if JVET_M0147_DMVR + m_filterHor[0][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); +#else m_filterHor[0][1][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff ); +#endif } else if( N == 4 ) { +#if JVET_M0147_DMVR + m_filterHor[1][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); +#else m_filterHor[1][1][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff ); +#endif } else if( N == 2 ) { +#if JVET_M0147_DMVR + m_filterHor[2][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); +#else m_filterHor[2][1][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff ); +#endif } else { @@ -424,20 +544,36 @@ void InterpolationFilter::filterHor(const ClpRng& clpRng, Pel const *src, int sr * \param coeff Pointer to filter taps */ template<int N> +#if JVET_M0147_DMVR +void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff, bool biMCForDMVR) +#else void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff) +#endif { //#if ENABLE_SIMD_OPT_MCIF if( N == 8 ) { +#if JVET_M0147_DMVR + m_filterVer[0][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); +#else m_filterVer[0][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff ); +#endif } else if( N == 4 ) { +#if JVET_M0147_DMVR + m_filterVer[1][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); +#else m_filterVer[1][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff ); +#endif } else if( N == 2 ) { +#if JVET_M0147_DMVR + m_filterVer[2][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); +#else m_filterVer[2][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff ); +#endif } else{ THROW( "Invalid tap number" ); @@ -463,29 +599,49 @@ void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int sr * \param fmt Chroma format * \param bitDepth Bit depth */ +#if JVET_M0147_DMVR +void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR) +#else void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx ) +#endif { if( frac == 0 ) { +#if JVET_M0147_DMVR + m_filterCopy[true][isLast](clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR); +#else m_filterCopy[true][isLast]( clpRng, src, srcStride, dst, dstStride, width, height ); +#endif } else if( isLuma( compID ) ) { CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); if( nFilterIdx == 1 ) { +#if JVET_M0147_DMVR + filterHor<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR); +#else filterHor<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilter[frac]); +#endif } else { +#if JVET_M0147_DMVR + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac], biMCForDMVR); +#else filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac] ); +#endif } } else { const uint32_t csx = getComponentScaleX( compID, fmt ); CHECK( frac < 0 || csx >= 2 || ( frac << ( 1 - csx ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); +#if JVET_M0147_DMVR + filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )], biMCForDMVR); +#else filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )] ); +#endif } } @@ -506,29 +662,49 @@ void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, i * \param fmt Chroma format * \param bitDepth Bit depth */ +#if JVET_M0147_DMVR +void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR) +#else void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx) +#endif { if( frac == 0 ) { +#if JVET_M0147_DMVR + m_filterCopy[isFirst][isLast](clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR); +#else m_filterCopy[isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height ); +#endif } else if( isLuma( compID ) ) { CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); if (nFilterIdx == 1) { +#if JVET_M0147_DMVR + filterVer<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR); +#else filterVer<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilter[frac]); +#endif } else { +#if JVET_M0147_DMVR + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac], biMCForDMVR); +#else filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac] ); +#endif } } else { const uint32_t csy = getComponentScaleY( compID, fmt ); CHECK( frac < 0 || csy >= 2 || ( frac << ( 1 - csy ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); +#if JVET_M0147_DMVR + filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << (1 - csy)], biMCForDMVR); +#else filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << ( 1 - csy )] ); +#endif } } diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h index e4ca95491de7a1f841fb35aed7a2ebf7b2053036..be366a5477bc1ad7a38ee3bcf092684a93f93407 100644 --- a/source/Lib/CommonLib/InterpolationFilter.h +++ b/source/Lib/CommonLib/InterpolationFilter.h @@ -48,7 +48,10 @@ #define IF_INTERNAL_PREC 14 ///< Number of bits for internal precision #define IF_FILTER_PREC 6 ///< Log2 of sum of filter taps #define IF_INTERNAL_OFFS (1<<(IF_INTERNAL_PREC-1)) ///< Offset used internally - +#if JVET_M0147_DMVR +#define IF_INTERNAL_PREC_BILINEAR 10 ///< Number of bits for internal precision +#define IF_FILTER_PREC_BILINEAR 4 ///< Bilinear filter coeff precision so that intermediate value will not exceed 16 bit for SIMD - bit exact +#endif /** * \brief Interpolation filter class */ @@ -57,17 +60,36 @@ class InterpolationFilter static const TFilterCoeff m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps static const TFilterCoeff m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps +#if JVET_M0147_DMVR + static const TFilterCoeff m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps +#endif public: template<bool isFirst, bool isLast> +#if JVET_M0147_DMVR + static void filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMCForDMVR); +#else static void filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height ); +#endif template<int N, bool isVertical, bool isFirst, bool isLast> +#if JVET_M0147_DMVR + static void filter(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR); +#else static void filter(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff); - +#endif template<int N> +#if JVET_M0147_DMVR + void filterHor(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff, bool biMCForDMVR); +#else void filterHor(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff); +#endif + template<int N> +#if JVET_M0147_DMVR + void filterVer(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff, bool biMCForDMVR); +#else void filterVer(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff); +#endif protected: #if JVET_J0090_MEMORY_BANDWITH_MEASURE @@ -76,10 +98,21 @@ protected: public: InterpolationFilter(); ~InterpolationFilter() {} - +#if JVET_M0147_DMVR + void( *m_filterHor[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR); +#else void( *m_filterHor[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff ); +#endif +#if JVET_M0147_DMVR + void( *m_filterVer[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR); +#else void( *m_filterVer[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff ); +#endif +#if JVET_M0147_DMVR + void( *m_filterCopy[2][2] ) ( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMCForDMVR); +#else void( *m_filterCopy[2][2] ) ( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height ); +#endif void initInterpolationFilter( bool enable ); #ifdef TARGET_SIMD_X86 @@ -87,9 +120,16 @@ public: template <X86_VEXT vext> void _initInterpolationFilterX86(); #endif - +#if JVET_M0147_DMVR + void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false); +#else void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0); +#endif +#if JVET_M0147_DMVR + void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false); +#else void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0); +#endif #if JVET_J0090_MEMORY_BANDWITH_MEASURE void cacheAssign( CacheModel *cache ) { m_cacheModel = cache; } #endif diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp index 097c0cab7f19d722ea5bb92ccc20d43a52b15e82..5ca4883b2e2c9f6a3074e91885fbe8cbad3df0a8 100644 --- a/source/Lib/CommonLib/RdCost.cpp +++ b/source/Lib/CommonLib/RdCost.cpp @@ -336,12 +336,16 @@ void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, rcDP.cur.stride = iRefStride; rcDP.cur.width = width; rcDP.cur.height = height; - +#if JVET_M0147_DMVR + rcDP.subShift = subShiftMode; +#endif rcDP.step = step; rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max(); - +#if JVET_M0147_DMVR + CHECK( useHadamard || rcDP.useMR, "only used in xDMVRCost with these default parameters (so far...)" ); +#else CHECK( useHadamard || rcDP.useMR || subShiftMode > 0, "only used in xDirectMCCost with these default parameters (so far...)" ); - +#endif if ( bioApplied ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD_INTERMEDIATE_BITDEPTH ]; diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index e8dc0af47d229af8cf994aa4a030a4145325c056..260ca2ecde1e1787b1275fcde2de946a95cc321d 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -1864,6 +1864,9 @@ SPS::SPS() #if JVET_M0246_AFFINE_AMVR , m_affineAmvrEnabledFlag ( false ) #endif +#if JVET_M0147_DMVR +, m_DMVR ( false ) +#endif #if HEVC_VPS , m_VPSId ( 0) #endif diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 298542f744998bba1cb3dd4dcbb3f340fe397349..b4ab4227da91904131bca169551c933b8428997e 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -989,6 +989,9 @@ private: #if JVET_M0246_AFFINE_AMVR bool m_affineAmvrEnabledFlag; #endif +#if JVET_M0147_DMVR + bool m_DMVR; +#endif #if HEVC_VPS int m_VPSId; #endif @@ -1248,7 +1251,10 @@ public: bool getDisFracMmvdEnabledFlag() const { return m_disFracMmvdEnabledFlag; } void setDisFracMmvdEnabledFlag( bool b ) { m_disFracMmvdEnabledFlag = b; } #endif - +#if JVET_M0147_DMVR + bool getUseDMVR()const { return m_DMVR; } + void setUseDMVR(bool b) { m_DMVR = b; } +#endif uint32_t getMaxTLayers() const { return m_uiMaxTLayers; } void setMaxTLayers( uint32_t uiMaxTLayers ) { CHECK( uiMaxTLayers > MAX_TLAYER, "Invalid number T-layers" ); m_uiMaxTLayers = uiMaxTLayers; } diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 5b2baf8cfdfe0619ca4b254f2be18d42f474a14e..97ec5e99a1d7640b0bc298f1b92065eba3bcda8b 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -116,6 +116,8 @@ #define JVET_M0255_FRACMMVD_SWITCH 1 // disable fractional MVD in MMVD adaptively #define JVET_M0823_MMVD_ENCOPT 1 // encoder optimization for MMVD +#define JVET_M0147_DMVR 1 //Decoder side Motion Vector Refinement + #if JVET_M0464_UNI_MTS typedef std::pair<int, bool> TrMode; typedef std::pair<int, int> TrCost; diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 2f36e96b0d2a65f9441719bf3a30e011c9dcb9aa..138617abf7246764d22a2a6348f93c2ffee6ea68 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -354,6 +354,13 @@ void PredictionUnit::initData() mergeType = MRG_TYPE_DEFAULT_N; bv.setZero(); bvd.setZero(); +#if JVET_M0147_DMVR + mvRefine = false; + for (uint32_t i = 0; i < MAX_NUM_SUBCU_DMVR; i++) + { + mvdL0SubPu[i].setZero(); + } +#endif for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++) { mvpIdx[i] = MAX_UCHAR; @@ -407,6 +414,13 @@ PredictionUnit& PredictionUnit::operator=(const InterPredictionData& predData) mergeType = predData.mergeType; bv = predData.bv; bvd = predData.bvd; +#if JVET_M0147_DMVR + mvRefine = predData.mvRefine; + for (uint32_t i = 0; i < MAX_NUM_SUBCU_DMVR; i++) + { + mvdL0SubPu[i] = predData.mvdL0SubPu[i]; + } +#endif for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++) { mvpIdx[i] = predData.mvpIdx[i]; @@ -452,6 +466,13 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other ) mergeType = other.mergeType; bv = other.bv; bvd = other.bvd; +#if JVET_M0147_DMVR + mvRefine = other.mvRefine; + for (uint32_t i = 0; i < MAX_NUM_SUBCU_DMVR; i++) + { + mvdL0SubPu[i] = other.mvdL0SubPu[i]; + } +#endif for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++) { mvpIdx[i] = other.mvpIdx[i]; diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index 064392af599e821ac9e325f0aad37ee2bae4601d..f2f30097778855feb8d00a7282a7a14ac2544476 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -372,6 +372,10 @@ struct InterPredictionData Mv mv [NUM_REF_PIC_LIST_01]; int16_t refIdx [NUM_REF_PIC_LIST_01]; MergeType mergeType; +#if JVET_M0147_DMVR + bool mvRefine; + Mv mvdL0SubPu[MAX_NUM_SUBCU_DMVR]; +#endif Mv mvdAffi [NUM_REF_PIC_LIST_01][3]; Mv mvAffi[NUM_REF_PIC_LIST_01][3]; bool mhIntraFlag; diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index eaa173a840b435c44a48ea45ab0e66fffb0d8299..e73eea309d8b5e5af61d6aa6d47a8cb5689fb522 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -65,7 +65,39 @@ UnitArea CS::getArea( const CodingStructure &cs, const UnitArea &area, const Cha { return isDualITree( cs ) ? area.singleChan( chType ) : area; } - +#if JVET_M0147_DMVR +void CS::setRefinedMotionField(CodingStructure &cs) +{ + for (CodingUnit *cu : cs.cus) + { + for (auto &pu : CU::traversePUs(*cu)) + { + PredictionUnit subPu = pu; + int dx, dy, x, y, num = 0; + dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); + dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); + Position puPos = pu.lumaPos(); + if (PU::checkDMVRCondition(pu)) + { + for (y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy) + { + for (x = puPos.x; x < (puPos.x + pu.lumaSize().width); x = x + dx) + { + subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); + subPu.mv[0] = pu.mv[0]; + subPu.mv[1] = pu.mv[1]; + subPu.mv[REF_PIC_LIST_0] += pu.mvdL0SubPu[num]; + subPu.mv[REF_PIC_LIST_1] -= pu.mvdL0SubPu[num]; + pu.mvdL0SubPu[num].setZero(); + num++; + PU::spanMotionInfo(subPu); + } + } + } + } + } +} +#endif // CU tools bool CU::isIntra(const CodingUnit &cu) @@ -1426,6 +1458,27 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, } mrgCtx.numValidMergeCand = uiArrayAddr; } +#if JVET_M0147_DMVR +bool PU::checkDMVRCondition(const PredictionUnit& pu) +{ + if (pu.cs->sps->getUseDMVR()) + { + return pu.mergeFlag + && pu.mergeType == MRG_TYPE_DEFAULT_N + && !pu.cu->affine + && !pu.mmvdMergeFlag + && !pu.cu->mmvdSkip + && PU::isBiPredFromDifferentDirEqDistPoc(pu) + && (pu.lheight() >= 8) + && ((pu.lheight() * pu.lwidth()) >= 64) + ; + } + else + { + return false; + } +} +#endif // for ibc pu validation bool PU::isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize) { @@ -3820,7 +3873,25 @@ bool PU::isBiPredFromDifferentDir( const PredictionUnit& pu ) return false; } - +#if JVET_M0147_DMVR +bool PU::isBiPredFromDifferentDirEqDistPoc(const PredictionUnit& pu) +{ + if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0) + { + const int poc0 = pu.cu->slice->getRefPOC(REF_PIC_LIST_0, pu.refIdx[0]); + const int poc1 = pu.cu->slice->getRefPOC(REF_PIC_LIST_1, pu.refIdx[1]); + const int poc = pu.cu->slice->getPOC(); + if ((poc - poc0)*(poc - poc1) < 0) + { + if (abs(poc - poc0) == abs(poc - poc1)) + { + return true; + } + } + } + return false; +} +#endif void PU::restrictBiPredMergeCands( const PredictionUnit &pu, MergeCtx& mergeCtx ) { if( PU::isBipredRestriction( pu ) ) diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index a225e52997ce432461a14de0526fe1e23176e528..2e30af9bb25a4c4d2274ffe0096e6569672f1a7c 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -49,6 +49,9 @@ namespace CS uint64_t getEstBits ( const CodingStructure &cs ); UnitArea getArea ( const CodingStructure &cs, const UnitArea &area, const ChannelType chType ); bool isDualITree ( const CodingStructure &cs ); +#if JVET_M0147_DMVR + void setRefinedMotionField(CodingStructure &cs); +#endif } @@ -151,6 +154,9 @@ namespace PU ); bool getInterMergeSubPuRecurCand(const PredictionUnit &pu, MergeCtx &mrgCtx, const int count); bool isBiPredFromDifferentDir (const PredictionUnit &pu); +#if JVET_M0147_DMVR + bool isBiPredFromDifferentDirEqDistPoc(const PredictionUnit &pu); +#endif void restrictBiPredMergeCands (const PredictionUnit &pu, MergeCtx& mrgCtx); #if JVET_M0068_M0171_MMVD_CLEANUP void restrictBiPredMergeCandsOne (PredictionUnit &pu); @@ -175,6 +181,9 @@ namespace PU void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred); bool getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv); bool isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize); +#if JVET_M0147_DMVR + bool checkDMVRCondition(const PredictionUnit& pu); +#endif } // TU tools diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index e83a00ef15352cbe221bd78f9ae5eeeb7299fd71..15da0b788cc1fa0a03f52f76b9b97756be0d2663 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -128,6 +128,125 @@ void addAvg_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int s } } +#if JVET_M0147_DMVR +template<X86_VEXT vext> +void copyBufferSimd(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height) +{ + __m128i x; +#ifdef USE_AVX2 + __m256i x16; +#endif + int j, temp; + for (int i = 0; i < height; i++) + { + j = 0; + temp = width; +#ifdef USE_AVX2 + while ((temp >> 4) > 0) + { + x16 = _mm256_loadu_si256((const __m256i*)(&src[i * srcStride + j])); + _mm256_storeu_si256((__m256i*)(&dst[i * dstStride + j]), x16); + j += 16; + temp -= 16; + } +#endif + while ((temp >> 3) > 0) + { + x = _mm_loadu_si128((const __m128i*)(&src[ i * srcStride + j])); + _mm_storeu_si128((__m128i*)(&dst[ i * dstStride + j]), x); + j += 8; + temp -= 8; + } + while ((temp >> 2) > 0) + { + x = _mm_loadl_epi64((const __m128i*)(&src[i * srcStride + j])); + _mm_storel_epi64((__m128i*)(&dst[i*dstStride + j]), x); + j += 4; + temp -= 4; + } + while (temp > 0) + { + dst[i * dstStride + j] = src[i * srcStride + j]; + j++; + temp--; + } + } +} + + +template<X86_VEXT vext> +void paddingSimd(Pel *dst, int stride, int width, int height, int padSize) +{ + __m128i x; +#ifdef USE_AVX2 + __m256i x16; +#endif + int temp, j; + for (int i = 1; i <= padSize; i++) + { + j = 0; + temp = width; +#ifdef USE_AVX2 + while ((temp >> 4) > 0) + { + + x16 = _mm256_loadu_si256((const __m256i*)(&(dst[j]))); + _mm256_storeu_si256((__m256i*)(dst + j - i*stride), x16); + x16 = _mm256_loadu_si256((const __m256i*)(dst + j + (height - 1)*stride)); + _mm256_storeu_si256((__m256i*)(dst + j + (height - 1 + i)*stride), x16); + + + j = j + 16; + temp = temp - 16; + } +#endif + while ((temp >> 3) > 0) + { + + x = _mm_loadu_si128((const __m128i*)(&(dst[j]))); + _mm_storeu_si128((__m128i*)(dst + j - i*stride), x); + x = _mm_loadu_si128((const __m128i*)(dst + j + (height - 1)*stride)); + _mm_storeu_si128((__m128i*)(dst + j + (height - 1 + i)*stride), x); + + j = j + 8; + temp = temp - 8; + } + while ((temp >> 2) > 0) + { + x = _mm_loadl_epi64((const __m128i*)(&dst[j])); + _mm_storel_epi64((__m128i*)(dst + j - i*stride), x); + x = _mm_loadl_epi64((const __m128i*)(dst + j + (height - 1)*stride)); + _mm_storel_epi64((__m128i*)(dst + j + (height - 1 + i)*stride), x); + + j = j + 4; + temp = temp - 4; + } + while (temp > 0) + { + dst[j - i*stride] = dst[j]; + dst[j + (height - 1 + i)*stride] = dst[j + (height - 1)*stride]; + j++; + temp--; + } + } + + + //Left and Right Padding + Pel* ptr1 = dst - padSize*stride; + Pel* ptr2 = dst - padSize*stride + width - 1; + int offset = 0; + for (int i = 0; i < height + 2 * padSize; i++) + { + offset = stride * i; + for (int j = 1; j <= padSize; j++) + { + *(ptr1 - j + offset) = *(ptr1 + offset); + *(ptr2 + j + offset) = *(ptr2 + offset); + } + + } +} +#endif template< X86_VEXT vext > void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng) { @@ -848,6 +967,10 @@ void PelBufferOps::_initPelBufOpsX86() calcBIOPar = calcBIOPar_SSE<vext>; calcBlkGradient = calcBlkGradient_SSE<vext>; +#if JVET_M0147_DMVR + copyBuffer = copyBufferSimd<vext>; + padding = paddingSimd<vext>; +#endif reco8 = reco_SSE<vext, 8>; reco4 = reco_SSE<vext, 4>; diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h index 1c3b75f5383f4d4d51f27f44f23fb0f354c043eb..4bea013a3de5c0948e0d580980e0adf18c2df25c 100644 --- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h +++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h @@ -193,7 +193,11 @@ static void fullPelCopyAVX2( const ClpRng& clpRng, const void*_src, int srcStrid template<X86_VEXT vext, bool isFirst, bool isLast> +#if JVET_M0147_DMVR +static void simdFilterCopy( const ClpRng& clpRng, const Pel* src, int srcStride, int16_t* dst, int dstStride, int width, int height, bool biMCForDMVR) +#else static void simdFilterCopy( const ClpRng& clpRng, const Pel* src, int srcStride, int16_t* dst, int dstStride, int width, int height ) +#endif { #if !HM_JEM_CLIP_PEL if( vext >= AVX2 && ( width % 16 ) == 0 ) @@ -211,7 +215,11 @@ static void simdFilterCopy( const ClpRng& clpRng, const Pel* src, int srcStride, else #endif { //Scalar +#if JVET_M0147_DMVR + InterpolationFilter::filterCopy<isFirst, isLast>( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR); +#else InterpolationFilter::filterCopy<isFirst, isLast>( clpRng, src, srcStride, dst, dstStride, width, height ); +#endif } } @@ -979,9 +987,102 @@ static void simdInterpolateN2_M4( const int16_t* src, int srcStride, int16_t *ds dst += dstStride; } } +#if JVET_M0147_DMVR +#ifdef USE_AVX2 +static inline __m256i simdInterpolateLuma10Bit2P16(int16_t const *src1, int srcStride, __m256i *mmCoeff, const __m256i & mmOffset, __m128i &mmShift) +{ + __m256i sumLo; + { + __m256i mmPix = _mm256_loadu_si256((__m256i*)src1); + __m256i mmPix1 = _mm256_loadu_si256((__m256i*)(src1 + srcStride)); + __m256i lo0 = _mm256_mullo_epi16(mmPix, mmCoeff[0]); + __m256i lo1 = _mm256_mullo_epi16(mmPix1, mmCoeff[1]); + sumLo = _mm256_add_epi16(lo0, lo1); + } + sumLo = _mm256_sra_epi16(_mm256_add_epi16(sumLo, mmOffset), mmShift); + return(sumLo); +} +#endif + +static inline __m128i simdInterpolateLuma10Bit2P8(int16_t const *src1, int srcStride, __m128i *mmCoeff, const __m128i & mmOffset, __m128i &mmShift) +{ + __m128i sumLo; + { + __m128i mmPix = _mm_loadu_si128((__m128i*)src1); + __m128i mmPix1 = _mm_loadu_si128((__m128i*)(src1 + srcStride)); + __m128i lo0 = _mm_mullo_epi16(mmPix, mmCoeff[0]); + __m128i lo1 = _mm_mullo_epi16(mmPix1, mmCoeff[1]); + sumLo = _mm_add_epi16(lo0, lo1); + } + sumLo = _mm_sra_epi16(_mm_add_epi16(sumLo, mmOffset), mmShift); + return(sumLo); +} + +static inline __m128i simdInterpolateLuma10Bit2P4(int16_t const *src, int srcStride, __m128i *mmCoeff, const __m128i & mmOffset, __m128i &mmShift) +{ + __m128i sumLo; + { + __m128i mmPix = _mm_loadl_epi64((__m128i*)src); + __m128i mmPix1 = _mm_loadl_epi64((__m128i*)(src + srcStride)); + __m128i lo0 = _mm_mullo_epi16(mmPix, mmCoeff[0]); + __m128i lo1 = _mm_mullo_epi16(mmPix1, mmCoeff[1]); + sumLo = _mm_add_epi16(lo0, lo1); + } + sumLo = _mm_sra_epi16(_mm_add_epi16(sumLo, mmOffset), mmShift); + return sumLo; +} + +template<X86_VEXT vext, bool isLast> +static void simdInterpolateN2_10BIT_M4(const int16_t* src, int srcStride, int16_t *dst, int dstStride, int cStride, int width, int height, int shift, int offset, const ClpRng& clpRng, int16_t const *c) +{ + int row, col; + __m128i mmOffset = _mm_set1_epi16(offset); + __m128i mmShift = _mm_set_epi64x(0, shift); + __m128i mmCoeff[2]; + for (int n = 0; n < 2; n++) + mmCoeff[n] = _mm_set1_epi16(c[n]); + + CHECK(isLast, "Not Supported"); + +#if USE_AVX2 + __m256i mm256Offset = _mm256_set1_epi16(offset); + __m256i mm256Coeff[2]; + for (int n = 0; n < 2; n++) + mm256Coeff[n] = _mm256_set1_epi16(c[n]); +#endif + for (row = 0; row < height; row++) + { + col = 0; +#if USE_AVX2 + // multiple of 16 + for (; col < ((width >> 4) << 4); col += 16) + { + __m256i mmFiltered = simdInterpolateLuma10Bit2P16(src + col, cStride, mm256Coeff, mm256Offset, mmShift); + _mm256_storeu_si256((__m256i *)(dst + col), mmFiltered); + } +#endif + // multiple of 8 + for (; col < ((width >> 3) << 3); col += 8) + { + __m128i mmFiltered = simdInterpolateLuma10Bit2P8(src + col, cStride, mmCoeff, mmOffset, mmShift); + _mm_storeu_si128((__m128i *)(dst + col), mmFiltered); + } + + // last 4 samples + __m128i mmFiltered = simdInterpolateLuma10Bit2P4(src + col, cStride, mmCoeff, mmOffset, mmShift); + _mm_storel_epi64((__m128i *)(dst + col), mmFiltered); + src += srcStride; + dst += dstStride; + } +} +#endif template<X86_VEXT vext, int N, bool isVertical, bool isFirst, bool isLast> +#if JVET_M0147_DMVR +static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR) +#else static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff ) +#endif { int row, col; @@ -1027,6 +1128,13 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel offset = ( isFirst ) ? -IF_INTERNAL_OFFS << shift : 0; } +#if JVET_M0147_DMVR + if (biMCForDMVR) + { + shift = IF_FILTER_PREC_BILINEAR - (IF_INTERNAL_PREC_BILINEAR - clpRng.bd); + offset = 1 << (shift - 1); + } +#endif if( clpRng.bd <= 10 ) { if( N == 8 && !( width & 0x07 ) ) @@ -1075,6 +1183,16 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel simdInterpolateVerM4<vext, 4, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); return; } +#if JVET_M0147_DMVR + else if (biMCForDMVR) + { + if (N == 2 && !(width & 0x03)) + { + simdInterpolateN2_10BIT_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c); + return; + } + } +#endif else if( N == 2 && !( width & 0x07 ) ) { simdInterpolateN2_M8<vext, isLast>( src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c ); diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp index 8602e729de947697c5e824f93694aba6285317ec..6bfbc6672e837f7d4a2c5b3cc886794a9e81296a 100644 --- a/source/Lib/DecoderLib/DecLib.cpp +++ b/source/Lib/DecoderLib/DecLib.cpp @@ -525,7 +525,9 @@ void DecLib::executeLoopFilters() #endif // deblocking filter m_cLoopFilter.loopFilterPic( cs ); - +#if JVET_M0147_DMVR + CS::setRefinedMotionField(cs); +#endif if( cs.sps->getSAOEnabledFlag() ) { m_cSAO.SAOProcess( cs, cs.picture->getSAO() ); diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index 488c20620f6d784d3346ee67f37ecf8617f7c117..da6544a9fea0c271e6a2a38d6f965f786db15417 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -1101,6 +1101,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) #if JVET_M0246_AFFINE_AMVR READ_FLAG( uiCode, "sps_affine_amvr_enabled_flag" ); pcSPS->setAffineAmvrEnabledFlag ( uiCode != 0 ); #endif +#if JVET_M0147_DMVR + READ_FLAG(uiCode, "dmvr_enable_flag"); pcSPS->setUseDMVR(uiCode != 0); +#endif #if HEVC_USE_SCALING_LISTS READ_FLAG( uiCode, "scaling_list_enabled_flag" ); pcSPS->setScalingListFlag ( uiCode ); if(pcSPS->getScalingListFlag()) diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 76452daac7ee0d35f86b6567cc23359504ff8e2a..5f8ff17f7bc8e33045554fcc41a39b1373baca96 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -252,6 +252,9 @@ protected: #endif #if JVET_M0247_AFFINE_AMVR_ENCOPT bool m_AffineAmvrEncOpt; +#endif +#if JVET_M0147_DMVR + bool m_DMVR; #endif unsigned m_IBCMode; unsigned m_IBCLocalSearchRangeX; @@ -790,6 +793,10 @@ public: void setUseAffineAmvrEncOpt ( bool b ) { m_AffineAmvrEncOpt = b; } bool getUseAffineAmvrEncOpt () const { return m_AffineAmvrEncOpt; } #endif +#if JVET_M0147_DMVR + void setDMVR ( bool b ) { m_DMVR = b; } + bool getDMVR () const { return m_DMVR; } +#endif void setIBCMode (unsigned n) { m_IBCMode = n; } unsigned getIBCMode () const { return m_IBCMode; } diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index ba9f2302649da8286056f1902aabbac78e993efa..4271515528d26af2b720baa47e09598919279742 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -1829,6 +1829,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& mergeCtx.subPuMvpMiBuf = MotionBuf( m_SubPuMiBuf, bufSize ); } +#if JVET_M0147_DMVR + Mv refinedMvdL0[MAX_NUM_PARTS_IN_CTU][MRG_MAX_NUM_CANDS]; +#endif setMergeBestSATDCost( MAX_DOUBLE ); { @@ -1979,14 +1982,38 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& mergeCtx.setMergeInfo( pu, uiMergeCand ); PU::spanMotionInfo( pu, mergeCtx ); +#if JVET_M0147_DMVR + pu.mvRefine = true; +#endif distParam.cur = singleMergeTempBuffer->Y(); m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer); acMergeBuffer[uiMergeCand] = m_acRealMergeBuffer[uiMergeCand].getBuf(localUnitArea); acMergeBuffer[uiMergeCand].copyFrom(*singleMergeTempBuffer); +#if JVET_M0147_DMVR + pu.mvRefine = false; +#endif if( mergeCtx.interDirNeighbours[uiMergeCand] == 3 && mergeCtx.mrgTypeNeighbours[uiMergeCand] == MRG_TYPE_DEFAULT_N ) { mergeCtx.mvFieldNeighbours[2*uiMergeCand].mv = pu.mv[0]; mergeCtx.mvFieldNeighbours[2*uiMergeCand+1].mv = pu.mv[1]; +#if JVET_M0147_DMVR + { + int dx, dy, i, j, num = 0; + dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); + dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); + if (PU::checkDMVRCondition(pu)) + { + for (i = 0; i < (pu.lumaSize().height); i += dy) + { + for (j = 0; j < (pu.lumaSize().width); j += dx) + { + refinedMvdL0[num][uiMergeCand] = pu.mvdL0SubPu[num]; + num++; + } + } + } + } +#endif } Distortion uiSad = distParam.distFunc(distParam); @@ -2178,6 +2205,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand); PU::spanMotionInfo(pu, mergeCtx); +#if JVET_M0147_DMVR + pu.mvRefine = true; +#endif distParam.cur = singleMergeTempBuffer->Y(); #if JVET_M0823_MMVD_ENCOPT pu.mmvdEncOptMode = (refineStep > 2 ? 2 : 1); @@ -2185,6 +2215,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer); #if JVET_M0823_MMVD_ENCOPT pu.mmvdEncOptMode = 0; +#endif +#if JVET_M0147_DMVR // store the refined MV + pu.mvRefine = false; #endif Distortion uiSad = distParam.distFunc(distParam); @@ -2345,6 +2378,24 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& if( mrgTempBufSet ) { +#if JVET_M0147_DMVR + { + int dx, dy, i, j, num = 0; + dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); + dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); + if (PU::checkDMVRCondition(pu)) + { + for (i = 0; i < (pu.lumaSize().height); i += dy) + { + for (j = 0; j < (pu.lumaSize().width); j += dx) + { + pu.mvdL0SubPu[num] = refinedMvdL0[num][uiMergeCand]; + num++; + } + } + } + } +#endif if (pu.mhIntraFlag) { uint32_t bufIdx = (pu.intraDir[0] > 1) ? (pu.intraDir[0] == HOR_IDX ? 2 : 3) : pu.intraDir[0]; @@ -2385,8 +2436,13 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& } else { +#if JVET_M0147_DMVR + pu.mvRefine = true; +#endif m_pcInterSearch->motionCompensation( pu ); - +#if JVET_M0147_DMVR + pu.mvRefine = false; +#endif } if (!cu.mmvdSkip && !pu.mhIntraFlag && uiNoResidualPass != 0) { diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp index 495f2a7335346cebf7a3e776fd4f3890b30c5baa..2811474aab32fd50ccb610a4c0b4ae01ff4f2b4d 100644 --- a/source/Lib/EncoderLib/EncGOP.cpp +++ b/source/Lib/EncoderLib/EncGOP.cpp @@ -2377,6 +2377,9 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, m_pcLoopFilter->loopFilterPic( cs ); +#if JVET_M0147_DMVR + CS::setRefinedMotionField(cs); +#endif DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "final", 1 ) ) ); if( pcSlice->getSPS()->getSAOEnabledFlag() ) diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 8b18cd6f4d8b883b9314e6a0870dfb3acd6d7ffc..5139abba74cb7b065dd995d819235e1736e63703 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -933,6 +933,9 @@ void EncLib::xInitSPS(SPS &sps) #endif #if JVET_M0246_AFFINE_AMVR sps.setAffineAmvrEnabledFlag ( m_AffineAmvr ); +#endif +#if JVET_M0147_DMVR + sps.setUseDMVR ( m_DMVR ); #endif sps.getSpsNext().setIBCMode ( m_IBCMode ); diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index adee3159949ffe85b7bdd1e6e37920378e249262..743c3b4e7ff6e27d1fb5457507ea6cba78311a32 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -771,6 +771,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) #if JVET_M0246_AFFINE_AMVR WRITE_FLAG( pcSPS->getAffineAmvrEnabledFlag() ? 1 : 0, "sps_affine_amvr_enabled_flag" ); #endif +#if JVET_M0147_DMVR + WRITE_FLAG( pcSPS->getUseDMVR() ? 1 : 0, "dmvr_enable_flag" ); +#endif #if HEVC_USE_SCALING_LISTS WRITE_FLAG( pcSPS->getScalingListFlag() ? 1 : 0, "scaling_list_enabled_flag" ); if(pcSPS->getScalingListFlag())