diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index fd188abda903c69a89948fb523e78efd25348b71..a50d8e5dbf39efa078e8056abbc5e4a6b8e8b7fa 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -115,7 +115,6 @@ -#define JVET_P0092_SMVD_SPEED_UP 1 // JVET-P0092: SMVD speed-up #define JVET_P0043_DEBLOCKING_CLEANUP 1 // JVET-P0043: Fix deblocking design inconsistency in the affine and TPM mode diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index 91dd0a9720086fccb992231c86b5aed9954d345e..25e6ba302a90aa251103314cd336ac6600f57268 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -1126,13 +1126,8 @@ void removeWeightHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1 { for (int col = 0; col < width; col += 8) { -#if JVET_P0092_SMVD_SPEED_UP __m128i vsrc0 = _mm_loadu_si128( (const __m128i *)&src0[col] ); __m128i vsrc1 = _mm_loadu_si128( (const __m128i *)&src1[col] ); -#else - __m128i vsrc0 = _mm_load_si128((const __m128i *)&src0[col]); - __m128i vsrc1 = _mm_load_si128((const __m128i *)&src1[col]); -#endif __m128i vtmp, vdst, vsrc; vdst = _mm_cvtepi16_epi32(vsrc0); @@ -1201,13 +1196,8 @@ void removeHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1, int { for (int col = 0; col < width; col += 8) { -#if JVET_P0092_SMVD_SPEED_UP __m128i vsrc0 = _mm_loadu_si128( (const __m128i *)&src0[col] ); __m128i vsrc1 = _mm_loadu_si128( (const __m128i *)&src1[col] ); -#else - __m128i vsrc0 = _mm_load_si128((const __m128i *)&src0[col]); - __m128i vsrc1 = _mm_load_si128((const __m128i *)&src1[col]); -#endif vsrc0 = _mm_sub_epi16(_mm_slli_epi16(vsrc0, 1), vsrc1); _mm_store_si128((__m128i *)&src0[col], vsrc0); diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index cbd72e354d5a35bedbb81b8f8792428e24c76b2c..00ae6e269f99b80f0fad90a8ee3b4c4584f5689a 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -2659,12 +2659,10 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) int refIdxCur = cs.slice->getSymRefIdx( curRefList ); int refIdxTar = cs.slice->getSymRefIdx( tarRefList ); -#if JVET_P0092_SMVD_SPEED_UP if ( aacAMVPInfo[curRefList][refIdxCur].mvCand[0] == aacAMVPInfo[curRefList][refIdxCur].mvCand[1] ) aacAMVPInfo[curRefList][refIdxCur].numCand = 1; if ( aacAMVPInfo[tarRefList][refIdxTar].mvCand[0] == aacAMVPInfo[tarRefList][refIdxTar].mvCand[1] ) aacAMVPInfo[tarRefList][refIdxTar].numCand = 1; -#endif MvField cCurMvField, cTarMvField; Distortion costStart = std::numeric_limits<Distortion>::max(); @@ -2731,10 +2729,8 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) } for (int i = 0; i < m_uniMvListSize; i++) { -#if JVET_P0092_SMVD_SPEED_UP if ( symmvdCands.size() >= 5 ) break; -#endif BlkUniMvInfo* curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - i + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); smmvdCandsGen(curMvInfo->uniMvs[curRefList][refIdxCur], true); } @@ -4348,7 +4344,6 @@ Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origB const Picture* picRefA = pu.cu->slice->getRefPic( eCurRefPicList, cCurMvField.refIdx ); Mv mvA = cCurMvField.mv; clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); -#if JVET_P0092_SMVD_SPEED_UP if ( (mvA.hor & 15) == 0 && (mvA.ver & 15) == 0 ) { Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvA.getHor() >> 4, mvA.getVer() >> 4 ); @@ -4362,16 +4357,12 @@ Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origB { xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); } -#else - xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); -#endif // get prediction of eTarRefPicList PelUnitBuf predBufB = m_tmpPredStorage[eTarRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); const Picture* picRefB = pu.cu->slice->getRefPic( eTarRefPicList, cTarMvField.refIdx ); Mv mvB = cTarMvField.mv; clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); -#if JVET_P0092_SMVD_SPEED_UP if ( (mvB.hor & 15) == 0 && (mvB.ver & 15) == 0 ) { Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvB.getHor() >> 4, mvB.getVer() >> 4 ); @@ -4383,12 +4374,8 @@ Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origB { xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); } -#else - xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); -#endif PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) ); -#if JVET_P0092_SMVD_SPEED_UP bufTmp.copyFrom( origBuf ); bufTmp.removeHighFreq( predBufA, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs(), getBcwWeight( pu.cu->BcwIdx, eTarRefPicList ) ); double fWeight = xGetMEDistortionWeight( pu.cu->BcwIdx, eTarRefPicList ); @@ -4396,16 +4383,6 @@ Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origB // calc distortion DFunc distFunc = (!pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD; cost = (Distortion)floor( fWeight * (double)m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), pu.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, distFunc ) ); -#else - if (bcwIdx != BCW_DEFAULT) - bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), bcwIdx); - else - bufTmp.Y().addAvg( predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng( COMPONENT_Y ) ); - - // calc distortion - DFunc distFunc = (!pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD; - cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); -#endif return(cost); } @@ -7908,7 +7885,6 @@ void InterSearch::symmvdCheckBestMvp( const Picture* picRefA = pu.cu->slice->getRefPic(curRefList, cCurMvField.refIdx); Mv mvA = cCurMvField.mv; clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); -#if JVET_P0092_SMVD_SPEED_UP if ( (mvA.hor & 15) == 0 && (mvA.ver & 15) == 0 ) { Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvA.getHor() >> 4, mvA.getVer() >> 4 ); @@ -7925,9 +7901,6 @@ void InterSearch::symmvdCheckBestMvp( bufTmp.removeHighFreq( predBufA, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs(), getBcwWeight( pu.cu->BcwIdx, tarRefList ) ); double fWeight = xGetMEDistortionWeight( pu.cu->BcwIdx, tarRefList ); -#else - xPredInterBlk(COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false); -#endif int32_t skipMvpIdx[2]; skipMvpIdx[0] = skip ? mvpIdxSym[0] : -1; @@ -7947,7 +7920,6 @@ void InterSearch::symmvdCheckBestMvp( const Picture* picRefB = pu.cu->slice->getRefPic(tarRefList, cTarMvField.refIdx); Mv mvB = cTarMvField.mv; clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); -#if JVET_P0092_SMVD_SPEED_UP if ( (mvB.hor & 15) == 0 && (mvB.ver & 15) == 0 ) { Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvB.getHor() >> 4, mvB.getVer() >> 4 ); @@ -7962,19 +7934,6 @@ void InterSearch::symmvdCheckBestMvp( // calc distortion DFunc distFunc = (!pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD; Distortion cost = (Distortion)floor( fWeight * (double)m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), pu.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, distFunc ) ); -#else - xPredInterBlk(COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false); - - PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf(UnitAreaRelative(*pu.cu, pu)); - if (bcwIdx != BCW_DEFAULT) - bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), bcwIdx); - else - bufTmp.Y().addAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y)); - - // calc distortion - DFunc distFunc = (!pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD; - Distortion cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); -#endif Mv pred = amvpCur.mvCand[i]; pred.changeTransPrecInternal2Amvr(pu.cu->imv);