diff --git a/source/Lib/CommonLib/AffineGradientSearch.cpp b/source/Lib/CommonLib/AffineGradientSearch.cpp index 6d564685cf1e88d99a57e8ffeb14bcbf651e72b5..541bb96b280e9e868d623d89185384a2777f6f11 100644 --- a/source/Lib/CommonLib/AffineGradientSearch.cpp +++ b/source/Lib/CommonLib/AffineGradientSearch.cpp @@ -135,26 +135,50 @@ void AffineGradientSearch::xEqualCoeffComputer( Pel *pResidue, int residueStride for ( int j = 0; j != height; j++ ) { +#if JVET_L0260_AFFINE_ME + int cy = ((j >> 2) << 2) + 2; +#endif for ( int k = 0; k != width; k++ ) { int iC[6]; int idx = j * derivateBufStride + k; +#if JVET_L0260_AFFINE_ME + int cx = ((k >> 2) << 2) + 2; +#endif if ( !b6Param ) { iC[0] = ppDerivate[0][idx]; +#if JVET_L0260_AFFINE_ME + iC[1] = cx * ppDerivate[0][idx] + cy * ppDerivate[1][idx]; +#else iC[1] = k * ppDerivate[0][idx] + j * ppDerivate[1][idx]; +#endif iC[2] = ppDerivate[1][idx]; +#if JVET_L0260_AFFINE_ME + iC[3] = cy * ppDerivate[0][idx] - cx * ppDerivate[1][idx]; +#else iC[3] = j * ppDerivate[0][idx] - k * ppDerivate[1][idx]; +#endif } else { iC[0] = ppDerivate[0][idx]; +#if JVET_L0260_AFFINE_ME + iC[1] = cx * ppDerivate[0][idx]; +#else iC[1] = k * ppDerivate[0][idx]; +#endif iC[2] = ppDerivate[1][idx]; +#if JVET_L0260_AFFINE_ME + iC[3] = cx * ppDerivate[1][idx]; + iC[4] = cy * ppDerivate[0][idx]; + iC[5] = cy * ppDerivate[1][idx]; +#else iC[3] = k * ppDerivate[1][idx]; iC[4] = j * ppDerivate[0][idx]; iC[5] = j * ppDerivate[1][idx]; +#endif } for ( int col = 0; col < affineParamNum; col++ ) { diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index c3b0c5c32f2783ed824ce9ecf3f93454801dbe85..591d9b18f5dc2fa92d210fa552e5e61449c1e5ef 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -116,6 +116,12 @@ typedef enum AFFINE_MODEL_NUM } EAffineModel; +#if JVET_L0260_AFFINE_ME +static const int AFFINE_ME_LIST_SIZE = 4; +static const int AFFINE_ME_LIST_SIZE_LD = 3; +static const double AFFINE_ME_LIST_MVP_TH = 1.0; +#endif + // ==================================================================================================================== // Common constants // ==================================================================================================================== diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 32438e65989c16b13cc6e322121194f8c2b2a589..cca333a455a62d5563d74cce58af2da9fb8a15cc 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -90,6 +90,8 @@ #define L0074_SUBBLOCK_DEBLOCKING 1 +#define JVET_L0260_AFFINE_ME 1 + #define JVET_L0256_BIO 1 #define JVET_L0646_GBI 1 // Generalized bi-prediction (GBi) diff --git a/source/Lib/CommonLib/x86/AffineGradientSearchX86.h b/source/Lib/CommonLib/x86/AffineGradientSearchX86.h index 0ee2b3d9e09c45205585e87613119f75a876e1ac..e1cc1cc1776281b373ad930f1761eefa6109fdd6 100644 --- a/source/Lib/CommonLib/x86/AffineGradientSearchX86.h +++ b/source/Lib/CommonLib/x86/AffineGradientSearchX86.h @@ -198,18 +198,33 @@ static void simdVerticalSobelFilter( Pel *const pPred, const int predStride, int template<X86_VEXT vext> static void simdEqualCoeffComputer( Pel *pResidue, int residueStride, int **ppDerivate, int derivateBufStride, int64_t( *pEqualCoeff )[7], int width, int height, bool b6Param ) { +#if JVET_L0260_AFFINE_ME + __m128i mmFour; +#else __m128i mmTwo, mmFour; +#endif __m128i mmTmp[4]; __m128i mmIntermediate[4]; +#if JVET_L0260_AFFINE_ME + __m128i mmIndxK, mmIndxJ; +#else __m128i mmIndxK, mmIndxJ[2]; +#endif __m128i mmResidue[2]; __m128i mmC[12]; // Add directly to indexes to get new index - mmTwo = _mm_set1_epi32( 2 ); - mmFour = _mm_set1_epi32( 4 ); - mmIndxJ[0] = _mm_set1_epi32( -2 ); - mmIndxJ[1] = _mm_set1_epi32( -1 ); +#if !JVET_L0260_AFFINE_ME + mmTwo = _mm_set1_epi32(2); +#endif + mmFour = _mm_set1_epi32(4); +#if JVET_L0260_AFFINE_ME + mmIndxJ = _mm_set1_epi32(-2); +#else + mmIndxJ[0] = _mm_set1_epi32(-2); + mmIndxJ[1] = _mm_set1_epi32(-1); +#endif + int n = b6Param ? 6 : 4; int idx1 = 0, idx2 = 0; @@ -218,9 +233,15 @@ static void simdEqualCoeffComputer( Pel *pResidue, int residueStride, int **ppDe for ( int j = 0; j < height; j += 2 ) { - mmIndxJ[0] = _mm_add_epi32( mmIndxJ[0], mmTwo ); - mmIndxJ[1] = _mm_add_epi32( mmIndxJ[1], mmTwo ); - mmIndxK = _mm_set_epi32( -1, -2, -3, -4 ); +#if JVET_L0260_AFFINE_ME + if (!(j & 3)) + mmIndxJ = _mm_add_epi32(mmIndxJ, mmFour); + mmIndxK = _mm_set1_epi32(-2); +#else + mmIndxJ[0] = _mm_add_epi32(mmIndxJ[0], mmTwo); + mmIndxJ[1] = _mm_add_epi32(mmIndxJ[1], mmTwo); + mmIndxK = _mm_set_epi32(-1, -2, -3, -4); +#endif idx1 += (derivateBufStride << 1); idx2 += (derivateBufStride << 1); @@ -237,16 +258,26 @@ static void simdEqualCoeffComputer( Pel *pResidue, int residueStride, int **ppDe mmC[2] = _mm_loadu_si128( (const __m128i*)&ppDerivate[1][idx1] ); mmC[1] = _mm_mullo_epi32( mmIndxK, mmC[0] ); mmC[3] = _mm_mullo_epi32( mmIndxK, mmC[2] ); - mmC[4] = _mm_mullo_epi32( mmIndxJ[0], mmC[0] ); - mmC[5] = _mm_mullo_epi32( mmIndxJ[0], mmC[2] ); +#if JVET_L0260_AFFINE_ME + mmC[4] = _mm_mullo_epi32(mmIndxJ, mmC[0]); + mmC[5] = _mm_mullo_epi32(mmIndxJ, mmC[2]); +#else + mmC[4] = _mm_mullo_epi32(mmIndxJ[0], mmC[0]); + mmC[5] = _mm_mullo_epi32(mmIndxJ[0], mmC[2]); +#endif // mmC[6-11] for iC[0-5] of 2nd row of pixels mmC[6] = _mm_loadu_si128( (const __m128i*)&ppDerivate[0][idx2] ); mmC[8] = _mm_loadu_si128( (const __m128i*)&ppDerivate[1][idx2] ); mmC[7] = _mm_mullo_epi32( mmIndxK, mmC[6] ); mmC[9] = _mm_mullo_epi32( mmIndxK, mmC[8] ); - mmC[10] = _mm_mullo_epi32( mmIndxJ[1], mmC[6] ); - mmC[11] = _mm_mullo_epi32( mmIndxJ[1], mmC[8] ); +#if JVET_L0260_AFFINE_ME + mmC[10] = _mm_mullo_epi32(mmIndxJ, mmC[6]); + mmC[11] = _mm_mullo_epi32(mmIndxJ, mmC[8]); +#else + mmC[10] = _mm_mullo_epi32(mmIndxJ[1], mmC[6]); + mmC[11] = _mm_mullo_epi32(mmIndxJ[1], mmC[8]); +#endif } else { @@ -254,9 +285,13 @@ static void simdEqualCoeffComputer( Pel *pResidue, int residueStride, int **ppDe mmC[0] = _mm_loadu_si128( (const __m128i*)&ppDerivate[0][idx1] ); mmC[2] = _mm_loadu_si128( (const __m128i*)&ppDerivate[1][idx1] ); mmC[1] = _mm_mullo_epi32( mmIndxK, mmC[0] ); - mmC[3] = _mm_mullo_epi32( mmIndxJ[0], mmC[0] ); - - mmTmp[0] = _mm_mullo_epi32( mmIndxJ[0], mmC[2] ); +#if JVET_L0260_AFFINE_ME + mmC[3] = _mm_mullo_epi32(mmIndxJ, mmC[0]); + mmTmp[0] = _mm_mullo_epi32(mmIndxJ, mmC[2]); +#else + mmC[3] = _mm_mullo_epi32(mmIndxJ[0], mmC[0]); + mmTmp[0] = _mm_mullo_epi32(mmIndxJ[0], mmC[2]); +#endif mmTmp[1] = _mm_mullo_epi32( mmIndxK, mmC[2] ); mmC[1] = _mm_add_epi32( mmC[1], mmTmp[0] ); mmC[3] = _mm_sub_epi32( mmC[3], mmTmp[1] ); @@ -265,9 +300,13 @@ static void simdEqualCoeffComputer( Pel *pResidue, int residueStride, int **ppDe mmC[4] = _mm_loadu_si128( (const __m128i*)&ppDerivate[0][idx2] ); mmC[6] = _mm_loadu_si128( (const __m128i*)&ppDerivate[1][idx2] ); mmC[5] = _mm_mullo_epi32( mmIndxK, mmC[4] ); - mmC[7] = _mm_mullo_epi32( mmIndxJ[1], mmC[4] ); - - mmTmp[2] = _mm_mullo_epi32( mmIndxJ[1], mmC[6] ); +#if JVET_L0260_AFFINE_ME + mmC[7] = _mm_mullo_epi32(mmIndxJ, mmC[4]); + mmTmp[2] = _mm_mullo_epi32(mmIndxJ, mmC[6]); +#else + mmC[7] = _mm_mullo_epi32(mmIndxJ[1], mmC[4]); + mmTmp[2] = _mm_mullo_epi32(mmIndxJ[1], mmC[6]); +#endif mmTmp[3] = _mm_mullo_epi32( mmIndxK, mmC[6] ); mmC[5] = _mm_add_epi32( mmC[5], mmTmp[2] ); mmC[7] = _mm_sub_epi32( mmC[7], mmTmp[3] ); diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index ca76b7b2394b0102935a83d394398822a8185486..4adba2919f8bd3d87fc7e7f8bcf600dd86b2fd4f 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -1090,6 +1090,11 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, m_CurrCtx++; tempCS->getRecoBuf().fill( 0 ); +#if JVET_L0260_AFFINE_ME + AffineMVInfo tmpMVInfo; + bool isAffMVInfoSaved; + m_pcInterSearch->savePrevAffMVInfo(0, tmpMVInfo, isAffMVInfoSaved); +#endif do { @@ -1252,6 +1257,11 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, #endif xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); +#if JVET_L0260_AFFINE_ME + if (isAffMVInfoSaved) + m_pcInterSearch->addAffMVInfo(tmpMVInfo); +#endif + #if JVET_L0266_HMVP if (!slice.isIntra()) { diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp index 8ffc8812adc0226a833be9e16e6b0c527f84c261..b6d08d0b3d6b43f5619e44e8fe5c5c11691ee55e 100644 --- a/source/Lib/EncoderLib/EncSlice.cpp +++ b/source/Lib/EncoderLib/EncSlice.cpp @@ -1453,6 +1453,9 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c const SPS *sps = pcSlice->getSPS(); CHECK(sps == 0, "No SPS present"); writeBlockStatisticsHeader(sps); +#endif +#if JVET_L0260_AFFINE_ME + m_pcInterSearch->resetAffineMVList(); #endif encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, startCtuTsAddr, boundingCtuTsAddr, m_pcLib ); diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index 3e1fdbd7069187a3561ad5e73d21a059f975bfe0..8d6f8cb2462ae1c88aacd74d48f0a01fd1c15452 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -107,6 +107,11 @@ InterSearch::InterSearch() } setWpScalingDistParam( -1, REF_PIC_LIST_X, nullptr ); +#if JVET_L0260_AFFINE_ME + m_affMVList = nullptr; + m_affMVListSize = 0; + m_affMVListIdx = 0; +#endif } @@ -142,6 +147,15 @@ void InterSearch::destroy() { delete[] m_tmpAffiDeri[1]; } +#if JVET_L0260_AFFINE_ME + if (m_affMVList) + { + delete[] m_affMVList; + m_affMVList = nullptr; + } + m_affMVListIdx = 0; + m_affMVListSize = 0; +#endif m_isInitialized = false; } @@ -231,7 +245,13 @@ void InterSearch::init( EncCfg* pcEncCfg, m_tmpAffiDeri[0] = new int[MAX_CU_SIZE * MAX_CU_SIZE]; m_tmpAffiDeri[1] = new int[MAX_CU_SIZE * MAX_CU_SIZE]; m_pTempPel = new Pel[maxCUWidth*maxCUHeight]; - +#if JVET_L0260_AFFINE_ME + m_affMVListMaxSize = (pcEncCfg->getIntraPeriod() == (uint32_t)-1) ? AFFINE_ME_LIST_SIZE_LD : AFFINE_ME_LIST_SIZE; + if (!m_affMVList) + m_affMVList = new AffineMVInfo[m_affMVListMaxSize]; + m_affMVListIdx = 0; + m_affMVListSize = 0; +#endif m_isInitialized = true; } @@ -1309,6 +1329,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) #endif ) { +#if JVET_L0260_AFFINE_ME + m_hevcCost = uiHevcCost; +#endif // save normal hevc result uint32_t uiMRGIndex = pu.mergeIdx; bool bMergeFlag = pu.mergeFlag; @@ -1663,7 +1686,7 @@ Distortion InterSearch::xGetTemplateCost( const PredictionUnit& pu, // calc distortion - uiCost = m_pcRdCost->getDistPart( origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_SAD ); + uiCost = m_pcRdCost->getDistPart(origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_SAD); uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] ); return uiCost; @@ -1696,7 +1719,13 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf& // calc distortion - uiCost = m_pcRdCost->getDistPart( origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_SAD ); + uiCost = m_pcRdCost->getDistPart( origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y +#if JVET_L0260_AFFINE_ME + , DF_HAD +#else + , DF_SAD +#endif + ); uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] ); DTRACE( g_trace_ctx, D_COMMON, " (%d) affineTemplateCost=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCost ); return uiCost; @@ -2714,6 +2743,76 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, Distortion uiCandCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvHevc, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp); +#if JVET_L0260_AFFINE_ME + if (pu.cu->affineType == AFFINEMODEL_4PARAM && m_affMVListSize +#if JVET_L0646_GBI + && (!pu.cu->cs->sps->getSpsNext().getUseGBi() || gbiIdx == GBI_DEFAULT) +#endif + ) + { + int shift = MAX_CU_DEPTH; + for (int i = 0; i < m_affMVListSize; i++) + { + AffineMVInfo *mvInfo = m_affMVList + ((m_affMVListIdx - i - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize)); + //check; + int j = 0; + for (; j < i; j++) + { + AffineMVInfo *prevMvInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize)); + if ((mvInfo->affMVs[iRefList][iRefIdxTemp][0] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][0]) && + (mvInfo->affMVs[iRefList][iRefIdxTemp][1] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][1]) + && (mvInfo->x == prevMvInfo->x) && (mvInfo->y == prevMvInfo->y) + && (mvInfo->w == prevMvInfo->w) + ) + { + break; + } + } + if (j < i) + continue; + + Mv mvTmp[3], *nbMv = mvInfo->affMVs[iRefList][iRefIdxTemp]; + int vx, vy; + int dMvHorX, dMvHorY, dMvVerX, dMvVerY; + int mvScaleHor = nbMv[0].getHor() << shift; + int mvScaleVer = nbMv[0].getVer() << shift; + Mv dMv = nbMv[1] - nbMv[0]; +#if REMOVE_MV_ADAPT_PREC + mvScaleHor <<= VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; + mvScaleVer <<= VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; + dMv <<= VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; +#endif + dMvHorX = dMv.getHor() << (shift - g_aucLog2[mvInfo->w]); + dMvHorY = dMv.getVer() << (shift - g_aucLog2[mvInfo->w]); + dMvVerX = -dMvHorY; + dMvVerY = dMvHorX; + vx = mvScaleHor + dMvHorX * (pu.Y().x - mvInfo->x) + dMvVerX * (pu.Y().y - mvInfo->y); + vy = mvScaleVer + dMvHorY * (pu.Y().x - mvInfo->x) + dMvVerY * (pu.Y().y - mvInfo->y); + roundAffineMv(vx, vy, shift); + mvTmp[0] = Mv(vx, vy); + clipMv(mvTmp[0], pu.cu->lumaPos(), *pu.cs->sps); + mvTmp[0].roundMV2SignalPrecision(); + vx = mvScaleHor + dMvHorX * (pu.Y().x + pu.Y().width - mvInfo->x) + dMvVerX * (pu.Y().y - mvInfo->y); + vy = mvScaleVer + dMvHorY * (pu.Y().x + pu.Y().width - mvInfo->x) + dMvVerY * (pu.Y().y - mvInfo->y); + roundAffineMv(vx, vy, shift); + mvTmp[1] = Mv(vx, vy); + clipMv(mvTmp[1], pu.cu->lumaPos(), *pu.cs->sps); + mvTmp[1].roundMV2SignalPrecision(); +#if REMOVE_MV_ADAPT_PREC + mvTmp[0].hor = mvTmp[0].hor >> VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; + mvTmp[0].ver = mvTmp[0].ver >> VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; + mvTmp[1].hor = mvTmp[1].hor >> VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; + mvTmp[1].ver = mvTmp[1].ver >> VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; +#endif + Distortion tmpCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvTmp, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp); + if (tmpCost < uiCandCost) + { + uiCandCost = tmpCost; + std::memcpy(mvHevc, mvTmp, 3 * sizeof(Mv)); + } + } + } +#endif if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { Mv mvFour[3]; @@ -2824,8 +2923,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, if(pu.cu->cs->sps->getSpsNext().getUseGBi() && pu.cu->GBiIdx == GBI_DEFAULT && pu.cu->slice->isInterB()) { m_uniMotions.setReadModeAffine(true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType); - m_uniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType - ); + m_uniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType); } #endif // Set best AMVP Index @@ -2863,6 +2961,41 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, if ( pu.cu->affineType == AFFINEMODEL_4PARAM ) { ::memcpy( mvAffine4Para, cMvTemp, sizeof( cMvTemp ) ); +#if JVET_L0260_AFFINE_ME +#if JVET_L0646_GBI + if (!pu.cu->cs->sps->getSpsNext().getUseGBi() || gbiIdx == GBI_DEFAULT) + { +#endif + AffineMVInfo *affMVInfo = m_affMVList + m_affMVListIdx; + + //check; + int j = 0; + for (; j < m_affMVListSize; j++) + { + AffineMVInfo *prevMvInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize)); + if ((pu.Y().x == prevMvInfo->x) && (pu.Y().y == prevMvInfo->y) && (pu.Y().width == prevMvInfo->w) && (pu.Y().height == prevMvInfo->h)) + { + break; + } + } + if (j < m_affMVListSize) + affMVInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize)); + + ::memcpy(affMVInfo->affMVs, cMvTemp, sizeof(cMvTemp)); + + if (j == m_affMVListSize) + { + affMVInfo->x = pu.Y().x; + affMVInfo->y = pu.Y().y; + affMVInfo->w = pu.Y().width; + affMVInfo->h = pu.Y().height; + m_affMVListSize = std::min(m_affMVListSize + 1, m_affMVListMaxSize); + m_affMVListIdx = (m_affMVListIdx + 1) % (m_affMVListMaxSize); + } +#if JVET_L0646_GBI + } +#endif +#endif } // Bi-directional prediction @@ -3357,7 +3490,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost, - bool bBi ) + bool bBi) { #if JVET_L0646_GBI if( pu.cu->cs->sps->getSpsNext().getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedAffineUniMv(pu, eRefPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost) ) @@ -3661,6 +3794,118 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, memcpy( acMv, acMvTemp, sizeof(Mv) * 3 ); } } + +#if JVET_L0260_AFFINE_ME + auto checkCPMVRdCost = [&](Mv ctrlPtMv[3]) + { + xPredAffineBlk(COMPONENT_Y, pu, refPic, ctrlPtMv, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y)); + // get error + Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD); + // get cost with mv + m_pcRdCost->setCostScale(0); + uint32_t bitsTemp = ruiBits; + for (int i = 0; i < mvNum; i++) + { + m_pcRdCost->setPredictor(acMvPred[i]); +#if REMOVE_MV_ADAPT_PREC + Mv mv0; + mv0.hor = ctrlPtMv[0].hor >> VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; + mv0.ver = ctrlPtMv[0].ver >> VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; + const int shift = VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; +#else + const int shift = ctrlPtMv[i].highPrec ? VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE : 0; +#endif + Mv secondPred; + if (i != 0) + { +#if REMOVE_MV_ADAPT_PREC + secondPred.hor = acMvPred[i].hor + mv0.hor - acMvPred[0].hor; + secondPred.ver = acMvPred[i].ver + mv0.ver - acMvPred[0].ver; +#else + secondPred = acMvPred[i] + (ctrlPtMv[0] - acMvPred[0]); +#endif + m_pcRdCost->setPredictor(secondPred); + } + bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor(ctrlPtMv[i].getHor() >> shift, ctrlPtMv[i].getVer() >> shift, 0); + } + costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp)); + // store best cost and mv + if (costTemp < uiCostBest) + { + uiCostBest = costTemp; + uiBitsBest = bitsTemp; + ::memcpy(acMv, ctrlPtMv, sizeof(Mv) * 3); + } + }; + + if (uiCostBest <= AFFINE_ME_LIST_MVP_TH*m_hevcCost) + { + Mv mvPredTmp[3] = { acMvPred[0], acMvPred[1], acMvPred[2] }; +#if REMOVE_MV_ADAPT_PREC + mvPredTmp[0] <<= VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; + mvPredTmp[1] <<= VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; + mvPredTmp[2] <<= VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; +#endif + Mv mvME[3]; + ::memcpy(mvME, acMv, sizeof(Mv) * 3); + Mv dMv = mvME[0] - mvPredTmp[0]; + + for (int j = 0; j < mvNum; j++) + { + if ((!j && mvME[j] != mvPredTmp[j]) || (j && mvME[j] != (mvPredTmp[j] + dMv))) + { + ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3); + acMvTemp[j] = mvPredTmp[j]; + + if (j) + acMvTemp[j] += dMv; + + checkCPMVRdCost(acMvTemp); + } + } + + //keep the rotation/zoom; + if (mvME[0] != mvPredTmp[0]) + { + ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3); + for (int i = 1; i < mvNum; i++) + { + acMvTemp[i] -= dMv; + } + acMvTemp[0] = mvPredTmp[0]; + + checkCPMVRdCost(acMvTemp); + } + + //keep the translation; + if (pu.cu->affineType == AFFINEMODEL_6PARAM && mvME[1] != (mvPredTmp[1] + dMv) && mvME[2] != (mvPredTmp[2] + dMv)) + { + ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3); + + acMvTemp[1] = mvPredTmp[1] + dMv; + acMvTemp[2] = mvPredTmp[2] + dMv; + + checkCPMVRdCost(acMvTemp); + } + + { + dMv = acMv[1] - acMv[0]; + if (pu.cu->affineType == AFFINEMODEL_4PARAM && (dMv.getAbsHor() > 4 || dMv.getAbsVer() > 4)) + { + int testPos[4][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 } }; + Mv centerMv[3]; + ::memcpy(centerMv, acMv, sizeof(Mv) * 3); + acMvTemp[0] = centerMv[0]; + for (int i = 0; i < 4; i++) + { + acMvTemp[1].set(centerMv[1].getHor() + (testPos[i][0] << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE), centerMv[1].getVer() + (testPos[i][1] << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE)); + checkCPMVRdCost(acMvTemp); + } + } + } + } +#endif + #if REMOVE_MV_ADAPT_PREC const int nShift = VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; const int nOffset = 1 << (nShift - 1); @@ -3671,10 +3916,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, acMv[2].hor = acMv[2].hor >= 0 ? (acMv[2].hor + nOffset) >> nShift : -((-acMv[2].hor + nOffset) >> nShift); acMv[2].ver = acMv[2].ver >= 0 ? (acMv[2].ver + nOffset) >> nShift : -((-acMv[2].ver + nOffset) >> nShift); #endif + // free buffer - for ( int i=0; i<iParaNum; i++ ) - delete []pdEqualCoeff[i]; - delete []pdEqualCoeff; + for (int i = 0; i<iParaNum; i++) + delete[]pdEqualCoeff[i]; + delete[]pdEqualCoeff; ruiBits = uiBitsBest; ruiCost = uiCostBest; @@ -4810,6 +5056,7 @@ bool InterSearch::xReadBufferedUniMv(PredictionUnit& pu, RefPicList eRefPicList, } return false; } + bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost) { if (m_uniMotions.isReadModeAffine((uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType)) diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h index 3422a82a876c6457fb60522fbea41eecd896e434..098014d1c1128bd4dc14454c28342105c5ca67e5 100644 --- a/source/Lib/EncoderLib/InterSearch.h +++ b/source/Lib/EncoderLib/InterSearch.h @@ -63,6 +63,14 @@ static const uint32_t MAX_IDX_ADAPT_SR = 33; static const uint32_t NUM_MV_PREDICTORS = 3; class EncModeCtrl; +#if JVET_L0260_AFFINE_ME +struct AffineMVInfo +{ + Mv affMVs[2][33][3]; + int x, y, w, h; +}; +#endif + /// encoder search class class InterSearch : public InterPrediction, CrossComponentPrediction, AffineGradientSearch { @@ -87,6 +95,14 @@ private: bool m_affineModeSelected; #endif +#if JVET_L0260_AFFINE_ME + AffineMVInfo *m_affMVList; + int m_affMVListIdx; + int m_affMVListSize; + int m_affMVListMaxSize; + Distortion m_hevcCost; +#endif + protected: // interface to option EncCfg* m_pcEncCfg; @@ -141,6 +157,40 @@ public: #if JVET_L0646_GBI void setAffineModeSelected ( bool flag) { m_affineModeSelected = flag; } #endif +#if JVET_L0260_AFFINE_ME + void resetAffineMVList() { m_affMVListIdx = 0; m_affMVListSize = 0; } + void savePrevAffMVInfo(int idx, AffineMVInfo &tmpMVInfo, bool& isSaved) + { + if (m_affMVListSize > idx) + { + tmpMVInfo = m_affMVList[(m_affMVListIdx - 1 - idx + m_affMVListMaxSize) % m_affMVListMaxSize]; + isSaved = true; + } + else + isSaved = false; + } + void addAffMVInfo(AffineMVInfo &tmpMVInfo) + { + int j = 0; + AffineMVInfo *prevInfo = nullptr; + for (; j < m_affMVListSize; j++) + { + prevInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize)); + if ((tmpMVInfo.x == prevInfo->x) && (tmpMVInfo.y == prevInfo->y) && (tmpMVInfo.w == prevInfo->w) && (tmpMVInfo.h == prevInfo->h)) + { + break; + } + } + if (j < m_affMVListSize) + *prevInfo = tmpMVInfo; + else + { + m_affMVList[m_affMVListIdx] = tmpMVInfo; + m_affMVListIdx = (m_affMVListIdx + 1) % m_affMVListMaxSize; + m_affMVListSize = std::min(m_affMVListSize + 1, m_affMVListMaxSize); + } + } +#endif protected: /// sub-function for motion vector refinement used in fractional-pel accuracy