diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index 878ecbcd40ccef8e106085c02ab112a6102c4d8b..974bf49321d6803af835075d32d7008ec52465f7 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -45,8 +45,10 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, int shiftNum, Pel offset, const ClpRng& clpRng) { int idx = 0; +#if !JVET_P0057_BDOF_PROF_HARMONIZATION const int dIshift = 1; const int dIoffset = 1 << (dIshift - 1); +#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); @@ -56,7 +58,9 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int w for (int w = 0; w < width; w++) { int32_t dI = dMvX[idx] * gradX[w] + dMvY[idx] * gradY[w]; +#if !JVET_P0057_BDOF_PROF_HARMONIZATION dI = (dI + dIoffset) >> dIshift; +#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING dI = Clip3(-dILimit, dILimit - 1, dI); #endif @@ -79,8 +83,10 @@ void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int idx = 16; int32_t dI0 = 0; int32_t dI1 = 0; +#if !JVET_P0057_BDOF_PROF_HARMONIZATION const int dIshift = 1; const int dIoffset = 1 << (dIshift - 1); +#endif const int clipbd = clpRng.bd; const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + g_GbiLog2WeightBase; @@ -101,14 +107,18 @@ void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, { if (!(w & 3)) idx -= 4; dI0 = dMvX0[idx] * gradX0[w] + dMvY0[idx] * gradY0[w]; +#if !JVET_P0057_BDOF_PROF_HARMONIZATION dI0 = (dI0 + dIoffset) >> dIshift; +#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING dI0 = Clip3(-dILimit, dILimit - 1, dI0); #endif if (l1PROFEnabled) { dI1 = dMvX1[idx] * gradX1[w] + dMvY1[idx] * gradY1[w]; +#if !JVET_P0057_BDOF_PROF_HARMONIZATION dI1 = (dI1 + dIoffset) >> dIshift; +#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING dI1 = Clip3(-dILimit, dILimit - 1, dI1); #endif diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index ddbde068634d7f5e02d56761b527b8598909b372..ef51d6960c16156e393fe28174a96d9b31f482c6 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -913,14 +913,23 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio dMvV += blockWidth; } +#if JVET_P0057_BDOF_PROF_HARMONIZATION + const int mvShift = shift + MV_FRACTIONAL_BITS_INTERNAL + 2 - std::max<int>(5, clpRng.bd - 7); + const int dmvLimit = (1 << (std::max<int>(5, clpRng.bd - 7))); +#else const int bdlimit = std::max<int>(6, clpRng.bd - 6); const int dmvLimit = 1 << bdlimit; +#endif if (!g_pelBufOP.roundIntVector) { for (int idx = 0; idx < blockWidth * blockHeight; idx++) { +#if JVET_P0057_BDOF_PROF_HARMONIZATION + roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift); +#else roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift); +#endif dMvScaleHor[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleHor[idx]); dMvScaleVer[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleVer[idx]); } @@ -928,8 +937,13 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio else { int sz = blockWidth * blockHeight; +#if JVET_P0057_BDOF_PROF_HARMONIZATION + g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit); + g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit); +#else g_pelBufOP.roundIntVector(dMvScaleHor, sz, shift, dmvLimit); g_pelBufOP.roundIntVector(dMvScaleVer, sz, shift, dmvLimit); +#endif } } // get prediction block by block @@ -1182,14 +1196,22 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf g_pelBufOP.calcBIOSums(SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride, src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX); tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX << 3, sumAbsGX)); +#if JVET_P0057_BDOF_PROF_HARMONIZATION + tmpx = Clip3(-limit, limit - 1, tmpx); +#else tmpx = Clip3(-limit, limit, tmpx); +#endif int mainsGxGy = sumSignGY_GX >> 12; int secsGxGy = sumSignGY_GX & ((1 << 12) - 1); int tmpData = tmpx * mainsGxGy; tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1; tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY << 3) - tmpData), sumAbsGY)); +#if JVET_P0057_BDOF_PROF_HARMONIZATION + tmpy = Clip3(-limit, limit - 1, tmpy); +#else tmpy = Clip3(-limit, limit, tmpy); +#endif srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2); srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2); gradX0 = m_gradX0 + offsetPos + ((yu*widthG + xu) << 2); @@ -1332,8 +1354,13 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu const int bit = MAX_CU_DEPTH; const int shift = bit - 4 + MV_FRACTIONAL_BITS_INTERNAL; +#if JVET_P0057_BDOF_PROF_HARMONIZATION + const int mvShift = shift + MV_FRACTIONAL_BITS_INTERNAL + 2 - std::max<int>(5, clpRng.bd - 7); + const int dmvLimit = (1 << (std::max<int>(5, clpRng.bd - 7))); +#else const int bdlimit = std::max<int>(6, clpRng.bd - 6); const int dmvLimit = 1 << bdlimit; +#endif for (int list = 0; list < 2; list++) { @@ -1393,7 +1420,11 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu { for (int idx = 0; idx < blockWidth * blockHeight; idx++) { +#if JVET_P0057_BDOF_PROF_HARMONIZATION + roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift); +#else roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift); +#endif dMvScaleHor[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleHor[idx]); dMvScaleVer[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleVer[idx]); } @@ -1401,8 +1432,13 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu else { int sz = blockWidth * blockHeight; +#if JVET_P0057_BDOF_PROF_HARMONIZATION + g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit); + g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit); +#else g_pelBufOP.roundIntVector(dMvScaleHor, sz, shift, dmvLimit); g_pelBufOP.roundIntVector(dMvScaleVer, sz, shift, dmvLimit); +#endif } } } diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 2789b0c211025aba7b3e75319e79475f31bc913e..fbb7edbfbfc8055154a63dc3c684cf7e9afc3d78 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,6 +50,8 @@ #include <assert.h> #include <cassert> +#define JVET_P0057_BDOF_PROF_HARMONIZATION 1 // JVET-P0057: harmonization of BDOF and PROF on motion refinement precis + #define JVET_P0400_REMOVE_SHARED_MERGE_LIST 1 // JVET-P0400: removeal of shared merge list #define JVET_P0436_CQP_OFFSET_SIGNALLING 1 // JVET_P0436: CU chroma QP offset signalling consistent with VPDU and bugfix diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index 37a478b1bb668b8827e6ceb588c2a420c0714f6e..f59493329627805430988a1152118bb6fef09925 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -345,7 +345,9 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, CHECKD((width & 3), "block width error!"); __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_src; +#if !JVET_P0057_BDOF_PROF_HARMONIZATION __m128i mm_dIoffset = _mm_set1_epi32(1); +#endif __m128i mm_offset = _mm_set1_epi32(offset); __m128i vibdimin = _mm_set1_epi32(clpRng.min); __m128i vibdimax = _mm_set1_epi32(clpRng.max); @@ -375,7 +377,9 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, mm_src = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)src)); mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady)); +#if !JVET_P0057_BDOF_PROF_HARMONIZATION mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1); +#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING mm_dI = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI)); #endif @@ -417,7 +421,9 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, __m128i mm_dmvx0, mm_dmvy0, mm_dmvx1, mm_dmvy1, mm_gradx0, mm_grady0, mm_gradx1, mm_grady1, mm_src0, mm_src1; __m128i mm_dI0, mm_dI1, mm_dI; +#if !JVET_P0057_BDOF_PROF_HARMONIZATION __m128i mm_dIoffset = _mm_set1_epi32(1); +#endif const int *mmMvX0, *mmMvY0, *mmMvX1, *mmMvY1; const Pel *gX0, *gY0, *gX1, *gY1; @@ -458,7 +464,9 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, mm_gradx0 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0)); mm_grady0 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0)); mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx0, mm_gradx0), _mm_mullo_epi32(mm_dmvy0, mm_grady0)); +#if !JVET_P0057_BDOF_PROF_HARMONIZATION mm_dI0 = _mm_srai_epi32(_mm_add_epi32(mm_dI0, mm_dIoffset), 1); +#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING mm_dI0 = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI0)); #endif @@ -470,7 +478,9 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, mm_gradx1 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX1)); mm_grady1 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY1)); mm_dI1 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx1, mm_gradx1), _mm_mullo_epi32(mm_dmvy1, mm_grady1)); +#if !JVET_P0057_BDOF_PROF_HARMONIZATION mm_dI1 = _mm_srai_epi32(_mm_add_epi32(mm_dI1, mm_dIoffset), 1); +#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING mm_dI1 = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI1)); #endif