From c08e49737afef541736c2f4f9b519e271e4e177f Mon Sep 17 00:00:00 2001 From: Frank Bossen <fbossen@gmail.com> Date: Fri, 17 Jan 2020 15:29:44 +0100 Subject: [PATCH] remove macro JVET_P0057_BDOF_PROF_HARMONIZATION --- source/Lib/CommonLib/Buffer.cpp | 17 ------------ source/Lib/CommonLib/InterPrediction.cpp | 28 -------------------- source/Lib/CommonLib/TypeDef.h | 1 - source/Lib/CommonLib/x86/BufferX86.h | 33 ------------------------ 4 files changed, 79 deletions(-) diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index 20f995645..f4b18c135 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -49,10 +49,6 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int w #endif { int idx = 0; -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - const int dIshift = 1; - const int dIoffset = 1 << (dIshift - 1); -#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); @@ -62,9 +58,6 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int w for (int w = 0; w < width; w++) { int32_t dI = dMvX[idx] * gradX[w] + dMvY[idx] * gradY[w]; -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - dI = (dI + dIoffset) >> dIshift; -#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING dI = Clip3(-dILimit, dILimit - 1, dI); dst[w] = src[w] + dI; @@ -94,10 +87,6 @@ void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int idx = 16; int32_t dI0 = 0; int32_t dI1 = 0; -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - const int dIshift = 1; - const int dIoffset = 1 << (dIshift - 1); -#endif const int clipbd = clpRng.bd; const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + g_BcwLog2WeightBase; @@ -118,18 +107,12 @@ void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, { if (!(w & 3)) idx -= 4; dI0 = dMvX0[idx] * gradX0[w] + dMvY0[idx] * gradY0[w]; -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - dI0 = (dI0 + dIoffset) >> dIshift; -#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING dI0 = Clip3(-dILimit, dILimit - 1, dI0); #endif if (l1PROFEnabled) { dI1 = dMvX1[idx] * gradX1[w] + dMvY1[idx] * gradY1[w]; -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - dI1 = (dI1 + dIoffset) >> dIshift; -#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING dI1 = Clip3(-dILimit, dILimit - 1, dI1); #endif diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 164489e40..bd7d590d6 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -992,24 +992,15 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction const int mvShift = 8; const int dmvLimit = ( 1 << 5 ) - 1; #else -#if JVET_P0057_BDOF_PROF_HARMONIZATION const int mvShift = shift + MV_FRACTIONAL_BITS_INTERNAL + 2 - std::max<int>(5, clpRng.bd - 7); const int dmvLimit = (1 << (std::max<int>(5, clpRng.bd - 7))); -#else - const int bdlimit = std::max<int>(6, clpRng.bd - 6); - const int dmvLimit = 1 << bdlimit; -#endif #endif if (!g_pelBufOP.roundIntVector) { for (int idx = 0; idx < blockWidth * blockHeight; idx++) { -#if JVET_P0057_BDOF_PROF_HARMONIZATION roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift); -#else - roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift); -#endif dMvScaleHor[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleHor[idx] ); dMvScaleVer[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleVer[idx] ); } @@ -1017,13 +1008,8 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction else { int sz = blockWidth * blockHeight; -#if JVET_P0057_BDOF_PROF_HARMONIZATION g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit); g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit); -#else - g_pelBufOP.roundIntVector(dMvScaleHor, sz, shift, dmvLimit); - g_pelBufOP.roundIntVector(dMvScaleVer, sz, shift, dmvLimit); -#endif } } int scaleXLuma = ::getComponentScaleX(COMPONENT_Y, chFmt); @@ -1523,13 +1509,8 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu const int dmvLimit = ( 1 << 5 ) - 1; #else const int shift = bit - 4 + MV_FRACTIONAL_BITS_INTERNAL; -#if JVET_P0057_BDOF_PROF_HARMONIZATION const int mvShift = shift + MV_FRACTIONAL_BITS_INTERNAL + 2 - std::max<int>(5, clpRng.bd - 7); const int dmvLimit = (1 << (std::max<int>(5, clpRng.bd - 7))); -#else - const int bdlimit = std::max<int>(6, clpRng.bd - 6); - const int dmvLimit = 1 << bdlimit; -#endif #endif for (int list = 0; list < 2; list++) @@ -1590,11 +1571,7 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu { for (int idx = 0; idx < blockWidth * blockHeight; idx++) { -#if JVET_P0057_BDOF_PROF_HARMONIZATION roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift); -#else - roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift); -#endif dMvScaleHor[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleHor[idx] ); dMvScaleVer[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleVer[idx] ); } @@ -1602,13 +1579,8 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu else { int sz = blockWidth * blockHeight; -#if JVET_P0057_BDOF_PROF_HARMONIZATION g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit); g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit); -#else - g_pelBufOP.roundIntVector(dMvScaleHor, sz, shift, dmvLimit); - g_pelBufOP.roundIntVector(dMvScaleVer, sz, shift, dmvLimit); -#endif } } } diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index deae1aed0..f8d6780a0 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -118,7 +118,6 @@ -#define JVET_P0057_BDOF_PROF_HARMONIZATION 1 // JVET-P0057: harmonization of BDOF and PROF on motion refinement precision #define JVET_P0653_BDOF_PROF_PARA_DEV 1 // JVET-P0653/P0281: fixed shift operations for BDOF and PROF parameter derivation diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index 25e6ba302..d61bcae99 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -358,9 +358,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, #ifdef USE_AVX2 __m256i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0, mm_src; -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - __m256i mm_dIoffset = _mm256_set1_epi32(1); -#endif __m256i mm_offset = _mm256_set1_epi16(offset); __m256i vibdimin = _mm256_set1_epi16(clpRng.min); __m256i vibdimax = _mm256_set1_epi16(clpRng.max); @@ -368,9 +365,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, __m256i mm_dimax = _mm256_set1_epi32(dILimit - 1); #else __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0; -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - __m128i mm_dIoffset = _mm_set1_epi32(1); -#endif __m128i mm_offset = _mm_set1_epi16(offset); __m128i vibdimin = _mm_set1_epi16(clpRng.min); __m128i vibdimax = _mm_set1_epi16(clpRng.max); @@ -385,9 +379,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, #endif #else __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_src; -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - __m128i mm_dIoffset = _mm_set1_epi32(1); -#endif __m128i mm_offset = _mm_set1_epi32(offset); __m128i vibdimin = _mm_set1_epi32(clpRng.min); __m128i vibdimax = _mm_set1_epi32(clpRng.max); @@ -420,9 +411,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))), _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1); mm_dI0 = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady)); -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - mm_dI0 = _mm256_srai_epi32(_mm256_add_epi32(mm_dI0, mm_dIoffset), 1); -#endif mm_dI0 = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI0)); // next two rows @@ -436,9 +424,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))), _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1); mm_dI = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady)); -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - mm_dI = _mm256_srai_epi32(_mm256_add_epi32(mm_dI, mm_dIoffset), 1); -#endif mm_dI = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI)); // combine four rows @@ -470,9 +455,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX)); mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY)); mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady)); -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - mm_dI0 = _mm_srai_epi32(_mm_add_epi32(mm_dI0, mm_dIoffset), 1); -#endif mm_dI0 = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI0)); // second row @@ -481,9 +463,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX + gradStride))); mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY + gradStride))); mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady)); -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1); -#endif mm_dI = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI)); // combine both rows @@ -506,9 +485,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, mm_src = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)src)); mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady)); -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1); -#endif mm_dI = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(mm_dI, mm_src), mm_offset), shiftNum); mm_dI = _mm_packs_epi32(_mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, mm_dI)), vzero); @@ -569,9 +545,6 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, __m128i mm_dmvx0, mm_dmvy0, mm_dmvx1, mm_dmvy1, mm_gradx0, mm_grady0, mm_gradx1, mm_grady1, mm_src0, mm_src1; __m128i mm_dI0, mm_dI1, mm_dI; -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - __m128i mm_dIoffset = _mm_set1_epi32(1); -#endif const int *mmMvX0, *mmMvY0, *mmMvX1, *mmMvY1; const Pel *gX0, *gY0, *gX1, *gY1; @@ -612,9 +585,6 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, mm_gradx0 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0)); mm_grady0 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0)); mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx0, mm_gradx0), _mm_mullo_epi32(mm_dmvy0, mm_grady0)); -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - mm_dI0 = _mm_srai_epi32(_mm_add_epi32(mm_dI0, mm_dIoffset), 1); -#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING mm_dI0 = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI0)); #endif @@ -626,9 +596,6 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, mm_gradx1 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX1)); mm_grady1 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY1)); mm_dI1 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx1, mm_gradx1), _mm_mullo_epi32(mm_dmvy1, mm_grady1)); -#if !JVET_P0057_BDOF_PROF_HARMONIZATION - mm_dI1 = _mm_srai_epi32(_mm_add_epi32(mm_dI1, mm_dIoffset), 1); -#endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING mm_dI1 = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI1)); #endif -- GitLab