From c08e49737afef541736c2f4f9b519e271e4e177f Mon Sep 17 00:00:00 2001
From: Frank Bossen <fbossen@gmail.com>
Date: Fri, 17 Jan 2020 15:29:44 +0100
Subject: [PATCH] remove macro JVET_P0057_BDOF_PROF_HARMONIZATION

---
 source/Lib/CommonLib/Buffer.cpp          | 17 ------------
 source/Lib/CommonLib/InterPrediction.cpp | 28 --------------------
 source/Lib/CommonLib/TypeDef.h           |  1 -
 source/Lib/CommonLib/x86/BufferX86.h     | 33 ------------------------
 4 files changed, 79 deletions(-)

diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index 20f995645..f4b18c135 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -49,10 +49,6 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int w
 #endif
 {
   int idx = 0;
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION 
-  const int dIshift = 1;
-  const int dIoffset = 1 << (dIshift - 1);
-#endif
 
 #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
   const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13);
@@ -62,9 +58,6 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int w
     for (int w = 0; w < width; w++)
     {
       int32_t dI = dMvX[idx] * gradX[w] + dMvY[idx] * gradY[w];
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION 
-      dI = (dI + dIoffset) >> dIshift;
-#endif
 #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
       dI = Clip3(-dILimit, dILimit - 1, dI);
       dst[w] = src[w] + dI;
@@ -94,10 +87,6 @@ void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1,
   int idx = 16;
   int32_t dI0 = 0;
   int32_t dI1 = 0;
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION 
-  const int dIshift = 1;
-  const int dIoffset = 1 << (dIshift - 1);
-#endif
 
   const int clipbd = clpRng.bd;
   const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + g_BcwLog2WeightBase;
@@ -118,18 +107,12 @@ void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1,
     {
       if (!(w & 3)) idx -= 4;
       dI0 = dMvX0[idx] * gradX0[w] + dMvY0[idx] * gradY0[w];
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION
-      dI0 = (dI0 + dIoffset) >> dIshift;
-#endif
 #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
       dI0 = Clip3(-dILimit, dILimit - 1, dI0);
 #endif
       if (l1PROFEnabled)
       {
         dI1 = dMvX1[idx] * gradX1[w] + dMvY1[idx] * gradY1[w];
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION 
-        dI1 = (dI1 + dIoffset) >> dIshift;
-#endif
 #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
         dI1 = Clip3(-dILimit, dILimit - 1, dI1);
 #endif
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index 164489e40..bd7d590d6 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -992,24 +992,15 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
     const int mvShift  = 8;
     const int dmvLimit = ( 1 << 5 ) - 1;
 #else
-#if JVET_P0057_BDOF_PROF_HARMONIZATION 
     const int mvShift = shift + MV_FRACTIONAL_BITS_INTERNAL + 2 - std::max<int>(5, clpRng.bd - 7);
     const int dmvLimit = (1 << (std::max<int>(5, clpRng.bd - 7)));
-#else
-    const int bdlimit = std::max<int>(6, clpRng.bd - 6);
-    const int dmvLimit = 1 << bdlimit;
-#endif
 #endif
 
     if (!g_pelBufOP.roundIntVector)
     {
       for (int idx = 0; idx < blockWidth * blockHeight; idx++)
       {
-#if JVET_P0057_BDOF_PROF_HARMONIZATION 
         roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift);
-#else
-        roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift);
-#endif
         dMvScaleHor[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleHor[idx] );
         dMvScaleVer[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleVer[idx] );
       }
@@ -1017,13 +1008,8 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
     else
     {
       int sz = blockWidth * blockHeight;
-#if JVET_P0057_BDOF_PROF_HARMONIZATION 
       g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit);
       g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit);
-#else
-      g_pelBufOP.roundIntVector(dMvScaleHor, sz, shift, dmvLimit);
-      g_pelBufOP.roundIntVector(dMvScaleVer, sz, shift, dmvLimit);
-#endif
     }
   }
   int scaleXLuma = ::getComponentScaleX(COMPONENT_Y, chFmt);
@@ -1523,13 +1509,8 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu
   const int dmvLimit = ( 1 << 5 ) - 1;
 #else
   const int shift = bit - 4 + MV_FRACTIONAL_BITS_INTERNAL;
-#if JVET_P0057_BDOF_PROF_HARMONIZATION 
   const int mvShift = shift + MV_FRACTIONAL_BITS_INTERNAL + 2 - std::max<int>(5, clpRng.bd - 7);
   const int dmvLimit = (1 << (std::max<int>(5, clpRng.bd - 7)));
-#else
-  const int bdlimit = std::max<int>(6, clpRng.bd - 6);
-  const int dmvLimit = 1 << bdlimit;
-#endif
 #endif
 
   for (int list = 0; list < 2; list++)
@@ -1590,11 +1571,7 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu
       {
         for (int idx = 0; idx < blockWidth * blockHeight; idx++)
         {
-#if JVET_P0057_BDOF_PROF_HARMONIZATION 
           roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift);
-#else
-          roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift);
-#endif
           dMvScaleHor[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleHor[idx] );
           dMvScaleVer[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleVer[idx] );
         }
@@ -1602,13 +1579,8 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu
       else
       {
         int sz = blockWidth * blockHeight;
-#if JVET_P0057_BDOF_PROF_HARMONIZATION 
         g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit);
         g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit);
-#else
-        g_pelBufOP.roundIntVector(dMvScaleHor, sz, shift, dmvLimit);
-        g_pelBufOP.roundIntVector(dMvScaleVer, sz, shift, dmvLimit);
-#endif
       }
     }
   }
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index deae1aed0..f8d6780a0 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -118,7 +118,6 @@
 
 
 
-#define JVET_P0057_BDOF_PROF_HARMONIZATION                1 // JVET-P0057: harmonization of BDOF and PROF on motion refinement precision
 
 #define JVET_P0653_BDOF_PROF_PARA_DEV                     1 // JVET-P0653/P0281: fixed shift operations for BDOF and PROF parameter derivation
 
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index 25e6ba302..d61bcae99 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -358,9 +358,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
 
 #ifdef USE_AVX2
   __m256i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0, mm_src;
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION
-  __m256i mm_dIoffset = _mm256_set1_epi32(1);
-#endif
   __m256i mm_offset = _mm256_set1_epi16(offset);
   __m256i vibdimin = _mm256_set1_epi16(clpRng.min);
   __m256i vibdimax = _mm256_set1_epi16(clpRng.max);
@@ -368,9 +365,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
   __m256i mm_dimax = _mm256_set1_epi32(dILimit - 1);
 #else
   __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0;
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION
-  __m128i mm_dIoffset = _mm_set1_epi32(1);
-#endif
   __m128i mm_offset = _mm_set1_epi16(offset);
   __m128i vibdimin = _mm_set1_epi16(clpRng.min);
   __m128i vibdimax = _mm_set1_epi16(clpRng.max);
@@ -385,9 +379,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
 #endif
 #else
   __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_src;
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION
-  __m128i mm_dIoffset = _mm_set1_epi32(1);
-#endif
   __m128i mm_offset = _mm_set1_epi32(offset);
   __m128i vibdimin  = _mm_set1_epi32(clpRng.min);
   __m128i vibdimax  = _mm_set1_epi32(clpRng.max);
@@ -420,9 +411,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
         _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))),
         _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1);
       mm_dI0 = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady));
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION
-      mm_dI0 = _mm256_srai_epi32(_mm256_add_epi32(mm_dI0, mm_dIoffset), 1);
-#endif
       mm_dI0 = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI0));
 
       // next two rows
@@ -436,9 +424,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
         _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))),
         _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1);
       mm_dI = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady));
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION
-      mm_dI = _mm256_srai_epi32(_mm256_add_epi32(mm_dI, mm_dIoffset), 1);
-#endif
       mm_dI = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI));
 
       // combine four rows
@@ -470,9 +455,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
       mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX));
       mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY));
       mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady));
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION
-      mm_dI0 = _mm_srai_epi32(_mm_add_epi32(mm_dI0, mm_dIoffset), 1);
-#endif
       mm_dI0 = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI0));
 
       // second row
@@ -481,9 +463,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
       mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX + gradStride)));
       mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY + gradStride)));
       mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady));
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION
-      mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1);
-#endif
       mm_dI = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI));
 
       // combine both rows
@@ -506,9 +485,6 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
       mm_src = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)src));
 
       mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady));
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION 
-      mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1);
-#endif
 
       mm_dI = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(mm_dI, mm_src), mm_offset), shiftNum);
       mm_dI = _mm_packs_epi32(_mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, mm_dI)), vzero);
@@ -569,9 +545,6 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1,
 
   __m128i mm_dmvx0, mm_dmvy0, mm_dmvx1, mm_dmvy1, mm_gradx0, mm_grady0, mm_gradx1, mm_grady1, mm_src0, mm_src1;
   __m128i mm_dI0, mm_dI1, mm_dI;
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION 
-  __m128i mm_dIoffset = _mm_set1_epi32(1);
-#endif
   const int *mmMvX0, *mmMvY0, *mmMvX1, *mmMvY1;
   const Pel *gX0, *gY0, *gX1, *gY1;
 
@@ -612,9 +585,6 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1,
       mm_gradx0 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0));
       mm_grady0 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0));
       mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx0, mm_gradx0), _mm_mullo_epi32(mm_dmvy0, mm_grady0));
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION 
-      mm_dI0 = _mm_srai_epi32(_mm_add_epi32(mm_dI0, mm_dIoffset), 1);
-#endif
 #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
       mm_dI0 = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI0));
 #endif
@@ -626,9 +596,6 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1,
         mm_gradx1 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX1));
         mm_grady1 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY1));
         mm_dI1 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx1, mm_gradx1), _mm_mullo_epi32(mm_dmvy1, mm_grady1));
-#if !JVET_P0057_BDOF_PROF_HARMONIZATION 
-        mm_dI1 = _mm_srai_epi32(_mm_add_epi32(mm_dI1, mm_dIoffset), 1);
-#endif
 #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
         mm_dI1 = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI1));
 #endif
-- 
GitLab