From db3813d9c8e68cabafaca42d8297a5a0b21030bc Mon Sep 17 00:00:00 2001
From: HongbinLiu <liuhongbin.01@bytedance.com>
Date: Sun, 20 Oct 2019 18:22:18 +0800
Subject: [PATCH] JVET-P0491: clip the MVD in BDOF/PROF to [-31 31]

---
 source/Lib/CommonLib/InterPrediction.cpp | 26 ++++++++++++++++++++++--
 source/Lib/CommonLib/TypeDef.h           |  2 ++
 source/Lib/CommonLib/x86/BufferX86.h     | 12 +++++++++++
 3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index 22dcc819e..b0baa9c19 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -915,7 +915,11 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
 
 #if JVET_P0653_BDOF_PROF_PARA_DEV
     const int mvShift  = 8;
+#if JVET_P0491_BDOFPROF_MVD_RANGE
+    const int dmvLimit = ( 1 << 5 ) - 1;
+#else
     const int dmvLimit = (1 << 5);
+#endif
 #else
 #if JVET_P0057_BDOF_PROF_HARMONIZATION 
     const int mvShift = shift + MV_FRACTIONAL_BITS_INTERNAL + 2 - std::max<int>(5, clpRng.bd - 7);
@@ -935,8 +939,13 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
 #else
         roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift);
 #endif
+#if JVET_P0491_BDOFPROF_MVD_RANGE
+        dMvScaleHor[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleHor[idx] );
+        dMvScaleVer[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleVer[idx] );
+#else
         dMvScaleHor[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleHor[idx]);
         dMvScaleVer[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleVer[idx]);
+#endif
       }
     }
     else
@@ -1176,7 +1185,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
   const int   shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
   const int   offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 #if JVET_P0653_BDOF_PROF_PARA_DEV
+#if JVET_P0491_BDOFPROF_MVD_RANGE
+  const int   limit = ( 1 << 5 ) - 1;
+#else
   const int   limit = (1 << 5);
+#endif
 #else
   const int   limit = (1<<(std::max<int>(5, bitDepth - 7)));
 #endif
@@ -1205,7 +1218,7 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
 
       g_pelBufOP.calcBIOSums(SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride, src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX);
       tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX << 3, sumAbsGX));
-#if JVET_P0057_BDOF_PROF_HARMONIZATION 
+#if JVET_P0057_BDOF_PROF_HARMONIZATION && !JVET_P0491_BDOFPROF_MVD_RANGE
       tmpx = Clip3(-limit, limit - 1, tmpx);
 #else
       tmpx = Clip3(-limit, limit, tmpx);
@@ -1216,7 +1229,7 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
       int     tmpData = tmpx * mainsGxGy;
       tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
       tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY << 3) - tmpData), sumAbsGY));
-#if JVET_P0057_BDOF_PROF_HARMONIZATION 
+#if JVET_P0057_BDOF_PROF_HARMONIZATION && !JVET_P0491_BDOFPROF_MVD_RANGE
       tmpy = Clip3(-limit, limit - 1, tmpy);
 #else
       tmpy = Clip3(-limit, limit, tmpy);
@@ -1364,7 +1377,11 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu
   const int bit = MAX_CU_DEPTH;
 #if JVET_P0653_BDOF_PROF_PARA_DEV
   const int mvShift  = 8;
+#if JVET_P0491_BDOFPROF_MVD_RANGE
+  const int dmvLimit = ( 1 << 5 ) - 1;
+#else
   const int dmvLimit = (1 << 5);
+#endif
 #else
   const int shift = bit - 4 + MV_FRACTIONAL_BITS_INTERNAL;
 #if JVET_P0057_BDOF_PROF_HARMONIZATION 
@@ -1439,8 +1456,13 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu
 #else
           roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift);
 #endif
+#if JVET_P0491_BDOFPROF_MVD_RANGE
+          dMvScaleHor[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleHor[idx] );
+          dMvScaleVer[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleVer[idx] );
+#else
           dMvScaleHor[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleHor[idx]);
           dMvScaleVer[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleVer[idx]);
+#endif
         }
       }
       else
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index bd2c51b8c..841a7f0f4 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -50,6 +50,8 @@
 #include <assert.h>
 #include <cassert>
 
+#define JVET_P0491_BDOFPROF_MVD_RANGE                     1 // JVET-P0491: clip the MVD in BDOF/PROF to [-31 31]
+
 #define JVET_P0460_PLT_TS_MIN_QP                          1 // JVET-P0460: Use TS min QP for Palette Escape mode
 
 #define JVET_P1001_DEBLOCKING_CHROMAQP_FIX                1 //JVET-P1001/P1002: Align Chroma QP used in deblocking with the one used in Transform/invTransform
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index e6459f4a9..e9df4e455 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -529,7 +529,11 @@ void roundIntVector_SIMD(int* v, int size, unsigned int nShift, const int dmvLim
   if (vext >= AVX512 && size >= 16)
   {
     __m512i dMvMin = _mm256_set1_epi32(-dmvLimit);
+#if JVET_P0491_BDOFPROF_MVD_RANGE
+    __m512i dMvMax = _mm256_set1_epi32( dmvLimit );
+#else
     __m512i dMvMax = _mm256_set1_epi32(dmvLimit - 1 );
+#endif
     __m512i nOffset = _mm512_set1_epi32((1 << (nShift - 1)));
     __m512i vones = _mm512_set1_epi32(1);
     __m512i vzero = _mm512_setzero_si512();
@@ -549,7 +553,11 @@ void roundIntVector_SIMD(int* v, int size, unsigned int nShift, const int dmvLim
   if (vext >= AVX2 && size >= 8)
   {
     __m256i dMvMin = _mm256_set1_epi32(-dmvLimit);
+#if JVET_P0491_BDOFPROF_MVD_RANGE
+    __m256i dMvMax = _mm256_set1_epi32( dmvLimit );
+#else
     __m256i dMvMax = _mm256_set1_epi32(dmvLimit - 1);
+#endif
     __m256i nOffset = _mm256_set1_epi32(1 << (nShift - 1));
     __m256i vzero = _mm256_setzero_si256();
     for (int i = 0; i < size; i += 8, v += 8)
@@ -565,7 +573,11 @@ void roundIntVector_SIMD(int* v, int size, unsigned int nShift, const int dmvLim
 #endif
   {
     __m128i dMvMin = _mm_set1_epi32(-dmvLimit);
+#if JVET_P0491_BDOFPROF_MVD_RANGE
+    __m128i dMvMax = _mm_set1_epi32( dmvLimit );
+#else
     __m128i dMvMax = _mm_set1_epi32(dmvLimit - 1);
+#endif
     __m128i nOffset = _mm_set1_epi32((1 << (nShift - 1)));
     __m128i vzero = _mm_setzero_si128();
     for (int i = 0; i < size; i += 4, v += 4)
-- 
GitLab