From 24b47fc3a2e434220fa6d93cd268776a93a8ffe1 Mon Sep 17 00:00:00 2001
From: hanhuang <hanhuang@qti.qualcomm.com>
Date: Mon, 21 Oct 2019 15:54:48 -0700
Subject: [PATCH] JVET-P0091: Align sample offset calculation of BDOF and PROF

---
 source/Lib/CommonLib/Buffer.cpp          |  8 ++++++++
 source/Lib/CommonLib/InterPrediction.cpp | 12 ++++++++++++
 source/Lib/CommonLib/TypeDef.h           |  2 ++
 source/Lib/CommonLib/x86/BufferX86.h     |  5 +++++
 4 files changed, 27 insertions(+)

diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index a06167006..7c1bb9f1d 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -167,19 +167,27 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str
     for (int x = 0; x < width; x += 4)
     {
       b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]);
+#if !JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT
       b = ((b + 1) >> 1);
+#endif
       dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
 
       b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]);
+#if !JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT
       b = ((b + 1) >> 1);
+#endif
       dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
 
       b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]);
+#if !JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT
       b = ((b + 1) >> 1);
+#endif
       dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
 
       b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]);
+#if !JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT
       b = ((b + 1) >> 1);
+#endif
       dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
     }
     dst += dstStride;       src0 += src0Stride;     src1 += src1Stride;
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index b0baa9c19..1cbf4f85a 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -1186,7 +1186,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
   const int   offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 #if JVET_P0653_BDOF_PROF_PARA_DEV
 #if JVET_P0491_BDOFPROF_MVD_RANGE
+#if JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT
+  const int   limit = ( 1 << 4 ) - 1;
+#else
   const int   limit = ( 1 << 5 ) - 1;
+#endif
 #else
   const int   limit = (1 << 5);
 #endif
@@ -1217,7 +1221,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
       const Pel* SrcY0Tmp = srcY0 + (xu << 2) + (yu << 2) * src0Stride;
 
       g_pelBufOP.calcBIOSums(SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride, src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX);
+#if JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT
+      tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX << 2, sumAbsGX));
+#else
       tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX << 3, sumAbsGX));
+#endif
 #if JVET_P0057_BDOF_PROF_HARMONIZATION && !JVET_P0491_BDOFPROF_MVD_RANGE
       tmpx = Clip3(-limit, limit - 1, tmpx);
 #else
@@ -1228,7 +1236,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
       int     secsGxGy = sumSignGY_GX & ((1 << 12) - 1);
       int     tmpData = tmpx * mainsGxGy;
       tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
+#if JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT
+      tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY << 2) - tmpData), sumAbsGY));
+#else
       tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY << 3) - tmpData), sumAbsGY));
+#endif
 #if JVET_P0057_BDOF_PROF_HARMONIZATION && !JVET_P0491_BDOFPROF_MVD_RANGE
       tmpy = Clip3(-limit, limit - 1, tmpy);
 #else
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index a840ed860..8374dd669 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -50,6 +50,8 @@
 #include <assert.h>
 #include <cassert>
 
+#define JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT               1 // JVET-P0091: Align sample offset calculation of BDOF and PROF
+
 #define JVET_P0512_SIMD_HIGH_BITDEPTH                     1 // JVET-P0512: MC SIMD support for high internal bit-depthf
 
 #define JVET_P0491_BDOFPROF_MVD_RANGE                     1 // JVET-P0491: clip the MVD in BDOF/PROF to [-31 31]
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index dfce5c6d3..b91ef72f2 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -248,9 +248,14 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St
 
       a   = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i *) (src0 + x)),
                              _mm_loadl_epi64((const __m128i *) (src1 + x)));
+#if JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT
+      sum = _mm_add_epi32(sum, _mm_set1_epi32(2 * offset));
+      sum = _mm_sra_epi32(sum, _mm_cvtsi32_si128(shift));
+#else
       sum = _mm_add_epi32(sum, _mm_madd_epi16(a, _mm_set1_epi16(2)));
       sum = _mm_add_epi32(sum, _mm_set1_epi32(2 * offset + 1));
       sum = _mm_sra_epi32(sum, _mm_cvtsi32_si128(shift + 1));
+#endif
       sum = _mm_packs_epi32(sum, sum);
       sum = _mm_max_epi16(sum, vibdimin);
       sum = _mm_min_epi16(sum, vibdimax);
-- 
GitLab