From a341e90d89c9925cc0c642784f1430af8c393f55 Mon Sep 17 00:00:00 2001
From: Anand Meher Kotra <anand.meher.kotra@huawei.com>
Date: Wed, 23 Oct 2019 22:41:51 +0200
Subject: [PATCH] JVET-P0081: Apply asymmetric long tap deblocking (1 + 3)
 filter at Horizontal Chroma CTB boundaries

---
 source/Lib/CommonLib/LoopFilter.cpp | 102 +++++++++++++++++++++++++---
 source/Lib/CommonLib/LoopFilter.h   |  18 ++++-
 source/Lib/CommonLib/TypeDef.h      |   3 +
 3 files changed, 110 insertions(+), 13 deletions(-)

diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp
index 66817bcb1..c7b4d9fd2 100644
--- a/source/Lib/CommonLib/LoopFilter.cpp
+++ b/source/Lib/CommonLib/LoopFilter.cpp
@@ -1217,6 +1217,9 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed
       const int maxFilterLengthP = m_maxFilterLengthP[COMPONENT_Cb][(pos.x-m_ctuXLumaSamples)>>m_shiftHor][(pos.y-m_ctuYLumaSamples)>>m_shiftVer];
       const int maxFilterLengthQ = m_maxFilterLengthQ[COMPONENT_Cb][(pos.x-m_ctuXLumaSamples)>>m_shiftHor][(pos.y-m_ctuYLumaSamples)>>m_shiftVer];
       bool largeBoundary         = false;
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+      bool isChromaHorCTBBoundary = false;
+#endif
       if ( maxFilterLengthP >= 3 && maxFilterLengthQ >= 3 )
       {
         largeBoundary = true;
@@ -1224,7 +1227,11 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed
 
       if (edgeDir == EDGE_HOR && pos.y % cuP.slice->getSPS()->getCTUSize() == 0)
       {
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+        isChromaHorCTBBoundary = true;
+#else
         largeBoundary = false;
+#endif 
       }
 
       for( int chromaIdx = 0; chromaIdx < 2; chromaIdx++ )
@@ -1266,10 +1273,18 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed
         const int indexB = Clip3<int>(0, MAX_QP, iQP + (betaOffsetDiv2 << 1));
         const int beta = sm_betaTable[indexB] * iBitdepthScale;
 
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+        const int dp0 = xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, isChromaHorCTBBoundary);
+#else
         const int dp0 = xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset);
+#endif 
         const int dq0 = xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset);
         const int subSamplingShift = ( edgeDir == EDGE_VER ) ? m_shiftVer : m_shiftHor;
-        const int dp3 = ( subSamplingShift == 1 ) ? xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset) : xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset);
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+        const int dp3 = (subSamplingShift == 1) ? xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset, isChromaHorCTBBoundary) : xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset, isChromaHorCTBBoundary);
+#else
+        const int dp3 = (subSamplingShift == 1) ? xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset) : xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset);
+#endif
         const int dq3 = ( subSamplingShift == 1 ) ? xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset) : xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset);
 
         const int d0 = dp0 + dq0;
@@ -1279,12 +1294,24 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed
           if (d < beta)
           {
             useLongFilter = true;
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+            const bool sw = xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, 2 * d0, beta, iTc, false, false, 7, 7, isChromaHorCTBBoundary)
+#else
             const bool sw = xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, 2 * d0, beta, iTc)
-                && xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + ( ( subSamplingShift == 1 ) ? 1 : 3 ) ), iOffset, 2 * d3, beta, iTc);
+#endif
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+              && xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + ((subSamplingShift == 1) ? 1 : 3)), iOffset, 2 * d3, beta, iTc, false, false, 7, 7, isChromaHorCTBBoundary);
+#else
+              && xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + ((subSamplingShift == 1) ? 1 : 3)), iOffset, 2 * d3, beta, iTc);
+#endif  
 
             for (unsigned step = 0; step < uiLoopLength; step++)
             {
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+              xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, sw, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary, isChromaHorCTBBoundary);
+#else
               xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, sw, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary);
+#endif 
             }
           }
         }
@@ -1292,7 +1319,11 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed
         {
           for (unsigned step = 0; step < uiLoopLength; step++)
           {
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+            xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, false, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary, isChromaHorCTBBoundary);
+#else
             xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, false, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary);
+#endif
           }
         }
         }
@@ -1512,7 +1543,11 @@ inline void LoopFilter::xPelFilterLuma(Pel* piSrc, const int iOffset, const int
  \param bPartQNoFilter  indicator to disable filtering on partQ
  \param bitDepthChroma  chroma bit depth
  */
-inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary ) const
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+inline void LoopFilter::xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary, const bool isChromaHorCTBBoundary) const
+#else
+inline void LoopFilter::xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary) const
+#endif
 {
   int delta;
 
@@ -1527,12 +1562,31 @@ inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const i
 
   if (sw)
   {
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+    if (isChromaHorCTBBoundary)
+    {
+      piSrc[-iOffset * 1] = Clip3(m3 - tc, m3 + tc, ((3 * m2 + 2 * m3 + m4 + m5 + m6 + 4) >> 3)); // p0
+      piSrc[0] = Clip3(m4 - tc, m4 + tc, ((2 * m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0
+      piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3));  // q1
+      piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3));       // q2
+    }
+    else
+    {
       piSrc[-iOffset * 3] = Clip3(m1 - tc, m1 + tc, ((3 * m0 + 2 * m1 + m2 + m3 + m4 + 4) >> 3));       // p2
       piSrc[-iOffset * 2] = Clip3(m2 - tc, m2 + tc, ((2 * m0 + m1 + 2 * m2 + m3 + m4 + m5 + 4) >> 3));  // p1
       piSrc[-iOffset * 1] = Clip3(m3 - tc, m3 + tc, ((m0 + m1 + m2 + 2 * m3 + m4 + m5 + m6 + 4) >> 3)); // p0
-      piSrc[0]            = Clip3(m4 - tc, m4 + tc, ((m1 + m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0
-      piSrc[iOffset * 1]  = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3));  // q1
-      piSrc[iOffset * 2]  = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3));       // q2
+      piSrc[0] = Clip3(m4 - tc, m4 + tc, ((m1 + m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0
+      piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3));  // q1
+      piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3));       // q2
+    }
+#else
+    piSrc[-iOffset * 3] = Clip3(m1 - tc, m1 + tc, ((3 * m0 + 2 * m1 + m2 + m3 + m4 + 4) >> 3));       // p2
+    piSrc[-iOffset * 2] = Clip3(m2 - tc, m2 + tc, ((2 * m0 + m1 + 2 * m2 + m3 + m4 + m5 + 4) >> 3));  // p1
+    piSrc[-iOffset * 1] = Clip3(m3 - tc, m3 + tc, ((m0 + m1 + m2 + 2 * m3 + m4 + m5 + m6 + 4) >> 3)); // p0
+    piSrc[0] = Clip3(m4 - tc, m4 + tc, ((m1 + m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0
+    piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3));  // q1
+    piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3));       // q2
+#endif
   }
   else
   {
@@ -1571,13 +1625,26 @@ inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const i
  \param tc              tc value
  \param piSrc           pointer to picture data
  */
-inline bool LoopFilter::xUseStrongFiltering( Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ ) const
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+inline bool LoopFilter::xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ, bool isChromaHorCTBBoundary) const
+#else
+inline bool LoopFilter::xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ) const
+#endif
 {
   const Pel m4 = piSrc[ 0          ];
   const Pel m3 = piSrc[-iOffset    ];
   const Pel m7 = piSrc[ iOffset * 3];
   const Pel m0 = piSrc[-iOffset * 4];
-  int       sp3      = abs(m0 - m3);
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+  const Pel m2 = piSrc[-iOffset * 2];
+  int       sp3 = abs(m0 - m3);
+  if (isChromaHorCTBBoundary)
+  {
+    sp3 = abs(m2 - m3);
+  }
+#else
+  int       sp3 = abs(m0 - m3);
+#endif
   int       sq3      = abs(m7 - m4);
   const int d_strong = sp3 + sq3;
 
@@ -1616,9 +1683,24 @@ inline bool LoopFilter::xUseStrongFiltering( Pel* piSrc, const int iOffset, cons
   return ( ( d_strong < ( beta >> 3 ) ) && ( d < ( beta >> 2 ) ) && ( abs( m3 - m4 ) < ( ( tc * 5 + 1 ) >> 1 ) ) );
 }
 
-inline int LoopFilter::xCalcDP( Pel* piSrc, const int iOffset ) const
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+inline int LoopFilter::xCalcDP(Pel* piSrc, const int iOffset, const bool isChromaHorCTBBoundary) const
+#else
+inline int LoopFilter::xCalcDP(Pel* piSrc, const int iOffset) const
+#endif
 {
-  return abs( piSrc[-iOffset * 3] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset] );
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+  if (isChromaHorCTBBoundary)
+  {
+    return abs(piSrc[-iOffset * 2] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset]);
+  }
+  else
+  {
+    return abs(piSrc[-iOffset * 3] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset]);
+  }
+#else
+  return abs(piSrc[-iOffset * 3] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset]);
+#endif 
 }
 
 inline int LoopFilter::xCalcDQ( Pel* piSrc, const int iOffset ) const
diff --git a/source/Lib/CommonLib/LoopFilter.h b/source/Lib/CommonLib/LoopFilter.h
index 0b2e05f6a..4191d7146 100644
--- a/source/Lib/CommonLib/LoopFilter.h
+++ b/source/Lib/CommonLib/LoopFilter.h
@@ -91,15 +91,27 @@ private:
   inline void xBilinearFilter     ( Pel* srcP, Pel* srcQ, int offset, int refMiddle, int refP, int refQ, int numberPSide, int numberQSide, const int* dbCoeffsP, const int* dbCoeffsQ, int tc ) const;
   inline void xFilteringPandQ     ( Pel* src, int offset, int numberPSide, int numberQSide, int tc ) const;
   inline void xPelFilterLuma      ( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const int iThrCut, const bool bFilterSecondP, const bool bFilterSecondQ, const ClpRng& clpRng, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7 ) const;
-  inline void xPelFilterChroma    ( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary ) const;
-  inline bool xUseStrongFiltering ( Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7 ) const;//move the computation outside the function
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+  inline void xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary, const bool isChromaHorCTBBoundary) const;
+#else
+  inline void xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary) const;
+#endif
+#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+  inline bool xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7, bool isChromaHorCTBBoundary = false) const;//move the computation outside the function
+#else
+  inline bool xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7) const;//move the computation outside the function
+#endif
   inline unsigned BsSet(unsigned val, const ComponentID compIdx) const;
   inline unsigned BsGet(unsigned val, const ComponentID compIdx) const;
 
   inline bool isCrossedByVirtualBoundaries ( const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PPS* pps );
   inline void xDeriveEdgefilterParam       ( const int xPos, const int yPos, const int numVerVirBndry, const int numHorVirBndry, const int verVirBndryPos[], const int horVirBndryPos[], bool &verEdgeFilter, bool &horEdgeFilter );
 
-  inline int xCalcDP              ( Pel* piSrc, const int iOffset ) const;
+#if  JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX
+  inline int xCalcDP(Pel* piSrc, const int iOffset, const bool isChromaHorCTBBoundary = false) const;
+#else
+  inline int xCalcDP(Pel* piSrc, const int iOffset) const;
+#endif
   inline int xCalcDQ              ( Pel* piSrc, const int iOffset ) const;
   static const uint16_t sm_tcTable[MAX_QP + 3];
   static const uint8_t sm_betaTable[MAX_QP + 1];
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index e51683753..3721b62dd 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -50,6 +50,7 @@
 #include <assert.h>
 #include <cassert>
 
+
 #define JVET_P0578_MINIMUM_CU_SIZE_CONSTRAINT             1 // JVET-P0578: minimum CU size constraint
 
 #define JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT               1 // JVET-P0091: Align sample offset calculation of BDOF and PROF
@@ -98,6 +99,8 @@
 
 #define JVET_P0329_PLANAR_SIMPLIFICATION                  1 // JVET-P0329: simplify planar prediction by comparison removal
 
+#define JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX             1 // JVET-P0081: Apply asymmetric long tap deblocking (1 + 3) filter at horizontal CTB boundaries for Chroma
+
 #define JVET_P0516_PLT_BINARIZATION                       1 // JVET-P0516: PLT is always signaled when pred mode is euqal to 1 (intra mode) 
 
 #define JVET_P0562_TS_RESIDUAL_CODING_SIMP                1 // JVET-P0562: Fix the Rice parameter equal to 1 for the remainder of TS residual coding
-- 
GitLab