Merge branch 'saintspear/VVCSoftware_VTM-JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN'

34a16b14 · Xiang Li · bf0e9b65 · d09359fa · 34a16b14 · 34a16b14
Commit 34a16b14 authored 6 years ago by Xiang Li
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
@@ -337,14 +337,23 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
      const Pel* pYup2 = src3 + pixY;

      const Pel y0 = pY[0] << 1;
+#if !JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN
      const Pel y1 = pY[1] << 1;
      const Pel yup0 = pYup[0] << 1;
+#endif
      const Pel yup1 = pYup[1] << 1;

+#if JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN
+      pYver[j] = abs( y0 - pYdown[0] - pYup[0] ) + abs( yup1 - pY[1] - pYup2[1] );
+      pYhor[j] = abs( y0 - pY[1] - pY[-1] ) + abs( yup1 - pYup[2] - pYup[0] );
+      pYdig0[j] = abs( y0 - pYdown[-1] - pYup[1] ) + abs( yup1 - pY[0] - pYup2[2] );
+      pYdig1[j] = abs( y0 - pYup[-1] - pYdown[1] ) + abs( yup1 - pYup2[0] - pY[2] );
+#else
      pYver[j] = abs( y0 - pYdown[0] - pYup[0] ) + abs( y1 - pYdown[1] - pYup[1] ) + abs( yup0 - pY[0] - pYup2[0] ) + abs( yup1 - pY[1] - pYup2[1] );
      pYhor[j] = abs( y0 - pY[1] - pY[-1] ) + abs( y1 - pY[2] - pY[0] ) + abs( yup0 - pYup[1] - pYup[-1] ) + abs( yup1 - pYup[2] - pYup[0] );
      pYdig0[j] = abs( y0 - pYdown[-1] - pYup[1] ) + abs( y1 - pYdown[0] - pYup[2] ) + abs( yup0 - pY[-1] - pYup2[1] ) + abs( yup1 - pY[0] - pYup2[2] );
      pYdig1[j] = abs( y0 - pYup[-1] - pYdown[1] ) + abs( y1 - pYup[0] - pYdown[2] ) + abs( yup0 - pYup2[-1] - pY[1] ) + abs( yup1 - pYup2[0] - pY[2] );
+#endif

      if( j > 4 && ( j - 6 ) % 4 == 0 )
      {
@@ -394,7 +403,11 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
      int sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j] + pYdig16[j];

      int tempAct = sumV + sumH;
+#if JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN
+      int activity = (Pel)Clip3<int>( 0, maxActivity, ( tempAct * 64 ) >> shift );
+#else
      int activity = (Pel)Clip3<int>( 0, maxActivity, ( tempAct * 32 ) >> shift );
+#endif
      int classIdx = th[activity];

      int hv1, hv0, d1, d0, hvd1, hvd0;

--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -50,6 +50,8 @@
 #include <assert.h>
 #include <cassert>

+#define JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN               1 // Subsampled Laplacian calculation
+
 #define JVET_L0191_LM_WO_LMS                              1 // NO LMS regression. min/max are used instead

 #define JVET_L0090_PAIR_AVG                               1 // Add pairwise average candidates, replace HEVC combined candidates

--- a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
+++ b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
@@ -105,6 +105,33 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
      __m128i xmm4 = _mm_slli_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), 1 );
      __m128i xmm5 = _mm_slli_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 2 ), 1 );

+#if JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN
+      __m128i xmm15 = _mm_setzero_si128();
+
+      //dig0
+      __m128i xmm6 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm0 );
+      xmm6 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm6, xmm15, 0xAA ) );
+      __m128i xmm8 = _mm_add_epi16( _mm_alignr_epi8( xmm3_next, xmm3, 4 ), xmm1 );
+      xmm8 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm8, xmm15, 0x55 ) );
+
+      //dig1
+      __m128i xmm9 = _mm_add_epi16( _mm_alignr_epi8( xmm0_next, xmm0, 4 ), xmm2 );
+      xmm9 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm9, xmm15, 0xAA ) );
+      __m128i xmm10 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 4 ), xmm3 );
+      xmm10 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm10, xmm15, 0x55 ) );
+
+      //hor
+      __m128i xmm13 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 4 ), xmm1 );
+      xmm13 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm13, xmm15, 0xAA ) );
+      __m128i xmm14 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm2 );
+      xmm14 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm14, xmm15, 0x55 ) );
+
+      //ver
+      __m128i xmm11 = _mm_add_epi16( _mm_alignr_epi8( xmm0_next, xmm0, 2 ), _mm_alignr_epi8( xmm2_next, xmm2, 2 ) );
+      xmm11 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm11, xmm15, 0xAA ) );
+      __m128i xmm12 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), _mm_alignr_epi8( xmm3_next, xmm3, 2 ) );
+      xmm12 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm12, xmm15, 0x55 ) );
+#else
      //dig0
      __m128i xmm6 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm0 );
      xmm6 = _mm_sub_epi16( xmm4, xmm6 );
@@ -128,6 +155,7 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
      xmm11 = _mm_sub_epi16( xmm4, xmm11 );
      __m128i xmm12 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), _mm_alignr_epi8( xmm3_next, xmm3, 2 ) );
      xmm12 = _mm_sub_epi16( xmm5, xmm12 );
+#endif

      xmm6 = _mm_abs_epi16( xmm6 );
      xmm8 = _mm_abs_epi16( xmm8 );
@@ -202,7 +230,11 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
      __m128i xmm12 = _mm_blend_epi16( xmm4, _mm_shuffle_epi32( xmm0, 0x40 ), 0xF0 );
      __m128i xmm10 = _mm_shuffle_epi32( xmm12, 0xB1 );
      xmm12 = _mm_add_epi32( xmm10, xmm12 );
+#if JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN
+      xmm12 = _mm_srai_epi32( xmm12, shift - 6 );
+#else
      xmm12 = _mm_srai_epi32( xmm12, shift - 5 );
+#endif
      xmm12 = _mm_min_epi32( xmm12, xmm13 );

      xmm12 = _mm_and_si128( xmm12, mm_15 );