Commit 34a16b14 authored by Xiang Li's avatar Xiang Li

Merge branch 'saintspear/VVCSoftware_VTM-JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN'

parents bf0e9b65 d09359fa
Pipeline #164 passed with stage
...@@ -337,14 +337,23 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in ...@@ -337,14 +337,23 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
const Pel* pYup2 = src3 + pixY; const Pel* pYup2 = src3 + pixY;
const Pel y0 = pY[0] << 1; const Pel y0 = pY[0] << 1;
#if !JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN
const Pel y1 = pY[1] << 1; const Pel y1 = pY[1] << 1;
const Pel yup0 = pYup[0] << 1; const Pel yup0 = pYup[0] << 1;
#endif
const Pel yup1 = pYup[1] << 1; const Pel yup1 = pYup[1] << 1;
#if JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN
pYver[j] = abs( y0 - pYdown[0] - pYup[0] ) + abs( yup1 - pY[1] - pYup2[1] );
pYhor[j] = abs( y0 - pY[1] - pY[-1] ) + abs( yup1 - pYup[2] - pYup[0] );
pYdig0[j] = abs( y0 - pYdown[-1] - pYup[1] ) + abs( yup1 - pY[0] - pYup2[2] );
pYdig1[j] = abs( y0 - pYup[-1] - pYdown[1] ) + abs( yup1 - pYup2[0] - pY[2] );
#else
pYver[j] = abs( y0 - pYdown[0] - pYup[0] ) + abs( y1 - pYdown[1] - pYup[1] ) + abs( yup0 - pY[0] - pYup2[0] ) + abs( yup1 - pY[1] - pYup2[1] ); pYver[j] = abs( y0 - pYdown[0] - pYup[0] ) + abs( y1 - pYdown[1] - pYup[1] ) + abs( yup0 - pY[0] - pYup2[0] ) + abs( yup1 - pY[1] - pYup2[1] );
pYhor[j] = abs( y0 - pY[1] - pY[-1] ) + abs( y1 - pY[2] - pY[0] ) + abs( yup0 - pYup[1] - pYup[-1] ) + abs( yup1 - pYup[2] - pYup[0] ); pYhor[j] = abs( y0 - pY[1] - pY[-1] ) + abs( y1 - pY[2] - pY[0] ) + abs( yup0 - pYup[1] - pYup[-1] ) + abs( yup1 - pYup[2] - pYup[0] );
pYdig0[j] = abs( y0 - pYdown[-1] - pYup[1] ) + abs( y1 - pYdown[0] - pYup[2] ) + abs( yup0 - pY[-1] - pYup2[1] ) + abs( yup1 - pY[0] - pYup2[2] ); pYdig0[j] = abs( y0 - pYdown[-1] - pYup[1] ) + abs( y1 - pYdown[0] - pYup[2] ) + abs( yup0 - pY[-1] - pYup2[1] ) + abs( yup1 - pY[0] - pYup2[2] );
pYdig1[j] = abs( y0 - pYup[-1] - pYdown[1] ) + abs( y1 - pYup[0] - pYdown[2] ) + abs( yup0 - pYup2[-1] - pY[1] ) + abs( yup1 - pYup2[0] - pY[2] ); pYdig1[j] = abs( y0 - pYup[-1] - pYdown[1] ) + abs( y1 - pYup[0] - pYdown[2] ) + abs( yup0 - pYup2[-1] - pY[1] ) + abs( yup1 - pYup2[0] - pY[2] );
#endif
if( j > 4 && ( j - 6 ) % 4 == 0 ) if( j > 4 && ( j - 6 ) % 4 == 0 )
{ {
...@@ -394,7 +403,11 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in ...@@ -394,7 +403,11 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
int sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j] + pYdig16[j]; int sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j] + pYdig16[j];
int tempAct = sumV + sumH; int tempAct = sumV + sumH;
#if JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN
int activity = (Pel)Clip3<int>( 0, maxActivity, ( tempAct * 64 ) >> shift );
#else
int activity = (Pel)Clip3<int>( 0, maxActivity, ( tempAct * 32 ) >> shift ); int activity = (Pel)Clip3<int>( 0, maxActivity, ( tempAct * 32 ) >> shift );
#endif
int classIdx = th[activity]; int classIdx = th[activity];
int hv1, hv0, d1, d0, hvd1, hvd0; int hv1, hv0, d1, d0, hvd1, hvd0;
......
...@@ -50,6 +50,8 @@ ...@@ -50,6 +50,8 @@
#include <assert.h> #include <assert.h>
#include <cassert> #include <cassert>
#define JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN 1 // Subsampled Laplacian calculation
#define JVET_L0191_LM_WO_LMS 1 // NO LMS regression. min/max are used instead #define JVET_L0191_LM_WO_LMS 1 // NO LMS regression. min/max are used instead
#define JVET_L0090_PAIR_AVG 1 // Add pairwise average candidates, replace HEVC combined candidates #define JVET_L0090_PAIR_AVG 1 // Add pairwise average candidates, replace HEVC combined candidates
......
...@@ -105,6 +105,33 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla ...@@ -105,6 +105,33 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
__m128i xmm4 = _mm_slli_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), 1 ); __m128i xmm4 = _mm_slli_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), 1 );
__m128i xmm5 = _mm_slli_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 2 ), 1 ); __m128i xmm5 = _mm_slli_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 2 ), 1 );
#if JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN
__m128i xmm15 = _mm_setzero_si128();
//dig0
__m128i xmm6 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm0 );
xmm6 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm6, xmm15, 0xAA ) );
__m128i xmm8 = _mm_add_epi16( _mm_alignr_epi8( xmm3_next, xmm3, 4 ), xmm1 );
xmm8 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm8, xmm15, 0x55 ) );
//dig1
__m128i xmm9 = _mm_add_epi16( _mm_alignr_epi8( xmm0_next, xmm0, 4 ), xmm2 );
xmm9 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm9, xmm15, 0xAA ) );
__m128i xmm10 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 4 ), xmm3 );
xmm10 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm10, xmm15, 0x55 ) );
//hor
__m128i xmm13 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 4 ), xmm1 );
xmm13 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm13, xmm15, 0xAA ) );
__m128i xmm14 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm2 );
xmm14 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm14, xmm15, 0x55 ) );
//ver
__m128i xmm11 = _mm_add_epi16( _mm_alignr_epi8( xmm0_next, xmm0, 2 ), _mm_alignr_epi8( xmm2_next, xmm2, 2 ) );
xmm11 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm11, xmm15, 0xAA ) );
__m128i xmm12 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), _mm_alignr_epi8( xmm3_next, xmm3, 2 ) );
xmm12 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm12, xmm15, 0x55 ) );
#else
//dig0 //dig0
__m128i xmm6 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm0 ); __m128i xmm6 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm0 );
xmm6 = _mm_sub_epi16( xmm4, xmm6 ); xmm6 = _mm_sub_epi16( xmm4, xmm6 );
...@@ -128,6 +155,7 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla ...@@ -128,6 +155,7 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
xmm11 = _mm_sub_epi16( xmm4, xmm11 ); xmm11 = _mm_sub_epi16( xmm4, xmm11 );
__m128i xmm12 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), _mm_alignr_epi8( xmm3_next, xmm3, 2 ) ); __m128i xmm12 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), _mm_alignr_epi8( xmm3_next, xmm3, 2 ) );
xmm12 = _mm_sub_epi16( xmm5, xmm12 ); xmm12 = _mm_sub_epi16( xmm5, xmm12 );
#endif
xmm6 = _mm_abs_epi16( xmm6 ); xmm6 = _mm_abs_epi16( xmm6 );
xmm8 = _mm_abs_epi16( xmm8 ); xmm8 = _mm_abs_epi16( xmm8 );
...@@ -202,7 +230,11 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla ...@@ -202,7 +230,11 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
__m128i xmm12 = _mm_blend_epi16( xmm4, _mm_shuffle_epi32( xmm0, 0x40 ), 0xF0 ); __m128i xmm12 = _mm_blend_epi16( xmm4, _mm_shuffle_epi32( xmm0, 0x40 ), 0xF0 );
__m128i xmm10 = _mm_shuffle_epi32( xmm12, 0xB1 ); __m128i xmm10 = _mm_shuffle_epi32( xmm12, 0xB1 );
xmm12 = _mm_add_epi32( xmm10, xmm12 ); xmm12 = _mm_add_epi32( xmm10, xmm12 );
#if JVET_L0147_ALF_SUBSAMPLED_LAPLACIAN
xmm12 = _mm_srai_epi32( xmm12, shift - 6 );
#else
xmm12 = _mm_srai_epi32( xmm12, shift - 5 ); xmm12 = _mm_srai_epi32( xmm12, shift - 5 );
#endif
xmm12 = _mm_min_epi32( xmm12, xmm13 ); xmm12 = _mm_min_epi32( xmm12, xmm13 );
xmm12 = _mm_and_si128( xmm12, mm_15 ); xmm12 = _mm_and_si128( xmm12, mm_15 );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment