Commit da553044 authored by Frank Bossen's avatar Frank Bossen
Browse files

Merge branch 'master' into 'master'

JVET-N0180: line buffer reduction using symmetric padding

See merge request jvet/VVCSoftware_VTM!462
parents e2e718a8 db93a8ad
......@@ -117,10 +117,24 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic
deriveClassification( m_classifier, tmpYuv.get( COMPONENT_Y ), blk );
Area blkPCM(xPos, yPos, width, height);
resetPCMBlkClassInfo(cs, m_classifier, tmpYuv.get(COMPONENT_Y), blkPCM);
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
#if JVET_N0242_NON_LINEAR_ALF
m_filter7x7Blk( m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clippFinal, m_clpRngs.comp[COMPONENT_Y], cs );
m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clippFinal, m_clpRngs.comp[COMPONENT_Y], cs
, m_alfVBLumaCTUHeight
, ((yPos + pcv.maxCUHeight >= pcv.lumaHeight) ? pcv.lumaHeight : m_alfVBLumaPos)
);
#else
m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clpRngs.comp[COMPONENT_Y], cs );
m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clpRngs.comp[COMPONENT_Y], cs
, m_alfVBLumaCTUHeight
, ((yPos + pcv.maxCUHeight >= pcv.lumaHeight) ? pcv.lumaHeight : m_alfVBLumaPos)
);
#endif
#else
#if JVET_N0242_NON_LINEAR_ALF
m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clippFinal, m_clpRngs.comp[COMPONENT_Y], cs);
#else
m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clpRngs.comp[COMPONENT_Y], cs);
#endif
#endif
}
......@@ -133,11 +147,25 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic
if( m_ctuEnableFlag[compIdx][ctuIdx] )
{
Area blk( xPos >> chromaScaleX, yPos >> chromaScaleY, width >> chromaScaleX, height >> chromaScaleY );
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
#if JVET_N0242_NON_LINEAR_ALF
m_filter5x5Blk(m_classifier, recYuv, tmpYuv, blk, compID, alfSliceParam.chromaCoeff, m_chromaClippFinal, m_clpRngs.comp[compIdx], cs
, m_alfVBChmaCTUHeight
, ((yPos + pcv.maxCUHeight >= pcv.lumaHeight) ? pcv.lumaHeight : m_alfVBChmaPos)
);
#else
m_filter5x5Blk(m_classifier, recYuv, tmpYuv, blk, compID, alfSliceParam.chromaCoeff, m_clpRngs.comp[compIdx], cs
, m_alfVBChmaCTUHeight
, ((yPos + pcv.maxCUHeight >= pcv.lumaHeight) ? pcv.lumaHeight : m_alfVBChmaPos)
);
#endif
#else
#if JVET_N0242_NON_LINEAR_ALF
m_filter5x5Blk( m_classifier, recYuv, tmpYuv, blk, compID, alfSliceParam.chromaCoeff, m_chromaClippFinal, m_clpRngs.comp[compIdx], cs );
#else
m_filter5x5Blk( m_classifier, recYuv, tmpYuv, blk, compID, alfSliceParam.chromaCoeff, m_clpRngs.comp[compIdx], cs );
#endif
#endif
}
}
......@@ -234,6 +262,13 @@ void AdaptiveLoopFilter::create( const int picWidth, const int picHeight, const
m_numCTUsInPic = m_numCTUsInHeight * m_numCTUsInWidth;
m_filterShapes[CHANNEL_TYPE_LUMA].push_back( AlfFilterShape( 7 ) );
m_filterShapes[CHANNEL_TYPE_CHROMA].push_back( AlfFilterShape( 5 ) );
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
m_alfVBLumaPos = m_maxCUHeight - ALF_VB_POS_ABOVE_CTUROW_LUMA;
m_alfVBChmaPos = (m_maxCUHeight >> ((m_chromaFormat == CHROMA_420) ? 1 : 0)) - ALF_VB_POS_ABOVE_CTUROW_CHMA;
m_alfVBLumaCTUHeight = m_maxCUHeight;
m_alfVBChmaCTUHeight = (m_maxCUHeight >> ((m_chromaFormat == CHROMA_420) ? 1 : 0));
#endif
#if JVET_N0242_NON_LINEAR_ALF
static_assert( AlfNumClippingValues[CHANNEL_TYPE_LUMA] > 0, "AlfNumClippingValues[CHANNEL_TYPE_LUMA] must be at least one" );
......@@ -332,8 +367,15 @@ void AdaptiveLoopFilter::deriveClassification( AlfClassifier** classifier, const
for( int j = blk.pos().x; j < width; j += m_CLASSIFICATION_BLK_SIZE )
{
int nWidth = std::min( j + m_CLASSIFICATION_BLK_SIZE, width ) - j;
m_deriveClassificationBlk( classifier, m_laplacian, srcLuma, Area( j, i, nWidth, nHeight ), m_inputBitDepth[CHANNEL_TYPE_LUMA] + 4 );
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
m_deriveClassificationBlk(classifier, m_laplacian, srcLuma, Area(j, i, nWidth, nHeight), m_inputBitDepth[CHANNEL_TYPE_LUMA] + 4
, m_alfVBLumaCTUHeight
, ((i + nHeight >= m_picHeight) ? m_picHeight : m_alfVBLumaPos)
);
#else
m_deriveClassificationBlk(classifier, m_laplacian, srcLuma, Area(j, i, nWidth, nHeight), m_inputBitDepth[CHANNEL_TYPE_LUMA] + 4);
#endif
}
}
}
......@@ -387,7 +429,11 @@ void AdaptiveLoopFilter::resetPCMBlkClassInfo(CodingStructure & cs, AlfClassifi
}
}
void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift )
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
void AdaptiveLoopFilter::deriveClassificationBlk(AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift, int vbCTUHeight, int vbPos)
#else
void AdaptiveLoopFilter::deriveClassificationBlk(AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift)
#endif
{
static const int th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
const int stride = srcLuma.stride;
......@@ -414,7 +460,16 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
const Pel *src1 = &src[yoffset];
const Pel *src2 = &src[yoffset + stride];
const Pel *src3 = &src[yoffset + stride * 2];
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
if (((posY - 2 + i) > 0) && ((posY - 2 + i) % vbCTUHeight) == (vbPos - 2))
{
src3 = &src[yoffset + stride];
}
else if (((posY - 2 + i) > 0) && ((posY - 2 + i) % vbCTUHeight) == vbPos)
{
src0 = &src[yoffset];
}
#endif
int* pYver = laplacian[VER][i];
int* pYhor = laplacian[HOR][i];
int* pYdig0 = laplacian[DIAG0][i];
......@@ -478,13 +533,50 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
for( int j = 0; j < blk.width; j += clsSizeX )
{
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
int sumV = 0; int sumH = 0; int sumD0 = 0; int sumD1 = 0;
if (((i + posY) % vbCTUHeight) == (vbPos - 4))
{
sumV = pYver[j] + pYver2[j] + pYver4[j];
sumH = pYhor[j] + pYhor2[j] + pYhor4[j];
sumD0 = pYdig0[j] + pYdig02[j] + pYdig04[j];
sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j];
}
else if (((i + posY) % vbCTUHeight) == vbPos)
{
sumV = pYver2[j] + pYver4[j] + pYver6[j];
sumH = pYhor2[j] + pYhor4[j] + pYhor6[j];
sumD0 = pYdig02[j] + pYdig04[j] + pYdig06[j];
sumD1 = pYdig12[j] + pYdig14[j] + pYdig16[j];
}
else
{
sumV = pYver[j] + pYver2[j] + pYver4[j] + pYver6[j];
sumH = pYhor[j] + pYhor2[j] + pYhor4[j] + pYhor6[j];
sumD0 = pYdig0[j] + pYdig02[j] + pYdig04[j] + pYdig06[j];
sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j] + pYdig16[j];
}
#else
int sumV = pYver[j] + pYver2[j] + pYver4[j] + pYver6[j];
int sumH = pYhor[j] + pYhor2[j] + pYhor4[j] + pYhor6[j];
int sumD0 = pYdig0[j] + pYdig02[j] + pYdig04[j] + pYdig06[j];
int sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j] + pYdig16[j];
#endif
int tempAct = sumV + sumH;
int activity = (Pel)Clip3<int>( 0, maxActivity, ( tempAct * 64 ) >> shift );
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
int activity = 0;
if ((((i + posY) % vbCTUHeight) == (vbPos - 4)) || (((i + posY) % vbCTUHeight) == vbPos))
{
activity = (Pel)Clip3<int>(0, maxActivity, (tempAct * 96) >> shift);
}
else
{
activity = (Pel)Clip3<int>(0, maxActivity, (tempAct * 64) >> shift);
}
#else
int activity = (Pel)Clip3<int>(0, maxActivity, (tempAct * 64) >> shift);
#endif
int classIdx = th[activity];
int hv1, hv0, d1, d0, hvd1, hvd0;
......@@ -559,11 +651,20 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
}
template<AlfFilterType filtType>
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
#if JVET_N0242_NON_LINEAR_ALF
void AdaptiveLoopFilter::filterBlk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos)
#else
void AdaptiveLoopFilter::filterBlk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos)
#endif
#else
#if JVET_N0242_NON_LINEAR_ALF
void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs )
void AdaptiveLoopFilter::filterBlk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs)
#else
void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs )
void AdaptiveLoopFilter::filterBlk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs)
#endif
#endif
{
const bool bChroma = isChroma( compId );
if( bChroma )
......@@ -757,6 +858,29 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
pRec1 = pRec0 + j + ii * dstStride;
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
if ((startHeight + i + ii) % vbCTUHeight < vbPos && ((startHeight + i + ii) % vbCTUHeight >= vbPos - (bChroma ? 2 : 4))) //above
{
pImg1 = ((startHeight + i + ii) % vbCTUHeight == vbPos - 1) ? pImg0 : pImg1;
pImg3 = ((startHeight + i + ii) % vbCTUHeight >= vbPos - 2) ? pImg1 : pImg3;
pImg5 = ((startHeight + i + ii) % vbCTUHeight >= vbPos - 3) ? pImg3 : pImg5;
pImg2 = ((startHeight + i + ii) % vbCTUHeight == vbPos - 1) ? pImg0 : pImg2;
pImg4 = ((startHeight + i + ii) % vbCTUHeight >= vbPos - 2) ? pImg2 : pImg4;
pImg6 = ((startHeight + i + ii) % vbCTUHeight >= vbPos - 3) ? pImg4 : pImg6;
}
else if ((startHeight + i + ii) % vbCTUHeight >= vbPos && ((startHeight + i + ii) % vbCTUHeight <= vbPos + (bChroma ? 1 : 3))) //bottom
{
pImg2 = ((startHeight + i + ii) % vbCTUHeight == vbPos) ? pImg0 : pImg2;
pImg4 = ((startHeight + i + ii) % vbCTUHeight <= vbPos + 1) ? pImg2 : pImg4;
pImg6 = ((startHeight + i + ii) % vbCTUHeight <= vbPos + 2) ? pImg4 : pImg6;
pImg1 = ((startHeight + i + ii) % vbCTUHeight == vbPos) ? pImg0 : pImg1;
pImg3 = ((startHeight + i + ii) % vbCTUHeight <= vbPos + 1) ? pImg1 : pImg3;
pImg5 = ((startHeight + i + ii) % vbCTUHeight <= vbPos + 2) ? pImg3 : pImg5;
}
#endif
for( int jj = 0; jj < clsSizeX; jj++ )
{
......
......@@ -89,27 +89,54 @@ public:
void reconstructCoeff( AlfSliceParam& alfSliceParam, ChannelType channel, const bool bRedo = false );
void create( const int picWidth, const int picHeight, const ChromaFormat format, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE] );
void destroy();
static void deriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift );
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
static void deriveClassificationBlk(AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift, int vbCTUHeight, int vbPos);
#else
static void deriveClassificationBlk(AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift);
#endif
void deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk );
void resetPCMBlkClassInfo(CodingStructure & cs, AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk);
template<AlfFilterType filtType>
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
#if JVET_N0242_NON_LINEAR_ALF
static void filterBlk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos);
#else
static void filterBlk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos);
#endif
#else
#if JVET_N0242_NON_LINEAR_ALF
static void filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs );
static void filterBlk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs);
#else
static void filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
static void filterBlk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs);
#endif
#endif
inline static int getMaxGolombIdx( AlfFilterType filterType )
{
return filterType == ALF_FILTER_5 ? 2 : 3;
}
void( *m_deriveClassificationBlk )( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift );
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
void(*m_deriveClassificationBlk)(AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift, int vbCTUHeight, int vbPos);
#else
void(*m_deriveClassificationBlk)(AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift);
#endif
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
#if JVET_N0242_NON_LINEAR_ALF
void( *m_filter5x5Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs );
void( *m_filter7x7Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs );
void(*m_filter5x5Blk)(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos);
void(*m_filter7x7Blk)(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos);
#else
void( *m_filter5x5Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
void( *m_filter7x7Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
void(*m_filter5x5Blk)(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos);
void(*m_filter7x7Blk)(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos);
#endif
#else
#if JVET_N0242_NON_LINEAR_ALF
void(*m_filter5x5Blk)(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs);
void(*m_filter7x7Blk)(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs);
#else
void(*m_filter5x5Blk)(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs);
void(*m_filter7x7Blk)(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs);
#endif
#endif
#ifdef TARGET_SIMD_X86
......@@ -141,6 +168,12 @@ protected:
int m_numCTUsInWidth;
int m_numCTUsInHeight;
int m_numCTUsInPic;
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
int m_alfVBLumaPos;
int m_alfVBChmaPos;
int m_alfVBLumaCTUHeight;
int m_alfVBChmaCTUHeight;
#endif
ChromaFormat m_chromaFormat;
ClpRngs m_clpRngs;
};
......
......@@ -327,6 +327,11 @@ static const int GBI_SIZE_CONSTRAINT = 256; ///< disa
static const int MAX_NUM_HMVP_CANDS = (MRG_MAX_NUM_CANDS-1); ///< maximum number of HMVP candidates to be stored and used in merge list
static const int MAX_NUM_HMVP_AVMPCANDS = 4; ///< maximum number of HMVP candidates to be used in AMVP list
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
static const int ALF_VB_POS_ABOVE_CTUROW_LUMA = 4;
static const int ALF_VB_POS_ABOVE_CTUROW_CHMA = 2;
#endif
#if W0038_DB_OPT
static const int MAX_ENCODER_DEBLOCKING_QUALITY_LAYERS = 8 ;
#endif
......
......@@ -117,6 +117,8 @@
#define JVET_N0483_DISABLE_SBT_FOR_TPM 1
#define JVET_N0180_ALF_LINE_BUFFER_REDUCTION 1 // Line buffer reduction for ALF using symmetric padding
#define JVET_N0242_NON_LINEAR_ALF 1 // enable CE5-3.2, Non-linear ALF based on clipping function
#define JVET_N0329_IBC_SEARCH_IMP 1 // IBC encoder-side improvement
......
......@@ -48,7 +48,11 @@
#endif
template<X86_VEXT vext>
static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift )
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
static void simdDeriveClassificationBlk(AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift, int vbCTUHeight, int vbPos)
#else
static void simdDeriveClassificationBlk(AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift)
#endif
{
const int img_stride = srcLuma.stride;
const Pel* srcExt = srcLuma.buf;
......@@ -76,6 +80,18 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
const Pel *p_imgY_pad_up = &srcExt[yoffset + img_stride];
const Pel *p_imgY_pad_up2 = &srcExt[yoffset + img_stride * 2];
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
// pixel padding for gradient calculation
if (((posY - 2 + i) > 0) && ((posY - 2 + i) % vbCTUHeight) == (vbPos - 2))
{
p_imgY_pad_up2 = &srcExt[yoffset + img_stride];
}
else if (((posY - 2 + i) > 0) && ((posY - 2 + i) % vbCTUHeight) == vbPos)
{
p_imgY_pad_down = &srcExt[yoffset];
}
#endif
__m128i mmStore = _mm_setzero_si128();
for( int j = 2; j < imgWExtended; j += 8 )
......@@ -185,10 +201,36 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
{
for( int j = 0; j < blk.width; j += 8 )
{
__m128i xmm0 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 0][j] ) ) );
__m128i xmm1 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 1][j] ) ) );
__m128i xmm2 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 2][j] ) ) );
__m128i xmm3 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 3][j] ) ) );
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
__m128i xmm0, xmm1, xmm2, xmm3;
if ((((i << 1) + posY) % vbCTUHeight) == (vbPos - 4))
{
xmm0 = _mm_loadu_si128((__m128i*)(&(_temp[i + 0][j])));
xmm1 = _mm_loadu_si128((__m128i*)(&(_temp[i + 1][j])));
xmm2 = _mm_loadu_si128((__m128i*)(&(_temp[i + 2][j])));
xmm3 = mm_0;
}
else if ((((i << 1) + posY) % vbCTUHeight) == vbPos)
{
xmm0 = mm_0;
xmm1 = _mm_loadu_si128((__m128i*)(&(_temp[i + 1][j])));
xmm2 = _mm_loadu_si128((__m128i*)(&(_temp[i + 2][j])));
xmm3 = _mm_loadu_si128((__m128i*)(&(_temp[i + 3][j])));
}
else
{
xmm0 = _mm_loadu_si128((__m128i*)(&(_temp[i + 0][j])));
xmm1 = _mm_loadu_si128((__m128i*)(&(_temp[i + 1][j])));
xmm2 = _mm_loadu_si128((__m128i*)(&(_temp[i + 2][j])));
xmm3 = _mm_loadu_si128((__m128i*)(&(_temp[i + 3][j])));
}
#else
__m128i xmm0 = _mm_loadu_si128((__m128i*)(&(_temp[i + 0][j])));
__m128i xmm1 = _mm_loadu_si128((__m128i*)(&(_temp[i + 1][j])));
__m128i xmm2 = _mm_loadu_si128((__m128i*)(&(_temp[i + 2][j])));
__m128i xmm3 = _mm_loadu_si128((__m128i*)(&(_temp[i + 3][j])));
#endif
__m128i xmm4 = _mm_add_epi16( xmm0, xmm1 );
__m128i xmm6 = _mm_add_epi16( xmm2, xmm3 );
......@@ -204,7 +246,18 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
__m128i xmm12 = _mm_blend_epi16( xmm4, _mm_shuffle_epi32( xmm0, 0x40 ), 0xF0 );
__m128i xmm10 = _mm_shuffle_epi32( xmm12, 0xB1 );
xmm12 = _mm_add_epi32( xmm10, xmm12 );
xmm12 = _mm_srai_epi32( xmm12, shift - 6 );
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
if (((((i << 1) + posY) % vbCTUHeight) == (vbPos - 4)) || ((((i << 1) + posY) % vbCTUHeight) == vbPos))
{
xmm12 = _mm_srai_epi32(_mm_add_epi32(_mm_slli_epi32(xmm12, 5), _mm_slli_epi32(xmm12, 6)), shift);
}
else
{
xmm12 = _mm_srai_epi32(xmm12, shift - 6);
}
#else
xmm12 = _mm_srai_epi32(xmm12, shift - 6);
#endif
xmm12 = _mm_min_epi32( xmm12, xmm13 );
xmm12 = _mm_and_si128( xmm12, mm_15 );
......@@ -317,10 +370,18 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
}
template<X86_VEXT vext>
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
#if JVET_N0242_NON_LINEAR_ALF
static void simdFilter5x5Blk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos)
#else
static void simdFilter5x5Blk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos)
#endif
#else
#if JVET_N0242_NON_LINEAR_ALF
static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs )
static void simdFilter5x5Blk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs)
#else
static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs )
static void simdFilter5x5Blk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs)
#endif
#endif
{
#if !JVET_N0242_NON_LINEAR_ALF
......@@ -579,15 +640,18 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
}
#endif
#if !JVET_N0180_ALF_LINE_BUFFER_REDUCTION
pImg0 = pImgYPad0 + j;
pImg1 = pImgYPad1 + j;
pImg2 = pImgYPad2 + j;
pImg3 = pImgYPad3 + j;
pImg4 = pImgYPad4 + j;
#if !JVET_N0242_NON_LINEAR_ALF
#endif
#if !JVET_N0180_ALF_LINE_BUFFER_REDUCTION && !JVET_N0242_NON_LINEAR_ALF
pImg5 = pImgYPad5 + j;
#endif
#if JVET_N0242_NON_LINEAR_ALF
pRec1 = pRec0 + j;
......@@ -623,6 +687,31 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
for( int ii = 0; ii < clsSizeY; ii++ )
{
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
pImg0 = pImgYPad0 + j + ii * srcStride;
pImg1 = pImgYPad1 + j + ii * srcStride;
pImg2 = pImgYPad2 + j + ii * srcStride;
pImg3 = pImgYPad3 + j + ii * srcStride;
pImg4 = pImgYPad4 + j + ii * srcStride;
#endif
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
if ((startHeight + i + ii) % vbCTUHeight < vbPos && ((startHeight + i + ii) % vbCTUHeight >= vbPos - (bChroma ? 2 : 4))) //above
{
pImg1 = ((startHeight + i + ii) % vbCTUHeight == vbPos - 1) ? pImg0 : pImg1;
pImg3 = ((startHeight + i + ii) % vbCTUHeight >= vbPos - 2) ? pImg1 : pImg3;
pImg2 = ((startHeight + i + ii) % vbCTUHeight == vbPos - 1) ? pImg0 : pImg2;
pImg4 = ((startHeight + i + ii) % vbCTUHeight >= vbPos - 2) ? pImg2 : pImg4;
}
else if ((startHeight + i + ii) % vbCTUHeight >= vbPos && ((startHeight + i + ii) % vbCTUHeight <= vbPos + (bChroma ? 1 : 3))) //bottom
{
pImg2 = ((startHeight + i + ii) % vbCTUHeight == vbPos) ? pImg0 : pImg2;
pImg4 = ((startHeight + i + ii) % vbCTUHeight <= vbPos + 1) ? pImg2 : pImg4;
pImg1 = ((startHeight + i + ii) % vbCTUHeight == vbPos) ? pImg0 : pImg1;
pImg3 = ((startHeight + i + ii) % vbCTUHeight <= vbPos + 1) ? pImg1 : pImg3;
}
#endif
__m128i clipp, clipm;
__m128i coeffa, coeffb;
__m128i xmmCur = _mm_lddqu_si128( ( __m128i* ) ( pImg0 + 0 ) );
......@@ -797,6 +886,33 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
#else
for( int k = 0; k < 4; k++ )
{
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
pImg0 = pImgYPad0 + j + k * srcStride; //j: width, 4x4 block
pImg1 = pImgYPad1 + j + k * srcStride;
pImg2 = pImgYPad2 + j + k * srcStride;
pImg3 = pImgYPad3 + j + k * srcStride;
pImg4 = pImgYPad4 + j + k * srcStride;
#endif
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
if ((startHeight + i + k) % vbCTUHeight < vbPos && ((startHeight + i + k) % vbCTUHeight >= vbPos - (bChroma ? 2 : 4))) //above
{
pImg1 = ((startHeight + i + k) % vbCTUHeight == vbPos - 1) ? pImg0 : pImg1;
pImg3 = ((startHeight + i + k) % vbCTUHeight >= vbPos - 2) ? pImg1 : pImg3;
pImg5 = ((startHeight + i + k) % vbCTUHeight >= vbPos - 3) ? pImg3 : pImg5;
pImg2 = ((startHeight + i + k) % vbCTUHeight == vbPos - 1) ? pImg0 : pImg2;
pImg4 = ((startHeight + i + k) % vbCTUHeight >= vbPos - 2) ? pImg2 : pImg4;
}
else if ((startHeight + i + k) % vbCTUHeight >= vbPos && ((startHeight + i + k) % vbCTUHeight <= vbPos + (bChroma ? 1 : 3))) //bottom
{
pImg2 = ((startHeight + i + k) % vbCTUHeight == vbPos) ? pImg0 : pImg2;
pImg4 = ((startHeight + i + k) % vbCTUHeight <= vbPos + 1) ? pImg2 : pImg4;
pImg1 = ((startHeight + i + k) % vbCTUHeight == vbPos) ? pImg0 : pImg1;
pImg3 = ((startHeight + i + k) % vbCTUHeight <= vbPos + 1) ? pImg1 : pImg3;
pImg5 = ((startHeight + i + k) % vbCTUHeight <= vbPos + 2) ? pImg3 : pImg5;
}
#endif
__m128i xmm4 = _mm_lddqu_si128( ( __m128i* ) ( pImg4 ) );
__m128i xmm2 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 - 1 ) );
__m128i xmm0 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 - 2 ) );
......@@ -877,13 +993,14 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
pRec += dstStride;
#endif
#if !JVET_N0180_ALF_LINE_BUFFER_REDUCTION
pImg0 += srcStride;
pImg1 += srcStride;
pImg2 += srcStride;
pImg3 += srcStride;
pImg4 += srcStride;
#if !JVET_N0242_NON_LINEAR_ALF
#endif
#if !JVET_N0242_NON_LINEAR_ALF && !JVET_N0180_ALF_LINE_BUFFER_REDUCTION
pImg5 += srcStride;
#endif
......@@ -960,11 +1077,19 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
}
template<X86_VEXT vext>
#if JVET_N0180_ALF_LINE_BUFFER_REDUCTION
#if JVET_N0242_NON_LINEAR_ALF
static void simdFilter7x7Blk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos)
#else
static void simdFilter7x7Blk(AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs, int vbCTUHeight, int vbPos)
#endif
#else
#if JVET_N0242_NON_LINEAR_ALF
static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs )
#else
static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs )
#endif
#endif
{
#if !JVET_N0242_NON_LINEAR_ALF
static const unsigned char mask0[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 6, 7, 4, 5, 2, 3 };
......@@ -1112,7 +1237,7 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
#endif