diff --git a/source/Lib/CommonLib/BilateralFilter.cpp b/source/Lib/CommonLib/BilateralFilter.cpp index 9b7137b869443d5ebe96691de5914951f6177fba..7d3b615d807a069a526274e99654f6ea84e5d8c5 100644 --- a/source/Lib/CommonLib/BilateralFilter.cpp +++ b/source/Lib/CommonLib/BilateralFilter.cpp @@ -53,6 +53,9 @@ BilateralFilter::BilateralFilter() { m_bilateralFilterDiamond5x5 = blockBilateralFilterDiamond5x5; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + m_calcMAD = calcMAD; +#endif #if ENABLE_SIMD_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER_ENABLE_SIMD #ifdef TARGET_SIMD_X86 @@ -67,14 +70,53 @@ BilateralFilter::~BilateralFilter() void BilateralFilter::create() { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + for (int qp = 0; qp < 26; qp++) + { + for (int i = 0; i < 16; i++) + { +#if JVET_V0094_BILATERAL_FILTER + m_lut[qp][i] = ((int)m_wBIF[qp][i] * m_distFactor[0] + 4) >> 3; + m_lut[qp][16 + i] = ((int)m_wBIF[qp][i] * m_distFactor[1] + 4) >> 3; + m_lut[qp][32 + i] = ((int)m_wBIF[qp][i] * m_distFactor[2] + 4) >> 3; +#endif +#if JVET_X0071_CHROMA_BILATERAL_FILTER + m_lutChroma[qp][i] = ((int)m_wBIFChroma[qp][i] * m_distFactorChroma[0] + 4) >> 3; + m_lutChroma[qp][16 + i] = ((int)m_wBIFChroma[qp][i] * m_distFactorChroma[1] + 4) >> 3; + m_lutChroma[qp][32 + i] = ((int)m_wBIFChroma[qp][i] * m_distFactorChroma[2] + 4) >> 3; +#endif + } + } +#endif } void BilateralFilter::destroy() { } #if JVET_V0094_BILATERAL_FILTER -const char* BilateralFilter::getFilterLutParameters( const int size, const PredMode predMode, const int32_t qp, int& bfac ) +const char* BilateralFilter::getFilterLutParameters(int16_t* block, const int stride, const int width, const int height, const PredMode predMode, const int32_t qp, int& bfac) { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + int w = floorLog2(width); + int h = floorLog2(height); + int mad = m_calcMAD(block, stride, width, height, w + h); + + w = std::min(w, 7); + h = std::min(h, 7); + mad = std::min(mad >> 4, 15); + + bfac = m_tuSizeFactor[predMode == MODE_INTER][w * 8 + h]; + if (bfac) // BIF is not applied if tuSizeFactor[(w, h)] = 0 + { + bfac += m_tuMADFactor[predMode == MODE_INTER][mad]; + } + + int sqp = qp - 17; + sqp = std::min(sqp, 25); + sqp = std::max(sqp, 0); + return m_lut[sqp]; +#else + const int size = std::min(width, height); if( size <= 4 ) { bfac = 3; @@ -117,99 +159,81 @@ const char* BilateralFilter::getFilterLutParameters( const int size, const PredM } return m_wBIF[sqp - 17]; +#endif } #endif -void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip ) +inline void bifApplyLut(int diff, int& res, int cutBitsNum, int bitsRound, int bitsRound2, int shift, const char* lutRowPtr, int lutShift) +{ + int sg0 = diff >> shift; + int v0 = (diff + sg0) ^ sg0; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + int idx = (v0 + bitsRound) >> cutBitsNum; + idx = 15 + ((idx - 15) & ((idx - 15) >> shift)); + int idx2 = (v0 + bitsRound2) >> cutBitsNum; + idx2 = 15 + ((idx2 - 15) & ((idx2 - 15) >> shift)); + int w0 = (lutRowPtr[lutShift + idx] + lutRowPtr[lutShift + idx2] + 1) >> 1; +#else + int idx = (v0 + 4) >> 3; + idx = 15 + ((idx - 15) & ((idx - 15) >> shift)); + int w0 = lutRowPtr[idx] >> lutShift; +#endif + res = (w0 + sg0) ^ sg0; +} + +void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum) { - int pad = 2; + int pad = NUMBER_PADDED_SAMPLES; int padwidth = iWidthExtSIMD; - int downbuffer[64]; - int downleftbuffer[65]; - int downrightbuffer[2][65]; - int shift, sg0, v0, idx, w0; - shift = sizeof( int ) * 8 - 1; - downbuffer[0] = 0; + int downbuffer[128]; + int downleftbuffer[129]; + int downrightbuffer[2][129]; + int shift = sizeof( int ) * 8 - 1; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + int lutShift1 = 0, lutShift2 = 16, lutShift3 = 32; + int bitsRound = 1 << (cutBitsNum - 2); + int bitsRound2 = bitsRound + (1 << (cutBitsNum - 1)); +#else + int lutShift1 = 0, lutShift2 = 1, lutShift3 = 1; + int bitsRound = 4; + int bitsRound2 = 4; +#endif for( int x = 0; x < uiWidth; x++ ) { int pixel = block[(-1 + pad)*padwidth + x + pad]; int below = block[(-1 + pad + 1)*padwidth + x + pad]; - int diff = below - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx]; - int mod = (w0 + sg0) ^ sg0; - downbuffer[x] = mod; + bifApplyLut(below - pixel, downbuffer[x], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1); int belowright = block[(-1 + pad + 1)*padwidth + x + pad + 1]; - diff = belowright - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx] >> 1; - mod = (w0 + sg0) ^ sg0; - downrightbuffer[1][x + 1] = mod; + bifApplyLut(belowright - pixel, downrightbuffer[1][x + 1], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2); int belowleft = block[(-1 + pad + 1)*padwidth + x + pad - 1]; - diff = belowleft - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx] >> 1; - mod = (w0 + sg0) ^ sg0; - downleftbuffer[x] = mod; + bifApplyLut(belowleft - pixel, downleftbuffer[x], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2); } int width = uiWidth; for( int y = 0; y < uiHeight; y++ ) { - int diff; - int16_t *rowStart = &block[(y + pad)*padwidth + pad]; int pixel = rowStart[-1]; - int right = rowStart[0]; - diff = right - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx]; - int mod = (w0 + sg0) ^ sg0; - int rightmod = mod; + int right = rowStart[0], rightmod = 0; + bifApplyLut(right - pixel, rightmod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1); pixel = rowStart[-padwidth - 1]; int belowright = right; - diff = belowright - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx] >> 1; - mod = (w0 + sg0) ^ sg0; - downrightbuffer[(y + 1) % 2][0] = mod; + bifApplyLut(belowright - pixel, downrightbuffer[(y + 1) % 2][0], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2); pixel = rowStart[-padwidth + width]; int belowleft = rowStart[width - 1]; - diff = belowleft - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx] >> 1; - mod = (w0 + sg0) ^ sg0; - downleftbuffer[width] = mod; + bifApplyLut(belowleft - pixel, downleftbuffer[width], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2); for( int x = 0; x < uiWidth; x++ ) { pixel = rowStart[x]; - int modsum = 0; + int modsum = 0, mod = 0; int abovemod = -downbuffer[x]; modsum += abovemod; @@ -218,57 +242,28 @@ void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t modsum += leftmod; right = rowStart[x + 1]; - diff = right - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx]; - mod = (w0 + sg0) ^ sg0; - + bifApplyLut(right - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1); modsum += mod; rightmod = mod; int below = rowStart[x + padwidth]; - diff = below - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx]; - mod = (w0 + sg0) ^ sg0; + bifApplyLut(below - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1); modsum += mod; downbuffer[x] = mod; int aboverightmod = -downleftbuffer[x + 1]; - // modsum += ((int16_t)((uint16_t)((aboverightmod) >> 1))); modsum += aboverightmod; int aboveleftmod = -downrightbuffer[(y + 1) % 2][x]; - // modsum += ((int16_t)((uint16_t)((aboveleftmod) >> 1))); modsum += aboveleftmod; int belowleft = rowStart[x + padwidth - 1]; - diff = belowleft - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx] >> 1; - mod = (w0 + sg0) ^ sg0; - // modsum += ((int16_t)((uint16_t)((mod) >> 1))); + bifApplyLut(belowleft - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2); modsum += mod; downleftbuffer[x] = mod; int belowright = rowStart[x + padwidth + 1]; - diff = belowright - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx] >> 1; - mod = (w0 + sg0) ^ sg0; - //modsum += ((int16_t)((uint16_t)((mod) >> 1))); + bifApplyLut(belowright - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2); modsum += mod; downrightbuffer[y % 2][x + 1] = mod; @@ -277,56 +272,34 @@ void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t // speed when SIMD is turned off. int above = rowStart[x - 2 * padwidth]; - diff = above - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx] >> 1; - mod = (w0 + sg0) ^ sg0; + bifApplyLut(above - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3); modsum += mod; below = rowStart[x + 2 * padwidth]; - diff = below - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx] >> 1; - mod = (w0 + sg0) ^ sg0; + bifApplyLut(below - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3); modsum += mod; int left = rowStart[x - 2]; - diff = left - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx] >> 1; - mod = (w0 + sg0) ^ sg0; + bifApplyLut(left - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3); modsum += mod; right = rowStart[x + 2]; - diff = right - pixel; - sg0 = diff >> shift; - v0 = (diff + sg0) ^ sg0; - v0 = (v0 + 4) >> 3; - idx = 15 + ((v0 - 15)&((v0 - 15) >> shift)); - w0 = lutRowPtr[idx] >> 1; - mod = (w0 + sg0) ^ sg0; + bifApplyLut(right - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3); modsum += mod; - blkFilt[(y + pad)*(padwidth + 4) + x + pad] = (( int16_t ) (( uint16_t ) ((modsum*bfac + bifRoundAdd) >> bifRoundShift))); +#if JVET_AF0112_BIF_DYNAMIC_SCALING + blkFilt[(y + pad) * padwidth + x + pad] = ((int16_t)((uint16_t)((modsum * bfac + (bifRoundAdd << 3)) >> (bifRoundShift + 3)))); +#else + blkFilt[(y + pad) * padwidth + x + pad] = (( int16_t ) (( uint16_t ) ((modsum*bfac + bifRoundAdd) >> bifRoundShift))); +#endif } } // Copy back - Pel *tempBlockPtr = ( short* ) blkFilt + (((padwidth + 4) << 1) + 2); - int tempBlockStride = padwidth + 4; + Pel* tempBlockPtr = blkFilt + pad * padwidth + pad; if( isRDO ) { - Pel *srcBlockPtr = ( short* ) block + (((padwidth) << 1) + 2); - int srcBlockStride = padwidth; + Pel *srcBlockPtr = block + pad * padwidth + pad; for( uint32_t yy = 0; yy < uiHeight; yy++ ) { for( uint32_t xx = 0; xx < uiWidth; xx++ ) @@ -334,8 +307,8 @@ void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t recPtr[xx] = ClipPel( srcBlockPtr[xx] + tempBlockPtr[xx], clpRng ); } recPtr += recStride; - tempBlockPtr += tempBlockStride; - srcBlockPtr += srcBlockStride; + tempBlockPtr += padwidth; + srcBlockPtr += padwidth; } } else if( noClip ) @@ -349,7 +322,7 @@ void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t recPtr[xx] = recPtr[xx] + tempBlockPtr[xx]; // clipping is done jointly for SAO/BIF/CCSAO } recPtr += recStride; - tempBlockPtr += tempBlockStride; + tempBlockPtr += padwidth; } } else @@ -362,41 +335,44 @@ void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t recPtr[xx] = ClipPel<int>( recPtr[xx] + tempBlockPtr[xx], clpRng ); } recPtr += recStride; - tempBlockPtr += tempBlockStride; + tempBlockPtr += padwidth; } } } -#if JVET_V0094_BILATERAL_FILTER -void BilateralFilter::bilateralFilterRDOdiamond5x5(const ComponentID compID, PelBuf& resiBuf, const CPelBuf& predBuf, PelBuf& recoBuf, int32_t qp, const CPelBuf& recIPredBuf, const ClpRng& clpRng, TransformUnit & currTU, bool useReco, bool doReshape, std::vector<Pel>* pLUT) -{ - uint32_t uiWidth = predBuf.width; - uint32_t uiHeight = predBuf.height; - - int bfac = 1; - const int bifRoundAdd = BIF_ROUND_ADD >> currTU.cs->pps->getBIFStrength(); - const int bifRoundShift = BIF_ROUND_SHIFT - currTU.cs->pps->getBIFStrength(); - - const char* lutRowPtr = nullptr; - if( isLuma( compID ) ) +#if JVET_AF0112_BIF_DYNAMIC_SCALING +int BilateralFilter::calcMAD(int16_t* block, int stride, int width, int height, int whlog2) +{ + int average = 0; + for (int i = 0; i < height; i++) { - lutRowPtr = getFilterLutParameters( std::min( uiWidth, uiHeight ), currTU.cu->predMode, qp + currTU.cs->pps->getBIFQPOffset(), bfac ); + for (int j = 0; j < width; j++) + { + average += block[j]; + } + block += stride; } - else + block -= stride * height; + average = (average + (1 << (whlog2 - 1))) >> whlog2; + int mad = 0; + for (int i = 0; i < height; i++) { - int widthForStrength = currTU.blocks[compID].width; - int heightForStrength = currTU.blocks[compID].height; - - if( currTU.blocks[COMPONENT_Y].valid() ) + for (int j = 0; j < width; j++) { - widthForStrength = currTU.blocks[COMPONENT_Y].width; - heightForStrength = currTU.blocks[COMPONENT_Y].height; + mad += std::abs(block[j] - average); } - - lutRowPtr = getFilterLutParametersChroma( std::min( uiWidth, uiHeight ), currTU.cu->predMode, qp + currTU.cs->pps->getChromaBIFQPOffset(), bfac, widthForStrength, heightForStrength, currTU.blocks[COMPONENT_Y].valid() ); - - CHECK( doReshape, "Reshape domain is not used for chroma" ); + block += stride; } + mad = (mad + (1 << (whlog2 - 1))) >> whlog2; + return mad; +} +#endif + +#if JVET_V0094_BILATERAL_FILTER +void BilateralFilter::bilateralFilterRDOdiamond5x5(const ComponentID compID, PelBuf& resiBuf, const CPelBuf& predBuf, PelBuf& recoBuf, int32_t qp, const CPelBuf& recIPredBuf, const ClpRng& clpRng, TransformUnit & currTU, bool useReco, bool doReshape, std::vector<Pel>* pLUT) +{ + uint32_t uiWidth = predBuf.width; + uint32_t uiHeight = predBuf.height; const unsigned uiPredStride = predBuf.stride; const unsigned uiStrideRes = resiBuf.stride; @@ -416,11 +392,7 @@ void BilateralFilter::bilateralFilterRDOdiamond5x5(const ComponentID compID, Pel const uint32_t uiWidthExt = uiWidth + (NUMBER_PADDED_SAMPLES << 1); const uint32_t uiHeightExt = uiHeight + (NUMBER_PADDED_SAMPLES << 1); - int iWidthExtSIMD = uiWidthExt | 0x04; - if( uiWidth < 8 ) - { - iWidthExtSIMD = 8 + (NUMBER_PADDED_SAMPLES << 1); - } + int iWidthExtSIMD = uiWidthExt + 16; memset(tempblock, 0, iWidthExtSIMD*uiHeightExt * sizeof(Pel)); Pel *tempBlockPtr = tempblock + NUMBER_PADDED_SAMPLES* iWidthExtSIMD + NUMBER_PADDED_SAMPLES; @@ -589,7 +561,38 @@ void BilateralFilter::bilateralFilterRDOdiamond5x5(const ComponentID compID, Pel std::copy( tempblock + iWidthExtSIMD, tempblock + iWidthExtSIMD + uiWidthExt, tempblock ); std::copy( tempblock + iWidthExtSIMD * ( uiHeightExt - 2 ), tempblock + iWidthExtSIMD * ( uiHeightExt - 2 ) + uiWidthExt, tempblock + iWidthExtSIMD * ( uiHeightExt - 1 ) ); - m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, piReco, uiRecStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, true, lutRowPtr, false ); + int bfac = 1; + const int bifRoundAdd = BIF_ROUND_ADD >> currTU.cs->pps->getBIFStrength(); + const int bifRoundShift = BIF_ROUND_SHIFT - currTU.cs->pps->getBIFStrength(); + + const char* lutRowPtr = nullptr; + int cutBitsNum = 3; + + if (isLuma(compID)) + { + lutRowPtr = getFilterLutParameters(tempblock + iWidthExtSIMD * 2 + 2, iWidthExtSIMD, uiWidth, uiHeight, currTU.cu->predMode, qp + currTU.cs->pps->getBIFQPOffset(), bfac); + } + else + { +#if JVET_X0071_CHROMA_BILATERAL_FILTER +#if JVET_AF0112_BIF_DYNAMIC_SCALING + cutBitsNum = 2; +#endif + int widthForStrength = currTU.blocks[compID].width; + int heightForStrength = currTU.blocks[compID].height; + + if (currTU.blocks[COMPONENT_Y].valid()) + { + widthForStrength = currTU.blocks[COMPONENT_Y].width; + heightForStrength = currTU.blocks[COMPONENT_Y].height; + } + + lutRowPtr = getFilterLutParametersChroma(tempblock + iWidthExtSIMD * 2 + 2, iWidthExtSIMD, uiWidth, uiHeight, currTU.cu->predMode, qp + currTU.cs->pps->getChromaBIFQPOffset(), bfac, widthForStrength, heightForStrength, currTU.blocks[COMPONENT_Y].valid()); + + CHECK(doReshape, "Reshape domain is not used for chroma"); +#endif + } + m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, piReco, uiRecStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, true, lutRowPtr, false, cutBitsNum); if( !useReco ) { @@ -659,29 +662,6 @@ void BilateralFilter::bilateralFilterDiamond5x5( const ComponentID compID, const int recStride = rec.get( compID ).stride; Pel *recPtr = rec.get( compID ).bufAt(blkDst); - int bfac = 1; - const char *lutRowPtr = nullptr; - if( isLuma( compID ) ) - { - lutRowPtr = getFilterLutParameters( std::min( width, height ), currTU.cu->predMode, qp + currTU.cs->pps->getBIFQPOffset(), bfac ); - } - else - { - int widthForStrength = currTU.blocks[compID].width; - int heightForStrength = currTU.blocks[compID].height; - - if( currTU.blocks[COMPONENT_Y].valid() ) - { - widthForStrength = currTU.blocks[COMPONENT_Y].width; - heightForStrength = currTU.blocks[COMPONENT_Y].height; - } - - lutRowPtr = getFilterLutParametersChroma(std::min( uiWidth, uiHeight ), currTU.cu->predMode, qp + currTU.cs->pps->getChromaBIFQPOffset(), bfac, widthForStrength, heightForStrength, currTU.blocks[COMPONENT_Y].valid() ); - } - - int bifRoundAdd = BIF_ROUND_ADD >> currTU.cs->pps->getBIFStrength(); - int bifRoundShift = BIF_ROUND_SHIFT - currTU.cs->pps->getBIFStrength(); - bool topAltAvailable = !clipT; bool leftAltAvailable = !clipL; @@ -708,11 +688,7 @@ void BilateralFilter::bilateralFilterDiamond5x5( const ComponentID compID, const uint32_t uiWidthExt = uiWidth + (NUMBER_PADDED_SAMPLES << 1); uint32_t uiHeightExt = uiHeight + (NUMBER_PADDED_SAMPLES << 1); - int iWidthExtSIMD = uiWidthExt | 0x04; - if (uiWidth < 8) - { - iWidthExtSIMD = 8 + (NUMBER_PADDED_SAMPLES << 1); - } + int iWidthExtSIMD = uiWidthExt + 16; Pel *tempBlockPtr; @@ -897,7 +873,36 @@ void BilateralFilter::bilateralFilterDiamond5x5( const ComponentID compID, const } } - m_bilateralFilterDiamond5x5( uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, false, lutRowPtr, false ); + int bfac = 1; + const char* lutRowPtr = nullptr; + int cutBitsNum = 3; + if (isLuma(compID)) + { + lutRowPtr = getFilterLutParameters(tempblock + iWidthExtSIMD * 2 + 2, iWidthExtSIMD, uiWidth, uiHeight, currTU.cu->predMode, qp + currTU.cs->pps->getBIFQPOffset(), bfac); + } + else + { +#if JVET_X0071_CHROMA_BILATERAL_FILTER +#if JVET_AF0112_BIF_DYNAMIC_SCALING + cutBitsNum = 2; +#endif + int widthForStrength = currTU.blocks[compID].width; + int heightForStrength = currTU.blocks[compID].height; + + if (currTU.blocks[COMPONENT_Y].valid()) + { + widthForStrength = currTU.blocks[COMPONENT_Y].width; + heightForStrength = currTU.blocks[COMPONENT_Y].height; + } + + lutRowPtr = getFilterLutParametersChroma(tempblock + iWidthExtSIMD * 2 + 2, iWidthExtSIMD, uiWidth, uiHeight, currTU.cu->predMode, qp + currTU.cs->pps->getChromaBIFQPOffset(), bfac, widthForStrength, heightForStrength, currTU.blocks[COMPONENT_Y].valid()); +#endif + } + + int bifRoundAdd = BIF_ROUND_ADD >> currTU.cs->pps->getBIFStrength(); + int bifRoundShift = BIF_ROUND_SHIFT - currTU.cs->pps->getBIFStrength(); + + m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, false, lutRowPtr, false, cutBitsNum); xStart = xEnd; } @@ -923,31 +928,6 @@ void BilateralFilter::bilateralFilterDiamond5x5( const ComponentID compID, const int recStride = rec.get( compID ).stride; Pel *recPtr = rec.get( compID ).bufAt(compArea); - int bfac = 1; - - const char *lutRowPtr = nullptr; - - if( isLuma( compID ) ) - { - lutRowPtr = getFilterLutParameters(std::min(uiWidth, uiHeight), currTU.cu->predMode, qp + currTU.cs->pps->getBIFQPOffset(), bfac); - } - else - { - int widthForStrength = currTU.blocks[compID].width; - int heightForStrength = currTU.blocks[compID].height; - - if( currTU.blocks[COMPONENT_Y].valid() ) - { - widthForStrength = currTU.blocks[COMPONENT_Y].width; - heightForStrength = currTU.blocks[COMPONENT_Y].height; - } - - lutRowPtr = getFilterLutParametersChroma( std::min( uiWidth, uiHeight ), currTU.cu->predMode, qp + currTU.cs->pps->getChromaBIFQPOffset(), bfac, widthForStrength, heightForStrength, currTU.blocks[COMPONENT_Y].valid() ); - } - - int bifRoundAdd = BIF_ROUND_ADD >> currTU.cs->pps->getBIFStrength(); - int bifRoundShift = BIF_ROUND_SHIFT - currTU.cs->pps->getBIFStrength(); - const CompArea &myArea = currTU.blocks[compID]; topAltAvailable = myArea.y - NUMBER_PADDED_SAMPLES >= 0; leftAltAvailable = myArea.x - NUMBER_PADDED_SAMPLES >= 0; @@ -962,11 +942,7 @@ void BilateralFilter::bilateralFilterDiamond5x5( const ComponentID compID, const uint32_t uiWidthExt = uiWidth + (NUMBER_PADDED_SAMPLES << 1); uint32_t uiHeightExt = uiHeight + (NUMBER_PADDED_SAMPLES << 1); - int iWidthExtSIMD = uiWidthExt | 0x04; - if (uiWidth < 8) - { - iWidthExtSIMD = 8 + (NUMBER_PADDED_SAMPLES << 1); - } + int iWidthExtSIMD = uiWidthExt + 16; Pel *tempBlockPtr; @@ -1163,7 +1139,37 @@ void BilateralFilter::bilateralFilterDiamond5x5( const ComponentID compID, const } } - m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, false, lutRowPtr, noClip); + int bfac = 1; + const char* lutRowPtr = nullptr; + int cutBitsNum = 3; + + if (isLuma(compID)) + { + lutRowPtr = getFilterLutParameters(tempblock + iWidthExtSIMD * 2 + 2, iWidthExtSIMD, uiWidth, uiHeight, currTU.cu->predMode, qp + currTU.cs->pps->getBIFQPOffset(), bfac); + } + else + { +#if JVET_X0071_CHROMA_BILATERAL_FILTER +#if JVET_AF0112_BIF_DYNAMIC_SCALING + cutBitsNum = 2; +#endif + int widthForStrength = currTU.blocks[compID].width; + int heightForStrength = currTU.blocks[compID].height; + + if (currTU.blocks[COMPONENT_Y].valid()) + { + widthForStrength = currTU.blocks[COMPONENT_Y].width; + heightForStrength = currTU.blocks[COMPONENT_Y].height; + } + + lutRowPtr = getFilterLutParametersChroma(tempblock + iWidthExtSIMD * 2 + 2, iWidthExtSIMD, uiWidth, uiHeight, currTU.cu->predMode, qp + currTU.cs->pps->getChromaBIFQPOffset(), bfac, widthForStrength, heightForStrength, currTU.blocks[COMPONENT_Y].valid()); +#endif + } + + int bifRoundAdd = BIF_ROUND_ADD >> currTU.cs->pps->getBIFStrength(); + int bifRoundShift = BIF_ROUND_SHIFT - currTU.cs->pps->getBIFStrength(); + + m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, false, lutRowPtr, noClip, cutBitsNum); } } void BilateralFilter::clipNotBilaterallyFilteredBlocks(const ComponentID compID, const CPelUnitBuf& src, PelUnitBuf& rec, const ClpRng& clpRng, TransformUnit & currTU) @@ -1395,8 +1401,6 @@ void BilateralFilter::bilateralFilterPicRDOperCTU( const ComponentID compID, Cod for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, chType ), chType ) ) { - bool isInter = ( currCU.predMode == MODE_INTER ) ? true : false; - bool valid = isLuma( compID ) ? true: currCU.blocks[compID].valid(); if( !valid ) { @@ -1405,6 +1409,10 @@ void BilateralFilter::bilateralFilterPicRDOperCTU( const ComponentID compID, Cod for (auto &currTU : CU::traverseTUs(currCU)) { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = getApplyBIF(currTU, compID); +#else + bool isInter = (currCU.predMode == MODE_INTER) ? true : false; bool applyBIF = true; if( isLuma( compID ) ) { @@ -1430,6 +1438,7 @@ void BilateralFilter::bilateralFilterPicRDOperCTU( const ComponentID compID, Cod applyBIF = ( tuCBF || isInter == false ) && ( currTU.cu->qp > 17 ); } } +#endif // We should ideally also check the CTU-BIF-flag here. However, given that this function // is only called by the encoder, and the encoder always has CTU-BIF-flag on, there is no @@ -1579,8 +1588,28 @@ void BilateralFilter::bilateralFilterPicRDOperCTU( const ComponentID compID, Cod #endif #if JVET_X0071_CHROMA_BILATERAL_FILTER -const char* BilateralFilter::getFilterLutParametersChroma( const int size, const PredMode predMode, const int32_t qp, int& bfac, int widthForStrength, int heightForStrength, bool isLumaValid) +const char* BilateralFilter::getFilterLutParametersChroma(int16_t* block, const int stride, const int width, const int height, const PredMode predMode, const int32_t qp, int& bfac, int widthForStrength, int heightForStrength, bool isLumaValid) { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + int w = floorLog2(widthForStrength); + int h = floorLog2(heightForStrength); + + int mad = m_calcMAD(block, stride, width, height, floorLog2(width) + floorLog2(height)); + + w = std::min(w, 7); + h = std::min(h, 7); + mad = std::min(mad >> 4, 15); + bfac = m_tuSizeFactorChroma[predMode == MODE_INTER][w * 8 + h]; + if (bfac) // BIF is not applied if tuSizeFactor[(w, h)] = 0 + { + bfac += m_tuMADFactorChroma[predMode == MODE_INTER][mad]; + } + + int sqp = qp - 17; + sqp = std::min(sqp, 25); + sqp = std::max(sqp, 0); + return m_lutChroma[sqp]; +#else int conditionForStrength = std::min(widthForStrength, heightForStrength); int T1 = 4; int T2 = 16; @@ -1632,6 +1661,39 @@ const char* BilateralFilter::getFilterLutParametersChroma( const int size, const sqp = maxQP; } return m_wBIFChroma[sqp - 17]; +#endif +} +#endif + +#if JVET_AF0112_BIF_DYNAMIC_SCALING +bool BilateralFilter::getApplyBIF(const TransformUnit& currTU, ComponentID compID) +{ + bool applyBIF = currTU.blocks[compID].valid() && (TU::getCbf(currTU, compID) || currTU.cu->predMode != MODE_INTER) && (currTU.cu->qp > 17); + if (applyBIF) + { + int w = currTU.blocks[compID].width, h = currTU.blocks[compID].height; + char(*factor)[64] = nullptr; +#if JVET_V0094_BILATERAL_FILTER + if (isLuma(compID)) + { + factor = m_tuSizeFactor; + } +#endif +#if JVET_X0071_CHROMA_BILATERAL_FILTER + if (!isLuma(compID)) + { + if (currTU.blocks[COMPONENT_Y].valid()) + { + w = currTU.blocks[COMPONENT_Y].width, h = currTU.blocks[COMPONENT_Y].height; + } + factor = m_tuSizeFactorChroma; + } +#endif + w = floorLog2(w), h = floorLog2(h); + w = std::min(w, 7), h = std::min(h, 7); + applyBIF = factor && factor[currTU.cu->predMode == MODE_INTER][w * 8 + h]; + } + return applyBIF; } #endif diff --git a/source/Lib/CommonLib/BilateralFilter.h b/source/Lib/CommonLib/BilateralFilter.h index 6fa4504564d336f61133b749211433a971dd2965..5bf80a14228949eda6bec0ed4dbb5f1699f921d0 100644 --- a/source/Lib/CommonLib/BilateralFilter.h +++ b/source/Lib/CommonLib/BilateralFilter.h @@ -55,27 +55,52 @@ public: class BilateralFilter { private: -#ifdef TARGET_SIMD_X86 - __m128i tempblockSIMD[2320]; - __m128i tempblockFilteredSIMD[2320]; -#else - int64_t tempblockSIMD[2 * 2320]; - int64_t tempblockFilteredSIMD[2 * 2320]; -#endif - Pel *tempblock = (Pel*) tempblockSIMD; - Pel *tempblockFilteredTemp = (Pel*) (&tempblockFilteredSIMD[1]); - // SIMD method writes to tempblockFiltered + 4 so that address - // must be 128-aligned. Hence tempblockFilteredSIMD is a bit bigger than - // it otherwise would need to be and we don't use the first 14 bytes. - // We pad 8 bytes so required sizes is (128+8)*(128+8)*2+16 bytes = 37008 bytes - // = 2313 128-bit words which has been rounded up to 2320 above. - Pel *tempblockFiltered = &tempblockFilteredTemp[-2]; + // 128x128 is the max TU size, 4 is the padding for the considered neighborhood, 16 is the AVX buffer size. + // (128 + 4 + 16) * (128 + 4) = 19536. + Pel tempblockSIMD[19536]; + Pel tempblockFilteredSIMD[19536]; + + Pel *tempblock = (Pel*)tempblockSIMD; + Pel* tempblockFiltered = (Pel*)tempblockFilteredSIMD; - void (*m_bilateralFilterDiamond5x5)( uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip); - static void blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip ); + void (*m_bilateralFilterDiamond5x5)(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum); + static void blockBilateralFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum); + +#if JVET_AF0112_BIF_DYNAMIC_SCALING + int (*m_calcMAD)(int16_t* block, int stride, int width, int height, int whlog2); + static int calcMAD(int16_t* block, int stride, int width, int height, int whlog2); +#endif #if JVET_V0094_BILATERAL_FILTER char m_wBIF[26][16] = { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* 17 */ + { 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* 18 */ + { 0, 2, 2, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* 19 */ + { 0, 3, 4, 4, 4, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, -1, }, /* 20 */ + { 0, 4, 5, 5, 5, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, -1, }, /* 21 */ + { 0, 5, 6, 6, 6, 4, 3, 3, 1, 1, 1, 1, 0, 0, 0, -1, }, /* 22 */ + { 0, 6, 7, 7, 7, 5, 4, 4, 2, 2, 1, 1, 1, 0, 0, -1, }, /* 23 */ + { 0, 6, 8, 8, 8, 6, 5, 4, 3, 3, 2, 2, 1, 1, 0, -2, }, /* 24 */ + { 0, 7, 8, 9, 9, 7, 7, 5, 4, 4, 2, 2, 2, 1, 1, -2, }, /* 25 */ + { 0, 7, 9, 10, 10, 8, 8, 5, 5, 5, 3, 3, 2, 2, 1, -3, }, /* 26 */ + { 0, 8, 10, 11, 11, 9, 9, 6, 6, 6, 3, 3, 3, 2, 1, -3, }, /* 27 */ + { 0, 9, 11, 13, 13, 11, 11, 8, 8, 8, 5, 4, 4, 3, 2, -3, }, /* 28 */ + { 0, 10, 12, 14, 14, 13, 13, 10, 9, 9, 6, 5, 5, 4, 3, -4, }, /* 29 */ + { 0, 11, 13, 16, 16, 15, 15, 13, 11, 11, 8, 6, 6, 4, 4, -4, }, /* 30 */ + { 0, 12, 14, 17, 17, 17, 17, 15, 12, 12, 9, 7, 7, 5, 5, -5, }, /* 31 */ + { 0, 13, 15, 19, 19, 19, 19, 17, 14, 14, 11, 8, 8, 6, 6, -5, }, /* 32 */ + { 0, 14, 17, 20, 21, 21, 21, 19, 17, 16, 13, 10, 10, 7, 7, -5, }, /* 33 */ + { 0, 15, 19, 22, 23, 24, 23, 22, 20, 18, 15, 12, 11, 9, 7, -6, }, /* 34 */ + { 0, 17, 20, 23, 26, 26, 26, 24, 23, 20, 18, 15, 13, 10, 8, -6, }, /* 35 */ + { 0, 18, 22, 25, 28, 29, 28, 27, 26, 22, 20, 17, 14, 12, 8, -7, }, /* 36 */ + { 0, 19, 24, 26, 30, 31, 30, 29, 29, 24, 22, 19, 16, 13, 9, -7, }, /* 37 */ + { 0, 20, 26, 29, 32, 33, 32, 31, 31, 27, 24, 21, 17, 14, 11, -8, }, /* 38 */ + { 0, 21, 28, 31, 34, 35, 35, 34, 34, 30, 26, 23, 19, 15, 13, -8, }, /* 39 */ + { 0, 21, 30, 34, 36, 38, 37, 36, 36, 32, 29, 25, 20, 17, 15, -9, }, /* 40 */ + { 0, 22, 32, 36, 38, 40, 40, 39, 39, 35, 31, 27, 22, 18, 17, -9, }, /* 41 */ + { 0, 23, 34, 39, 40, 42, 42, 41, 41, 38, 33, 29, 23, 19, 19, -10, }, /* 42 */ +#else { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, { 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, { 0, 2, 2, 2, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, @@ -102,10 +127,49 @@ private: { 0, 15, 22, 31, 35, 39, 42, 42, 43, 41, 37, 25, 21, 17, 15, -3, }, { 0, 16, 23, 32, 36, 40, 43, 43, 44, 42, 39, 26, 21, 17, 15, -3, }, { 0, 17, 23, 33, 37, 41, 44, 44, 45, 44, 42, 27, 22, 17, 15, -3, }, +#endif + }; + char m_lut[26][3 * 16]; + char m_distFactor[3] = { 16, 12, 11, }; + char m_tuSizeFactor[2][64] = { + { 10,10,10,9,8,6,5,4,10,10,10,9,8,6,5,4,10,10,10,9,8,6,5,4,9,9,9,7,6,5,5,4,8,8,8,6,5,5,5,4,6,6,6,5,5,5,5,4,5,5,5,5,5,5,5,4,4,4,4,4,4,4,4,4, }, + { 10,10,10,8,6,6,4,0,10,10,10,8,6,6,4,0,10,10,10,8,6,6,4,0,8,8,8,6,5,4,4,0,6,6,6,5,4,4,4,0,6,6,6,4,4,0,0,0,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, }, + }; + char m_tuMADFactor[2][16] = { + { 0,0,0,1,2,3,4,5,6,6,6,6,7,7,7,7, }, + { 0,0,0,0,0,0,1,2,2,4,4,4,4,5,5,5, }, }; #endif #if JVET_X0071_CHROMA_BILATERAL_FILTER char m_wBIFChroma[26][16] = { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* 17 */ + { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, }, /* 18 */ + { 0, 2, 3, 3, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 0, }, /* 19 */ + { 0, 3, 5, 5, 6, 5, 5, 5, 5, 5, 4, 2, 2, 2, 2, 1, }, /* 20 */ + { 0, 4, 6, 6, 8, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 1, }, /* 21 */ + { 0, 5, 8, 8, 10, 8, 8, 8, 8, 8, 7, 4, 4, 4, 4, 1, }, /* 22 */ + { 0, 5, 8, 8, 10, 9, 9, 9, 9, 9, 8, 5, 5, 5, 4, 1, }, /* 23 */ + { 0, 5, 8, 8, 10, 9, 10, 10, 9, 9, 9, 6, 6, 6, 4, 2, }, /* 24 */ + { 0, 5, 8, 8, 10, 10, 10, 10, 10, 10, 9, 8, 6, 6, 5, 2, }, /* 25 */ + { 0, 5, 8, 8, 10, 10, 11, 11, 10, 10, 10, 9, 7, 7, 5, 3, }, /* 26 */ + { 0, 5, 8, 8, 10, 11, 12, 12, 11, 11, 11, 10, 8, 8, 5, 3, }, /* 27 */ + { 0, 5, 8, 9, 11, 12, 13, 13, 13, 13, 13, 13, 11, 11, 8, 5, }, /* 28 */ + { 0, 5, 8, 9, 11, 13, 14, 15, 15, 15, 16, 15, 13, 13, 11, 6, }, /* 29 */ + { 0, 6, 9, 10, 12, 13, 16, 16, 16, 16, 18, 18, 16, 16, 15, 8, }, /* 30 */ + { 0, 6, 9, 10, 12, 14, 17, 18, 18, 18, 21, 20, 18, 18, 18, 9, }, /* 31 */ + { 0, 6, 9, 11, 13, 15, 18, 19, 20, 20, 23, 23, 21, 21, 21, 11, }, /* 32 */ + { 0, 6, 9, 12, 14, 16, 18, 20, 21, 21, 24, 24, 22, 24, 21, 13, }, /* 33 */ + { 0, 6, 9, 13, 14, 17, 19, 21, 21, 21, 25, 25, 23, 27, 22, 14, }, /* 34 */ + { 0, 6, 10, 13, 15, 17, 19, 21, 22, 22, 25, 25, 25, 29, 22, 16, }, /* 35 */ + { 0, 6, 10, 14, 15, 18, 20, 22, 22, 22, 26, 26, 26, 32, 23, 17, }, /* 36 */ + { 0, 6, 10, 15, 16, 19, 20, 23, 23, 23, 27, 27, 27, 35, 23, 19, }, /* 37 */ + { 0, 6, 11, 15, 17, 20, 21, 24, 25, 25, 28, 29, 29, 36, 26, 21, }, /* 38 */ + { 0, 6, 11, 15, 18, 21, 22, 25, 26, 26, 29, 30, 32, 37, 29, 24, }, /* 39 */ + { 0, 7, 12, 16, 18, 21, 22, 27, 28, 28, 29, 32, 34, 37, 33, 26, }, /* 40 */ + { 0, 7, 12, 16, 19, 22, 23, 28, 29, 29, 30, 33, 37, 38, 36, 29, }, /* 41 */ + { 0, 7, 13, 16, 20, 23, 24, 29, 31, 31, 31, 35, 39, 39, 39, 31, }, /* 42 */ +#else { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, { 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, { 0, 2, 2, 2, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, @@ -132,6 +196,17 @@ private: { 0, 12, 17, 24, 27, 30, 33, 33, 34, 32, 29, 20, 16, 13, 12, -2, }, { 0, 12, 18, 25, 28, 31, 34, 34, 34, 33, 30, 20, 16, 13, 12, -2, }, { 0, 13, 18, 26, 29, 32, 34, 34, 35, 34, 33, 21, 17, 13, 12, -2, }, +#endif + }; + char m_lutChroma[26][3 * 16]; + char m_distFactorChroma[3] = { 16, 8, 6, }; + char m_tuSizeFactorChroma[2][64] = { + { 9,9,9,9,8,8,8,0,9,9,9,9,8,8,8,0,9,9,8,8,8,8,8,0,9,9,8,8,8,8,8,0,8,8,8,8,8,8,8,0,8,8,8,8,8,8,8,0,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0,0, }, + { 13,13,13,13,9,9,9,0,13,13,13,13,9,9,9,0,13,13,13,13,9,9,9,0,13,13,13,9,9,9,9,0,9,9,9,9,9,9,9,0,9,9,9,9,9,0,0,0,9,9,9,9,9,0,0,0,0,0,0,0,0,0,0,0, }, + }; + char m_tuMADFactorChroma[2][16] = { + { 0,1,3,4,5,7,9,11,11,11,11,11,11,11,11,11, }, + { 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, }, }; #endif public: @@ -142,26 +217,34 @@ public: void destroy(); #if JVET_V0094_BILATERAL_FILTER void bilateralFilterRDOdiamond5x5(const ComponentID compID, PelBuf& resiBuf, const CPelBuf& predBuf, PelBuf& recoBuf, int32_t qp, const CPelBuf& recIPredBuf, const ClpRng& clpRng, TransformUnit & currTU, bool useReco, bool doReshape = false, std::vector<Pel>* pLUT = nullptr); - void bilateralFilterPicRDOperCTU( const ComponentID compID, CodingStructure& cs, PelUnitBuf& src,BIFCabacEst* bifCABACEstimator); - void bilateralFilterDiamond5x5( const ComponentID compID, const CPelUnitBuf& src, PelUnitBuf& rec, int32_t qp, const ClpRng& clpRng, TransformUnit & currTU, bool noClip + void bilateralFilterPicRDOperCTU(const ComponentID compID, CodingStructure& cs, PelUnitBuf& src,BIFCabacEst* bifCABACEstimator); + void bilateralFilterDiamond5x5(const ComponentID compID, const CPelUnitBuf& src, PelUnitBuf& rec, int32_t qp, const ClpRng& clpRng, TransformUnit & currTU, bool noClip #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY , bool isCtuCrossedByVirtualBoundaries, int horVirBndryPos[], int verVirBndryPos[], int numHorVirBndry, int numVerVirBndry , bool clipTop, bool clipBottom, bool clipLeft, bool clipRight #endif ); - const char* getFilterLutParameters( const int size, const PredMode predMode, const int qp, int& bfac ); - void clipNotBilaterallyFilteredBlocks( const ComponentID compID, const CPelUnitBuf& src, PelUnitBuf& rec, const ClpRng& clpRng, TransformUnit & currTU); -#if JVET_X0071_CHROMA_BILATERAL_FILTER - const char* getFilterLutParametersChroma( const int size, const PredMode predMode, const int qp, int& bfac, int widthForStrength, int heightForStrength, bool isLumaValid); + const char* getFilterLutParameters(int16_t* block, const int stride, const int width, const int height, const PredMode predMode, const int qp, int& bfac); + void clipNotBilaterallyFilteredBlocks(const ComponentID compID, const CPelUnitBuf& src, PelUnitBuf& rec, const ClpRng& clpRng, TransformUnit & currTU); #endif + +#if JVET_X0071_CHROMA_BILATERAL_FILTER + const char* getFilterLutParametersChroma(int16_t* block, const int stride, const int width, const int height, const PredMode predMode, const int qp, int& bfac, int widthForStrength, int heightForStrength, bool isLumaValid); #endif +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool getApplyBIF(const TransformUnit& currTU, ComponentID compID); +#endif #if ENABLE_SIMD_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER_ENABLE_SIMD #ifdef TARGET_SIMD_X86 template<X86_VEXT vext> - static void simdFilterDiamond5x5( uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip ); + static void simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum); +#if JVET_AF0112_BIF_DYNAMIC_SCALING + template<X86_VEXT vext> + static int simdCalcMAD(int16_t* block, int stride, int width, int height, int whlog2); +#endif void initBilateralFilterX86(); template <X86_VEXT vext> diff --git a/source/Lib/CommonLib/Picture.h b/source/Lib/CommonLib/Picture.h index 4f0803624f908b98cd8cb395b16350385191f923..86c8084b573e09182d4b6a89e43f00e27c3ae2fa 100644 --- a/source/Lib/CommonLib/Picture.h +++ b/source/Lib/CommonLib/Picture.h @@ -364,7 +364,7 @@ public: #if JVET_X0071_CHROMA_BILATERAL_FILTER BifParams m_bifParams[MAX_NUM_COMPONENT]; #else - BifParams m_bifParams; + BifParams m_bifParams[1]; #endif #endif std::vector<uint8_t> m_alfCtuEnableFlag[MAX_NUM_COMPONENT]; diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp index 97d8143d91f8034d7a21d7b1454d049118db233f..04f37c0117543433c742b93226b96c869510a502 100644 --- a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp +++ b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp @@ -174,6 +174,9 @@ void SampleAdaptiveOffset::create( int picWidth, int picHeight, ChromaFormat for ::memset(m_ccSaoControl[compIdx], 0, sizeof(uint8_t) * m_numCTUsInPic); } #endif +#if JVET_V0094_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER + m_bilateralFilter.create(); +#endif } void SampleAdaptiveOffset::destroy() @@ -1156,8 +1159,17 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP { for (auto& currTU : CU::traverseTUs(currCU)) { - bool isInter = (currCU.predMode == MODE_INTER) ? true : false; - if (bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height)))) + bool applyBIF = bifParams.ctuOn[ctuRsAddr]; + if (applyBIF) + { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyBIF = m_bilateralFilter.getApplyBIF(currTU, COMPONENT_Y); +#else + bool isInter = (currCU.predMode == MODE_INTER) ? true : false; + applyBIF = ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + } + if (applyBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; @@ -1181,8 +1193,6 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP #if JVET_X0071_CHROMA_BILATERAL_FILTER if(cs.pps->getUseChromaBIF()) { - bool tuValid = false; - bool tuCBF = false; bool isDualTree = CS::isDualITree(cs); ChannelType chType = isDualTree ? CH_C : CH_L; bool applyChromaBIF = false; @@ -1202,7 +1212,6 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP } for (auto &currTU : CU::traverseTUs(currCU)) { - bool isInter = (currCU.predMode == MODE_INTER) ? true : false; for(int compIdx = COMPONENT_Cb; compIdx < MAX_NUM_COMPONENT; compIdx++) { applyChromaBIF = false; @@ -1210,6 +1219,12 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP BifParams& chromaBifParams = cs.picture->getBifParam( compID ); bool ctuEnableChromaBIF = chromaBifParams.ctuOn[ctuRsAddr]; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = ctuEnableChromaBIF && m_bilateralFilter.getApplyBIF(currTU, compID); +#else + bool tuValid = false; + bool tuCBF = false; + bool isInter = (currCU.predMode == MODE_INTER) ? true : false; if(!isDualTree) { tuValid = currTU.blocks[compIdx].valid(); @@ -1225,6 +1240,7 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP tuCBF = TU::getCbf(currTU, compID); applyChromaBIF = (ctuEnableChromaBIF && ((tuCBF || isInter == false) && (currTU.cu->qp > 17))); } +#endif if(applyChromaBIF) { @@ -1301,20 +1317,29 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP for (auto &currTU : CU::traverseTUs(currCU)) { - bool isInter = (currCU.predMode == MODE_INTER) ? true : false; - if ( bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height)))) + bool applyBIF = bifParams.ctuOn[ctuRsAddr]; + if (applyBIF) + { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyBIF = m_bilateralFilter.getApplyBIF(currTU, COMPONENT_Y); +#else + bool isInter = (currCU.predMode == MODE_INTER) ? true : false; + applyBIF = ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + } + if (applyBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; int numHorVirBndry = 0, numVerVirBndry = 0; int horVirBndryPos[] = { 0, 0, 0 }; int verVirBndryPos[] = { 0, 0, 0 }; - bool isTUCrossedByVirtualBoundaries = bilateralFilter.isCrossedByVirtualBoundaries( + bool isTUCrossedByVirtualBoundaries = m_bilateralFilter.isCrossedByVirtualBoundaries( cs, currTU.Y().x, currTU.Y().y, currTU.lumaSize().width, currTU.lumaSize().height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos); #endif - m_bilateralFilter.bilateralFilterDiamond5x5( COMPONENT_Y, m_tempBuf, rec, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU + m_bilateralFilter.bilateralFilterDiamond5x5( COMPONENT_Y, m_tempBuf, rec, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU, false #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY , isTUCrossedByVirtualBoundaries, horVirBndryPos, verVirBndryPos, numHorVirBndry, numVerVirBndry , clipTop, clipBottom, clipLeft, clipRight @@ -1347,9 +1372,7 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP } } if(cs.pps->getUseChromaBIF()) - { - bool tuValid = false; - bool tuCBF = false; + { bool isDualTree = CS::isDualITree(cs); ChannelType chType = isDualTree ? CH_C : CH_L; bool applyChromaBIF = false; @@ -1363,12 +1386,17 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP } for (auto &currTU : CU::traverseTUs(currCU)) { - bool isInter = (currCU.predMode == MODE_INTER) ? true : false; for(int compIdx = COMPONENT_Cb; compIdx < MAX_NUM_COMPONENT; compIdx++) { ComponentID compID = ComponentID( compIdx ); BifParams& chromaBifParams = cs.picture->getBifParam( compID ); bool ctuEnableChromaBIF = chromaBifParams.ctuOn[ctuRsAddr]; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = ctuEnableChromaBIF && m_bilateralFilter.getApplyBIF(currTU, compID); +#else + bool tuValid = false; + bool tuCBF = false; + bool isInter = (currCU.predMode == MODE_INTER) ? true : false; applyChromaBIF = false; if(!isDualTree) { @@ -1385,6 +1413,7 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP tuCBF = TU::getCbf(currTU, compID); applyChromaBIF = (ctuEnableChromaBIF && ((tuCBF || isInter == false) && (currTU.cu->qp > 17))); } +#endif if(applyChromaBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY @@ -1402,7 +1431,7 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos); #endif - m_bilateralFilter.bilateralFilterDiamond5x5( compID, m_tempBuf, rec, currTU.cu->qp, cs.slice->clpRng(compID), currTU, false, + m_bilateralFilter.bilateralFilterDiamond5x5( compID, m_tempBuf, rec, currTU.cu->qp, cs.slice->clpRng(compID), currTU, false #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY , isTUCrossedByVirtualBoundaries, horVirBndryPos, verVirBndryPos, numHorVirBndry, numVerVirBndry, clipTop, clipBottom, clipLeft, clipRight #endif @@ -1517,6 +1546,9 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP BifParams& chromaBifParams = cs.picture->getBifParam( compID ); bool ctuEnableChromaBIF = chromaBifParams.ctuOn[ctuRsAddr]; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = ctuEnableChromaBIF && m_bilateralFilter.getApplyBIF(currTU, compID); +#else applyChromaBIF = false; if(!isDualTree) { @@ -1533,6 +1565,7 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP tuCBF = TU::getCbf(currTU, compID); applyChromaBIF = (ctuEnableChromaBIF && ((tuCBF || isInter == false) && (currTU.cu->qp > 17))); } +#endif if(applyChromaBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY @@ -1723,8 +1756,13 @@ void SampleAdaptiveOffset::jointClipSaoBifCcSao(CodingStructure& cs) { for( auto& currTU : CU::traverseTUs( currCU ) ) { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && m_bilateralFilter.getApplyBIF(currTU, compID); +#else bool isInter = ( currCU.predMode == MODE_INTER ) ? true : false; - if( bifParams.ctuOn[ctuRsAddr] && ( ( TU::getCbf( currTU, compID ) || isInter == false ) && ( currTU.cu->qp > 17 ) ) && ( 128 > std::max( currTU.lumaSize().width, currTU.lumaSize().height ) ) && ( ( isInter == false ) || ( 32 > std::min( currTU.lumaSize().width, currTU.lumaSize().height ) ) ) ) + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, compID) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + if (applyBIF) { m_bilateralFilter.clipNotBilaterallyFilteredBlocks( compID, m_tempBuf, dstYuv, cs.slice->clpRng( compID ), currTU ); } @@ -1734,8 +1772,6 @@ void SampleAdaptiveOffset::jointClipSaoBifCcSao(CodingStructure& cs) #if JVET_X0071_CHROMA_BILATERAL_FILTER if( cs.pps->getUseChromaBIF() && isChroma( compID ) ) { - bool tuValid = false; - bool tuCBF = false; bool ctuEnableChromaBIF = false; bool isDualTree = CS::isDualITree( cs ); ChannelType chType = isDualTree ? CH_C : CH_L; @@ -1749,12 +1785,17 @@ void SampleAdaptiveOffset::jointClipSaoBifCcSao(CodingStructure& cs) } for( auto &currTU : CU::traverseTUs( currCU ) ) { - bool isInter = ( currCU.predMode == MODE_INTER ) ? true : false; //Cb or Cr applyChromaBIF = false; BifParams& chromaBifParams = cs.picture->getBifParam( compID ); ctuEnableChromaBIF = chromaBifParams.ctuOn[ctuRsAddr]; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = ctuEnableChromaBIF && m_bilateralFilter.getApplyBIF(currTU, compID); +#else + bool tuValid = false; + bool tuCBF = false; + bool isInter = (currCU.predMode == MODE_INTER) ? true : false; if( !isDualTree ) { tuValid = currTU.blocks[compIdx].valid(); @@ -1770,6 +1811,7 @@ void SampleAdaptiveOffset::jointClipSaoBifCcSao(CodingStructure& cs) tuCBF = TU::getCbf( currTU, compID ); applyChromaBIF = ( ctuEnableChromaBIF && ( ( tuCBF || isInter == false ) && ( currTU.cu->qp > 17 ) ) ); } +#endif if( applyChromaBIF ) { m_bilateralFilter.clipNotBilaterallyFilteredBlocks( compID, m_tempBuf, dstYuv, cs.slice->clpRng( compID ), currTU ); diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index a03ea17bb02bb233c5b455567b50ac847c0666f8..b976de3bac19fc37ce44269b7a2093bce917d260 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -397,6 +397,7 @@ #define JVET_AE0139_ALF_IMPROVED_FIXFILTER 1 // JVET-AE0139: Improved ALF fixed filter #define JVET_AE0151_CCSAO_HISTORY_OFFSETS_AND_EXT_EO 1 // JVET-AE0151: CCSAO with history offsets and extended edge classifiers #define JVET_AF0197_LUMA_RESIDUAL_TAP_IN_CCALF 1 // JVET-AF0197: Luma Residual Tap in CCALF +#define JVET_AF0112_BIF_DYNAMIC_SCALING 1 // JVET-AF0112: Dynamic TU scale factor for BIF with LUTs interpolation // SIMD optimizations #if IF_12TAP diff --git a/source/Lib/CommonLib/x86/BilateralFilterX86.h b/source/Lib/CommonLib/x86/BilateralFilterX86.h index 7f39a283fa644e9880eff236cd11733f4fc4e4c3..9e3262765f16f6344f0319d555a55582150dfcff 100644 --- a/source/Lib/CommonLib/x86/BilateralFilterX86.h +++ b/source/Lib/CommonLib/x86/BilateralFilterX86.h @@ -44,33 +44,317 @@ #endif #if ENABLE_SIMD_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER_ENABLE_SIMD + +#if USE_AVX2 + +#if JVET_AF0112_BIF_DYNAMIC_SCALING +inline void simdBifApplyLut(__m256i& val, __m256i& acc, int cutBitsNum, __m256i& bitsRound, __m256i& bitsRound2, __m256i& lut) +#else +inline void simdBifApplyLut(__m256i& val, __m256i& acc, __m256i& lut, int lutShift) +#endif +{ + __m256i diffabs = _mm256_abs_epi16(val); /* absolute value */ +#if JVET_AF0112_BIF_DYNAMIC_SCALING + __m256i diffabs2 = _mm256_add_epi16(diffabs, bitsRound2); /* + bitsRound2 */ + diffabs2 = _mm256_srli_epi16(diffabs2, cutBitsNum); /* >> cutBitsNum */ + diffabs = _mm256_add_epi16(diffabs, bitsRound); /* + bitsRound2 */ + diffabs = _mm256_srli_epi16(diffabs, cutBitsNum); /* >> cutBitsNum */ + diffabs = _mm256_packus_epi16(diffabs2, diffabs); /* convert to 8 bits */ + diffabs = _mm256_permute4x64_epi64(diffabs, 0xD8); /* permute bytes */ + diffabs = _mm256_min_epu8(diffabs, _mm256_set1_epi8(15)); /* min(x,15) */ + diffabs = _mm256_shuffle_epi8(lut, diffabs); /* lut */ + diffabs2 = _mm256_cvtepi8_epi16(*((__m128i*) & diffabs)); /* back to 16-bit */ + diffabs = _mm256_cvtepi8_epi16(*((__m128i*) & diffabs + 1)); /* back to 16-bit */ + diffabs = _mm256_srai_epi16(_mm256_add_epi16(_mm256_add_epi16(diffabs, diffabs2), _mm256_set1_epi16(1)), 1); /* (v1 + v2 + 1) >> 1 */ +#else + diffabs = _mm256_add_epi16(diffabs, _mm256_set1_epi16(4)); /* +4 */ + diffabs = _mm256_srai_epi16(diffabs, 3); /* >> 3 */ + diffabs = _mm256_min_epi16(diffabs, _mm256_set1_epi16(15)); /* min(x,15) */ + diffabs = _mm256_packus_epi16(diffabs, _mm256_permute2f128_si256(diffabs, diffabs, 0x01)); /* convert to 8 */ + diffabs = _mm256_shuffle_epi8(lut, diffabs); /* lut */ + diffabs = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(diffabs)); /* back to 16-bit */ + diffabs = _mm256_srai_epi16(diffabs, lutShift); /* diagonal shift! */ +#endif + diffabs = _mm256_sign_epi16(diffabs, val); /* add original sign */ + acc = _mm256_add_epi16(diffabs, acc); /* add to acc */ +} + template<X86_VEXT vext> -void BilateralFilter::simdFilterDiamond5x5( uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip ) +void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum) +{ + //if( uiWidth < 4 || ( uiWidth < 8 && isRDO ) ) + if (uiWidth < 4) + { + return blockBilateralFilterDiamond5x5(uiWidth, uiHeight, block, blkFilt, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, isRDO, lutRowPtr, noClip, cutBitsNum); + } + + int pad = 2; + int padwidth = iWidthExtSIMD; + + __m256i center, left, right, up, down, lu, ld, ru, rd, acc, roundAdd, clipmin, clipmax, inputVals; + __m256i ll, rr, uu, dd; + __m128i lutTmp; + + clipmin = _mm256_set1_epi16(clpRng.min); + clipmax = _mm256_set1_epi16(clpRng.max); + + acc = _mm256_set1_epi32(0); +#if JVET_AF0112_BIF_DYNAMIC_SCALING + lutTmp = _mm_loadu_si128((__m128i*)(lutRowPtr)); + __m256i lut1 = _mm256_set_m128i(lutTmp, lutTmp); + lutTmp = _mm_loadu_si128((__m128i*)(lutRowPtr + 16)); + __m256i lut2 = _mm256_set_m128i(lutTmp, lutTmp); + lutTmp = _mm_loadu_si128((__m128i*)(lutRowPtr + 32)); + __m256i lut3 = _mm256_set_m128i(lutTmp, lutTmp); + __m256i mmBfac = _mm256_set1_epi16(bfac); + roundAdd = _mm256_set1_epi16(bifRoundAdd << 3); + __m256i bitsRound = _mm256_set1_epi16(1 << (cutBitsNum - 2)); + __m256i bitsRound2 = _mm256_set1_epi16((1 << (cutBitsNum - 2)) + (1 << (cutBitsNum - 1))); +#else + lutTmp = _mm_loadu_si128((__m128i*)(lutRowPtr)); + __m256i lut = _mm256_set_m128i(lutTmp, lutTmp); + int lutShift1 = 0, lutShift2 = 1, lutShift3 = 1; + roundAdd = _mm256_set1_epi16(bifRoundAdd); +#endif + + for (int row = 0; row < uiHeight; row++) + { + for (int col = 0; col < uiWidth; col += 16) + { + acc = _mm256_set1_epi32(0); + int16_t* point = &block[(row + pad) * padwidth + pad + col]; + + center = _mm256_loadu_si256((__m256i*)(point)); + + //load neighbours + left = _mm256_loadu_si256((__m256i*)(point - 1)); + right = _mm256_loadu_si256((__m256i*)(point + 1)); + up = _mm256_loadu_si256((__m256i*)(point - padwidth)); + down = _mm256_loadu_si256((__m256i*)(point + padwidth)); + + lu = _mm256_loadu_si256((__m256i*)(point - 1 - padwidth)); + ld = _mm256_loadu_si256((__m256i*)(point - 1 + padwidth)); + ru = _mm256_loadu_si256((__m256i*)(point + 1 - padwidth)); + rd = _mm256_loadu_si256((__m256i*)(point + 1 + padwidth)); + + ll = _mm256_loadu_si256((__m256i*)(point - 2)); + rr = _mm256_loadu_si256((__m256i*)(point + 2)); + uu = _mm256_loadu_si256((__m256i*)(point - 2 * padwidth)); + dd = _mm256_loadu_si256((__m256i*)(point + 2 * padwidth)); + + //calculate diffs + left = _mm256_sub_epi16(left, center); + right = _mm256_sub_epi16(right, center); + up = _mm256_sub_epi16(up, center); + down = _mm256_sub_epi16(down, center); + + lu = _mm256_sub_epi16(lu, center); + ld = _mm256_sub_epi16(ld, center); + ru = _mm256_sub_epi16(ru, center); + rd = _mm256_sub_epi16(rd, center); + + ll = _mm256_sub_epi16(ll, center); + rr = _mm256_sub_epi16(rr, center); + uu = _mm256_sub_epi16(uu, center); + dd = _mm256_sub_epi16(dd, center); + + // apply LUT +#if JVET_AF0112_BIF_DYNAMIC_SCALING + simdBifApplyLut(left, acc, cutBitsNum, bitsRound, bitsRound2, lut1); + simdBifApplyLut(right, acc, cutBitsNum, bitsRound, bitsRound2, lut1); + simdBifApplyLut(up, acc, cutBitsNum, bitsRound, bitsRound2, lut1); + simdBifApplyLut(down, acc, cutBitsNum, bitsRound, bitsRound2, lut1); + + simdBifApplyLut(lu, acc, cutBitsNum, bitsRound, bitsRound2, lut2); + simdBifApplyLut(ld, acc, cutBitsNum, bitsRound, bitsRound2, lut2); + simdBifApplyLut(ru, acc, cutBitsNum, bitsRound, bitsRound2, lut2); + simdBifApplyLut(rd, acc, cutBitsNum, bitsRound, bitsRound2, lut2); + + simdBifApplyLut(ll, acc, cutBitsNum, bitsRound, bitsRound2, lut3); + simdBifApplyLut(rr, acc, cutBitsNum, bitsRound, bitsRound2, lut3); + simdBifApplyLut(uu, acc, cutBitsNum, bitsRound, bitsRound2, lut3); + simdBifApplyLut(dd, acc, cutBitsNum, bitsRound, bitsRound2, lut3); +#else + simdBifApplyLut(left, acc, lut, lutShift1); + simdBifApplyLut(right, acc, lut, lutShift1); + simdBifApplyLut(up, acc, lut, lutShift1); + simdBifApplyLut(down, acc, lut, lutShift1); + + simdBifApplyLut(lu, acc, lut, lutShift2); + simdBifApplyLut(ld, acc, lut, lutShift2); + simdBifApplyLut(ru, acc, lut, lutShift2); + simdBifApplyLut(rd, acc, lut, lutShift2); + + simdBifApplyLut(ll, acc, lut, lutShift3); + simdBifApplyLut(rr, acc, lut, lutShift3); + simdBifApplyLut(uu, acc, lut, lutShift3); + simdBifApplyLut(dd, acc, lut, lutShift3); +#endif + + // TU scaling +#if JVET_AF0112_BIF_DYNAMIC_SCALING + acc = _mm256_mullo_epi16(acc, mmBfac); + acc = _mm256_adds_epi16(acc, roundAdd); + acc = _mm256_srai_epi16(acc, bifRoundShift + 3); +#else + if (bfac == 2) + { + acc = _mm256_slli_epi16(acc, 1); // Shift left to get 2* + } + else if (bfac == 3) + { + acc = _mm256_add_epi16(acc, _mm256_slli_epi16(acc, 1)); // Multiply by two by shifting left and add original value to get 3* + } + + // Add 16 and shift 5 + acc = _mm256_add_epi16(acc, roundAdd); + acc = _mm256_srai_epi16(acc, bifRoundShift); +#endif + + // Instead we add our input values to the delta + if (isRDO) + { + acc = _mm256_add_epi16(acc, center); + } + else + { + int16_t* recpoint = &recPtr[row * recStride + col]; + inputVals = _mm256_loadu_si256((__m256i*)(recpoint)); + acc = _mm256_add_epi16(acc, inputVals); + } + + // Clip +#if JVET_W0066_CCSAO + if (isRDO || !noClip) +#endif + { + acc = _mm256_max_epi16(acc, clipmin); + acc = _mm256_min_epi16(acc, clipmax); + } + + _mm256_storeu_si256((__m256i*)(blkFilt + (row + pad) * padwidth + col + pad), acc); + } + } + + // Copy back from tempbufFilter to recBuf + int onerow = uiWidth * sizeof(Pel); + // Copy back parameters + Pel* tempBlockPtr = blkFilt + pad * padwidth + pad; + for (uint32_t yy = 0; yy < uiHeight; yy++) + { + std::memcpy(recPtr, tempBlockPtr, onerow); + recPtr += recStride; + tempBlockPtr += padwidth; + } +} + +#if JVET_AF0112_BIF_DYNAMIC_SCALING +template<X86_VEXT vext> +int BilateralFilter::simdCalcMAD(int16_t* block, int stride, int width, int height, int whlog2) +{ + if (width < 8) + { + return calcMAD(block, stride, width, height, whlog2); + } + int sum32[8]; + __m256i acc32, val, average; + acc32 = _mm256_setzero_si256(); + for (int i = 0; i < height; i++) + { + for (int j = 0; j < width; j += 8) + { + val = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*)(block + j))); + acc32 = _mm256_add_epi32(acc32, val); + } + block += stride; + } + block -= stride * height; + acc32 = _mm256_hadd_epi32(acc32, acc32); + acc32 = _mm256_hadd_epi32(acc32, acc32); + _mm256_storeu_si256((__m256i*)sum32, acc32); + average = _mm256_set1_epi32((sum32[0] + sum32[4] + (1 << (whlog2 - 1))) >> whlog2); + acc32 = _mm256_setzero_si256(); + for (int i = 0; i < height; i++) + { + for (int j = 0; j < width; j += 8) + { + val = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*)(block + j))); + acc32 = _mm256_add_epi32(acc32, _mm256_abs_epi32(_mm256_sub_epi32(val, average))); + } + block += stride; + } + acc32 = _mm256_hadd_epi32(acc32, acc32); + acc32 = _mm256_hadd_epi32(acc32, acc32); + _mm256_storeu_si256((__m256i*)sum32, acc32); + return (sum32[0] + sum32[4] + (1 << (whlog2 - 1))) >> whlog2; +} +#endif + +#else // USE_AVX2 + +#if JVET_AF0112_BIF_DYNAMIC_SCALING +inline void simdBifApplyLut(__m128i& val, __m128i& acc, int cutBitsNum, __m128i& bitsRound, __m128i& bitsRound2, __m128i& lut) +#else +inline void simdBifApplyLut(__m128i& val, __m128i& acc, __m128i& lut, int lutShift) +#endif +{ + __m128i diffabs = _mm_abs_epi16(val); /* absolute value */ +#if JVET_AF0112_BIF_DYNAMIC_SCALING + __m128i diffabs2 = _mm_add_epi16(diffabs, bitsRound2); /* + bitsRound2 */ + diffabs2 = _mm_srli_epi16(diffabs2, cutBitsNum); /* >> cutBitsNum */ + diffabs = _mm_add_epi16(diffabs, bitsRound); /* + bitsRound2 */ + diffabs = _mm_srli_epi16(diffabs, cutBitsNum); /* >> cutBitsNum */ + diffabs = _mm_packus_epi16(diffabs2, diffabs); /* convert to 8 bits */ + diffabs = _mm_min_epu8(diffabs, _mm_set1_epi8(15)); /* min(x,15) */ + diffabs = _mm_shuffle_epi8(lut, diffabs); /* lut */ + diffabs2 = _mm_cvtepi8_epi16(diffabs); /* back to 16-bit */ + diffabs = _mm_cvtepi8_epi16(_mm_shuffle_epi32(diffabs, 0x4e)); /* back to 16-bit */ + diffabs = _mm_srai_epi16(_mm_add_epi16(_mm_add_epi16(diffabs, diffabs2), _mm_set1_epi16(1)), 1); /* (v1 + v2 + 1) >> 1 */ +#else + diffabs = _mm_add_epi16(diffabs, _mm_set1_epi16(4)); /* +4 */ + diffabs = _mm_srai_epi16(diffabs, 3); /* >> 3 */ + diffabs = _mm_min_epi16(diffabs, _mm_set1_epi16(15)); /* min(x,15) */ + diffabs = _mm_packus_epi16(diffabs, diffabs); /* convert to 8 */ + diffabs = _mm_shuffle_epi8(lut, diffabs); /* lut */ + diffabs = _mm_cvtepi8_epi16(diffabs); /* back to 16-bit */ + diffabs = _mm_srai_epi16(diffabs, lutShift); /* diagonal shift! */ +#endif + diffabs = _mm_sign_epi16(diffabs, val); /* add original sign */ + acc = _mm_add_epi16(diffabs, acc); /* add to acc */ +} + +template<X86_VEXT vext> +void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum) { //if( uiWidth < 4 || ( uiWidth < 8 && isRDO ) ) if( uiWidth < 4 ) { - return blockBilateralFilterDiamond5x5( uiWidth, uiHeight, block, blkFilt, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, isRDO, lutRowPtr, noClip ); + return blockBilateralFilterDiamond5x5(uiWidth, uiHeight, block, blkFilt, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, isRDO, lutRowPtr, noClip, cutBitsNum); } int pad = 2; int padwidth = iWidthExtSIMD; - __m128i center, left, right, up, down, lu, ld, ru, rd, diffabs, four, fifteen, lut, acc, temp, roundAdd, clipmin, clipmax, inputVals; + __m128i center, left, right, up, down, lu, ld, ru, rd, acc, roundAdd, clipmin, clipmax, inputVals; __m128i ll, rr, uu, dd; - four = _mm_set1_epi16(4); - fifteen = _mm_set1_epi16(15); - roundAdd = _mm_set1_epi16(bifRoundAdd); clipmin = _mm_set1_epi16(clpRng.min); clipmax = _mm_set1_epi16(clpRng.max); - lut = _mm_loadu_si128((__m128i*)(lutRowPtr)); acc = _mm_set1_epi32(0); - - // Copy back parameters - Pel *tempBlockPtr = (short*)blkFilt + (((padwidth+4) << 1) + 2); - int tempBlockStride = padwidth+4; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + __m128i lut1 = _mm_loadu_si128((__m128i*)(lutRowPtr)); + __m128i lut2 = _mm_loadu_si128((__m128i*)(lutRowPtr + 16)); + __m128i lut3 = _mm_loadu_si128((__m128i*)(lutRowPtr + 32)); + __m128i mmBfac = _mm_set1_epi16(bfac); + roundAdd = _mm_set1_epi16(bifRoundAdd << 3); + __m128i bitsRound = _mm_set1_epi16(1 << (cutBitsNum - 2)); + __m128i bitsRound2 = _mm_set1_epi16((1 << (cutBitsNum - 2)) + (1 << (cutBitsNum - 1))); +#else + __m128i lut = _mm_loadu_si128((__m128i*)(lutRowPtr)); + int lutShift1 = 0, lutShift2 = 1, lutShift3 = 1; + roundAdd = _mm_set1_epi16(bifRoundAdd); +#endif for (int col = 0; col < uiWidth; col += 8) { @@ -113,164 +397,59 @@ void BilateralFilter::simdFilterDiamond5x5( uint32_t uiWidth, uint32_t uiHeight, uu = _mm_sub_epi16(uu, center); dd = _mm_sub_epi16(dd, center); - //LEFT! - //calculate abs - diffabs = _mm_abs_epi16(left); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_sign_epi16(diffabs, left);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - //RIGHT! - //calculate abs - diffabs = _mm_abs_epi16(right); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_sign_epi16(diffabs, right);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - //UP! - //calculate abs - diffabs = _mm_abs_epi16(up); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_sign_epi16(diffabs, up);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - - //DOWN! - //calculate abs - diffabs = _mm_abs_epi16(down); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_sign_epi16(diffabs, down);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - - //lu! - //calculate abs - diffabs = _mm_abs_epi16(lu); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift! - diffabs = _mm_sign_epi16(diffabs, lu);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - //ld! - //calculate abs - diffabs = _mm_abs_epi16(ld); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift! - diffabs = _mm_sign_epi16(diffabs, ld);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - //ru! - //calculate abs - diffabs = _mm_abs_epi16(ru); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift! - diffabs = _mm_sign_epi16(diffabs, ru);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - //rd! - //calculate abs - diffabs = _mm_abs_epi16(rd); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift! - diffabs = _mm_sign_epi16(diffabs, rd);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - - //ll! - //calculate abs - diffabs = _mm_abs_epi16(ll); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift! - diffabs = _mm_sign_epi16(diffabs, ll);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - //rr! - //calculate abs - diffabs = _mm_abs_epi16(rr); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift! - diffabs = _mm_sign_epi16(diffabs, rr);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - //uu! - //calculate abs - diffabs = _mm_abs_epi16(uu); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift! - diffabs = _mm_sign_epi16(diffabs, uu);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc - //dd! - //calculate abs - diffabs = _mm_abs_epi16(dd); //abs - diffabs = _mm_add_epi16(diffabs, four); //+4 - diffabs = _mm_srai_epi16(diffabs, 3); //>>3 - diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15) - diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8 - diffabs = _mm_shuffle_epi8(lut, diffabs);//lut - diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit - diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift! - diffabs = _mm_sign_epi16(diffabs, dd);//fix sign! - acc = _mm_add_epi16(diffabs, acc); //add to acc + // apply LUT +#if JVET_AF0112_BIF_DYNAMIC_SCALING + simdBifApplyLut(left, acc, cutBitsNum, bitsRound, bitsRound2, lut1); + simdBifApplyLut(right, acc, cutBitsNum, bitsRound, bitsRound2, lut1); + simdBifApplyLut(up, acc, cutBitsNum, bitsRound, bitsRound2, lut1); + simdBifApplyLut(down, acc, cutBitsNum, bitsRound, bitsRound2, lut1); + + simdBifApplyLut(lu, acc, cutBitsNum, bitsRound, bitsRound2, lut2); + simdBifApplyLut(ld, acc, cutBitsNum, bitsRound, bitsRound2, lut2); + simdBifApplyLut(ru, acc, cutBitsNum, bitsRound, bitsRound2, lut2); + simdBifApplyLut(rd, acc, cutBitsNum, bitsRound, bitsRound2, lut2); + + simdBifApplyLut(ll, acc, cutBitsNum, bitsRound, bitsRound2, lut3); + simdBifApplyLut(rr, acc, cutBitsNum, bitsRound, bitsRound2, lut3); + simdBifApplyLut(uu, acc, cutBitsNum, bitsRound, bitsRound2, lut3); + simdBifApplyLut(dd, acc, cutBitsNum, bitsRound, bitsRound2, lut3); +#else + simdBifApplyLut(left, acc, lut, lutShift1); + simdBifApplyLut(right, acc, lut, lutShift1); + simdBifApplyLut(up, acc, lut, lutShift1); + simdBifApplyLut(down, acc, lut, lutShift1); + + simdBifApplyLut(lu, acc, lut, lutShift2); + simdBifApplyLut(ld, acc, lut, lutShift2); + simdBifApplyLut(ru, acc, lut, lutShift2); + simdBifApplyLut(rd, acc, lut, lutShift2); + + simdBifApplyLut(ll, acc, lut, lutShift3); + simdBifApplyLut(rr, acc, lut, lutShift3); + simdBifApplyLut(uu, acc, lut, lutShift3); + simdBifApplyLut(dd, acc, lut, lutShift3); +#endif + // TU scaling +#if JVET_AF0112_BIF_DYNAMIC_SCALING + acc = _mm_mullo_epi16(acc, mmBfac); + acc = _mm_adds_epi16(acc, roundAdd); + acc = _mm_srai_epi16(acc, bifRoundShift + 3); +#else if (bfac == 2) { acc = _mm_slli_epi16(acc, 1); // Shift left to get 2* } else if (bfac == 3) { - temp = _mm_slli_epi16(acc, 1); // Multiply by two by shifting left - acc = _mm_add_epi16(acc, temp); // Add original value to get 3* + acc = _mm_add_epi16(acc, _mm_slli_epi16(acc, 1)); // Multiply by two by shifting left and add original value to get 3* } // Add 16 and shift 5 acc = _mm_add_epi16(acc, roundAdd); acc = _mm_srai_epi16(acc, bifRoundShift); - +#endif + // Instead we add our input values to the delta if(isRDO) { @@ -292,26 +471,76 @@ void BilateralFilter::simdFilterDiamond5x5( uint32_t uiWidth, uint32_t uiHeight, acc = _mm_min_epi16( acc, clipmax ); } - _mm_store_si128((__m128i*)(blkFilt + (row + pad) * (padwidth + 4) + col + pad), acc); + _mm_storeu_si128((__m128i*)(blkFilt + (row + pad) * padwidth + col + pad), acc); } } // Copy back from tempbufFilter to recBuf int onerow = uiWidth * sizeof(Pel); - for(uint32_t yy = 0; yy < uiHeight; yy++) + // Copy back parameters + Pel* tempBlockPtr = (short*)blkFilt + pad * padwidth + pad; + for (uint32_t yy = 0; yy < uiHeight; yy++) { std::memcpy(recPtr, tempBlockPtr, onerow); recPtr += recStride; - tempBlockPtr += tempBlockStride; + tempBlockPtr += padwidth; + } +} + +#if JVET_AF0112_BIF_DYNAMIC_SCALING +template<X86_VEXT vext> +int BilateralFilter::simdCalcMAD(int16_t* block, int stride, int width, int height, int whlog2) +{ + if (width < 4) + { + return calcMAD(block, stride, width, height, whlog2); + } + int sum32[4]; + __m128i acc32, val, average; + acc32 = _mm_setzero_si128(); + for (int i = 0; i < height; i++) + { + for (int j = 0; j < width; j += 4) + { + val = _mm_loadl_epi64((__m128i*)(block + j)); + val = _mm_cvtepi16_epi32(val); + acc32 = _mm_add_epi32(acc32, val); + } + block += stride; } + block -= stride * height; + acc32 = _mm_hadd_epi32(acc32, acc32); + _mm_storeu_si128((__m128i*)sum32, acc32); + average = _mm_set1_epi32((sum32[0] + sum32[1] + (1 << (whlog2 - 1))) >> whlog2); + acc32 = _mm_setzero_si128(); + for (int i = 0; i < height; i++) + { + for (int j = 0; j < width; j += 4) + { + val = _mm_loadl_epi64((__m128i*)(block + j)); + val = _mm_cvtepi16_epi32(val); + acc32 = _mm_add_epi32(acc32, _mm_abs_epi32(_mm_sub_epi32(val, average))); + } + block += stride; + } + acc32 = _mm_hadd_epi32(acc32, acc32); + _mm_storeu_si128((__m128i*)sum32, acc32); + return (sum32[0] + sum32[1] + (1 << (whlog2 - 1))) >> whlog2; } +#endif + +#endif // USE_AVX2 template <X86_VEXT vext> void BilateralFilter::_initBilateralFilterX86() { - m_bilateralFilterDiamond5x5 = simdFilterDiamond5x5<vext>; + m_bilateralFilterDiamond5x5 = simdFilterDiamond5x5<vext>; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + m_calcMAD = simdCalcMAD<vext>; +#endif } template void BilateralFilter::_initBilateralFilterX86<SIMDX86>(); + #endif #endif // TARGET_SIMD_X86 diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 0640e4a2a3af3fb2b0a02b441633c052048b8fc8..ca0e53773812991c707ab37594bc204d420879a1 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -103,6 +103,11 @@ EncCu::EncCu() : m_GeoModeTest void EncCu::create( EncCfg* encCfg ) { +#if JVET_V0094_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER + m_bilateralFilter = new BilateralFilter();; + m_bilateralFilter->create(); +#endif + unsigned uiMaxWidth = encCfg->getMaxCUWidth(); unsigned uiMaxHeight = encCfg->getMaxCUHeight(); ChromaFormat chromaFormat = encCfg->getChromaFormatIdc(); @@ -296,6 +301,7 @@ void EncCu::destroy() unsigned numHeights = gp_sizeIdxInfo->numHeights(); #if JVET_V0094_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER + m_bilateralFilter->destroy(); delete m_bilateralFilter; #endif @@ -16993,17 +16999,19 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal for (auto &currTU : CU::traverseTUs(*cu)) { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = m_bilateralFilter->getApplyBIF(currTU, COMPONENT_Y); +#else bool isInter = (cu->predMode == MODE_INTER) ? true : false; - if ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height))) + bool applyBIF = ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + if (applyBIF) { - if ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))) - { - CompArea &compArea = currTU.block(COMPONENT_Y); - PelBuf recBuf = picDbBuf.getBuf(compArea); - PelBuf recIPredBuf = recBuf; - std::vector<Pel> invLUT; - m_bilateralFilter->bilateralFilterRDOdiamond5x5( COMPONENT_Y, recBuf, recBuf, recBuf, currTU.cu->qp, recIPredBuf, cs.slice->clpRng(COMPONENT_Y), currTU, true, false, &invLUT ); - } + CompArea& compArea = currTU.block(COMPONENT_Y); + PelBuf recBuf = picDbBuf.getBuf(compArea); + PelBuf recIPredBuf = recBuf; + std::vector<Pel> invLUT; + m_bilateralFilter->bilateralFilterRDOdiamond5x5(COMPONENT_Y, recBuf, recBuf, recBuf, currTU.cu->qp, recIPredBuf, cs.slice->clpRng(COMPONENT_Y), currTU, true, false, &invLUT); } } } @@ -17011,10 +17019,6 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal #if JVET_X0071_CHROMA_BILATERAL_FILTER if(cs.pps->getUseChromaBIF()) { - bool tuValid = false; - bool tuCBF = false; - bool isDualTree = CS::isDualITree(cs); - bool chromaValid = cu->Cb().valid() && cu->Cr().valid(); bool applyChromaBIF = false; if (leftEdgeAvai && topEdgeAvai) @@ -17030,10 +17034,17 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal for (auto &currTU : CU::traverseTUs(*cu)) { - bool isInter = (cu->predMode == MODE_INTER) ? true : false; for(int compIdx = COMPONENT_Cb; compIdx < MAX_NUM_COMPONENT; compIdx++) { ComponentID compID = ComponentID( compIdx ); +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = m_bilateralFilter->getApplyBIF(currTU, compID); +#else + bool tuValid = false; + bool tuCBF = false; + bool isInter = (cu->predMode == MODE_INTER) ? true : false; + bool isDualTree = CS::isDualITree(cs); + bool chromaValid = cu->Cb().valid() && cu->Cr().valid(); applyChromaBIF = false; if(!isDualTree && chromaValid) { @@ -17051,6 +17062,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal tuCBF = TU::getCbf(currTU, compID); applyChromaBIF = ((tuCBF || isInter == false) && (currTU.cu->qp > 17)); } +#endif if (applyChromaBIF) { CompArea &compArea = currTU.block(compID); @@ -17799,17 +17811,20 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best tmpRecLuma.rspSignal(m_pcReshape->getInvLUT()); } } - if(tempCS->pps->getUseBIF() && isLuma(compID) && cu.qp > 17) + if(tempCS->pps->getUseBIF() && isLuma(compID)) { for (auto &currTU : CU::traverseTUs(cu)) { - Position tuPosInCu = currTU.lumaPos() - cu.lumaPos(); - PelBuf tmpSubBuf = tmpRecLuma.subBuf(tuPosInCu, currTU.lumaSize()); - - bool isInter = (cu.predMode == MODE_INTRA) ? false : true; - - if ((TU::getCbf(currTU, compID ) || isInter == false) && (currTU.cu->qp > 17) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height)))) +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = m_bilateralFilter->getApplyBIF(currTU, compID); +#else + bool isInter = (cu.predMode == MODE_INTER) ? true : false; + bool applyBIF = ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + if (applyBIF) { + Position tuPosInCu = currTU.lumaPos() - cu.lumaPos(); + PelBuf tmpSubBuf = tmpRecLuma.subBuf(tuPosInCu, currTU.lumaSize()); CompArea compArea = currTU.blocks[compID]; PelBuf recIPredBuf = tempCS->slice->getPic()->getRecoBuf(compArea); // Do we need to use clipArea? @@ -17837,18 +17852,19 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best tmpRecChroma.copyFrom(reco); } - if(tempCS->pps->getUseChromaBIF() && isChroma(compID) && (cu.qp > 17)) + if(tempCS->pps->getUseChromaBIF() && isChroma(compID)) { - bool tuValid = false; - bool tuCBF = false; - bool isDualTree = CS::isDualITree(*tempCS); - bool chromaValid = cu.Cb().valid() && cu.Cr().valid(); bool applyChromaBIF = false; for (auto &currTU : CU::traverseTUs(cu)) { - Position tuPosInCu = currTU.chromaPos() - cu.chromaPos(); - PelBuf tmpSubBuf = tmpRecChroma.subBuf(tuPosInCu, currTU.chromaSize()); +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = m_bilateralFilter->getApplyBIF(currTU, compID); +#else + bool tuValid = false; + bool tuCBF = false; bool isInter = (cu.predMode == MODE_INTER) ? true : false; + bool isDualTree = CS::isDualITree(*tempCS); + bool chromaValid = cu.Cb().valid() && cu.Cr().valid(); applyChromaBIF = false; if(!isDualTree && chromaValid) { @@ -17865,9 +17881,12 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best { applyChromaBIF = ((TU::getCbf(currTU, compID) || isInter == false) && (currTU.cu->qp > 17)); } +#endif if(applyChromaBIF) { + Position tuPosInCu = currTU.chromaPos() - cu.chromaPos(); + PelBuf tmpSubBuf = tmpRecChroma.subBuf(tuPosInCu, currTU.chromaSize()); CompArea compArea = currTU.blocks[compID]; PelBuf recIPredBuf = tempCS->slice->getPic()->getRecoBuf(compArea); m_bilateralFilter->bilateralFilterRDOdiamond5x5( compID, tmpSubBuf, tmpSubBuf, tmpSubBuf, currTU.cu->qp, recIPredBuf, tempCS->slice->clpRng(compID), currTU, true ); @@ -17917,7 +17936,7 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best tmpRecChroma.copyFrom(reco); } - if(tempCS->pps->getUseChromaBIF() && isChroma(compID) && (cu.qp > 17)) + if(tempCS->pps->getUseChromaBIF() && isChroma(compID)) { bool tuValid = false; bool tuCBF = false; @@ -17927,9 +17946,9 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best for (auto &currTU : CU::traverseTUs(cu)) { - Position tuPosInCu = currTU.chromaPos() - cu.chromaPos(); - PelBuf tmpSubBuf = tmpRecChroma.subBuf(tuPosInCu, currTU.chromaSize()); - +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = m_bilateralFilter->getApplyBIF(currTU, compID); +#else bool isInter = (cu.predMode == MODE_INTER) ? true : false; applyChromaBIF = false; if(!isDualTree && chromaValid) @@ -17946,8 +17965,11 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best { applyChromaBIF = ((TU::getCbf(currTU, compID) || isInter == false) && (currTU.cu->qp > 17)); } +#endif if(applyChromaBIF) { + Position tuPosInCu = currTU.chromaPos() - cu.chromaPos(); + PelBuf tmpSubBuf = tmpRecChroma.subBuf(tuPosInCu, currTU.chromaSize()); CompArea compArea = currTU.blocks[compID]; PelBuf recIPredBuf = tempCS->slice->getPic()->getRecoBuf(compArea); m_bilateralFilter->bilateralFilterRDOdiamond5x5( compID, tmpSubBuf, tmpSubBuf, tmpSubBuf, currTU.cu->qp, recIPredBuf, tempCS->slice->clpRng(compID), currTU, true ); diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index 5721eef609961649593b208c1f90c69d7ff97a2b..f1dd78ab07b5c5f5772cae100a312d793bbd78fa 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -433,7 +433,7 @@ public: EncModeCtrl* getModeCtrl () { return m_modeCtrl; } #if JVET_V0094_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER - BilateralFilter *m_bilateralFilter = new BilateralFilter(); + BilateralFilter *m_bilateralFilter; #endif void setMergeBestSATDCost(double cost) { m_mergeBestSATDCost = cost; } diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp index 74df2581a152cb798c517853c2f0700e21cc72c5..117a3a5a2ae3e13a13a448f2d4d4acec0cb363fe 100644 --- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp +++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp @@ -1423,9 +1423,13 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn { for (auto& currTU : CU::traverseTUs(currCU)) { - +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && m_bilateralFilter.getApplyBIF(currTU, COMPONENT_Y); +#else bool isInter = (currCU.predMode == MODE_INTER) ? true : false; - if (bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height)))) + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + if (applyBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; @@ -1449,8 +1453,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn #if JVET_X0071_CHROMA_BILATERAL_FILTER if(cs.pps->getUseChromaBIF()) { - bool tuValid = false; - bool tuCBF = false; bool isDualTree = CS::isDualITree(cs); ChannelType chType = isDualTree ? CH_C : CH_L; bool applyChromaBIF = false; @@ -1469,13 +1471,18 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn } for (auto &currTU : CU::traverseTUs(currCU)) { - bool isInter = (currCU.predMode == MODE_INTER) ? true : false; for(int compIdx = COMPONENT_Cb ; compIdx < MAX_NUM_COMPONENT; compIdx++) { ComponentID compID = ComponentID( compIdx ); BifParams& chromaBifParams = cs.picture->getBifParam( compID ); bool ctuEnableChromaBIF = chromaBifParams.ctuOn[ctuRsAddr]; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = ctuEnableChromaBIF && m_bilateralFilter.getApplyBIF(currTU, compID); +#else + bool tuValid = false; + bool tuCBF = false; + bool isInter = (currCU.predMode == MODE_INTER) ? true : false; applyChromaBIF = false; if(!isDualTree) { @@ -1492,6 +1499,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn tuCBF = TU::getCbf(currTU, compID); applyChromaBIF = (ctuEnableChromaBIF && ((tuCBF || isInter == false) && (currTU.cu->qp > 17))); } +#endif if(applyChromaBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY @@ -1534,7 +1542,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn // We don't need to clip if SAO was not performed on luma. SAOBlkParam mySAOblkParam = cs.picture->getSAO()[ctuRsAddr]; SAOOffset& myCtbOffset = mySAOblkParam[0]; - BifParams& bifParams = cs.picture->getBifParam(); + BifParams& bifParams = cs.picture->getBifParam(COMPONENT_Y); bool clipLumaIfNoBilat = false; if(myCtbOffset.modeIdc != SAO_MODE_OFF) @@ -1561,9 +1569,14 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn { for (auto &currTU : CU::traverseTUs(currCU)) { - +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && m_bilateralFilter.getApplyBIF(currTU, COMPONENT_Y); +#else bool isInter = (currCU.predMode == MODE_INTER) ? true : false; - if ( bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height)))) + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + + if (applyBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; @@ -1574,7 +1587,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn cs, currTU.Y().x, currTU.Y().y, currTU.lumaSize().width, currTU.lumaSize().height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos); #endif - bilateralFilter.bilateralFilterDiamond5x5( COMPONENT_Y, srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU + bilateralFilter.bilateralFilterDiamond5x5( COMPONENT_Y, srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU, false #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY , isTUCrossedByVirtualBoundaries, horVirBndryPos, verVirBndryPos, numHorVirBndry, numVerVirBndry , clipTop, clipBottom, clipLeft, clipRight @@ -1608,8 +1621,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn } if(cs.pps->getUseChromaBIF()) { - bool tuValid = false; - bool tuCBF = false; bool isDualTree = CS::isDualITree(cs); ChannelType chType = isDualTree ? CH_C : CH_L; bool applyChromaBIF = false; @@ -1623,12 +1634,17 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn } for (auto &currTU : CU::traverseTUs(currCU)) { - bool isInter = (currCU.predMode == MODE_INTER) ? true : false; for(int compIdx = COMPONENT_Cb; compIdx < MAX_NUM_COMPONENT; compIdx++) { ComponentID compID = ComponentID( compIdx ); BifParams& chromaBifParams = cs.picture->getBifParam( compID ); bool ctuEnableChromaBIF = chromaBifParams.ctuOn[ctuRsAddr]; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = ctuEnableChromaBIF && m_bilateralFilter.getApplyBIF(currTU, compID); +#else + bool tuValid = false; + bool tuCBF = false; + bool isInter = (currCU.predMode == MODE_INTER) ? true : false; applyChromaBIF = false; if(!isDualTree) { @@ -1645,6 +1661,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn tuCBF = TU::getCbf(currTU, compID); applyChromaBIF = (ctuEnableChromaBIF && ((tuCBF || isInter == false) && (currTU.cu->qp > 17))); } +#endif if(applyChromaBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY @@ -1662,7 +1679,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos); #endif - m_bilateralFilter.bilateralFilterDiamond5x5(compID, srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(compID), currTU, false, + m_bilateralFilter.bilateralFilterDiamond5x5(compID, srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(compID), currTU, false #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY , isTUCrossedByVirtualBoundaries, horVirBndryPos, verVirBndryPos, numHorVirBndry, numVerVirBndry, clipTop, clipBottom, clipLeft, clipRight #endif @@ -1771,6 +1788,9 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn ComponentID compID = ComponentID( compIdx ); BifParams& chromaBifParams = cs.picture->getBifParam( compID ); bool ctuEnableChromaBIF = chromaBifParams.ctuOn[ctuRsAddr]; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = ctuEnableChromaBIF && m_bilateralFilter.getApplyBIF(currTU, compID); +#else applyChromaBIF = false; if(!isDualTree) { @@ -1787,6 +1807,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn tuCBF = TU::getCbf(currTU, compID); applyChromaBIF = (ctuEnableChromaBIF && ((tuCBF || isInter == false) && (currTU.cu->qp > 17))); } +#endif if(applyChromaBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY @@ -1892,9 +1913,13 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn { for (auto& currTU : CU::traverseTUs(currCU)) { - +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && m_bilateralFilter.getApplyBIF(currTU, COMPONENT_Y); +#else bool isInter = (currCU.predMode == MODE_INTER) ? true : false; - if (bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height)))) + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + if (applyBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; @@ -1919,8 +1944,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn #if JVET_X0071_CHROMA_BILATERAL_FILTER if(cs.pps->getUseChromaBIF()) { - bool tuValid = false; - bool tuCBF = false; bool isDualTree = CS::isDualITree(cs); ChannelType chType = isDualTree ? CH_C : CH_L; bool applyChromaBIF = false; @@ -1939,13 +1962,18 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn for (auto &currTU : CU::traverseTUs(currCU)) { - bool isInter = (currCU.predMode == MODE_INTER) ? true : false; for(int compIdx = COMPONENT_Cb; compIdx < MAX_NUM_COMPONENT; compIdx++) { ComponentID compID = ComponentID( compIdx ); BifParams& chromaBifParams = cs.picture->getBifParam( compID ); bool ctuEnableChromaBIF = chromaBifParams.ctuOn[ctuRsAddr]; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = ctuEnableChromaBIF && m_bilateralFilter.getApplyBIF(currTU, compID); +#else + bool tuValid = false; + bool tuCBF = false; + bool isInter = (currCU.predMode == MODE_INTER) ? true : false; applyChromaBIF = false; if(!isDualTree) { @@ -1962,6 +1990,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn tuCBF = TU::getCbf(currTU, compID); applyChromaBIF = (ctuEnableChromaBIF && ((tuCBF || isInter == false) && (currTU.cu->qp > 17))); } +#endif if(applyChromaBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY @@ -2022,8 +2051,13 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn { for (auto &currTU : CU::traverseTUs(currCU)) { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && m_bilateralFilter.getApplyBIF(currTU, COMPONENT_Y); +#else bool isInter = (currCU.predMode == MODE_INTER) ? true : false; - if ( bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height)))) + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + if (applyBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; @@ -2034,7 +2068,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn cs, currTU.Y().x, currTU.Y().y, currTU.lumaSize().width, currTU.lumaSize().height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos); #endif - bilateralFilter.bilateralFilterDiamond5x5( COMPONENT_Y, srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU + bilateralFilter.bilateralFilterDiamond5x5( COMPONENT_Y, srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU, false #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY , isTUCrossedByVirtualBoundaries, horVirBndryPos, verVirBndryPos, numHorVirBndry, numVerVirBndry , clipTop, clipBottom, clipLeft, clipRight @@ -2068,8 +2102,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn if(cs.pps->getUseChromaBIF()) { - bool tuValid = false; - bool tuCBF = false; bool isDualTree = CS::isDualITree(cs); ChannelType chType = isDualTree ? CH_C : CH_L; bool applyChromaBIF = false; @@ -2083,13 +2115,18 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn } for (auto &currTU : CU::traverseTUs(currCU)) { - bool isInter = (currCU.predMode == MODE_INTER) ? true : false; for(int compIdx = COMPONENT_Cb; compIdx < MAX_NUM_COMPONENT; compIdx++) { ComponentID compID = ComponentID( compIdx ); BifParams& chromaBifParams = cs.picture->getBifParam( compID ); bool ctuEnableChromaBIF = chromaBifParams.ctuOn[ctuRsAddr]; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = ctuEnableChromaBIF && m_bilateralFilter.getApplyBIF(currTU, compID); +#else + bool tuValid = false; + bool tuCBF = false; + bool isInter = (currCU.predMode == MODE_INTER) ? true : false; applyChromaBIF = false; if(!isDualTree) { @@ -2106,6 +2143,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn tuCBF = TU::getCbf(currTU, compID); applyChromaBIF = (ctuEnableChromaBIF && ((tuCBF || isInter == false) && (currTU.cu->qp > 17))); } +#endif if(applyChromaBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY @@ -2123,7 +2161,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos); #endif - bilateralFilter.bilateralFilterDiamond5x5(compID, srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(compID), currTU, false, + bilateralFilter.bilateralFilterDiamond5x5(compID, srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(compID), currTU, false #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY ,isTUCrossedByVirtualBoundaries, horVirBndryPos, verVirBndryPos, numHorVirBndry, numVerVirBndry, clipTop, clipBottom, clipLeft, clipRight #endif @@ -2195,9 +2233,13 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn { for (auto &currTU : CU::traverseTUs(currCU)) { - +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && m_bilateralFilter.getApplyBIF(currTU, COMPONENT_Y); +#else bool isInter = (currCU.predMode == MODE_INTER) ? true : false; - if ( bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height)))) + bool applyBIF = bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + if (applyBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; @@ -2209,7 +2251,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos); #endif - bilateralFilter.bilateralFilterDiamond5x5( COMPONENT_Y, srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU + bilateralFilter.bilateralFilterDiamond5x5( COMPONENT_Y, srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU, false #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY , isTUCrossedByVirtualBoundaries, horVirBndryPos, verVirBndryPos, numHorVirBndry, numVerVirBndry , clipTop, clipBottom, clipLeft, clipRight @@ -2292,6 +2334,9 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn ComponentID compID = ComponentID( compIdx ); BifParams& chromaBifParams = cs.picture->getBifParam( compID ); bool ctuEnableChromaBIF = chromaBifParams.ctuOn[ctuRsAddr]; +#if JVET_AF0112_BIF_DYNAMIC_SCALING + applyChromaBIF = ctuEnableChromaBIF && m_bilateralFilter.getApplyBIF(currTU, compID); +#else applyChromaBIF = false; if(!isDualTree) { @@ -2308,6 +2353,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn tuCBF = TU::getCbf(currTU, compID); applyChromaBIF = (ctuEnableChromaBIF && ((tuCBF || isInter == false) && (currTU.cu->qp > 17))); } +#endif if(applyChromaBIF) { #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index aaeb87461e89e877287385cf628be316be757704..bfba7fdf823b5cc62d0523dafe14139403ed2431 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -13196,9 +13196,14 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par } #if JVET_V0094_BILATERAL_FILTER - // getCbf() is going to be 1 since currAbsSum > 0 here, according to the if-statement a couple of lines up. +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = cs.pps->getUseBIF() && isLuma(compID) && m_bilateralFilter->getApplyBIF(tu, compID); +#else bool isInter = (cu.predMode == MODE_INTER) ? true : false; - if( cs.pps->getUseBIF() && isLuma( compID ) && tu.cu->qp > 17 && 128 > std::max( tu.lumaSize().width, tu.lumaSize().height ) && ( !isInter || 32 > std::min( tu.lumaSize().width, tu.lumaSize().height ) ) ) + // getCbf() is going to be 1 since currAbsSum > 0 here, according to the if-statement a couple of lines up. + bool applyBIF = cs.pps->getUseBIF() && isLuma(compID) && tu.cu->qp > 17 && 128 > std::max(tu.lumaSize().width, tu.lumaSize().height) && (!isInter || 32 > std::min(tu.lumaSize().width, tu.lumaSize().height)); +#endif + if(applyBIF) { CompArea tmpArea1( compID, tu.chromaFormat, Position(0, 0), Size(resiBuf.width, resiBuf.height)); PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1); @@ -13217,7 +13222,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par #if JVET_X0071_CHROMA_BILATERAL_FILTER if(isChroma(compID)) { - if (cs.pps->getUseChromaBIF() && isChroma(compID) && (tu.cu->qp > 17)) +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyChromaBIF = cs.pps->getUseChromaBIF() && m_bilateralFilter->getApplyBIF(tu, compID); +#else + bool applyChromaBIF = cs.pps->getUseChromaBIF() && isChroma(compID) && (tu.cu->qp > 17); +#endif + if (applyChromaBIF) { //chroma and bilateral CompArea tmpArea1(compID, tu.chromaFormat, Position(0, 0), Size(resiBuf.width, resiBuf.height)); @@ -14674,16 +14684,20 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa tmpRecLuma.rspSignal(m_pcReshape->getInvLUT()); } - if(cs.pps->getUseBIF() && isLuma(compID) && (cu.qp > 17)) + if(cs.pps->getUseBIF() && isLuma(compID)) { for (auto &currTU : CU::traverseTUs(cu)) - { - Position tuPosInCu = currTU.lumaPos() - cu.lumaPos(); - PelBuf tmpSubBuf = tmpRecLuma.subBuf(tuPosInCu, currTU.lumaSize()); - + { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = m_bilateralFilter->getApplyBIF(currTU, compID); +#else bool isInter = (cu.predMode == MODE_INTER) ? true : false; - if( ( TU::getCbf( currTU, compID ) || !isInter ) && currTU.cu->qp > 17 && 128 > std::max( currTU.lumaSize().width, currTU.lumaSize().height ) && ( !isInter || 32 > std::min( currTU.lumaSize().width, currTU.lumaSize().height ) ) ) + bool applyBIF = ((TU::getCbf(currTU, compID) || !isInter) && currTU.cu->qp > 17 && 128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height) && (!isInter || 32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))); +#endif + Position tuPosInCu = currTU.lumaPos() - cu.lumaPos(); + if(applyBIF) { + PelBuf tmpSubBuf = tmpRecLuma.subBuf(tuPosInCu, currTU.lumaSize()); CompArea compArea = currTU.blocks[compID]; PelBuf recIPredBuf = cs.slice->getPic()->getRecoBuf(compArea); @@ -14716,14 +14730,19 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa tmpRecChroma = m_tmpStorageLCU.getBuf(tmpArea2); tmpRecChroma.copyFrom(reco); - if(cs.pps->getUseChromaBIF() && isChroma(compID) && (cu.qp > 17)) + if(cs.pps->getUseChromaBIF() && isChroma(compID)) { for (auto &currTU : CU::traverseTUs(cu)) { +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyChromaBIF = m_bilateralFilter->getApplyBIF(currTU, compID); +#else + bool isInter = (cu.predMode == MODE_INTER) ? true : false; + bool applyChromaBIF = (TU::getCbf(currTU, compID) || isInter == false) && (cu.qp > 17); +#endif Position tuPosInCu = currTU.chromaPos() - cu.chromaPos(); PelBuf tmpSubBuf = tmpRecChroma.subBuf(tuPosInCu, currTU.chromaSize()); - bool isInter = (cu.predMode == MODE_INTER) ? true : false; - if ((TU::getCbf(currTU, compID) || isInter == false)) + if (applyChromaBIF) { CompArea compArea = currTU.blocks[compID]; PelBuf recIPredBuf = cs.slice->getPic()->getRecoBuf(compArea); @@ -14786,15 +14805,20 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa CompArea tmpArea2( compID, area.chromaFormat, Position(0, 0), area.size()); tmpRecChroma = m_tmpStorageLCU.getBuf(tmpArea2); tmpRecChroma.copyFrom(reco); - if(cs.pps->getUseChromaBIF() && isChroma(compID) && (cu.qp > 17)) + if(cs.pps->getUseChromaBIF() && isChroma(compID)) { for (auto &currTU : CU::traverseTUs(cu)) { - Position tuPosInCu = currTU.chromaPos() - cu.chromaPos(); - PelBuf tmpSubBuf = tmpRecChroma.subBuf(tuPosInCu, currTU.chromaSize()); +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyChromaBIF = m_bilateralFilter->getApplyBIF(currTU, compID); +#else bool isInter = (cu.predMode == MODE_INTER) ? true : false; - if ((TU::getCbf( currTU, compID ) || isInter == false)) + bool applyChromaBIF = (TU::getCbf(currTU, compID) || isInter == false) && (cu.qp > 17); +#endif + if (applyChromaBIF) { + Position tuPosInCu = currTU.chromaPos() - cu.chromaPos(); + PelBuf tmpSubBuf = tmpRecChroma.subBuf(tuPosInCu, currTU.chromaSize()); CompArea compArea = currTU.blocks[compID]; PelBuf recIPredBuf = cs.slice->getPic()->getRecoBuf(compArea); m_bilateralFilter->bilateralFilterRDOdiamond5x5( compID, tmpSubBuf, tmpSubBuf, tmpSubBuf, currTU.cu->qp, recIPredBuf, cs.slice->clpRng(compID), currTU, true ); diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index b48065f4d12deb16c8c37311744e11fa9e564d01..98a86a537a8622c1db850174096289f9c7f1d237 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -8032,7 +8032,12 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp tmpRecLuma.rspSignal(m_pcReshape->getInvLUT()); } - if( pps.getUseBIF() /*&& (uiAbsSum > 0)*/ && tu.cu->qp > 17 && 128 > std::max( tu.lumaSize().width, tu.lumaSize().height ) ) +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = pps.getUseBIF() && m_bilateralFilter->getApplyBIF(tu, compID); +#else + bool applyBIF = pps.getUseBIF() /*&& (uiAbsSum > 0)*/ && tu.cu->qp > 17 && 128 > std::max(tu.lumaSize().width, tu.lumaSize().height); +#endif + if(applyBIF) { CompArea compArea = tu.blocks[compID]; PelBuf recIPredBuf = cs.slice->getPic()->getRecoBuf(compArea); @@ -8052,7 +8057,12 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp else { #if JVET_X0071_CHROMA_BILATERAL_FILTER - if(pps.getUseChromaBIF() && isChroma(compID) && tu.cu->qp > 17) +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyChromaBIF = pps.getUseChromaBIF() && m_bilateralFilter->getApplyBIF(tu, compID); +#else + bool applyChromaBIF = pps.getUseChromaBIF() && tu.cu->qp > 17; +#endif + if(applyChromaBIF) { CompArea compArea = tu.blocks[compID]; PelBuf recIPredBuf = cs.slice->getPic()->getRecoBuf(compArea); @@ -8084,7 +8094,12 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp { if(isLuma(compID)) { - if( pps.getUseBIF() /*&& (uiAbsSum > 0)*/ && tu.cu->qp > 17 && 128 > std::max( tu.lumaSize().width, tu.lumaSize().height ) ) +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyBIF = pps.getUseBIF() && m_bilateralFilter->getApplyBIF(tu, compID); +#else + bool applyBIF = pps.getUseBIF() /*&& (uiAbsSum > 0)*/ && tu.cu->qp > 17 && 128 > std::max(tu.lumaSize().width, tu.lumaSize().height); +#endif + if(applyBIF) { CompArea compArea = tu.blocks[compID]; PelBuf recIPredBuf = cs.slice->getPic()->getRecoBuf(compArea); @@ -8097,7 +8112,12 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp else { #if JVET_X0071_CHROMA_BILATERAL_FILTER - if (pps.getUseChromaBIF() && isChroma(compID) && (tu.cu->qp > 17)) +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyChromaBIF = pps.getUseChromaBIF() && m_bilateralFilter->getApplyBIF(tu, compID); +#else + bool applyChromaBIF = pps.getUseChromaBIF() && isChroma(compID) && (tu.cu->qp > 17); +#endif + if (applyChromaBIF) { CompArea compArea = tu.blocks[compID]; PelBuf recIPredBuf = cs.slice->getPic()->getRecoBuf(compArea); @@ -8159,7 +8179,12 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp } else { - if(pps.getUseChromaBIF() && isChroma(compID) && (tu.cu->qp > 17)) +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyChromaBIF = pps.getUseChromaBIF() && m_bilateralFilter->getApplyBIF(tu, compID); +#else + bool applyChromaBIF = pps.getUseChromaBIF() && tu.cu->qp > 17; +#endif + if (applyChromaBIF) { CompArea compArea = tu.blocks[compID]; PelBuf recIPredBuf = cs.slice->getPic()->getRecoBuf(compArea); @@ -8188,7 +8213,12 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp } else { - if (pps.getUseChromaBIF() && isChroma(compID) && (tu.cu->qp > 17)) +#if JVET_AF0112_BIF_DYNAMIC_SCALING + bool applyChromaBIF = pps.getUseChromaBIF() && m_bilateralFilter->getApplyBIF(tu, compID); +#else + bool applyChromaBIF = pps.getUseChromaBIF() && tu.cu->qp > 17; +#endif + if (applyChromaBIF) { CompArea compArea = tu.blocks[compID]; PelBuf recIPredBuf = cs.slice->getPic()->getRecoBuf(compArea);