From ac34a8df47429b6526f7d958ad7e337b2718928e Mon Sep 17 00:00:00 2001 From: Vadim Seregin <vseregin@qti.qualcomm.com> Date: Thu, 9 Feb 2023 15:03:27 +0000 Subject: [PATCH] Format naming and code cleanup --- source/Lib/CommonLib/Buffer.cpp | 82 ++++++------- source/Lib/CommonLib/Buffer.h | 10 +- source/Lib/CommonLib/CodingStructure.cpp | 44 ++++--- source/Lib/CommonLib/InterPrediction.cpp | 149 ++++++++++++----------- source/Lib/CommonLib/InterPrediction.h | 14 +-- source/Lib/CommonLib/IntraPrediction.cpp | 2 +- source/Lib/CommonLib/IntraPrediction.h | 2 +- source/Lib/CommonLib/Picture.cpp | 26 ++-- source/Lib/CommonLib/x86/BufferX86.h | 140 ++++++++++----------- source/Lib/DecoderLib/DecCu.cpp | 4 +- source/Lib/EncoderLib/IntraSearch.cpp | 22 ++-- 11 files changed, 249 insertions(+), 246 deletions(-) diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index d211a000e..c739d7c02 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -282,7 +282,7 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str } #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF -void calcBIOParameterCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int width, int height, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGY_GX, Pel* dI) +void calcBIOParameterCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int width, int height, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGyGx, Pel* dI) { width -= 2; height -= 2; @@ -293,7 +293,7 @@ void calcBIOParameterCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, gradY0 += bioParamOffset; gradY1 += bioParamOffset; absGX += bioParamOffset; absGY += bioParamOffset; dIX += bioParamOffset; dIY += bioParamOffset; - signGY_GX += bioParamOffset; + signGyGx += bioParamOffset; int shift4 = 4; int shift5 = 1; if (dI) @@ -311,7 +311,7 @@ void calcBIOParameterCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, absGY[x] = (tmpGY < 0 ? -tmpGY : tmpGY); dIX[x] = (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI)); dIY[x] = (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI)); - signGY_GX[x] = (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX)); + signGyGx[x] = (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX)); } srcY0Tmp += src0Stride; srcY1Tmp += src1Stride; @@ -324,7 +324,7 @@ void calcBIOParameterCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, dI += widthG; dIX += widthG; dIY += widthG; - signGY_GX += widthG; + signGyGx += widthG; } return; @@ -341,7 +341,7 @@ void calcBIOParameterCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, absGY[x] = (tmpGY < 0 ? -tmpGY : tmpGY); dIX[x] = (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI)); dIY[x] = (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI)); - signGY_GX[x] = (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX)); + signGyGx[x] = (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX)); } srcY0Tmp += src0Stride; srcY1Tmp += src1Stride; @@ -353,55 +353,55 @@ void calcBIOParameterCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, absGY += widthG; dIX += widthG; dIY += widthG; - signGY_GX += widthG; + signGyGx += widthG; } } -void calcBIOParamSum5Core(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGY_GX, const int widthG, const int width, const int height, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX) +void calcBIOParamSum5Core(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGyGx, const int widthG, const int width, const int height, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGyGx) { for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - const int pixel_idx = y * width + x; - sumAbsGX[pixel_idx] = 0; - sumAbsGY[pixel_idx] = 0; - sumDIX[pixel_idx] = 0; - sumDIY[pixel_idx] = 0; - sumSignGY_GX[pixel_idx] = 0; + const int sampleIdx = y * width + x; + sumAbsGX[sampleIdx] = 0; + sumAbsGY[sampleIdx] = 0; + sumDIX[sampleIdx] = 0; + sumDIY[sampleIdx] = 0; + sumSignGyGx[sampleIdx] = 0; for (int yy = 0; yy < 5; yy++) { for (int xx = 0; xx < 5; xx++) { - sumAbsGX[pixel_idx] += absGX[xx]; - sumAbsGY[pixel_idx] += absGY[xx]; - sumDIX[pixel_idx] += dIX[xx]; - sumDIY[pixel_idx] += dIY[xx]; - sumSignGY_GX[pixel_idx] += signGY_GX[xx]; + sumAbsGX[sampleIdx] += absGX[xx]; + sumAbsGY[sampleIdx] += absGY[xx]; + sumDIX[sampleIdx] += dIX[xx]; + sumDIY[sampleIdx] += dIY[xx]; + sumSignGyGx[sampleIdx] += signGyGx[xx]; } absGX += widthG; absGY += widthG; dIX += widthG; dIY += widthG; - signGY_GX += widthG; + signGyGx += widthG; } - sumDIX[pixel_idx] <<= 2; - sumDIY[pixel_idx] <<= 2; + sumDIX[sampleIdx] <<= 2; + sumDIY[sampleIdx] <<= 2; absGX += (1 - 5 * widthG); absGY += (1 - 5 * widthG); dIX += (1 - 5 * widthG); dIY += (1 - 5 * widthG); - signGY_GX += (1 - 5 * widthG); + signGyGx += (1 - 5 * widthG); } absGX += (widthG - width); absGY += (widthG - width); dIX += (widthG - width); dIY += (widthG - width); - signGY_GX += (widthG - width); + signGyGx += (widthG - width); } } -void calcBIOParamSum4Core(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGY_GX, int width, int height, const int widthG, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX) +void calcBIOParamSum4Core(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGyGx, int width, int height, const int widthG, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGyGx) { for (int y = 0; y < height; y++) { @@ -411,30 +411,30 @@ void calcBIOParamSum4Core(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signG *sumAbsGY += absGY[x]; *sumDIX += dIX[x]; *sumDIY += dIY[x]; - *sumSignGY_GX += signGY_GX[x]; + *sumSignGyGx += signGyGx[x]; } absGX += widthG; absGY += widthG; dIX += widthG; dIY += widthG; - signGY_GX += widthG; + signGyGx += widthG; } } -void calcBIOClippedVxVyCore(int* sumDIX_pixel_32bit, int* sumAbsGX_pixel_32bit, int* sumDIY_pixel_32bit, int* sumAbsGY_pixel_32bit, int* sumSignGY_GX_pixel_32bit, const int limit, const int bioSubblockSize, int* tmpx_pixel_32bit, int* tmpy_pixel_32bit) +void calcBIOClippedVxVyCore(int* sumDIXSample32bit, int* sumAbsGxSample32bit, int* sumDIYSample32bit, int* sumAbsGySample32bit, int* sumSignGyGxSample32bit, const int limit, const int bioSubblockSize, int* tmpxSample32bit, int* tmpySample32bit) { for (int idx = 0; idx < bioSubblockSize; idx++) { - *tmpx_pixel_32bit = Clip3(-limit, limit, (*sumDIX_pixel_32bit) >> (*sumAbsGX_pixel_32bit)); - int tmpData = ((*sumSignGY_GX_pixel_32bit) * (*tmpx_pixel_32bit)) >> 1; - *tmpy_pixel_32bit = Clip3(-limit, limit, (((*sumDIY_pixel_32bit) - tmpData) >> (*sumAbsGY_pixel_32bit))); - sumDIX_pixel_32bit++; - sumAbsGX_pixel_32bit++; - sumDIY_pixel_32bit++; - sumAbsGY_pixel_32bit++; - sumSignGY_GX_pixel_32bit++; - tmpx_pixel_32bit++; - tmpy_pixel_32bit++; + *tmpxSample32bit = Clip3(-limit, limit, (*sumDIXSample32bit) >> (*sumAbsGxSample32bit)); + int tmpData = ((*sumSignGyGxSample32bit) * (*tmpxSample32bit)) >> 1; + *tmpySample32bit = Clip3(-limit, limit, (((*sumDIYSample32bit) - tmpData) >> (*sumAbsGySample32bit))); + sumDIXSample32bit++; + sumAbsGxSample32bit++; + sumDIYSample32bit++; + sumAbsGySample32bit++; + sumSignGyGxSample32bit++; + tmpxSample32bit++; + tmpySample32bit++; } } #if JVET_Z0136_OOB @@ -542,7 +542,7 @@ void calAbsSumCore(const Pel* diff, int stride, int width, int height, int* absS } #endif -template<bool PAD = true> +template<bool pad = true> void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth) { Pel* srcTmp = pSrc + srcStride + 1; @@ -569,7 +569,7 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr } #if !MULTI_PASS_DMVR && !SAMPLE_BASED_BDOF - if (PAD) + if (pad) { gradXTmp = gradX + gradStride + 1; gradYTmp = gradY + gradStride + 1; @@ -594,7 +594,7 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr #endif } -void calcBIOSumsCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX) +void calcBIOSumsCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGyGx) { int shift4 = 4; int shift5 = 1; @@ -610,7 +610,7 @@ void calcBIOSumsCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* *sumAbsGY += (tmpGY < 0 ? -tmpGY : tmpGY); *sumDIX += (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI)); *sumDIY += (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI)); - *sumSignGY_GX += (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX)); + *sumSignGyGx += (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX)); } srcY1Tmp += src1Stride; diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index 943cb213f..daf28f03b 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -80,18 +80,18 @@ struct PelBufferOps void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth); void(*calcBIOPar) (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth); #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF - void(*calcBIOParameter) (const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int width, int height, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGY_GX, Pel* dI); + void(*calcBIOParameter) (const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int width, int height, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGyGx, Pel* dI); void(*calAbsSum) (const Pel* diff, int stride, int width, int height, int* absDiff); - void(*calcBIOParamSum5) (Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGY_GX, const int widthG, const int width, const int height, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX); - void(*calcBIOParamSum4) (Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGY_GX, int width, int height, const int widthG, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX); + void(*calcBIOParamSum5) (Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGyGx, const int widthG, const int width, const int height, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGyGx); + void(*calcBIOParamSum4) (Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGyGx, int width, int height, const int widthG, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGyGx); #if JVET_Z0136_OOB void(*addBIOAvgN) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel *gradY1, int gradStride, int width, int height, int* tmpx, int* tmpy, int shift, int offset, const ClpRng& clpRng, bool *mcMask[2], int mcStride, bool *isOOB); #else void(*addBIOAvgN) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel *gradY1, int gradStride, int width, int height, int* tmpx, int* tmpy, int shift, int offset, const ClpRng& clpRng); #endif - void(*calcBIOClippedVxVy) (int* sumDIX_pixel_32bit, int* sumAbsGX_pixel_32bit, int* sumDIY_pixel_32bit, int* sumAbsGY_pixel_32bit, int* sumSignGY_GX_pixel_32bit, const int limit, const int bioSubblockSize, int* tmpx_pixel_32bit, int* tmpy_pixel_32bit); + void(*calcBIOClippedVxVy) (int* sumDIXSample32bit, int* sumAbsGxSample32bit, int* sumDIYSample32bit, int* sumAbsGySample32bit, int* sumSignGyGxSample32bit, const int limit, const int bioSubblockSize, int* tmpxSample32bit, int* tmpySample32bit); #endif - void(*calcBIOSums) (const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX); + void(*calcBIOSums) (const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGyGx); void(*calcBlkGradient)(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize); void(*copyBuffer)(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height); void(*padding)(Pel *dst, int stride, int width, int height, int padSize); diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp index a21ab886c..e8b069d86 100644 --- a/source/Lib/CommonLib/CodingStructure.cpp +++ b/source/Lib/CommonLib/CodingStructure.cpp @@ -2294,35 +2294,47 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C if( parent ) { // copy data to picture - if( cpyPred ) getPredBuf ( clippedArea ).copyFrom( subPredBuf ); -#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT + if( cpyPred ) + { + getPredBuf( clippedArea ).copyFrom( subPredBuf ); + } + if (cpyResi) { getResiBuf(clippedArea).copyFrom(subResiBuf); } +#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT else { getResiBuf(clippedArea).copyFrom(subStruct.getResiBuf(clippedArea), true); } -#else - if( cpyResi ) getResiBuf ( clippedArea ).copyFrom( subResiBuf ); #endif - if( cpyReco ) getRecoBuf ( clippedArea ).copyFrom( subRecoBuf ); - if( cpyOrgResi ) getOrgResiBuf( clippedArea ).copyFrom( subStruct.getOrgResiBuf( clippedArea ) ); + + if( cpyReco ) + { + getRecoBuf( clippedArea ).copyFrom( subRecoBuf ); + } + + if( cpyOrgResi ) + { + getOrgResiBuf( clippedArea ).copyFrom( subStruct.getOrgResiBuf( clippedArea ) ); + } + } + + if( cpyPred ) + { + picture->getPredBuf( clippedArea ).copyFrom( subPredBuf ); } - if( cpyPred ) picture->getPredBuf( clippedArea ).copyFrom( subPredBuf ); -#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT if (cpyResi) { picture->getResiBuf(clippedArea).copyFrom(subResiBuf); } +#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT else { picture->getResiBuf(clippedArea).copyFrom(subStruct.getResiBuf(clippedArea), true); } -#else - if( cpyResi ) picture->getResiBuf( clippedArea ).copyFrom( subResiBuf ); #endif #if JVET_Z0118_GDR @@ -3022,17 +3034,13 @@ PelBuf CodingStructure::getBuf( const CompArea &blk, const PictureType &type ) #if !KEEP_PRED_AND_RESI_SIGNALS #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT if (!parent && (type == PIC_PREDICTION)) - { - cFinal.x &= (pcv->maxCUWidthMask >> getComponentScaleX(blk.compID, blk.chromaFormat)); - cFinal.y &= (pcv->maxCUHeightMask >> getComponentScaleY(blk.compID, blk.chromaFormat)); - } #else if( !parent && ( type == PIC_RESIDUAL || type == PIC_PREDICTION ) ) +#endif { cFinal.x &= ( pcv->maxCUWidthMask >> getComponentScaleX( blk.compID, blk.chromaFormat ) ); cFinal.y &= ( pcv->maxCUHeightMask >> getComponentScaleY( blk.compID, blk.chromaFormat ) ); } -#endif #endif return buf->getBuf( cFinal ); @@ -3067,17 +3075,13 @@ const CPelBuf CodingStructure::getBuf( const CompArea &blk, const PictureType &t #if !KEEP_PRED_AND_RESI_SIGNALS #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT if (!parent && (type == PIC_PREDICTION)) - { - cFinal.x &= (pcv->maxCUWidthMask >> getComponentScaleX(blk.compID, blk.chromaFormat)); - cFinal.y &= (pcv->maxCUHeightMask >> getComponentScaleY(blk.compID, blk.chromaFormat)); - } #else if( !parent && ( type == PIC_RESIDUAL || type == PIC_PREDICTION ) ) +#endif { cFinal.x &= ( pcv->maxCUWidthMask >> getComponentScaleX( blk.compID, blk.chromaFormat ) ); cFinal.y &= ( pcv->maxCUHeightMask >> getComponentScaleY( blk.compID, blk.chromaFormat ) ); } -#endif #endif return buf->getBuf( cFinal ); diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 30bc4d6b8..45fec4b89 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -107,13 +107,13 @@ InterPrediction::InterPrediction() , m_dIy(nullptr) , m_dI(nullptr) , m_signGxGy(nullptr) -, m_tmpx_pixel_32bit(nullptr) -, m_tmpy_pixel_32bit(nullptr) -, m_sumAbsGX_pixel_32bit(nullptr) -, m_sumAbsGY_pixel_32bit(nullptr) -, m_sumDIX_pixel_32bit(nullptr) -, m_sumDIY_pixel_32bit(nullptr) -, m_sumSignGY_GX_pixel_32bit(nullptr) +, m_tmpxSample32bit(nullptr) +, m_tmpySample32bit(nullptr) +, m_sumAbsGxSample32bit(nullptr) +, m_sumAbsGySample32bit(nullptr) +, m_sumDIXSample32bit(nullptr) +, m_sumDIYSample32bit(nullptr) +, m_sumSignGyGxSample32bit(nullptr) #endif , m_subPuMC(false) { @@ -332,15 +332,15 @@ void InterPrediction::destroy() xFree(m_absGy); m_absGy = nullptr; xFree(m_dIx); m_dIx = nullptr; xFree(m_dIy); m_dIy = nullptr; - xFree(m_dI); m_dI = nullptr; + xFree(m_dI); m_dI = nullptr; xFree(m_signGxGy); m_signGxGy = nullptr; - xFree(m_tmpx_pixel_32bit); m_tmpx_pixel_32bit = nullptr; - xFree(m_tmpy_pixel_32bit); m_tmpy_pixel_32bit = nullptr; - xFree(m_sumAbsGX_pixel_32bit); m_sumAbsGX_pixel_32bit = nullptr; - xFree(m_sumAbsGY_pixel_32bit); m_sumAbsGY_pixel_32bit = nullptr; - xFree(m_sumDIX_pixel_32bit); m_sumDIX_pixel_32bit = nullptr; - xFree(m_sumDIY_pixel_32bit); m_sumDIY_pixel_32bit = nullptr; - xFree(m_sumSignGY_GX_pixel_32bit); m_sumSignGY_GX_pixel_32bit = nullptr; + xFree(m_tmpxSample32bit); m_tmpxSample32bit = nullptr; + xFree(m_tmpySample32bit); m_tmpySample32bit = nullptr; + xFree(m_sumAbsGxSample32bit); m_sumAbsGxSample32bit = nullptr; + xFree(m_sumAbsGySample32bit); m_sumAbsGySample32bit = nullptr; + xFree(m_sumDIXSample32bit); m_sumDIXSample32bit = nullptr; + xFree(m_sumDIYSample32bit); m_sumDIYSample32bit = nullptr; + xFree(m_sumSignGyGxSample32bit); m_sumSignGyGxSample32bit = nullptr; #endif #if ENABLE_OBMC m_tmpObmcBufL0.destroy(); @@ -520,13 +520,13 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, cons m_dIy = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_dI = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_signGxGy = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); - m_tmpx_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); - m_tmpy_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); - m_sumAbsGX_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); - m_sumAbsGY_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); - m_sumDIX_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); - m_sumDIY_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); - m_sumSignGY_GX_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); + m_tmpxSample32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); + m_tmpySample32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); + m_sumAbsGxSample32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); + m_sumAbsGySample32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); + m_sumDIXSample32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); + m_sumDIYSample32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); + m_sumSignGyGxSample32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); #endif #if ENABLE_OBMC m_tmpObmcBufL0.create(UnitArea(chromaFormatIDC, Area(0, 0, 4, MAX_CU_SIZE))); @@ -1097,8 +1097,10 @@ void InterPrediction::xPredInterBiSubPuBDOF(PredictionUnit &pu, PelUnitBuf &pcYu if (bioMv.hor == 0 && bioMv.ver == 0) { // only chroma MC - if (!lumaOnly) - xPredInterUni ( subPu, eRefPicList, pcMbBuf, true, bioApplied, false, chroma, false ); + if( !lumaOnly ) + { + xPredInterUni( subPu, eRefPicList, pcMbBuf, true, bioApplied, false, chroma, false ); + } } else { @@ -1170,7 +1172,9 @@ void InterPrediction::xPredInterBiSubPuBDOF(PredictionUnit &pu, PelUnitBuf &pcYu } #else if (!lumaOnly) + { xWeightedAverage( false/*isBdofMvRefine*/, 0/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), false/*bioApplied*/, lumaOnly, true/*chromaOnly*/, NULL/*yuvPredTmp*/ ); + } #endif } else @@ -3617,26 +3621,35 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf continue; } - int sumAbsGX_block = 0, sumAbsGY_block = 0, sumDIX_block = 0, sumDIY_block = 0, sumSignGY_GX_block = 0; + int sumAbsGxBlock = 0, sumAbsGyBlock = 0, sumDIXblock = 0, sumDIYblock = 0, sumSignGyGxBlock = 0; g_pelBufOP.calcBIOParamSum4(m_absGx + bioBlockParamOffset, m_absGy + bioBlockParamOffset, m_dIx + bioBlockParamOffset, m_dIy + bioBlockParamOffset, m_signGxGy + bioBlockParamOffset, bioDx + 4, bioDy + 4, widthG, - &sumAbsGX_block, &sumAbsGY_block, &sumDIX_block, &sumDIY_block, &sumSignGY_GX_block); + &sumAbsGxBlock, &sumAbsGyBlock, &sumDIXblock, &sumDIYblock, &sumSignGyGxBlock); - int tmpx_block = (sumAbsGX_block == 0 ? 0 : rightShiftMSB(sumDIX_block << 3, sumAbsGX_block)); - int tmpData_block = ((tmpx_block * sumSignGY_GX_block) >> 1); - int tmpy_block = (sumAbsGY_block == 0 ? 0 : rightShiftMSB(((sumDIY_block << 3) - tmpData_block), sumAbsGY_block)); - tmpx_block = Clip3(-256, 256, tmpx_block); - tmpy_block = Clip3(-256, 256, tmpy_block); + int tmpXblock = (sumAbsGxBlock == 0 ? 0 : rightShiftMSB(sumDIXblock << 3, sumAbsGxBlock)); + int tmpDataBlock = ((tmpXblock * sumSignGyGxBlock) >> 1); + int tmpYblock = (sumAbsGyBlock == 0 ? 0 : rightShiftMSB(((sumDIYblock << 3) - tmpDataBlock), sumAbsGyBlock)); + tmpXblock = Clip3(-256, 256, tmpXblock); + tmpYblock = Clip3(-256, 256, tmpYblock); Mv bioMv; - if (tmpx_block >= 0) - bioMv.hor = ((tmpx_block + 4) >> 3); + if( tmpXblock >= 0 ) + { + bioMv.hor = ((tmpXblock + 4) >> 3); + } else - bioMv.hor = (-1) * ((((-1) * tmpx_block) + 4) >> 3); - if (tmpy_block >= 0) - bioMv.ver = ((tmpy_block + 4) >> 3); + { + bioMv.hor = (-1) * ((((-1) * tmpXblock) + 4) >> 3); + } + + if( tmpYblock >= 0 ) + { + bioMv.ver = ((tmpYblock + 4) >> 3); + } else - bioMv.ver = (-1) * ((((-1) * tmpy_block) + 4) >> 3); + { + bioMv.ver = (-1) * ((((-1) * tmpYblock) + 4) >> 3); + } m_bdofSubPuMvOffset[bdofBlockOffset + bioSubPuMvIndex] = bioMv; if (bioMv.hor == 0 && bioMv.ver == 0) @@ -3734,9 +3747,9 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf gradY0 = m_gradY0; gradY1 = m_gradY1; Pel *pGradX0Tmp, *pGradX1Tmp, *pGradY0Tmp, *pGradY1Tmp; - const Pel *SrcY0Tmp, *SrcY1Tmp; + const Pel *srcY0Tmp, *srcY1Tmp; int tmpx = 0, tmpy = 0; - int sumAbsGX = 0, sumAbsGY = 0, sumDIX = 0, sumDIY = 0, sumSignGY_GX = 0; + int sumAbsGX = 0, sumAbsGY = 0, sumDIX = 0, sumDIY = 0, sumSignGyGx = 0; int gradOfst, srcOfst, dstOfst, gradLineOfst = 0, srcLineOfst = 0, dstLineOfst = 0; for (int yu = 0; yu < yUnit; yu++) @@ -3747,27 +3760,27 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf for (int xu = 0; xu < xUnit; xu++) { - sumAbsGX = 0; sumAbsGY = 0; sumDIX = 0; sumDIY = 0, sumSignGY_GX = 0; + sumAbsGX = 0; sumAbsGY = 0; sumDIX = 0; sumDIY = 0, sumSignGyGx = 0; pGradX0Tmp = m_gradX0 + gradOfst; pGradX1Tmp = m_gradX1 + gradOfst; pGradY0Tmp = m_gradY0 + gradOfst; pGradY1Tmp = m_gradY1 + gradOfst; - SrcY1Tmp = srcY1 + srcOfst; - SrcY0Tmp = srcY0 + srcOfst; + srcY1Tmp = srcY1 + srcOfst; + srcY0Tmp = srcY0 + srcOfst; - g_pelBufOP.calcBIOSums(SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride, src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX); + g_pelBufOP.calcBIOSums(srcY0Tmp, srcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride, src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGyGx); tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX << 2, sumAbsGX)); tmpx = Clip3(-limit, limit, tmpx); - int mainsGxGy = sumSignGY_GX >> 12; - int secsGxGy = sumSignGY_GX & ((1 << 12) - 1); + int mainsGxGy = sumSignGyGx >> 12; + int secsGxGy = sumSignGyGx & ((1 << 12) - 1); int tmpData = tmpx * mainsGxGy; tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1; tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY << 2) - tmpData), sumAbsGY)); tmpy = Clip3(-limit, limit, tmpy); - srcY0Temp = SrcY0Tmp + ( stridePredMC + 1 ); - srcY1Temp = SrcY1Tmp + ( stridePredMC + 1 ); + srcY0Temp = srcY0Tmp + ( stridePredMC + 1 ); + srcY1Temp = srcY1Tmp + ( stridePredMC + 1 ); gradX0 = pGradX0Tmp + offsetPos; gradX1 = pGradX1Tmp + offsetPos; gradY0 = pGradY0Tmp + offsetPos; @@ -3797,32 +3810,32 @@ void InterPrediction::subBlockBiOptFlow(Pel* dstY, const int dstStride, const Pe #if SAMPLE_BASED_BDOF g_pelBufOP.calcBIOParamSum5(m_absGx + bioParamOffset, m_absGy + bioParamOffset, m_dIx + bioParamOffset, m_dIy + bioParamOffset, m_signGxGy + bioParamOffset, bioParamStride, width, height, - m_sumAbsGX_pixel_32bit, m_sumAbsGY_pixel_32bit, m_sumDIX_pixel_32bit, m_sumDIY_pixel_32bit, m_sumSignGY_GX_pixel_32bit); + m_sumAbsGxSample32bit, m_sumAbsGySample32bit, m_sumDIXSample32bit, m_sumDIYSample32bit, m_sumSignGyGxSample32bit); // sumDIX and sumDIY left shift by 2 is calculated in previous step const int bioSubblockSize = width * height; - for (int pixel_index = 0; pixel_index < bioSubblockSize; pixel_index++) + for (int sampleIndex = 0; sampleIndex < bioSubblockSize; sampleIndex++) { - if (m_sumAbsGX_pixel_32bit[pixel_index] == 0) + if (m_sumAbsGxSample32bit[sampleIndex] == 0) { - m_sumDIX_pixel_32bit[pixel_index] = 0; - m_sumAbsGX_pixel_32bit[pixel_index] = 32; + m_sumDIXSample32bit[sampleIndex] = 0; + m_sumAbsGxSample32bit[sampleIndex] = 32; } else { - m_sumAbsGX_pixel_32bit[pixel_index] = floorLog2(m_sumAbsGX_pixel_32bit[pixel_index]); + m_sumAbsGxSample32bit[sampleIndex] = floorLog2(m_sumAbsGxSample32bit[sampleIndex]); } - if (m_sumAbsGY_pixel_32bit[pixel_index] == 0) + if (m_sumAbsGySample32bit[sampleIndex] == 0) { - m_sumDIY_pixel_32bit[pixel_index] = 0; - m_sumSignGY_GX_pixel_32bit[pixel_index] = 0; - m_sumAbsGY_pixel_32bit[pixel_index] = 32; + m_sumDIYSample32bit[sampleIndex] = 0; + m_sumSignGyGxSample32bit[sampleIndex] = 0; + m_sumAbsGySample32bit[sampleIndex] = 32; } else { - m_sumAbsGY_pixel_32bit[pixel_index] = floorLog2(m_sumAbsGY_pixel_32bit[pixel_index]); + m_sumAbsGySample32bit[sampleIndex] = floorLog2(m_sumAbsGySample32bit[sampleIndex]); } } - g_pelBufOP.calcBIOClippedVxVy(m_sumDIX_pixel_32bit, m_sumAbsGX_pixel_32bit, m_sumDIY_pixel_32bit, m_sumAbsGY_pixel_32bit, m_sumSignGY_GX_pixel_32bit, limit, bioSubblockSize, m_tmpx_pixel_32bit, m_tmpy_pixel_32bit); + g_pelBufOP.calcBIOClippedVxVy(m_sumDIXSample32bit, m_sumAbsGxSample32bit, m_sumDIYSample32bit, m_sumAbsGySample32bit, m_sumSignGyGxSample32bit, limit, bioSubblockSize, m_tmpxSample32bit, m_tmpySample32bit); bioParamOffset += ((bioParamStride + 1) << 1); #else bioParamOffset += ((bioParamStride + 1) << 1); @@ -3832,7 +3845,7 @@ void InterPrediction::subBlockBiOptFlow(Pel* dstY, const int dstStride, const Pe for (int xUnit = 0; xUnit < width; xUnit += unitSize) { int subTmpx = 0, subTmpy = 0; - int subSumGx = 0, subSumGy = 0, subSumDIX = 0, subSumDIY = 0, subSumSignGY_GX = 0; + int subSumGx = 0, subSumGy = 0, subSumDIX = 0, subSumDIY = 0, subSumSignGyGx = 0; int subBioParamOffset = bioParamOffset + (yUnit - extendSize) * bioParamStride + xUnit; for (int ySub = -extendSize; ySub < (extendSize + unitSize); ySub++) { @@ -3842,16 +3855,16 @@ void InterPrediction::subBlockBiOptFlow(Pel* dstY, const int dstStride, const Pe subSumGy += m_absGy[subBioParamOffset + xSub]; subSumDIX += m_dIx[subBioParamOffset + xSub]; subSumDIY += m_dIy[subBioParamOffset + xSub]; - subSumSignGY_GX += m_signGxGy[subBioParamOffset + xSub]; + subSumSignGyGx += m_signGxGy[subBioParamOffset + xSub]; } subBioParamOffset += bioParamStride; } subTmpx = (subSumGx == 0 ? 0 : rightShiftMSB(subSumDIX << 2, subSumGx)); subTmpx = Clip3(-limit, limit, subTmpx); - int mainsGxGy = subSumSignGY_GX >> 12; - int secsGxGy = subSumSignGY_GX & ((1 << 12) - 1); - int tmpData = subTmpx * mainsGxGy; + int mainsGxGy = subSumSignGyGx >> 12; + int secsGxGy = subSumSignGyGx & ((1 << 12) - 1); + int tmpData = subTmpx * mainsGxGy; tmpData = ((tmpData << 12) + subTmpx*secsGxGy) >> 1; subTmpy = (subSumGy == 0 ? 0 : rightShiftMSB(((subSumDIY << 2) - tmpData), subSumGy)); subTmpy = Clip3(-limit, limit, subTmpy); @@ -3860,8 +3873,8 @@ void InterPrediction::subBlockBiOptFlow(Pel* dstY, const int dstStride, const Pe { for (int xSub = 0; xSub < unitSize; xSub++) { - m_tmpx_pixel_32bit[curSubIdx + xSub] = subTmpx; - m_tmpy_pixel_32bit[curSubIdx + xSub] = subTmpy; + m_tmpxSample32bit[curSubIdx + xSub] = subTmpx; + m_tmpySample32bit[curSubIdx + xSub] = subTmpy; } curSubIdx += width; } @@ -3869,9 +3882,9 @@ void InterPrediction::subBlockBiOptFlow(Pel* dstY, const int dstStride, const Pe } #endif #if JVET_Z0136_OOB - g_pelBufOP.addBIOAvgN(src0, src0Stride, src1, src1Stride, dstY, dstStride, m_gradX0 + bioParamOffset, m_gradX1 + bioParamOffset, m_gradY0 + bioParamOffset, m_gradY1 + bioParamOffset, bioParamStride, width, height, m_tmpx_pixel_32bit, m_tmpy_pixel_32bit, shiftNum, offset, clpRng, mcMask, mcStride, isOOB); + g_pelBufOP.addBIOAvgN(src0, src0Stride, src1, src1Stride, dstY, dstStride, m_gradX0 + bioParamOffset, m_gradX1 + bioParamOffset, m_gradY0 + bioParamOffset, m_gradY1 + bioParamOffset, bioParamStride, width, height, m_tmpxSample32bit, m_tmpySample32bit, shiftNum, offset, clpRng, mcMask, mcStride, isOOB); #else - g_pelBufOP.addBIOAvgN(src0, src0Stride, src1, src1Stride, dstY, dstStride, m_gradX0 + bioParamOffset, m_gradX1 + bioParamOffset, m_gradY0 + bioParamOffset, m_gradY1 + bioParamOffset, bioParamStride, width, height, m_tmpx_pixel_32bit, m_tmpy_pixel_32bit, shiftNum, offset, clpRng); + g_pelBufOP.addBIOAvgN(src0, src0Stride, src1, src1Stride, dstY, dstStride, m_gradX0 + bioParamOffset, m_gradX1 + bioParamOffset, m_gradY0 + bioParamOffset, m_gradY1 + bioParamOffset, bioParamStride, width, height, m_tmpxSample32bit, m_tmpySample32bit, shiftNum, offset, clpRng); #endif } #endif diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index b92d16c9f..42925e79b 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -219,13 +219,13 @@ protected: Pel* m_dIy; Pel* m_dI; Pel* m_signGxGy; - int* m_tmpx_pixel_32bit; - int* m_tmpy_pixel_32bit; - int* m_sumAbsGX_pixel_32bit; - int* m_sumAbsGY_pixel_32bit; - int* m_sumDIX_pixel_32bit; - int* m_sumDIY_pixel_32bit; - int* m_sumSignGY_GX_pixel_32bit; + int* m_tmpxSample32bit; + int* m_tmpySample32bit; + int* m_sumAbsGxSample32bit; + int* m_sumAbsGySample32bit; + int* m_sumDIXSample32bit; + int* m_sumDIYSample32bit; + int* m_sumSignGyGxSample32bit; bool m_bdofMvRefined; Mv m_bdofSubPuMvOffset[BDOF_SUBPU_MAX_NUM]; #endif diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index 29ed70022..28292146a 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -1841,7 +1841,7 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co } #if JVET_AC0071_DBV -void IntraPrediction::PredIntraDbv(const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu) +void IntraPrediction::predIntraDbv(const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu) { const int shiftSampleHor = ::getComponentScaleX(compId, pu.chromaFormat); const int shiftSampleVer = ::getComponentScaleY(compId, pu.chromaFormat); diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h index 9f0e06831..179517a9b 100644 --- a/source/Lib/CommonLib/IntraPrediction.h +++ b/source/Lib/CommonLib/IntraPrediction.h @@ -580,7 +580,7 @@ public: #if JVET_AC0071_DBV // Direct Block Vector - void PredIntraDbv(const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu); + void predIntraDbv(const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu); Mv refineChromaBv(const ComponentID compId, const PredictionUnit &pu); #endif diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp index d30510dad..7c109be02 100644 --- a/source/Lib/CommonLib/Picture.cpp +++ b/source/Lib/CommonLib/Picture.cpp @@ -297,6 +297,9 @@ void Picture::createTempBuffers( const unsigned _maxCUSize ) const Area a( Position{ 0, 0 }, lumaSize() ); #else const Area a = m_ctuArea.Y(); +#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT + const Area aOld( Position{ 0, 0 }, lumaSize() ); +#endif #endif #if ENABLE_SPLIT_PARALLELISM @@ -306,9 +309,8 @@ void Picture::createTempBuffers( const unsigned _maxCUSize ) #endif { M_BUFS( jId, PIC_PREDICTION ).create( chromaFormat, a, _maxCUSize ); -#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT - const Area aOld(Position{ 0, 0 }, lumaSize()); - M_BUFS(jId, PIC_RESIDUAL).create(chromaFormat, aOld, _maxCUSize); +#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT && !KEEP_PRED_AND_RESI_SIGNALS + M_BUFS( jId, PIC_RESIDUAL ).create(chromaFormat, aOld, _maxCUSize ); #else M_BUFS( jId, PIC_RESIDUAL ).create( chromaFormat, a, _maxCUSize ); #endif @@ -1532,15 +1534,9 @@ PelBuf Picture::getBuf( const CompArea &blk, const PictureType &type ) #if !KEEP_PRED_AND_RESI_SIGNALS #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT if (type == PIC_PREDICTION) - { - CompArea localBlk = blk; - localBlk.x &= (cs->pcv->maxCUWidthMask >> getComponentScaleX(blk.compID, blk.chromaFormat)); - localBlk.y &= (cs->pcv->maxCUHeightMask >> getComponentScaleY(blk.compID, blk.chromaFormat)); - - return M_BUFS(jId, type).getBuf(localBlk); - } #else if( type == PIC_RESIDUAL || type == PIC_PREDICTION ) +#endif { CompArea localBlk = blk; localBlk.x &= ( cs->pcv->maxCUWidthMask >> getComponentScaleX( blk.compID, blk.chromaFormat ) ); @@ -1548,7 +1544,6 @@ PelBuf Picture::getBuf( const CompArea &blk, const PictureType &type ) return M_BUFS( jId, type ).getBuf( localBlk ); } -#endif #endif return M_BUFS( jId, type ).getBuf( blk ); @@ -1568,15 +1563,9 @@ const CPelBuf Picture::getBuf( const CompArea &blk, const PictureType &type ) co #if !KEEP_PRED_AND_RESI_SIGNALS #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT if (type == PIC_PREDICTION) - { - CompArea localBlk = blk; - localBlk.x &= (cs->pcv->maxCUWidthMask >> getComponentScaleX(blk.compID, blk.chromaFormat)); - localBlk.y &= (cs->pcv->maxCUHeightMask >> getComponentScaleY(blk.compID, blk.chromaFormat)); - - return M_BUFS(jId, type).getBuf(localBlk); - } #else if( type == PIC_RESIDUAL || type == PIC_PREDICTION ) +#endif { CompArea localBlk = blk; localBlk.x &= ( cs->pcv->maxCUWidthMask >> getComponentScaleX( blk.compID, blk.chromaFormat ) ); @@ -1584,7 +1573,6 @@ const CPelBuf Picture::getBuf( const CompArea &blk, const PictureType &type ) co return M_BUFS( jId, type ).getBuf( localBlk ); } -#endif #endif return M_BUFS( jId, type ).getBuf( blk ); diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index 61d74d361..ff94e1136 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -579,7 +579,7 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF template< X86_VEXT vext > -void calcBIOParameter_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int width, int height, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGY_GX, Pel* dI) +void calcBIOParameter_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int width, int height, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGyGx, Pel* dI) { width -= 2; height -= 2; @@ -590,7 +590,7 @@ void calcBIOParameter_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, gradY0 += bioParamOffset; gradY1 += bioParamOffset; absGX += bioParamOffset; absGY += bioParamOffset; dIX += bioParamOffset; dIY += bioParamOffset; - signGY_GX += bioParamOffset; + signGyGx += bioParamOffset; if (dI) { dI += bioParamOffset; @@ -619,13 +619,13 @@ void calcBIOParameter_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, } __m128i dIX_tmp = _mm_sign_epi16(subTemp1, packTempX ); __m128i dIY_tmp = _mm_sign_epi16(subTemp1, packTempY ); - __m128i signGY_GX_tmp = _mm_sign_epi16(packTempX, packTempY ); + __m128i signGyGxTmp = _mm_sign_epi16(packTempX, packTempY ); _mm_storeu_si128( ( __m128i * )absGX, gX_tmp ); _mm_storeu_si128( ( __m128i * )absGY, gY_tmp ); _mm_storeu_si128( ( __m128i * )dIX, dIX_tmp ); _mm_storeu_si128( ( __m128i * )dIY, dIY_tmp ); - _mm_storeu_si128( ( __m128i * )signGY_GX, signGY_GX_tmp ); + _mm_storeu_si128( ( __m128i * )signGyGx, signGyGxTmp ); srcY0Tmp += src0Stride; srcY1Tmp += src1Stride; gradX0 += widthG; @@ -640,7 +640,7 @@ void calcBIOParameter_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, } dIX += widthG; dIY += widthG; - signGY_GX += widthG; + signGyGx += widthG; } } else // width = 12, 20, 36, 68, 132, 260 @@ -666,13 +666,13 @@ void calcBIOParameter_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, } __m256i dIX_tmp = _mm256_sign_epi16(subTemp1, packTempX ); __m256i dIY_tmp = _mm256_sign_epi16(subTemp1, packTempY ); - __m256i signGY_GX_tmp = _mm256_sign_epi16(packTempX, packTempY ); + __m256i signGyGxTmp = _mm256_sign_epi16(packTempX, packTempY ); _mm256_storeu_si256( ( __m256i * ) ( absGX + x ), gX_tmp ); _mm256_storeu_si256( ( __m256i * ) ( absGY + x ), gY_tmp ); _mm256_storeu_si256( ( __m256i * ) ( dIX + x ), dIX_tmp ); _mm256_storeu_si256( ( __m256i * ) ( dIY + x ), dIY_tmp ); - _mm256_storeu_si256( ( __m256i * ) ( signGY_GX + x ), signGY_GX_tmp ); + _mm256_storeu_si256( ( __m256i * ) ( signGyGx + x ), signGyGxTmp ); } srcY0Tmp += src0Stride; srcY1Tmp += src1Stride; @@ -688,7 +688,7 @@ void calcBIOParameter_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, } dIX += widthG; dIY += widthG; - signGY_GX += widthG; + signGyGx += widthG; } } #else @@ -713,13 +713,13 @@ void calcBIOParameter_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, } __m128i dIX_tmp = _mm_sign_epi16(subTemp1, packTempX ); __m128i dIY_tmp = _mm_sign_epi16(subTemp1, packTempY ); - __m128i signGY_GX_tmp = _mm_sign_epi16(packTempX, packTempY ); + __m128i signGyGxTmp = _mm_sign_epi16(packTempX, packTempY ); _mm_storeu_si128( ( __m128i * ) ( absGX + x ), gX_tmp ); _mm_storeu_si128( ( __m128i * ) ( absGY + x ), gY_tmp ); _mm_storeu_si128( ( __m128i * ) ( dIX + x ), dIX_tmp ); _mm_storeu_si128( ( __m128i * ) ( dIY + x ), dIY_tmp ); - _mm_storeu_si128( ( __m128i * ) ( signGY_GX + x ), signGY_GX_tmp ); + _mm_storeu_si128( ( __m128i * ) ( signGyGx + x ), signGyGxTmp ); } srcY0Tmp += src0Stride; srcY1Tmp += src1Stride; @@ -735,12 +735,12 @@ void calcBIOParameter_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, { dI += widthG; } - signGY_GX += widthG; + signGyGx += widthG; } #endif } template< X86_VEXT vext > -void calcBIOParamSum5_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGY_GX, const int widthG, const int width, const int height, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX) +void calcBIOParamSum5_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGyGx, const int widthG, const int width, const int height, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGyGx) { __m128i vzero = _mm_setzero_si128(); __m128i vmask = _mm_setr_epi16(1, 1, 1, 1, 1, 0, 0, 0); @@ -782,9 +782,9 @@ void calcBIOParamSum5_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signG sumDIYTmp16 = _mm_add_epi16(_mm_loadu_si128((const __m128i*)dIY), _mm_loadu_si128((const __m128i*)(dIY + widthG))); sumDIYTmp16 = _mm_add_epi16(sumDIYTmp16, _mm_loadu_si128((const __m128i*)(dIY + widthG_2))); sumDIYTmp16 = _mm_add_epi16(sumDIYTmp16, _mm_loadu_si128((const __m128i*)(dIY + widthG_3))); - sumSignGyGxTmp16 = _mm_add_epi16(_mm_loadu_si128((const __m128i*)signGY_GX), _mm_loadu_si128((const __m128i*)(signGY_GX + widthG))); - sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadu_si128((const __m128i*)(signGY_GX + widthG_2))); - sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadu_si128((const __m128i*)(signGY_GX + widthG_3))); + sumSignGyGxTmp16 = _mm_add_epi16(_mm_loadu_si128((const __m128i*)signGyGx), _mm_loadu_si128((const __m128i*)(signGyGx + widthG))); + sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadu_si128((const __m128i*)(signGyGx + widthG_2))); + sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadu_si128((const __m128i*)(signGyGx + widthG_3))); __m128i absGXOneRow = vzero; __m128i dIXOneRow = vzero; @@ -806,8 +806,8 @@ void calcBIOParamSum5_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signG dIYOneRow = _mm_loadu_si128((const __m128i*)dIY); sumDIYTmp16 = _mm_add_epi16(sumDIYTmp16, _mm_loadu_si128((const __m128i*)(dIY + widthG_4))); sumSignGyGxTmp16 = _mm_sub_epi16(sumSignGyGxTmp16, signGyGxOneRow); - signGyGxOneRow = _mm_loadu_si128((const __m128i*)signGY_GX); - sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadu_si128((const __m128i*)(signGY_GX+ widthG_4))); + signGyGxOneRow = _mm_loadu_si128((const __m128i*)signGyGx); + sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadu_si128((const __m128i*)(signGyGx+ widthG_4))); sumAbsGXTmp32 = _mm_madd_epi16(sumAbsGXTmp16, vmask); sumAbsGYTmp32 = _mm_madd_epi16(sumAbsGYTmp16, vmask); @@ -830,33 +830,33 @@ void calcBIOParamSum5_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signG sumSignGyGxTmp32 = _mm_madd_epi16(sumSignGyGxTmp16, vmask); sumSignGyGxTmp32 = _mm_add_epi32(sumSignGyGxTmp32, _mm_shuffle_epi32(sumSignGyGxTmp32, 0x4e)); // 01001110 sumSignGyGxTmp32 = _mm_add_epi32(sumSignGyGxTmp32, _mm_shuffle_epi32(sumSignGyGxTmp32, 0xb1)); // 10110001 - *sumSignGY_GX = _mm_cvtsi128_si32(sumSignGyGxTmp32); + *sumSignGyGx = _mm_cvtsi128_si32(sumSignGyGxTmp32); // bio parameter increment absGX += widthG; absGY += widthG; dIX += widthG; dIY += widthG; - signGY_GX += widthG; + signGyGx += widthG; // sum parameter increment sumAbsGX += width; sumAbsGY += width; sumDIX += width; sumDIY += width; - sumSignGY_GX += width; + sumSignGyGx += width; } // bio parameter back to first row absGX += widthG_N; absGY += widthG_N; dIX += widthG_N; dIY += widthG_N; - signGY_GX += widthG_N; + signGyGx += widthG_N; // sum parameter back to first row sumAbsGX += width_N; sumAbsGY += width_N; sumDIX += width_N; sumDIY += width_N; - sumSignGY_GX += width_N; + sumSignGyGx += width_N; } sumDIX -= width; sumDIY -= width; @@ -883,7 +883,7 @@ void calcBIOParamSum5_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signG #endif } template< X86_VEXT vext > -void calcBIOParamSum4_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGY_GX, int width, int height, const int widthG, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX) +void calcBIOParamSum4_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signGyGx, int width, int height, const int widthG, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGyGx) { __m128i vzero = _mm_setzero_si128(); @@ -900,13 +900,13 @@ void calcBIOParamSum4_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signG sumDIXTmp16 = _mm_add_epi16(sumDIXTmp16, _mm_loadu_si128((const __m128i*)dIX)); sumAbsGYTmp16 = _mm_add_epi16(sumAbsGYTmp16, _mm_loadu_si128((const __m128i*)absGY)); sumDIYTmp16 = _mm_add_epi16(sumDIYTmp16, _mm_loadu_si128((const __m128i*)dIY)); - sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadu_si128((const __m128i*)signGY_GX)); + sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadu_si128((const __m128i*)signGyGx)); // bio parameter increment absGX += widthG; absGY += widthG; dIX += widthG; dIY += widthG; - signGY_GX += widthG; + signGyGx += widthG; } } else // (width == 12) @@ -921,14 +921,14 @@ void calcBIOParamSum4_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signG sumAbsGYTmp16 = _mm_add_epi16(sumAbsGYTmp16, _mm_loadl_epi64((const __m128i*)(absGY + 8))); sumDIYTmp16 = _mm_add_epi16(sumDIYTmp16, _mm_loadu_si128((const __m128i*)dIY)); sumDIYTmp16 = _mm_add_epi16(sumDIYTmp16, _mm_loadl_epi64((const __m128i*)(dIY + 8))); - sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadu_si128((const __m128i*)signGY_GX)); - sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadl_epi64((const __m128i*)(signGY_GX + 8))); + sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadu_si128((const __m128i*)signGyGx)); + sumSignGyGxTmp16 = _mm_add_epi16(sumSignGyGxTmp16, _mm_loadl_epi64((const __m128i*)(signGyGx + 8))); // bio parameter increment absGX += widthG; absGY += widthG; dIX += widthG; dIY += widthG; - signGY_GX += widthG; + signGyGx += widthG; } } @@ -954,11 +954,11 @@ void calcBIOParamSum4_SSE(Pel* absGX, Pel* absGY, Pel* dIX, Pel* dIY, Pel* signG __m128i sumSignGyGxTmp32 = _mm_madd_epi16(sumSignGyGxTmp16, _mm_set1_epi16(1)); sumSignGyGxTmp32 = _mm_add_epi32(sumSignGyGxTmp32, _mm_shuffle_epi32(sumSignGyGxTmp32, 0x4e)); // 01001110 sumSignGyGxTmp32 = _mm_add_epi32(sumSignGyGxTmp32, _mm_shuffle_epi32(sumSignGyGxTmp32, 0xb1)); // 10110001 - *sumSignGY_GX = _mm_cvtsi128_si32(sumSignGyGxTmp32); + *sumSignGyGx = _mm_cvtsi128_si32(sumSignGyGxTmp32); } template< X86_VEXT vext > -void calcBIOClippedVxVy_SSE(int* sumDIX_pixel_32bit, int* sumAbsGX_pixel_32bit, int* sumDIY_pixel_32bit, int* sumAbsGY_pixel_32bit, int* sumSignGY_GX_pixel_32bit, const int limit, const int bioSubblockSize, int* tmpx_pixel_32bit, int* tmpy_pixel_32bit) +void calcBIOClippedVxVy_SSE(int* sumDIXSample32bit, int* sumAbsGxSample32bit, int* sumDIYSample32bit, int* sumAbsGySample32bit, int* sumSignGyGxSample32bit, const int limit, const int bioSubblockSize, int* tmpxSample32bit, int* tmpySample32bit) { #ifdef USE_AVX2 __m256i vibdimin = _mm256_set1_epi32(-limit); @@ -967,25 +967,25 @@ void calcBIOClippedVxVy_SSE(int* sumDIX_pixel_32bit, int* sumAbsGX_pixel_32bit, for (int idx = 0; idx < bioSubblockSize; idx += 8) { - tmp256 = _mm256_loadu_si256((const __m256i*)sumDIX_pixel_32bit); - tmp256 = _mm256_srav_epi32(tmp256, _mm256_loadu_si256((const __m256i*)sumAbsGX_pixel_32bit)); + tmp256 = _mm256_loadu_si256((const __m256i*)sumDIXSample32bit); + tmp256 = _mm256_srav_epi32(tmp256, _mm256_loadu_si256((const __m256i*)sumAbsGxSample32bit)); tmp256 = _mm256_max_epi32(tmp256, vibdimin); tmp256 = _mm256_min_epi32(tmp256, vibdimax); - _mm256_storeu_si256( ( __m256i * )tmpx_pixel_32bit, tmp256); - tmp256 = _mm256_mullo_epi32(tmp256, _mm256_loadu_si256((const __m256i*)sumSignGY_GX_pixel_32bit)); + _mm256_storeu_si256( ( __m256i * )tmpxSample32bit, tmp256); + tmp256 = _mm256_mullo_epi32(tmp256, _mm256_loadu_si256((const __m256i*)sumSignGyGxSample32bit)); tmp256 = _mm256_srai_epi32(tmp256, 1); - tmp256 = _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)sumDIY_pixel_32bit), tmp256); - tmp256 = _mm256_srav_epi32(tmp256, _mm256_loadu_si256((const __m256i*)sumAbsGY_pixel_32bit)); + tmp256 = _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)sumDIYSample32bit), tmp256); + tmp256 = _mm256_srav_epi32(tmp256, _mm256_loadu_si256((const __m256i*)sumAbsGySample32bit)); tmp256 = _mm256_max_epi32(tmp256, vibdimin); tmp256 = _mm256_min_epi32(tmp256, vibdimax); - _mm256_storeu_si256( ( __m256i * )tmpy_pixel_32bit, tmp256); - sumDIX_pixel_32bit += 8; - sumAbsGX_pixel_32bit += 8; - sumDIY_pixel_32bit += 8; - sumAbsGY_pixel_32bit += 8; - sumSignGY_GX_pixel_32bit += 8; - tmpx_pixel_32bit += 8; - tmpy_pixel_32bit += 8; + _mm256_storeu_si256( ( __m256i * )tmpySample32bit, tmp256); + sumDIXSample32bit += 8; + sumAbsGxSample32bit += 8; + sumDIYSample32bit += 8; + sumAbsGySample32bit += 8; + sumSignGyGxSample32bit += 8; + tmpxSample32bit += 8; + tmpySample32bit += 8; } #else __m128i vibdimin = _mm_set1_epi32(-limit); @@ -994,33 +994,33 @@ void calcBIOClippedVxVy_SSE(int* sumDIX_pixel_32bit, int* sumAbsGX_pixel_32bit, for (int idx = 0; idx < bioSubblockSize; idx += 4) { - *sumDIX_pixel_32bit = (*sumDIX_pixel_32bit) >> (*sumAbsGX_pixel_32bit); - *(sumDIX_pixel_32bit + 1) = (*(sumDIX_pixel_32bit + 1)) >> (*(sumAbsGX_pixel_32bit + 1)); - *(sumDIX_pixel_32bit + 2) = (*(sumDIX_pixel_32bit + 2)) >> (*(sumAbsGX_pixel_32bit + 2)); - *(sumDIX_pixel_32bit + 3) = (*(sumDIX_pixel_32bit + 3)) >> (*(sumAbsGX_pixel_32bit + 3)); - tmp128 = _mm_loadu_si128((const __m128i*)sumDIX_pixel_32bit); + *sumDIXSample32bit = (*sumDIXSample32bit) >> (*sumAbsGxSample32bit); + *(sumDIXSample32bit + 1) = (*(sumDIXSample32bit + 1)) >> (*(sumAbsGxSample32bit + 1)); + *(sumDIXSample32bit + 2) = (*(sumDIXSample32bit + 2)) >> (*(sumAbsGxSample32bit + 2)); + *(sumDIXSample32bit + 3) = (*(sumDIXSample32bit + 3)) >> (*(sumAbsGxSample32bit + 3)); + tmp128 = _mm_loadu_si128((const __m128i*)sumDIXSample32bit); tmp128 = _mm_max_epi32(tmp128, vibdimin); tmp128 = _mm_min_epi32(tmp128, vibdimax); - _mm_storeu_si128( ( __m128i * )tmpx_pixel_32bit, tmp128); - tmp128 = _mm_mullo_epi32(tmp128, _mm_loadu_si128((const __m128i*)sumSignGY_GX_pixel_32bit)); + _mm_storeu_si128( ( __m128i * )tmpxSample32bit, tmp128); + tmp128 = _mm_mullo_epi32(tmp128, _mm_loadu_si128((const __m128i*)sumSignGyGxSample32bit)); tmp128 = _mm_srai_epi32(tmp128, 1); - tmp128 = _mm_sub_epi32(_mm_loadu_si128((const __m128i*)sumDIY_pixel_32bit), tmp128); - _mm_storeu_si128( ( __m128i * )sumDIY_pixel_32bit, tmp128); - *sumDIY_pixel_32bit = (*sumDIY_pixel_32bit) >> (*sumAbsGY_pixel_32bit); - *(sumDIY_pixel_32bit + 1) = (*(sumDIY_pixel_32bit + 1)) >> (*(sumAbsGY_pixel_32bit + 1)); - *(sumDIY_pixel_32bit + 2) = (*(sumDIY_pixel_32bit + 2)) >> (*(sumAbsGY_pixel_32bit + 2)); - *(sumDIY_pixel_32bit + 3) = (*(sumDIY_pixel_32bit + 3)) >> (*(sumAbsGY_pixel_32bit + 3)); - tmp128 = _mm_loadu_si128((const __m128i*)sumDIY_pixel_32bit); + tmp128 = _mm_sub_epi32(_mm_loadu_si128((const __m128i*)sumDIYSample32bit), tmp128); + _mm_storeu_si128( ( __m128i * )sumDIYSample32bit, tmp128); + *sumDIYSample32bit = (*sumDIYSample32bit) >> (*sumAbsGySample32bit); + *(sumDIYSample32bit + 1) = (*(sumDIYSample32bit + 1)) >> (*(sumAbsGySample32bit + 1)); + *(sumDIYSample32bit + 2) = (*(sumDIYSample32bit + 2)) >> (*(sumAbsGySample32bit + 2)); + *(sumDIYSample32bit + 3) = (*(sumDIYSample32bit + 3)) >> (*(sumAbsGySample32bit + 3)); + tmp128 = _mm_loadu_si128((const __m128i*)sumDIYSample32bit); tmp128 = _mm_max_epi32(tmp128, vibdimin); tmp128 = _mm_min_epi32(tmp128, vibdimax); - _mm_storeu_si128( ( __m128i * )tmpy_pixel_32bit, tmp128); - sumDIX_pixel_32bit += 4; - sumAbsGX_pixel_32bit += 4; - sumDIY_pixel_32bit += 4; - sumAbsGY_pixel_32bit += 4; - sumSignGY_GX_pixel_32bit += 4; - tmpx_pixel_32bit += 4; - tmpy_pixel_32bit += 4; + _mm_storeu_si128( ( __m128i * )tmpySample32bit, tmp128); + sumDIXSample32bit += 4; + sumAbsGxSample32bit += 4; + sumDIYSample32bit += 4; + sumAbsGySample32bit += 4; + sumSignGyGxSample32bit += 4; + tmpxSample32bit += 4; + tmpySample32bit += 4; } #endif } @@ -1219,7 +1219,7 @@ void calAbsSum_SSE(const Pel* diff, int stride, int width, int height, int* absS #endif template< X86_VEXT vext > -void calcBIOSums_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX) +void calcBIOSums_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGyGx) { int shift4 = 4; @@ -1254,13 +1254,13 @@ void calcBIOSums_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* __m128i gY = _mm_abs_epi16(packTempY); __m128i dIX = _mm_sign_epi16(subTemp1, packTempX ); __m128i dIY = _mm_sign_epi16(subTemp1, packTempY ); - __m128i signGY_GX = _mm_sign_epi16(packTempX, packTempY ); + __m128i signGyGx = _mm_sign_epi16(packTempX, packTempY ); sumAbsGXTmp = _mm_add_epi16(sumAbsGXTmp, gX); sumDIXTmp = _mm_add_epi16(sumDIXTmp, dIX); sumAbsGYTmp = _mm_add_epi16(sumAbsGYTmp, gY); sumDIYTmp = _mm_add_epi16(sumDIYTmp, dIY); - sumSignGyGxTmp = _mm_add_epi16(sumSignGyGxTmp, signGY_GX); + sumSignGyGxTmp = _mm_add_epi16(sumSignGyGxTmp, signGyGx); srcY0Tmp += src0Stride; srcY1Tmp += src1Stride; gradX0 += widthG; @@ -1293,7 +1293,7 @@ void calcBIOSums_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* sumSignGyGxTmp = _mm_add_epi32(sumSignGyGxTmp, _mm_shuffle_epi32(sumSignGyGxTmp, 0x4e)); // 01001110 sumSignGyGxTmp = _mm_add_epi32(sumSignGyGxTmp, _mm_shuffle_epi32(sumSignGyGxTmp, 0xb1)); // 10110001 - *sumSignGY_GX = _mm_cvtsi128_si32(sumSignGyGxTmp); + *sumSignGyGx = _mm_cvtsi128_si32(sumSignGyGxTmp); } template< X86_VEXT vext > diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index 9e21ed608..35aeabfe5 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -710,7 +710,7 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) #if JVET_AC0071_DBV if (compID != COMPONENT_Y && uiChFinalMode == DBV_CHROMA_IDX) { - m_pcIntraPred->PredIntraDbv(compID, piPred, pu); + m_pcIntraPred->predIntraDbv(compID, piPred, pu); } else #endif @@ -749,7 +749,7 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) #if JVET_AC0071_DBV if (uiChFinalMode == DBV_CHROMA_IDX) { - m_pcIntraPred->PredIntraDbv(COMPONENT_Cr, piPredCr, pu); + m_pcIntraPred->predIntraDbv(COMPONENT_Cr, piPredCr, pu); } else #endif diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 15508e553..248da80da 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -7660,16 +7660,15 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par saveCS.getPredBuf( tu.Y() ).copyFrom( csFull->getPredBuf( tu.Y() ) ); saveCS.getRecoBuf( tu.Y() ).copyFrom( csFull->getRecoBuf( tu.Y() ) ); - if( KEEP_PRED_AND_RESI_SIGNALS ) + if( KEEP_PRED_AND_RESI_SIGNALS || JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT ) { saveCS.getResiBuf ( tu.Y() ).copyFrom( csFull->getResiBuf ( tu.Y() ) ); + } + if( KEEP_PRED_AND_RESI_SIGNALS ) + { saveCS.getOrgResiBuf( tu.Y() ).copyFrom( csFull->getOrgResiBuf( tu.Y() ) ); } -#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT - saveCS.getResiBuf(tu.Y()).copyFrom(csFull->getResiBuf(tu.Y())); -#endif - tmpTU->copyComponentFrom( tu, COMPONENT_Y ); ctxBest = m_CABACEstimator->getCtx(); @@ -7693,16 +7692,15 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par csFull->getPredBuf( tu.Y() ).copyFrom( saveCS.getPredBuf( tu.Y() ) ); csFull->getRecoBuf( tu.Y() ).copyFrom( saveCS.getRecoBuf( tu.Y() ) ); - if( KEEP_PRED_AND_RESI_SIGNALS ) + if( KEEP_PRED_AND_RESI_SIGNALS || JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT ) { csFull->getResiBuf ( tu.Y() ).copyFrom( saveCS.getResiBuf ( tu.Y() ) ); + } + if( KEEP_PRED_AND_RESI_SIGNALS ) + { csFull->getOrgResiBuf( tu.Y() ).copyFrom( saveCS.getOrgResiBuf( tu.Y() ) ); } -#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT - csFull->getResiBuf(tu.Y()).copyFrom(saveCS.getResiBuf(tu.Y())); -#endif - tu.copyComponentFrom( *tmpTU, COMPONENT_Y ); if( !bCheckSplit ) @@ -8833,8 +8831,8 @@ ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT( CodingStructure &cs, Partitio #if JVET_AC0071_DBV if (predMode == DBV_CHROMA_IDX) { - PredIntraDbv(COMPONENT_Cb, piPredCb, pu); - PredIntraDbv(COMPONENT_Cr, piPredCr, pu); + predIntraDbv(COMPONENT_Cb, piPredCb, pu); + predIntraDbv(COMPONENT_Cr, piPredCr, pu); } else { -- GitLab