diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index 974bf49321d6803af835075d32d7008ec52465f7..a06167006a134f2ad5d51725cf7a2ff76f0fbfda 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -193,7 +193,11 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr Pel* srcTmp = pSrc + srcStride + 1; Pel* gradXTmp = gradX + gradStride + 1; Pel* gradYTmp = gradY + gradStride + 1; +#if JVET_P0653_BDOF_PROF_PARA_DEV + int shift1 = 6; +#else int shift1 = std::max<int>(6, (bitDepth - 6)); +#endif for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++) { @@ -233,8 +237,13 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr void calcBIOSumsCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX) { +#if JVET_P0653_BDOF_PROF_PARA_DEV + int shift4 = 4; + int shift5 = 1; +#else int shift4 = std::max<int>(4, (bitDepth - 8)); int shift5 = std::max<int>(1, (bitDepth - 11)); +#endif for (int y = 0; y < 6; y++) { @@ -259,6 +268,7 @@ void calcBIOSumsCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* } } +#if !JVET_P0653_BDOF_PROF_PARA_DEV void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth) { int shift4 = std::max<int>(4, (bitDepth - 8)); @@ -289,6 +299,7 @@ void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX dotProductTemp6 += widthG; } } +#endif void calcBlkGradientCore(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize) { diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index ef51d6960c16156e393fe28174a96d9b31f482c6..22dcc819e57e47ba9e5c6389a9eeaffa32463e78 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -913,12 +913,17 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio dMvV += blockWidth; } +#if JVET_P0653_BDOF_PROF_PARA_DEV + const int mvShift = 8; + const int dmvLimit = (1 << 5); +#else #if JVET_P0057_BDOF_PROF_HARMONIZATION const int mvShift = shift + MV_FRACTIONAL_BITS_INTERNAL + 2 - std::max<int>(5, clpRng.bd - 7); const int dmvLimit = (1 << (std::max<int>(5, clpRng.bd - 7))); #else const int bdlimit = std::max<int>(6, clpRng.bd - 6); const int dmvLimit = 1 << bdlimit; +#endif #endif if (!g_pelBufOP.roundIntVector) @@ -1170,7 +1175,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf const int bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)]; const int shiftNum = IF_INTERNAL_PREC + 1 - bitDepth; const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; +#if JVET_P0653_BDOF_PROF_PARA_DEV + const int limit = (1 << 5); +#else const int limit = (1<<(std::max<int>(5, bitDepth - 7))); +#endif int xUnit = (width >> 2); int yUnit = (height >> 2); @@ -1353,6 +1362,10 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu const int height = pu.Y().height; const int bit = MAX_CU_DEPTH; +#if JVET_P0653_BDOF_PROF_PARA_DEV + const int mvShift = 8; + const int dmvLimit = (1 << 5); +#else const int shift = bit - 4 + MV_FRACTIONAL_BITS_INTERNAL; #if JVET_P0057_BDOF_PROF_HARMONIZATION const int mvShift = shift + MV_FRACTIONAL_BITS_INTERNAL + 2 - std::max<int>(5, clpRng.bd - 7); @@ -1360,6 +1373,7 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu #else const int bdlimit = std::max<int>(6, clpRng.bd - 6); const int dmvLimit = 1 << bdlimit; +#endif #endif for (int list = 0; list < 2; list++) diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 302852907f374259f327b71f8275d0984e336c89..22ec38d4b25987880728225a67a107710dabd6c4 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -52,7 +52,9 @@ #define JVET_P0273_MTSIntraMaxCand 1 // JVET-P0273: Use MTSIntraMaxCand if LFNST is used -#define JVET_P0057_BDOF_PROF_HARMONIZATION 1 // JVET-P0057: harmonization of BDOF and PROF on motion refinement precis +#define JVET_P0057_BDOF_PROF_HARMONIZATION 1 // JVET-P0057: harmonization of BDOF and PROF on motion refinement precision + +#define JVET_P0653_BDOF_PROF_PARA_DEV 1 // JVET-P0653/P0281: fixed shift operations for BDOF and PROF parameter derivation #define JVET_P0400_REMOVE_SHARED_MERGE_LIST 1 // JVET-P0400: removeal of shared merge list diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index f59493329627805430988a1152118bb6fef09925..e6459f4a9033da5dc374c869ae01afad16c6339e 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -265,8 +265,13 @@ template< X86_VEXT vext > void calcBIOSums_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX) { +#if JVET_P0653_BDOF_PROF_PARA_DEV + int shift4 = 4; + int shift5 = 1; +#else int shift4 = std::max<int>(4, (bitDepth - 8)); int shift5 = std::max<int>(1, (bitDepth - 11)); +#endif __m128i sumAbsGXTmp = _mm_setzero_si128(); __m128i sumDIXTmp = _mm_setzero_si128(); @@ -583,7 +588,11 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri int widthInside = width - 2 * BIO_EXTEND_SIZE; int heightInside = height - 2 * BIO_EXTEND_SIZE; +#if JVET_P0653_BDOF_PROF_PARA_DEV + int shift1 = 6; +#else int shift1 = std::max<int>(6, bitDepth - 6); +#endif __m128i mmShift1 = _mm_cvtsi32_si128( shift1 ); assert((widthInside & 3) == 0); @@ -658,6 +667,7 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri } } +#if !JVET_P0653_BDOF_PROF_PARA_DEV template< X86_VEXT vext > void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth) { @@ -804,6 +814,7 @@ void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX dotProductTemp6 += widthG; } } +#endif template< X86_VEXT vext > void calcBlkGradient_SSE(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize)