diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index 759cd09f836c35805eb45d59ef2342f13f19e770..878ecbcd40ccef8e106085c02ab112a6102c4d8b 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -48,12 +48,18 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int w const int dIshift = 1; const int dIoffset = 1 << (dIshift - 1); +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); +#endif for (int h = 0; h < height; h++) { for (int w = 0; w < width; w++) { int32_t dI = dMvX[idx] * gradX[w] + dMvY[idx] * gradY[w]; dI = (dI + dIoffset) >> dIshift; +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + dI = Clip3(-dILimit, dILimit - 1, dI); +#endif dI = (src[w] + dI + offset) >> shiftNum; dst[w] = (Pel)ClipPel(dI, clpRng); @@ -80,6 +86,10 @@ void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + g_GbiLog2WeightBase; const int offset = (1 << (shiftNum - 1)) + (IF_INTERNAL_OFFS << g_GbiLog2WeightBase); +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); +#endif + const int8_t w1 = g_GbiWeightBase - w0; for (int h = 0; h < height; h++) @@ -92,10 +102,16 @@ void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, if (!(w & 3)) idx -= 4; dI0 = dMvX0[idx] * gradX0[w] + dMvY0[idx] * gradY0[w]; dI0 = (dI0 + dIoffset) >> dIshift; +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + dI0 = Clip3(-dILimit, dILimit - 1, dI0); +#endif if (l1PROFEnabled) { dI1 = dMvX1[idx] * gradX1[w] + dMvY1[idx] * gradY1[w]; dI1 = (dI1 + dIoffset) >> dIshift; +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + dI1 = Clip3(-dILimit, dILimit - 1, dI1); +#endif dst[w] = (Pel)ClipPel(rightShift(((src0[w] + dI0) * w0 + (src1[w] + dI1) * w1 + offset), shiftNum), clpRng); } else diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 56871e4b7a7d14dad32945cf69736cc399a4c9bd..5be14fbea8252bfba4cb438c6ff0f7cb34b0eb0b 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,6 +50,8 @@ #include <assert.h> #include <cassert> +#define JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING 1 // JVET-P0154/P0094/P0172/P0413/P0518/P0281: Clip the PROF sample offset to 14-bit + #define JVET_O0145_ENTRYPOINT_SIGNALLING 0 // JVET-O0145: Not signalling num_entry_point_offsets but derive it at decoder #define JVET_O0625_ALF_PADDING 1 // JVET-O0625/O0654/O0662: Unified padding method in ALF diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index be2d68c96b024ebace4e4684e6d58fe37cc36045..37a478b1bb668b8827e6ceb588c2a420c0714f6e 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -351,6 +351,12 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, __m128i vibdimax = _mm_set1_epi32(clpRng.max); __m128i vzero = _mm_setzero_si128(); +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); + __m128i vdImin = _mm_set1_epi32(-dILimit); + __m128i vdImax = _mm_set1_epi32(dILimit - 1); +#endif + for (int h = 0; h < height; h++) { const int* vX = dMvX; @@ -370,6 +376,9 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady)); mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1); +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + mm_dI = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI)); +#endif mm_dI = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(mm_dI, mm_src), mm_offset), shiftNum); mm_dI = _mm_packs_epi32(_mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, mm_dI)), vzero); _mm_storel_epi64((__m128i *)dst, mm_dI); @@ -400,6 +409,12 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, __m128i vibdimax = _mm_set1_epi32(clpRng.max); __m128i vzero = _mm_setzero_si128(); +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); + __m128i vdImin = _mm_set1_epi32(-dILimit); + __m128i vdImax = _mm_set1_epi32(dILimit - 1); +#endif + __m128i mm_dmvx0, mm_dmvy0, mm_dmvx1, mm_dmvy1, mm_gradx0, mm_grady0, mm_gradx1, mm_grady1, mm_src0, mm_src1; __m128i mm_dI0, mm_dI1, mm_dI; __m128i mm_dIoffset = _mm_set1_epi32(1); @@ -444,6 +459,9 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, mm_grady0 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0)); mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx0, mm_gradx0), _mm_mullo_epi32(mm_dmvy0, mm_grady0)); mm_dI0 = _mm_srai_epi32(_mm_add_epi32(mm_dI0, mm_dIoffset), 1); +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + mm_dI0 = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI0)); +#endif mm_dI0 = _mm_mullo_epi32(_mm_add_epi32(mm_src0, mm_dI0), mm_w0); gX0 += 4; gY0 += 4; @@ -453,6 +471,9 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, mm_grady1 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY1)); mm_dI1 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx1, mm_gradx1), _mm_mullo_epi32(mm_dmvy1, mm_grady1)); mm_dI1 = _mm_srai_epi32(_mm_add_epi32(mm_dI1, mm_dIoffset), 1); +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + mm_dI1 = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI1)); +#endif mm_dI1 = _mm_mullo_epi32(_mm_add_epi32(mm_src1, mm_dI1), mm_w1); gX1 += 4; gY1 += 4; }