diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp index 1b0a09738af87c7ae5ee3df93342434857f1d6fe..9ff4a501217b7a83e4d85589292e0633d1078b6d 100644 --- a/source/Lib/CommonLib/InterpolationFilter.cpp +++ b/source/Lib/CommonLib/InterpolationFilter.cpp @@ -697,33 +697,69 @@ void InterpolationFilter::xWeightedTriangleBlk( const PredictionUnit &pu, const const int32_t offsetDefault = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS; const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase; const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase); +#if JVET_P0530_TPM_WEIGHT_ALIGN + int32_t logSubWidthC = getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, pu.chromaFormat); + int32_t logSubHeightC = getChannelTypeScaleY(CHANNEL_TYPE_CHROMA, pu.chromaFormat); + int32_t stepX = (compIdx == 0) ? 1 : (1 << logSubWidthC); + int32_t stepY = (compIdx == 0) ? 1 : (1 << logSubHeightC); + + int32_t widthY = (compIdx == 0) ? width : (width << logSubWidthC); + int32_t heightY = (compIdx == 0) ? height : (height << logSubHeightC); + + int32_t ratioWH = (widthY > heightY) ? (widthY / heightY) : 1; + int32_t ratioHW = (widthY > heightY) ? 1 : (heightY / widthY); + + int32_t weightedLength = 7; + int32_t weightedStartPos = (splitDir == 0) ? (0 - (weightedLength >> 1) * ratioWH) : (widthY - ((weightedLength + 1) >> 1) * ratioWH); +#else const int32_t ratioWH = (width > height) ? (width / height) : 1; const int32_t ratioHW = (width > height) ? 1 : (height / width); const bool longWeight = (compIdx == COMPONENT_Y); const int32_t weightedLength = longWeight ? 7 : 3; int32_t weightedStartPos = ( splitDir == 0 ) ? ( 0 - (weightedLength >> 1) * ratioWH ) : ( width - ((weightedLength + 1) >> 1) * ratioWH ); +#endif int32_t weightedEndPos = weightedStartPos + weightedLength * ratioWH - 1; int32_t weightedPosoffset = ( splitDir == 0 ) ? ratioWH : -ratioWH; Pel tmpPelWeighted; int32_t weightIdx; int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd; - +#if JVET_P0530_TPM_WEIGHT_ALIGN + for (y = 0; y < heightY; y += ratioHW) + { + if (y % stepY != 0) + { + weightedStartPos += weightedPosoffset; + weightedEndPos += weightedPosoffset; + continue; + } + for (tmpY = ratioHW; tmpY > 0; tmpY -= stepY) + { + for (x = 0; x < weightedStartPos; x++) + { + if (x % stepX != 0) + continue; +#else for( y = 0; y < height; y+= ratioHW ) { for( tmpY = ratioHW; tmpY > 0; tmpY-- ) { for( x = 0; x < weightedStartPos; x++ ) { +#endif *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng ); src0++; src1++; } tmpWeightedStart = std::max((int32_t)0, weightedStartPos); +#if JVET_P0530_TPM_WEIGHT_ALIGN + tmpWeightedEnd = std::min(weightedEndPos, (int32_t)(widthY - 1)); +#else tmpWeightedEnd = std::min(weightedEndPos, (int32_t)(width - 1)); +#endif weightIdx = 1; if( weightedStartPos < 0 ) { @@ -731,17 +767,36 @@ void InterpolationFilter::xWeightedTriangleBlk( const PredictionUnit &pu, const } for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH ) { +#if JVET_P0530_TPM_WEIGHT_ALIGN + if (x % stepX != 0) + { + weightIdx++; + continue; + } + + for (tmpX = ratioWH; tmpX > 0; tmpX -= stepX) + { + tmpPelWeighted = Clip3(1, 7, weightIdx); +#else for( tmpX = ratioWH; tmpX > 0; tmpX-- ) { tmpPelWeighted = Clip3( 1, 7, longWeight ? weightIdx : (weightIdx * 2)); +#endif tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted; *dst++ = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng ); } weightIdx ++; } +#if JVET_P0530_TPM_WEIGHT_ALIGN + for (x = weightedEndPos + 1; x < widthY; x++) + { + if (x % stepX != 0) + continue; +#else for( x = weightedEndPos + 1; x < width; x++ ) { +#endif *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng ); src0++; src1++; diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index 3250e138c7601afaf12c9d6c386de18e17e44bec..dd7a0331ba42f4b591f9bc5e9d74e2bbf5491c84 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -434,15 +434,32 @@ void initROM() g_triangleWeights[0][1][idxH][idxW] = new int16_t[nCbH * nCbW + 4]; g_triangleWeights[1][0][idxH][idxW] = new int16_t[nCbH * nCbW + 4]; g_triangleWeights[1][1][idxH][idxW] = new int16_t[nCbH * nCbW + 4]; - +#if JVET_P0530_TPM_WEIGHT_ALIGN + g_triangleWeights[2][0][idxH][idxW] = new int16_t[nCbH * nCbW + 4]; + g_triangleWeights[2][1][idxH][idxW] = new int16_t[nCbH * nCbW + 4]; + g_triangleWeights[3][0][idxH][idxW] = new int16_t[nCbH * nCbW + 4]; + g_triangleWeights[3][1][idxH][idxW] = new int16_t[nCbH * nCbW + 4]; + int nCbR_422 = (nCbW * 2 > nCbH) ? (nCbW * 2) / nCbH : nCbH / (nCbW * 2); +#endif for (int y = 0; y < nCbH; y++) { for (int x = 0; x < nCbW; x++) { g_triangleWeights[0][0][idxH][idxW][y*nCbW + x] = (nCbW > nCbH) ? Clip3(0, 8, x / nCbR - y + 4) : Clip3(0, 8, x - y / nCbR + 4); g_triangleWeights[0][1][idxH][idxW][y*nCbW + x] = (nCbW > nCbH) ? Clip3(0, 8, nCbH - 1 - x / nCbR - y + 4) : Clip3(0, 8, nCbW - 1 - x - y / nCbR + 4); +#if JVET_P0530_TPM_WEIGHT_ALIGN + g_triangleWeights[CHROMA_420][0][idxH][idxW][y*nCbW + x] = (nCbW > nCbH) ? Clip3(0, 8, (x * 2) / nCbR - (y * 2) + 4) : Clip3(0, 8, (x * 2) - (y * 2) / nCbR + 4); + g_triangleWeights[CHROMA_420][1][idxH][idxW][y*nCbW + x] = (nCbW > nCbH) ? Clip3(0, 8, nCbH * 2 - 1 - (x * 2) / nCbR - (y * 2) + 4) : Clip3(0, 8, nCbW * 2 - 1 - (x * 2) - (y * 2) / nCbR + 4); + + g_triangleWeights[CHROMA_422][0][idxH][idxW][y*nCbW + x] = (nCbW * 2 > nCbH) ? Clip3(0, 8, (x * 2) / nCbR_422 - y + 4) : Clip3(0, 8, (x * 2) - y / nCbR_422 + 4); + g_triangleWeights[CHROMA_422][1][idxH][idxW][y*nCbW + x] = (nCbW * 2 > nCbH) ? Clip3(0, 8, nCbH - 1 - (x * 2) / nCbR_422 - y + 4) : Clip3(0, 8, nCbW * 2 - 1 - (x * 2) - y / nCbR_422 + 4); + + g_triangleWeights[CHROMA_444][0][idxH][idxW][y*nCbW + x] = g_triangleWeights[0][0][idxH][idxW][y*nCbW + x]; + g_triangleWeights[CHROMA_444][1][idxH][idxW][y*nCbW + x] = g_triangleWeights[0][1][idxH][idxW][y*nCbW + x]; +#else g_triangleWeights[1][0][idxH][idxW][y*nCbW + x] = (nCbW > nCbH) ? Clip3(0, 4, x / nCbR - y + 2) * 2 : Clip3(0, 4, x - y / nCbR + 2) * 2; g_triangleWeights[1][1][idxH][idxW][y*nCbW + x] = (nCbW > nCbH) ? Clip3(0, 4, nCbH - 1 - x / nCbR - y + 2) * 2 : Clip3(0, 4, nCbW - 1 - x - y / nCbR + 2) * 2; +#endif } } } @@ -486,6 +503,16 @@ void destroyROM() g_triangleWeights[0][1][idxH][idxW] = nullptr; g_triangleWeights[1][0][idxH][idxW] = nullptr; g_triangleWeights[1][1][idxH][idxW] = nullptr; +#if JVET_P0530_TPM_WEIGHT_ALIGN + delete[] g_triangleWeights[2][0][idxH][idxW]; + delete[] g_triangleWeights[2][1][idxH][idxW]; + delete[] g_triangleWeights[3][0][idxH][idxW]; + delete[] g_triangleWeights[3][1][idxH][idxW]; + g_triangleWeights[2][0][idxH][idxW] = nullptr; + g_triangleWeights[2][1][idxH][idxW] = nullptr; + g_triangleWeights[3][0][idxH][idxW] = nullptr; + g_triangleWeights[3][1][idxH][idxW] = nullptr; +#endif } } } @@ -728,7 +755,11 @@ const uint32_t g_scalingListSizeX[SCALING_LIST_SIZE_NUM] = { 1, 2, 4, 8, 16, uint8_t g_triangleMvStorage[TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_SIZE >> MIN_CU_LOG2][MAX_CU_SIZE >> MIN_CU_LOG2]; +#if JVET_P0530_TPM_WEIGHT_ALIGN +int16_t *g_triangleWeights[4][TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 2][MAX_CU_DEPTH - MIN_CU_LOG2 + 2]; +#else int16_t *g_triangleWeights[2][TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 2][MAX_CU_DEPTH - MIN_CU_LOG2 + 2]; +#endif Mv g_reusedUniMVs[32][32][8][8][2][33]; bool g_isReusedUniMVsFilled[32][32][8][8]; diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h index ac70317eb1f943cbaf6b4b33035e79f830a5d237..af8b3d519d7458e1facf71320552afec5183a2e3 100644 --- a/source/Lib/CommonLib/Rom.h +++ b/source/Lib/CommonLib/Rom.h @@ -211,8 +211,11 @@ constexpr uint8_t g_tbMax[257] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, extern uint8_t g_triangleMvStorage[TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_SIZE >> MIN_CU_LOG2][MAX_CU_SIZE >> MIN_CU_LOG2]; // 7-tap/3-tap, direction, 2/4/8/16/32/64/128 +#if JVET_P0530_TPM_WEIGHT_ALIGN +extern int16_t *g_triangleWeights[4][TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 2][MAX_CU_DEPTH - MIN_CU_LOG2 + 2]; +#else extern int16_t *g_triangleWeights[2][TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 2][MAX_CU_DEPTH - MIN_CU_LOG2 + 2]; - +#endif extern bool g_mctsDecCheckEnabled; class Mv; diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 841a7f0f43b1a53f68c3f889ef6c9a8702c9e4e7..21995d14b2dafd76a8fed80b609d80dc179e5352 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,6 +50,8 @@ #include <assert.h> #include <cassert> +#define JVET_P0530_TPM_WEIGHT_ALIGN 1 // JVET-P0530: align chroma weights with luma weights for TPM blending + #define JVET_P0491_BDOFPROF_MVD_RANGE 1 // JVET-P0491: clip the MVD in BDOF/PROF to [-31 31] #define JVET_P0460_PLT_TS_MIN_QP 1 // JVET-P0460: Use TS min QP for Palette Escape mode diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h index 6d94bd153fe2273170fc05df1dfb41ea19842777..d78580bbc1a7a6f34080625b7c7030a03276df20 100644 --- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h +++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h @@ -1233,10 +1233,15 @@ void xWeightedTriangleBlk_SSE(const PredictionUnit &pu, const uint32_t width, co const ClpRng clpRng = pu.cu->slice->clpRngs().comp[compIdx]; const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)) + log2WeightBase; const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase); +#if JVET_P0530_TPM_WEIGHT_ALIGN + int wIdx = (compIdx == COMPONENT_Y) ? 0 : pu.cs->sps->getChromaFormatIdc(); + int16_t *weight = g_triangleWeights[wIdx][splitDir][log2Height][log2Width]; +#else const bool longWeight = (compIdx == COMPONENT_Y); const bool shortWeight = !longWeight; int16_t *weight = g_triangleWeights[shortWeight][splitDir][log2Height][log2Width]; +#endif const __m128i mmEight = _mm_set1_epi16(8); const __m128i mmOffset = _mm_set1_epi32(offsetWeighted);