diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index a2881df31554040e244d372d2ca78c24a3912e29..b149ac464cb2e05f3ac342cd13db708cde4e417c 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -943,6 +943,13 @@ void EncApp::xInitLibCfg() #endif #if JVET_W0123_TIMD_FUSION m_cEncLib.setUseTimd ( m_timd ); +#if JVET_AJ0061_TIMD_MERGE + m_cEncLib.setUseTimdMrg ( m_timdMrg ); + if (!m_timd) + { + m_cEncLib.setUseTimdMrg ( false ); + } +#endif #endif #if JVET_AB0155_SGPM m_cEncLib.setUseSgpm ( m_sgpm ); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 737460da0d59fcf4437eb1a11f688ece2af21650..e9efe75120028a9f72c7ecbd3bf430cf88e4c953 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -1161,6 +1161,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #endif #if JVET_W0123_TIMD_FUSION ( "TIMD", m_timd, true, "Enable template based intra mode derivation\n" ) +#if JVET_AJ0061_TIMD_MERGE + ( "TIMDMerge", m_timdMrg, true, "Enable merge mode for TIMD\n" ) +#endif #endif #if JVET_AB0155_SGPM ( "SGPM", m_sgpm, true, "Enable spatial geometric partitioning mode\n" ) @@ -2037,6 +2040,13 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) } #endif +#if JVET_AJ0061_TIMD_MERGE + if (!m_timd) + { + m_timdMrg = false; + } +#endif + #if JVET_AG0136_INTRA_TMP_LIC m_itmpLicMode = (m_iIntraPeriod != 1) ? 1 : 0; #endif @@ -4119,6 +4129,13 @@ bool EncAppCfg::xCheckParameter() msg(WARNING, "TIMD is forcefully disabled since the enable flag of non-inter-TM tools is set off. \n"); m_timd = false; } +#if JVET_AJ0061_TIMD_MERGE + if (!m_timd) + { + msg(WARNING, "TIMDMerge is forcefully disabled since timd mode is set to off. \n"); + m_timdMrg = false; + } +#endif #endif #if JVET_AB0155_SGPM if (m_sgpm) @@ -5922,6 +5939,9 @@ void EncAppCfg::xPrintParameter() #endif #if JVET_W0123_TIMD_FUSION msg(VERBOSE, "TIMD:%d ", m_timd); +#if JVET_AJ0061_TIMD_MERGE + msg(VERBOSE, "TIMDMerge:%d ", m_timdMrg); +#endif #endif #if JVET_AB0155_SGPM msg(VERBOSE, "SGPM:%d ", m_sgpm); diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 9a478c16a0901ac17536b60ee3dbf14f38c0769d..e999b0988c8ed2dac680a9253c967c39b3d77195 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -507,6 +507,9 @@ protected: #endif #if JVET_W0123_TIMD_FUSION bool m_timd; +#if JVET_AJ0061_TIMD_MERGE + bool m_timdMrg; +#endif #endif #if JVET_AB0155_SGPM bool m_sgpm; diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index a5a6c32fc81feba3cbfcafb07e19305a1a398b66..c083a2fda2ace3b91055336b80d3443ab96b608e 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -637,6 +637,26 @@ static const int NUM_EXT_LUMA_MODE = 30; static const int NUM_EXT_LUMA_MODE = 28; #endif +#if JVET_AJ0061_TIMD_MERGE +static const int TIMDM_IDX = 251; // index for intra TIMD merge mode +static const int NUM_TIMD_MERGE_CUS = 99 + 13; +static const int NUM_TIMD_MERGE_MODES = 1; +static const int NUM_TIMD_MRL_MODES = 2; +static const size_t TIMD_MERGE_MAX_NONADJACENT = 42; +enum TimdMode +{ + Timd = 0, + TimdMrg = 1, + TimdMrl1 = 2, + TimdMrl3 = 3, + NumTimdMode = 4 +}; +static inline TimdMode getTimdMode(bool timdm, int refIdx) +{ + return (timdm ? TimdMrg : (refIdx == 0 ? Timd : (refIdx == 1 ? TimdMrl1 : TimdMrl3))); +} +#endif + static const int NUM_DIR = (((NUM_LUMA_MODE - 3) >> 2) + 1); static const int PLANAR_IDX = 0; ///< index for intra PLANAR mode static const int DC_IDX = 1; ///< index for intra DC mode diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index dfb8a866464d38a71502563f153410df18584909..190e3d8fee48393abf5031b9a2cf76855fde503d 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -742,6 +742,9 @@ public: #endif #if JVET_W0123_TIMD_FUSION static const CtxSet TimdFlag; +#if JVET_AJ0061_TIMD_MERGE + static const CtxSet TimdMrgFlag; +#endif #endif #if JVET_AB0155_SGPM static const CtxSet SgpmFlag; diff --git a/source/Lib/CommonLib/Contexts_ecm14.0.inl b/source/Lib/CommonLib/Contexts_ecm14.0.inl index 6503c2c742ae8953f9d1a94c36f5625066466343..7d99a221a160db0c132375c47251a8dd9f6114f9 100644 --- a/source/Lib/CommonLib/Contexts_ecm14.0.inl +++ b/source/Lib/CommonLib/Contexts_ecm14.0.inl @@ -5279,6 +5279,33 @@ const CtxSet ContextSetCfg::TimdFlag = ContextSetCfg::addCtxSet({ { 134, 195, 227 }, }); +#if JVET_AJ0061_TIMD_MERGE +const CtxSet ContextSetCfg::TimdMrgFlag = ContextSetCfg::addCtxSet +// ctx 1470 1471 +({ + { 35, 35, }, + { 35, 35, }, + { 19, 34, }, + { 35, 35, }, + { 8, 8, }, + { 8, 8, }, + { 8, 8, }, + { 8, 8, }, + { 18, 18, }, + { 18, 18, }, + { 18, 18, }, + { 18, 18, }, + { 119, 119, }, + { 119, 119, }, + { 119, 119, }, + { 119, 119, }, + { 102, 120, }, + { 122, 132, }, + { 119, 119, }, + { 119, 119, }, + }); +#endif + const CtxSet ContextSetCfg::SgpmFlag = ContextSetCfg::addCtxSet({ // ctx 1465 1467 { 26, 26, 26 }, diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index d4e4c4d135f4b8bcd4cdbbf0a19ec02dd4d6f30b..704bdf30d872f092d162f10e50dbe7ccf32cb13c 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -1530,6 +1530,12 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co int weightMode = ((pu.cu->timd && pu.cu->timdIsBlended) || !applyFusion) ? 4 : 3; #else int weightMode = ((pu.cu->timd && pu.cu->timdIsBlended) || !applyFusion || (PU::isSgpm(pu, CHANNEL_TYPE_LUMA))) ? 4 : 3; +#if JVET_AJ0061_TIMD_MERGE + if (pu.cu->timd && pu.cu->timdMrg && isLuma(compID)) + { + weightMode = ((pu.cu->timd && pu.cu->timdMrgIsBlended[pu.cu->timdMrg - 1]) || !applyFusion || (PU::isSgpm(pu, CHANNEL_TYPE_LUMA))) ? 4 : 3; + } +#endif #endif #if JVET_AC0112_IBC_CIIP if (pu.ibcCiipFlag) @@ -2146,7 +2152,11 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co } #endif #if JVET_W0123_TIMD_FUSION +#if JVET_AJ0061_TIMD_MERGE + if (((pu.cu->timd && !pu.cu->timdMrg && pu.cu->timdIsBlended) || (pu.cu->timdMrg && pu.cu->timdMrgIsBlended[pu.cu->timdMrg - 1])) && isLuma(compID)) +#else if (pu.cu->timd && pu.cu->timdIsBlended && isLuma(compID)) +#endif { int width = piPred.width; int height = piPred.height; @@ -2154,16 +2164,30 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co CPelBuf srcBuf3rd; #if JVET_AG0092_ENHANCED_TIMD_FUSION PelBuf nonAngBuffer = m_tempBuffer[0].getBuf( localUnitArea.Y() ); +#if JVET_AJ0061_TIMD_MERGE + int nonAngFusionWeight = pu.cu->timdMrg ? pu.cu->timdMrgFusionWeight[pu.cu->timdMrg - 1][2] : pu.cu->timdFusionWeight[2]; + if (nonAngFusionWeight > 0) +#else if( pu.cu->timdFusionWeight[2] > 0 ) +#endif { PredictionUnit pu3 = pu; +#if JVET_AJ0061_TIMD_MERGE + int timdModeNonAng = pu.cu->timdMrg ? pu.cu->timdMrgList[pu.cu->timdMrg - 1][2] : pu.cu->timdModeNonAng; + pu3.intraDir[0] = timdModeNonAng; +#else pu3.intraDir[0] = pu.cu->timdModeNonAng; +#endif int tmpTimdMode = pu3.cu->timdMode; pu3.cu->timdMode = INVALID_TIMD_IDX; initPredIntraParams(pu3, pu.Y(), *(pu.cs->sps)); srcBuf3rd = CPelBuf(getPredictorPtr(compID), srcStride, srcHStride); pu3.cu->timdMode = tmpTimdMode; +#if JVET_AJ0061_TIMD_MERGE + switch (timdModeNonAng) +#else switch (pu.cu->timdModeNonAng) +#endif { #if JVET_AC0105_DIRECTIONAL_PLANAR case (PLANAR_IDX): xPredIntraPlanar(srcBuf3rd, nonAngBuffer, 0); break; @@ -2175,7 +2199,11 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co } #if JVET_X0148_TIMD_PDPC #if CIIP_PDPC +#if JVET_AJ0061_TIMD_MERGE + if( (m_ipaParam.applyPDPC || pu.ciipPDPC) && ( timdModeNonAng == PLANAR_IDX || timdModeNonAng == DC_IDX) ) +#else if( (m_ipaParam.applyPDPC || pu.ciipPDPC) && ( pu.cu->timdModeNonAng == PLANAR_IDX || pu.cu->timdModeNonAng == DC_IDX) ) +#endif { xIntraPredPlanarDcPdpc( srcBuf3rd, m_tempBuffer[0].getBuf( localUnitArea.Y() ).buf, m_tempBuffer[0].getBuf( localUnitArea.Y() ).stride, iWidth, iHeight, pu.ciipPDPC ); } @@ -2194,7 +2222,12 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co const bool applyPdpc = m_ipaParam.applyPDPC; PredictionUnit pu2 = pu; #if JVET_AC0094_REF_SAMPLES_OPT +#if JVET_AJ0061_TIMD_MERGE + int timdModeSecondary = pu.cu->timdMrg ? pu.cu->timdMrgList[pu.cu->timdMrg - 1][1] : pu.cu->timdModeSecondary; + pu2.intraDir[0] = timdModeSecondary; +#else pu2.intraDir[0] = pu.cu->timdModeSecondary; +#endif int tmpTimdMode = pu2.cu->timdMode; pu2.cu->timdMode = INVALID_TIMD_IDX; #else @@ -2209,7 +2242,11 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co #else srcBuf3rd = srcBuf; #endif +#if JVET_AJ0061_TIMD_MERGE + switch (timdModeSecondary) +#else switch (pu.cu->timdModeSecondary) +#endif { #if JVET_AC0105_DIRECTIONAL_PLANAR case (PLANAR_IDX): xPredIntraPlanar(srcBuf3rd, predFusion, 0); break; @@ -2226,7 +2263,11 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co #if JVET_X0148_TIMD_PDPC #if CIIP_PDPC +#if JVET_AJ0061_TIMD_MERGE + if( (m_ipaParam.applyPDPC || pu.ciipPDPC) && (timdModeSecondary == PLANAR_IDX || timdModeSecondary == DC_IDX) ) +#else if( (m_ipaParam.applyPDPC || pu.ciipPDPC) && (pu.cu->timdModeSecondary == PLANAR_IDX || pu.cu->timdModeSecondary == DC_IDX) ) +#endif { xIntraPredPlanarDcPdpc( srcBuf3rd, m_tempBuffer[1].getBuf( localUnitArea.Y() ).buf, m_tempBuffer[1].getBuf( localUnitArea.Y() ).stride, iWidth, iHeight, pu.ciipPDPC ); } @@ -2268,7 +2309,12 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co #else Pel *pelPred = piPred.buf; Pel *pelPredFusion = predFusion.buf; +#if JVET_AJ0061_TIMD_MERGE + int timdFusionWeight = pu.cu->timdMrg ? pu.cu->timdMrgFusionWeight[pu.cu->timdMrg - 1][2] : pu.cu->timdFusionWeight[2]; + Pel *pelPredNonAng = timdFusionWeight > 0 ? nonAngBuffer.buf : nullptr; +#else Pel *pelPredNonAng = pu.cu->timdFusionWeight[2] > 0 ? nonAngBuffer.buf : nullptr; +#endif #if JVET_AG0092_ENHANCED_TIMD_FUSION PelBuf predAngNonLocDep = m_tempBuffer[4].getBuf( localUnitArea.Y() ); PelBuf predAngVer = m_tempBuffer[2].getBuf( localUnitArea.Y() ); @@ -2283,6 +2329,10 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co bool useLocDepBlending = false; int weightVer = 0, weightHor = 0, weightNonLocDep = 0; +#if JVET_AJ0061_TIMD_MERGE + if (!pu.cu->timdMrg) + { +#endif for (int i = 0; i < TIMD_FUSION_NUM; i++) { if (pu.cu->timdLocDep[i] == 1) @@ -2298,6 +2348,27 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co weightNonLocDep += pu.cu->timdFusionWeight[i]; } } +#if JVET_AJ0061_TIMD_MERGE + } + else + { + for (int i = 0; i < TIMD_FUSION_NUM; i++) + { + if (pu.cu->timdMrgLocDep[pu.cu->timdMrg - 1][i] == 1) + { + weightVer += pu.cu->timdMrgFusionWeight[pu.cu->timdMrg - 1][i]; + } + else if (pu.cu->timdMrgLocDep[pu.cu->timdMrg - 1][i] == 2) + { + weightHor += pu.cu->timdMrgFusionWeight[pu.cu->timdMrg - 1][i]; + } + else + { + weightNonLocDep += pu.cu->timdMrgFusionWeight[pu.cu->timdMrg - 1][i]; + } + } + } +#endif if(weightHor || weightVer) { @@ -2319,10 +2390,24 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co } int weights[TIMD_FUSION_NUM] = {0}; +#if JVET_AJ0061_TIMD_MERGE + if (!pu.cu->timdMrg) + { +#endif for (int i = 0; i < TIMD_FUSION_NUM ; i++) { weights[i] = (pu.cu->timdLocDep[i] == locDep) ? pu.cu->timdFusionWeight[i] : 0; } +#if JVET_AJ0061_TIMD_MERGE + } + else + { + for (int i = 0; i < TIMD_FUSION_NUM ; i++) + { + weights[i] = (pu.cu->timdMrgLocDep[pu.cu->timdMrg - 1][i] == locDep) ? pu.cu->timdMrgFusionWeight[pu.cu->timdMrg - 1][i] : 0; + } + } +#endif int num2blend = 0; int blendIndexes[3] = {0}; @@ -3561,7 +3646,12 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA #if JVET_AC0094_REF_SAMPLES_OPT #if JVET_AD0085_TMRL_EXTENSION bool checkWideAngle = !bExtIntraDir ? true : (pu.cu->timdMode != INVALID_TIMD_IDX ? pu.cu->timdModeCheckWA : pu.cu->timdModeSecondaryCheckWA); - +#if JVET_AJ0061_TIMD_MERGE + if (pu.cu->timd && pu.cu->timdMrg) + { + checkWideAngle = !bExtIntraDir ? true : (pu.cu->timdMode != INVALID_TIMD_IDX ? pu.cu->timdMrgModeCheckWA[pu.cu->timdMrg - 1][0] : pu.cu->timdMrgModeCheckWA[pu.cu->timdMrg - 1][1]); + } +#endif int predMode = checkWideAngle ? (bExtIntraDir ? getWideAngleExt(blockSize.width, blockSize.height, dirMode) : getModifiedWideAngle(blockSize.width, blockSize.height, dirMode)) : (bExtIntraDir ? getTimdWideAngleExt(blockSize.width, blockSize.height, dirMode) : getTimdWideAngle(blockSize.width, blockSize.height, dirMode)); if (pu.cu->tmrlFlag && isLuma(chType)) @@ -3732,7 +3822,11 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA #if JVET_AB0157_INTRA_FUSION #if JVET_AG0128_REF_LINE_OPT_TIMD_FUSION +#if JVET_AJ0061_TIMD_MERGE + if (pu.cu->timd && pu.cu->timdIsBlended && !pu.cu->timdMrg) +#else if (pu.cu->timd && pu.cu->timdIsBlended) +#endif { if (pu.cu->timdMode != INVALID_TIMD_IDX) { @@ -3755,6 +3849,27 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA } } } +#if JVET_AJ0061_TIMD_MERGE + else if (pu.cu->timd && pu.cu->timdMrg) + { + m_ipaParam.applyFusion = false; + if (pu.cu->timdMode != INVALID_TIMD_IDX && pu.cu->timdMrgIsBlended[pu.cu->timdMrg - 1]) + { + m_ipaParam.fetchRef2nd = true; + m_ipaParam.applyFusion = true; + } + else if (m_ipaParam.fetchRef2nd) + { + bool isAngularMode = !(dirMode == PLANAR_IDX || dirMode == DC_IDX); +#if JVET_W0123_TIMD_FUSION + const bool isIntSlope = bExtIntraDir ? isIntegerSlopeExt(absAng) : isIntegerSlope(absAng); +#else + const bool isIntSlope = isIntegerSlope(absAng); +#endif + m_ipaParam.applyFusion = isAngularMode && !isIntSlope && !useISP; + } + } +#endif else { #endif @@ -8624,6 +8739,9 @@ void IntraPrediction::deriveMPMSorted(const PredictionUnit& pu, uint8_t* mpm, in assert(0); } +#if JVET_AJ0061_TIMD_MERGE + g_timdMrgCost[iMode] = uiCost; +#endif if (uiCostList.size() < iBestN || (uiCostList.size() >= iBestN && uiCost < uiCostList.back())) { updateCandList(mpm[i], uiCost, uiModeList, uiCostList, iBestN); @@ -8796,6 +8914,444 @@ int IntraPrediction::getBestNonAnglularMode(const CPelBuf& recoBuf, const CompAr } #endif +#if JVET_AJ0061_TIMD_MERGE +void IntraPrediction::searchTimdMrgCus(const CodingUnit &cu, static_vector<const CodingUnit*, NUM_TIMD_MERGE_CUS> &cuNeighbours) +{ + /* ----------------------------------------------------------------- + ----------- Collect adjacent neighbour cands ---------------------- + ----------------------------------------------------------------- */ + const int numCUs = NUM_TIMD_MERGE_CUS; + const int step = 4; + const CodingUnit *cuLeft = NULL; + for (int i = 0; i <= cu.lheight(); i += step) + { + cuLeft = cu.cs->getCURestricted(cu.lumaPos().offset(-1, i), cu, CH_L); + if (cuLeft && CU::isIntra(*cuLeft) && cuLeft->timd && cuNeighbours.size() < numCUs - 1) + { + cuNeighbours.push_back(cuLeft); + } + } + const CodingUnit *cuTop = NULL; + for (int i = 0; i <= cu.lwidth(); i += step) + { + cuTop = cu.cs->getCURestricted(cu.lumaPos().offset(i, -1), cu, CH_L); + if (cuTop && CU::isIntra(*cuTop) && cuTop->timd && cuNeighbours.size() < numCUs - 1) + { + cuNeighbours.push_back(cuTop); + } + } + const CodingUnit* cuNeighbour = cu.cs->getCURestricted(cu.lumaPos().offset(-1, -1), cu, CH_L); + if (cuNeighbour && CU::isIntra(*cuNeighbour) && cuNeighbour->timd && cuNeighbours.size() < numCUs - 1) + { + cuNeighbours.push_back(cuNeighbour); + } + const CodingUnit *cuLeft2 = cuLeft ? cu.cs->getCURestricted(cuLeft->lumaPos().offset(cuLeft->lwidth() - 1, cuLeft->lheight()), cu, CH_L) : NULL; + const CodingUnit *cuTop2 = cuTop ? cu.cs->getCURestricted(cuTop->lumaPos().offset(cuTop->lwidth(), cuTop->lheight() - 1), cu, CH_L) : NULL; + cuNeighbour = cuLeft2; + if (cuNeighbour && CU::isIntra(*cuNeighbour) && cuNeighbour->timd && cuNeighbours.size() < numCUs - 1) + { + cuNeighbours.push_back(cuNeighbour); + } + cuNeighbour = cuTop2; + if (cuNeighbour && CU::isIntra(*cuNeighbour) && cuNeighbour->timd && cuNeighbours.size() < numCUs - 1) + { + cuNeighbours.push_back(cuNeighbour); + } + cuNeighbour = cuLeft2 ? cu.cs->getCURestricted(cuLeft2->lumaPos().offset(cuLeft2->lwidth() - 1, cuLeft2->lheight()), cu, CH_L) : NULL; + if (cuNeighbour && CU::isIntra(*cuNeighbour) && cuNeighbour->timd && cuNeighbours.size() < numCUs - 1) + { + cuNeighbours.push_back(cuNeighbour); + } + cuNeighbour = cuTop2 ? cu.cs->getCURestricted(cuTop2->lumaPos().offset(cuTop2->lwidth(), cuTop2->lheight() - 1), cu, CH_L) : NULL; + if (cuNeighbour && CU::isIntra(*cuNeighbour) && cuNeighbour->timd && cuNeighbours.size() < numCUs - 1) + { + cuNeighbours.push_back(cuNeighbour); + } + + /* ----------------------------------------------------------------- + ---------- Collect non-adjacent neighbour cands------------- + ----------------------------------------------------------------- */ + for (const CodingUnit *nonAdjacentNeighbour : PU::timdMergeNonAdjacentNeighbours(*cu.firstPU)) + { + if (nonAdjacentNeighbour && CU::isIntra(*nonAdjacentNeighbour) && nonAdjacentNeighbour->timd && cuNeighbours.size() < numCUs) + { + cuNeighbours.push_back(nonAdjacentNeighbour); + } + } + + // Deduplicate candidates + static_vector<const CodingUnit *, numCUs> cuNeighboursFiltered; + for (int i = 0; i < cuNeighbours.size(); i++) + { + bool useNeighbour = true; + for (int j = i-1; j >= 0; j--) + { + if (cuNeighbours[i]->lx() == cuNeighbours[j]->lx() && cuNeighbours[i]->ly() == cuNeighbours[j]->ly()) + { + useNeighbour = false; + break; + } + } + if (useNeighbour) + { + cuNeighboursFiltered.push_back(cuNeighbours[i]); + } + } + cuNeighbours = std::move(cuNeighboursFiltered); + + /* ----------------------------------------------------------------- + ---------------- Sort neighbours by distance ----------------------- + ----------------------------------------------------------------- */ + size_t limitMaxNeigh = 5; + static_vector<size_t, numCUs> neighboursInDistOrder; + // Calculate the distance to each neighbour and store in dists. + static_vector<int, numCUs> dists; + for (size_t i = 0; i < cuNeighbours.size(); i++) + { + const int dx = abs((int)(cu.lx()) - (int)(cuNeighbours[i]->lx())); + const int dy = abs((int)(cu.ly()) - (int)(cuNeighbours[i]->ly())); + const int d = dx + dy; + dists.push_back(d); + } + + // Sort neighbours in ascending order of distance. + auto neighbourComp = [&](size_t a, size_t b) + { + return dists[a] < dists[b] + || (dists[a] == dists[b] && a < b); + }; + for (size_t i = 0; i < cuNeighbours.size(); i++) + { + neighboursInDistOrder.push_back(i); + } + std::sort(neighboursInDistOrder.begin(), neighboursInDistOrder.end(), neighbourComp); + CHECK(cuNeighbours.size() != neighboursInDistOrder.size(), + "cuNeighbours and neighboursInDistOrder not correctly associated"); + + // Discard all but the nearest limitMaxNeigh neighbours. + cuNeighboursFiltered.clear(); + const size_t numToMix = std::min(cuNeighbours.size(), limitMaxNeigh); + for (int i = 0; i < cuNeighbours.size() && cuNeighboursFiltered.size() < numToMix; i++) + { + for (int j = 0; j < numToMix; j++) + { + if (neighboursInDistOrder[j] == i) + { + cuNeighboursFiltered.push_back(cuNeighbours[i]); + } + } + } + cuNeighbours = std::move(cuNeighboursFiltered); + + return; +} + +template<typename T, size_t N> +void IntraPrediction::calcTimdMrgCandCosts( + const CodingUnit &cu, + static_vector<TimdMergeInfo, NUM_TIMD_MERGE_CUS> &timdMrgList, + static_vector<T, N>& uiModeList, + static_vector<uint64_t, N>& candCostList) +{ + const CompArea &area = cu.Y(); + SizeType uiWidth = cu.lwidth(); + SizeType uiHeight = cu.lheight(); + int iCurX = cu.lx(); + int iCurY = cu.ly(); + int iRefX = -1, iRefY = -1; + uint32_t uiRefWidth = 0, uiRefHeight = 0; + int iTempWidth = 1, iTempHeight = 1; + TemplateType eTempType = CU::deriveTimdRefType(iCurX, iCurY, uiWidth, uiHeight, iTempWidth, iTempHeight, iRefX, + iRefY, uiRefWidth, uiRefHeight); + + int channelBitDepth = cu.slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA); + auto & pu = *cu.firstPU; + uint32_t uiRealW = uiRefWidth + (eTempType == LEFT_NEIGHBOR ? iTempWidth : 0); + uint32_t uiRealH = uiRefHeight + (eTempType == ABOVE_NEIGHBOR ? iTempHeight : 0); + uint32_t uiRealW2 = uiRefWidth + (eTempType == LEFT_NEIGHBOR ? iTempWidth : 0); + uint32_t uiRealH2 = uiRefHeight + (eTempType == ABOVE_NEIGHBOR ? iTempHeight : 0); + const UnitArea localUnitArea(pu.chromaFormat, Area(0, 0, uiRealW2, uiRealH2)); + uint32_t uiPredStride = m_intraPredBuffer[0].getBuf(localUnitArea.Y()).stride; + Pel *piPred = m_intraPredBuffer[0].getBuf(localUnitArea.Y()).buf; + const CodingStructure &cs = *cu.cs; + m_ipaParam.multiRefIndex = iTempWidth; + Pel *piOrg = cs.picture->getRecoBuf(area).buf; + int iOrgStride = cs.picture->getRecoBuf(area).stride; + piOrg += (iRefY - iCurY) * iOrgStride + (iRefX - iCurX); + DistParam distParamSad[2]; // above, left + distParamSad[0].applyWeight = false; + distParamSad[0].useMR = false; + distParamSad[1].applyWeight = false; + distParamSad[1].useMR = false; + if (eTempType == LEFT_ABOVE_NEIGHBOR) + { + m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg + iTempWidth, piPred + iTempWidth, iOrgStride, + uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, + false); // Use HAD (SATD) cost + m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg + iTempHeight * iOrgStride, + piPred + iTempHeight * uiPredStride, iOrgStride, uiPredStride, channelBitDepth, + COMPONENT_Y, iTempWidth, uiHeight, 0, 1, false); // Use HAD (SATD) cost + } + else if (eTempType == LEFT_NEIGHBOR) + { + m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, + COMPONENT_Y, iTempWidth, uiHeight, 0, 1, false); + } + else if (eTempType == ABOVE_NEIGHBOR) + { + m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, + COMPONENT_Y, uiWidth, iTempHeight, 0, 1, false); + } + initTimdIntraPatternLuma(cu, area, eTempType != ABOVE_NEIGHBOR ? iTempWidth : 0, + eTempType != LEFT_NEIGHBOR ? iTempHeight : 0, uiRefWidth, uiRefHeight); + + int iNum = (int)timdMrgList.size(); + + // create and store all predictions + std::array<bool, EXT_VDIA_IDX + 1> predDone; + predDone.fill(false); + std::array<uint64_t, EXT_VDIA_IDX + 1> predCost; + predCost.fill(MAX_UINT64); + + auto calculateCost = [&](int mode) + { + uint64_t uiCostTmp; + initPredTimdIntraParams(pu, area, mode); + predTimdIntraAng(COMPONENT_Y, pu, mode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, + (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth, + (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight); + if (eTempType == LEFT_ABOVE_NEIGHBOR) + { + uiCostTmp = distParamSad[0].distFunc(distParamSad[0]) + + distParamSad[1].distFunc(distParamSad[1]); + } + else if (eTempType == LEFT_NEIGHBOR) + { + uiCostTmp = distParamSad[1].distFunc(distParamSad[1]); + } + else if (eTempType == ABOVE_NEIGHBOR) + { + uiCostTmp = distParamSad[0].distFunc(distParamSad[0]); + } + else + { + CHECK(true, "At least one template side must be available."); + } + return uiCostTmp; + }; + + const bool isEncoder = cu.cs->pcv->isEncoder; + auto getCost = [&](int mode) + { + uint64_t cost; + if (isEncoder && g_timdMrgCost[mode] != MAX_UINT64) + { + cost = g_timdMrgCost[mode]; + } + else + { + cost = calculateCost(mode); + } + + predCost[mode] = cost; + predDone[mode] = true; + }; + + for (int iMode = 0; iMode < iNum; iMode++) + { + const int (&modes)[TIMD_FUSION_NUM] = timdMrgList[iMode].timdMode; + if (!predDone[modes[0]]) + { + getCost(modes[0]); + } + } + + /* ----------------------------------------------------------------- + ----------- Sort neighbour cands by weighted sum ----------- + ------------- of their individual modes' template cost ------------- + ----------------------------------------------------------------- */ + for (int iMode = 0; iMode < iNum; iMode++) + { + int mode1 = timdMrgList[iMode].timdMode[0]; + const uint64_t uiCost = predCost[mode1]; + updateCandList(iMode, uiCost, uiModeList, candCostList, NUM_TIMD_MERGE_MODES + 1); + } + if (uiModeList.size() > 1 && candCostList.size() > 1 && uiModeList[0] >= 0 && uiModeList[1] >= 0 && candCostList[1] < 1.1 * candCostList[0]) + { + std::swap(uiModeList[0], uiModeList[1]); + std::swap(candCostList[0], candCostList[1]); + } + return; +} +void IntraPrediction::genTimdMrgList( + const CodingUnit &cu, + const static_vector<const CodingUnit*, NUM_TIMD_MERGE_CUS> &cuNeighbours, + static_vector<TimdMergeInfo, NUM_TIMD_MERGE_CUS> &timdMrgList) +{ + /* ----------------------------------------------------------------- + ------------------- Build Timd-Merge list ------------------ + --------------------- from remaining neighbours -------------------- + ----------------------------------------------------------------- */ + for (int i = 0; i < cuNeighbours.size(); i++) + { + if (cuNeighbours[i]->timdMrg) + { + for (int k = 0; k < NUM_TIMD_MERGE_MODES; k++) + { + bool isThere = false; + for (int idx = 0; idx < timdMrgList.size(); idx++) + { + if (timdMrgList[idx].timdMode[0] == cuNeighbours[i]->timdMrgList[k][0] && timdMrgList[idx].timdMode[1] == cuNeighbours[i]->timdMrgList[k][1]) + { + isThere = true; + break; + } + } + if (!isThere && cuNeighbours[i]->timdMrgList[k][0] != INVALID_TIMD_IDX) + { + TimdMergeInfo m = TimdMergeInfo(); + m.timdMode[0] = cuNeighbours[i]->timdMrgList[k][0]; + m.timdMode[1] = cuNeighbours[i]->timdMrgList[k][1]; + m.timdMode[2] = cuNeighbours[i]->timdMrgList[k][2]; + m.timdFusionWeight[0] = cuNeighbours[i]->timdMrgFusionWeight[k][0]; + m.timdFusionWeight[1] = cuNeighbours[i]->timdMrgFusionWeight[k][1]; + m.timdFusionWeight[2] = cuNeighbours[i]->timdMrgFusionWeight[k][2]; + m.timdModeCheckWA[0] = true; + m.timdModeCheckWA[1] = true; + m.timdModeCheckWA[2] = true; + m.timdIsBlended = cuNeighbours[i]->timdMrgIsBlended[k]; + m.timdLocDep[0] = cuNeighbours[i]->timdMrgLocDep[k][0]; + m.timdLocDep[1] = cuNeighbours[i]->timdMrgLocDep[k][1]; + m.timdLocDep[2] = cuNeighbours[i]->timdMrgLocDep[k][2]; + m.timdmTrType[0] = !CS::isDualITree(*cu.cs) ? TransType::DCT2 : cuNeighbours[i]->timdmTrType[k][0]; + m.timdmTrType[1] = !CS::isDualITree(*cu.cs) ? TransType::DCT2 : cuNeighbours[i]->timdmTrType[k][1]; + timdMrgList.push_back(m); + } + } + } + else + { + bool isThere = false; + for (int idx = 0; idx < timdMrgList.size(); idx++) + { + if (timdMrgList[idx].timdMode[0] == cuNeighbours[i]->timdMode && timdMrgList[idx].timdMode[1] == cuNeighbours[i]->timdModeSecondary) + { + isThere = true; + break; + } + } + if (!isThere) + { + TimdMergeInfo m = TimdMergeInfo(); + m.timdMode[0] = cuNeighbours[i]->timdMode; + m.timdMode[1] = cuNeighbours[i]->timdModeSecondary; + m.timdMode[2] = cuNeighbours[i]->timdModeNonAng; + m.timdFusionWeight[0] = cuNeighbours[i]->timdFusionWeight[0]; + m.timdFusionWeight[1] = cuNeighbours[i]->timdFusionWeight[1]; + m.timdFusionWeight[2] = cuNeighbours[i]->timdFusionWeight[2]; + m.timdModeCheckWA[0] = true; + m.timdModeCheckWA[1] = true; + m.timdModeCheckWA[2] = true; + m.timdLocDep[0] = cuNeighbours[i]->timdLocDep[0]; + m.timdLocDep[1] = cuNeighbours[i]->timdLocDep[1]; + m.timdLocDep[2] = cuNeighbours[i]->timdLocDep[2]; + m.timdIsBlended = cuNeighbours[i]->timdIsBlended; + m.timdmTrType[0] = !CS::isDualITree(*cu.cs) ? TransType::DCT2 : cuNeighbours[i]->timdmTrType[NUM_TIMD_MERGE_MODES][0]; + m.timdmTrType[1] = !CS::isDualITree(*cu.cs) ? TransType::DCT2 : cuNeighbours[i]->timdmTrType[NUM_TIMD_MERGE_MODES][1]; + timdMrgList.push_back(m); + } + } + } + return; +} +void IntraPrediction::deriveTimdMergeModes(const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu) +{ + /* ----------------------------------------------------------------------------------- + Step 1: Collect adjacent neighbour cands + Step 2: Collect non-adjacent neighbour cands + Step 3: Sort neighbours by distance + Step 4: Build Timd-Merge list from remaining neighbours + Step 5: Compute template cost of individual modes in the merge list + Step 6: Sort neighbour cands by weighted sum of their individual modes' template cost + Step 7: Store best neighbour cands in CU + ------------------------------------------------------------------------------------ */ + cu.timdMrgCand = 0; + for (int i = 0; i < NUM_TIMD_MERGE_MODES; i++) + { + cu.timdMrgIsBlended[i] = false; + for (int j = 0; j < TIMD_FUSION_NUM; j++) + { + cu.timdMrgList[i][j] = INVALID_TIMD_IDX; + cu.timdMrgFusionWeight[i][j] = -1; + cu.timdMrgModeCheckWA[i][j] = true; + cu.timdMrgLocDep[i][j] = 0; + } + } + if (!cu.slice->getSPS()->getUseTimd()) + { + return; + } + + SizeType uiWidth = cu.lwidth(); + SizeType uiHeight = cu.lheight(); + int iCurX = cu.lx(); + int iCurY = cu.ly(); + int iRefX = -1, iRefY = -1; + uint32_t uiRefWidth = 0, uiRefHeight = 0; + int iTempWidth = 1, iTempHeight = 1; + TemplateType eTempType = CU::deriveTimdRefType(iCurX, iCurY, uiWidth, uiHeight, iTempWidth, iTempHeight, iRefX, + iRefY, uiRefWidth, uiRefHeight); + if (eTempType == NO_NEIGHBOR) + { + return; + } + static_vector<const CodingUnit*, NUM_TIMD_MERGE_CUS> cuNeighbours; + static_vector<TimdMergeInfo, NUM_TIMD_MERGE_CUS> timdMrgList; + searchTimdMrgCus(cu, cuNeighbours); + cu.timdMrgCand = (int)cuNeighbours.size(); + genTimdMrgList(cu, cuNeighbours, timdMrgList); + + if (!timdMrgList.size()) + { + return; + } + if (timdMrgList.size() == 1) + { + std::memcpy(cu.timdMrgList[0], timdMrgList[0].timdMode, TIMD_FUSION_NUM * sizeof(int)); + std::memcpy(cu.timdMrgFusionWeight[0], timdMrgList[0].timdFusionWeight, TIMD_FUSION_NUM * sizeof(int)); + std::memcpy(cu.timdMrgLocDep[0], timdMrgList[0].timdLocDep, TIMD_FUSION_NUM * sizeof(int8_t)); + std::memcpy(cu.timdmTrType[0], timdMrgList[0].timdmTrType, 2 * sizeof(int)); + std::memcpy(cu.timdMrgModeCheckWA[0], timdMrgList[0].timdModeCheckWA, TIMD_FUSION_NUM * sizeof(bool)); + cu.timdMrgIsBlended[0] = timdMrgList[0].timdIsBlended; + return; + } + /* ----------------------------------------------------------------- + ------------------- Compute template cost ----------------- + ----------------- individual modes in the merge list --------------- + ----------------------------------------------------------------- */ + static_vector<int, NUM_TIMD_MERGE_MODES + 1> uiModeList; + static_vector<uint64_t, NUM_TIMD_MERGE_MODES + 1> uiCostList; + calcTimdMrgCandCosts(cu, timdMrgList, uiModeList, uiCostList); + + /* ----------------------------------------------------------------- + -------------- Store best neighbour cands in CU ------------ + ----------------------------------------------------------------- */ + for (int i = 0; i < NUM_TIMD_MERGE_MODES; i++) + { + if (uiModeList[i] < 0) + { + break; + } + std::memcpy(cu.timdMrgList[i], timdMrgList[uiModeList[i]].timdMode, TIMD_FUSION_NUM * sizeof(int)); + std::memcpy(cu.timdMrgFusionWeight[i], timdMrgList[uiModeList[i]].timdFusionWeight, TIMD_FUSION_NUM * sizeof(int)); + std::memcpy(cu.timdMrgLocDep[i], timdMrgList[uiModeList[i]].timdLocDep, TIMD_FUSION_NUM * sizeof(int8_t)); + std::memcpy(cu.timdmTrType[i], timdMrgList[uiModeList[i]].timdmTrType, 2 * sizeof(int)); + std::memcpy(cu.timdMrgModeCheckWA[i], timdMrgList[uiModeList[i]].timdModeCheckWA, TIMD_FUSION_NUM * sizeof(bool)); + cu.timdMrgIsBlended[i] = timdMrgList[uiModeList[i]].timdIsBlended; + } +} +#endif + #if JVET_AH0076_OBIC void IntraPrediction::deriveObicMode( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu ) { @@ -8991,7 +9547,11 @@ int IntraPrediction::getBestNonAnglularMode(const CPelBuf& recoBuf, const CompAr continue; } int numSamples = cuNeighbours[i]->lumaSize().width * cuNeighbours[i]->lumaSize().height; +#if JVET_AJ0061_TIMD_MERGE + if (cuNeighbours[i]->timd && !cuNeighbours[i]->timdMrg) +#else if (cuNeighbours[i]->timd) +#endif { int m = MAP131TO67(cuNeighbours[i]->timdMode); histogram[m] += numSamples; @@ -9006,6 +9566,23 @@ int IntraPrediction::getBestNonAnglularMode(const CPelBuf& recoBuf, const CompAr } } } +#if JVET_AJ0061_TIMD_MERGE + else if (cuNeighbours[i]->timd && cuNeighbours[i]->timdMrg) + { + int m = MAP131TO67(cuNeighbours[i]->timdMrgList[cuNeighbours[i]->timdMrg - 1][0]); + histogram[m] += numSamples; + if (cuNeighbours[i]->timdMrgIsBlended[cuNeighbours[i]->timdMrg - 1] && cuNeighbours[i]->timdMrgFusionWeight[cuNeighbours[i]->timdMrg - 1][1] > 0) + { + int m = MAP131TO67(cuNeighbours[i]->timdMrgList[cuNeighbours[i]->timdMrg - 1][1]); + histogram[m] += numSamples; + if (cuNeighbours[i]->timdMrgFusionWeight[cuNeighbours[i]->timdMrg - 1][2] > 0) + { + int m = MAP131TO67(cuNeighbours[i]->timdMrgList[cuNeighbours[i]->timdMrg - 1][2]); + histogram[m] += numSamples; + } + } + } +#endif else if (cuNeighbours[i]->dimd && !cuNeighbours[i]->obicFlag) { int m = cuNeighbours[i]->dimdMode; diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h index d702673e5d69fac349f2498b6bc0786ede1d6a7d..99df6297331f417380c1f11ab6f8d1cc9b428b96 100644 --- a/source/Lib/CommonLib/IntraPrediction.h +++ b/source/Lib/CommonLib/IntraPrediction.h @@ -252,6 +252,18 @@ private: }; #endif +#if JVET_AJ0061_TIMD_MERGE +struct TimdMergeInfo +{ + int timdMode[TIMD_FUSION_NUM]; + int timdFusionWeight[TIMD_FUSION_NUM]; + bool timdModeCheckWA[TIMD_FUSION_NUM]; + int8_t timdLocDep[TIMD_FUSION_NUM]; + bool timdIsBlended; + int timdmTrType[2]; +}; +#endif + class IntraPrediction { public: @@ -880,6 +892,13 @@ public: ); #else void initPredTimdIntraParams (const PredictionUnit & pu, const CompArea area, int dirMode); +#endif +#if JVET_AJ0061_TIMD_MERGE + void deriveTimdMergeModes ( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu); + void searchTimdMrgCus ( const CodingUnit &cu, static_vector<const CodingUnit*, NUM_TIMD_MERGE_CUS> &cuNeighbours ); + void genTimdMrgList ( const CodingUnit &cu, const static_vector<const CodingUnit*, NUM_TIMD_MERGE_CUS> &cuNeighbours, static_vector<TimdMergeInfo, NUM_TIMD_MERGE_CUS> &timdMrgList ); + template<typename T, size_t N> + void calcTimdMrgCandCosts ( const CodingUnit &cu, static_vector<TimdMergeInfo, NUM_TIMD_MERGE_CUS> &timdMrgList, static_vector<T, N>& uiModeList, static_vector<uint64_t, N>& candCostList ); #endif void predTimdIntraAng ( const ComponentID compId, const PredictionUnit &pu, uint32_t uiDirMode, Pel* pPred, uint32_t uiStride, uint32_t iWidth, uint32_t iHeight, TemplateType eTempType, int32_t iTemplateWidth, int32_t iTemplateHeight); #if JVET_AG0146_DIMD_ITMP_IBC diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index af927378a9fc4e2a5090819b789f63634d02beec..d8ca4d6a47a4aa7407072dd4abb6164bf90638c7 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -6189,4 +6189,97 @@ uint32_t g_picAmvpSbTmvpEnabledArea = 0; #if JVET_AG0276_LIC_SLOPE_ADJUST const int g_licSlopeDeltaSet[LIC_SLOPE_MAX_NUM_DELTA + 1] = {0, 1, -1}; #endif + +#if JVET_AJ0061_TIMD_MERGE +uint64_t g_timdMrgCost[EXT_VDIA_IDX + 1]; +static constexpr std::array<std::array<PosType, TIMD_MERGE_MAX_NONADJACENT>, MAX_CU_DEPTH - MIN_CU_LOG2 + 1> timdMergeOffsetXTable() +{ + std::array<std::array<PosType, TIMD_MERGE_MAX_NONADJACENT>, MAX_CU_DEPTH - MIN_CU_LOG2 + 1> tab {{{0}}}; + for (size_t widthLog2MinusMinCU = 0; widthLog2MinusMinCU < MAX_CU_DEPTH - MIN_CU_LOG2 + 1; widthLog2MinusMinCU++) + { + size_t i = 0; + const PosType width = 1 << (widthLog2MinusMinCU + MIN_CU_LOG2); + int offsetX = 0; + int offsetX2 = width >> 1; + const int numNACandidate[7] = { 11, 13, 10, 2, 2, 2, 2 }; + const int idxMap[7][15] = { {0, 1, 2, 3, 4, 7, 8, 9, 10, 11, 12}, {0, 1, 2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14}, {0, 1, 3, 4, 5, 6, 7, 8, 11, 12}, {0, 1}, {0, 1}, {0, 1}, {0, 1} }; + for (int iDistanceIndex = 0; iDistanceIndex < 7 ; iDistanceIndex++) + { + const int iNADistanceHor = width * (iDistanceIndex + 1); + const int offsetX0 = -iNADistanceHor - 1; + const int offsetX1 = width + iNADistanceHor - 1; + for (int iNASPIdx = 0; iNASPIdx < numNACandidate[iDistanceIndex]; iNASPIdx++) + { + switch (idxMap[iDistanceIndex][iNASPIdx]) + { // Angle CCW from (1,0), approx. + case 0: offsetX = offsetX2; break; // 90° + case 1: offsetX = offsetX0; break; // 180° + case 2: offsetX = offsetX0; break; // 135° + case 3: offsetX = offsetX0; break; // 157.5° + case 4: offsetX = (offsetX0 + offsetX2) >> 1; break; // 112.5° + case 5: offsetX = offsetX0; break; // 146.25° + case 6: offsetX = ((offsetX2 + (offsetX0 + offsetX2)) >> 1) >> 1; break; // 123.75° + case 7: offsetX = offsetX1; break; // 0° + case 8: offsetX = -1; break; // 270° + case 9: offsetX = offsetX1; break; // 45° + case 10: offsetX = offsetX0; break; // 215° + case 11: offsetX = (offsetX2 + offsetX1) >> 1; break; // 67.5° + case 12: offsetX = offsetX0; break; // 197.5° + case 13: offsetX = offsetX1; break; // 22.5° + case 14: offsetX = (offsetX0 + offsetX2) >> 1; break; // 242.5° + default: printf("error!"); exit(0); break; + } + tab[widthLog2MinusMinCU][i++] = offsetX; + } + } + } + return tab; +} +const std::array<std::array<PosType, TIMD_MERGE_MAX_NONADJACENT>, MAX_CU_DEPTH - MIN_CU_LOG2 + 1> g_timdMergeOffsetXTable = timdMergeOffsetXTable(); + +static constexpr std::array<std::array<PosType, TIMD_MERGE_MAX_NONADJACENT>, MAX_CU_DEPTH - MIN_CU_LOG2 + 1> timdMergeOffsetYTable() +{ + std::array<std::array<PosType, TIMD_MERGE_MAX_NONADJACENT>, MAX_CU_DEPTH - MIN_CU_LOG2 + 1> tab {{{0}}}; + for (size_t heightLog2MinusMinCU = 0; heightLog2MinusMinCU < MAX_CU_DEPTH - MIN_CU_LOG2 + 1; heightLog2MinusMinCU++) + { + size_t i = 0; + const PosType height = 1 << (heightLog2MinusMinCU + MIN_CU_LOG2); + int offsetY = 0; + int offsetY2 = height >> 1; + const int numNACandidate[7] = { 11, 13, 10, 2, 2, 2, 2 }; + const int idxMap[7][15] = { {0, 1, 2, 3, 4, 7, 8, 9, 10, 11, 12}, {0, 1, 2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14}, {0, 1, 3, 4, 5, 6, 7, 8, 11, 12}, {0, 1}, {0, 1}, {0, 1}, {0, 1} }; + for (int iDistanceIndex = 0; iDistanceIndex < 7 ; iDistanceIndex++) + { + const int iNADistanceVer = height * (iDistanceIndex + 1); + const int offsetY0 = height + iNADistanceVer - 1; + const int offsetY1 = -iNADistanceVer - 1; + for (int iNASPIdx = 0; iNASPIdx < numNACandidate[iDistanceIndex]; iNASPIdx++) + { + switch (idxMap[iDistanceIndex][iNASPIdx]) + { // Angle CCW from (1,0), approx. + case 0: offsetY = offsetY1; break; // 90° + case 1: offsetY = offsetY2; break; // 180° + case 2: offsetY = offsetY1; break; // 135° + case 3: offsetY = (offsetY1 + offsetY2) >> 1; break; // 157.5° + case 4: offsetY = offsetY1; break; // 112.5° + case 5: offsetY = ((offsetY2 + (offsetY1 + offsetY2)) >> 1) >> 1; break; // 146.25° + case 6: offsetY = offsetY1; break; // 123.75° + case 7: offsetY = -1; break; // 0° + case 8: offsetY = offsetY0; break; // 270° + case 9: offsetY = offsetY1; break; // 45° + case 10: offsetY = offsetY0; break; // 215° + case 11: offsetY = offsetY1; break; // 67.5° + case 12: offsetY = (offsetY2 + offsetY0) >> 1; break; // 197.5° + case 13: offsetY = (offsetY1 + offsetY2) >> 1; break; // 22.5° + case 14: offsetY = offsetY0; break; // 242.5° + default: printf("error!"); exit(0); break; + } + tab[heightLog2MinusMinCU][i++] = offsetY; + } + } + } + return tab; +} +const std::array<std::array<PosType, TIMD_MERGE_MAX_NONADJACENT>, MAX_CU_DEPTH - MIN_CU_LOG2 + 1> g_timdMergeOffsetYTable = timdMergeOffsetYTable(); +#endif //! \} diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h index 17e845988e7b032a4e08036c61c803129cc628a5..196da7eb980abb28ad0c8e646d85d1ebc81fd472 100644 --- a/source/Lib/CommonLib/Rom.h +++ b/source/Lib/CommonLib/Rom.h @@ -502,5 +502,10 @@ extern const EIPInfo g_eipInfoLut[4][4][9]; #if JVET_AG0276_LIC_SLOPE_ADJUST extern const int g_licSlopeDeltaSet[LIC_SLOPE_MAX_NUM_DELTA + 1]; #endif +#if JVET_AJ0061_TIMD_MERGE +extern uint64_t g_timdMrgCost[EXT_VDIA_IDX + 1]; +extern const std::array<std::array<PosType, TIMD_MERGE_MAX_NONADJACENT>, MAX_CU_DEPTH - MIN_CU_LOG2 + 1> g_timdMergeOffsetXTable; +extern const std::array<std::array<PosType, TIMD_MERGE_MAX_NONADJACENT>, MAX_CU_DEPTH - MIN_CU_LOG2 + 1> g_timdMergeOffsetYTable; +#endif #endif //__TCOMROM__ diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 4e132f78c0cf89d346a997752c0b1f695e89ea10..d7a6c5d1d68ce300416146f4785e831de5ed6f29 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -1821,6 +1821,9 @@ private: #endif #if JVET_W0123_TIMD_FUSION bool m_timd; +#if JVET_AJ0061_TIMD_MERGE + bool m_timdMrg; +#endif #endif #if JVET_AB0155_SGPM bool m_sgpm; @@ -2578,6 +2581,10 @@ void setCCALFEnabledFlag( bool b ) #if JVET_W0123_TIMD_FUSION void setUseTimd ( bool b ) { m_timd = b; } bool getUseTimd () const { return m_timd; } +#if JVET_AJ0061_TIMD_MERGE + void setUseTimdMrg ( bool b ) { m_timdMrg = b; } + bool getUseTimdMrg () const { return m_timdMrg; } +#endif #endif #if JVET_AB0155_SGPM void setUseSgpm (bool b) { m_sgpm = b; } diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index 079c9c4e20992d18f63889cc8b0c801bc02845c5..480e72ad46bd4e5114026c968b89d073b370cb24 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -1442,6 +1442,16 @@ void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPel int trTypeHor = DCT2; int trTypeVer = DCT2; +#if JVET_AJ0061_TIMD_MERGE + if (tu.cu->timdMrg && !tu.cu->lfnstIdx) + { + // Timd-Mrg CUs inherit transform type from their cands + int implicitDst7 = PU::canTimdMergeImplicitDst7(tu); + trTypeHor = (implicitDst7 & 2) ? DST7 : tu.cu->timdmTrType[tu.cu->timdMrg - 1][0]; + trTypeVer = (implicitDst7 & 1) ? DST7 : tu.cu->timdmTrType[tu.cu->timdMrg - 1][1]; + } + else +#endif getTrTypes ( tu, compID, trTypeHor, trTypeVer ); #if TU_256 int skipWidth = width > JVET_C0024_ZERO_OUT_TH ? width - JVET_C0024_ZERO_OUT_TH : 0; @@ -1579,6 +1589,16 @@ void TrQuant::xIT( const TransformUnit &tu, const ComponentID &compID, const CCo int trTypeHor = DCT2; int trTypeVer = DCT2; +#if JVET_AJ0061_TIMD_MERGE + if (tu.cu->timdMrg && !tu.cu->lfnstIdx) + { + // Timd-Mrg CUs inherit transform type from their cands + int implicitDst7 = PU::canTimdMergeImplicitDst7(tu); + trTypeHor = (implicitDst7 & 2) ? DST7 : tu.cu->timdmTrType[tu.cu->timdMrg - 1][0]; + trTypeVer = (implicitDst7 & 1) ? DST7 : tu.cu->timdmTrType[tu.cu->timdMrg - 1][1]; + } + else +#endif getTrTypes ( tu, compID, trTypeHor, trTypeVer ); #if TU_256 int skipWidth = width > JVET_C0024_ZERO_OUT_TH ? width - JVET_C0024_ZERO_OUT_TH : 0; @@ -2542,6 +2562,16 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const else { int trHor, trVer; +#if JVET_AJ0061_TIMD_MERGE + if (tu.cu->timdMrg && !tu.cu->lfnstIdx) + { + // Timd-Mrg CUs inherit transform type from their cands + int implicitDst7 = PU::canTimdMergeImplicitDst7(tu); + trHor = (implicitDst7 & 2) ? DST7 : tu.cu->timdmTrType[tu.cu->timdMrg - 1][0]; + trVer = (implicitDst7 & 1) ? DST7 : tu.cu->timdmTrType[tu.cu->timdMrg - 1][1]; + } + else +#endif getTrTypes(tu, residCompID, trHor, trVer); #if JVET_W0103_INTRA_MTS actualTrIdx = trHor * NUM_TRANS_TYPE + trVer; diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index c2c5e192c06ced35c0536cd3819d17e6a412e109..cecac60bf30e775ec762867667720aa7206f2a7f 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -151,6 +151,7 @@ #define JVET_X0148_TIMD_PDPC 1 // JVET-X0148: PDPC handling for TIMD #define JVET_AG0092_ENHANCED_TIMD_FUSION 1 // JVET-AG0092: TIMD fusion with non-angular predictor #define JVET_AG0128_REF_LINE_OPT_TIMD_FUSION 1 // JVET-AG0128: TIMD fusion reference line determination +#define JVET_AJ0061_TIMD_MERGE 1 // TIMD merge mode #if ENABLE_DIMD #define JVET_AC0098_LOC_DEP_DIMD 1 // JVET-AC0098: Location-dependent Decoder-side Intra Mode Derivation diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index f5668d98a98cb365544e8830446436b7079f65ea..0450bbebf36742595609920e8aa043bd714c1d05 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -407,6 +407,26 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) timdFusionWeight[0] = other.timdFusionWeight[0]; timdFusionWeight[1] = other.timdFusionWeight[1]; #endif +#if JVET_AJ0061_TIMD_MERGE + timdMrg = other.timdMrg; + for (int i = 0; i <= NUM_TIMD_MERGE_MODES; i++) + { + timdmTrType[i][0] = other.timdmTrType[i][0]; + timdmTrType[i][1] = other.timdmTrType[i][1]; + } + for (int i = 0; i < NUM_TIMD_MERGE_MODES; i++) + { + timdMrgIsBlended[i] = other.timdMrgIsBlended[i]; + for (int j = 0; j < TIMD_FUSION_NUM; j++) + { + timdMrgList[i][j] = other.timdMrgList[i][j]; + timdMrgFusionWeight[i][j] = other.timdMrgFusionWeight[i][j]; + timdMrgModeCheckWA[i][j] = other.timdMrgModeCheckWA[i][j]; + timdMrgLocDep[i][j] = other.timdMrgLocDep[i][j]; + } + } + timdMrgCand = other.timdMrgCand; +#endif #endif #if JVET_AB0155_SGPM timdHor = other.timdHor; @@ -707,6 +727,26 @@ void CodingUnit::initData() timdFusionWeight[0] = -1; timdFusionWeight[1] = -1; #endif +#if JVET_AJ0061_TIMD_MERGE + timdMrg = 0; + for (int i = 0; i <= NUM_TIMD_MERGE_MODES; i++) + { + timdmTrType[i][0] = TransType::DCT2; + timdmTrType[i][1] = TransType::DCT2; + } + for (int i = 0; i < NUM_TIMD_MERGE_MODES; i++) + { + timdMrgIsBlended[i] = false; + for (int j = 0; j < TIMD_FUSION_NUM; j++) + { + timdMrgList[i][j] = INVALID_TIMD_IDX; + timdMrgFusionWeight[i][j] = -1; + timdMrgModeCheckWA[i][j] = true; + timdMrgLocDep[i][j] = 0; + } + } + timdMrgCand = -1; +#endif #endif #if JVET_AB0155_SGPM timdHor = -1; diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index ff1fb6048eea2839f3ef92499c8f3a360c762d51..e611f6378ea33f4426a3678f2909b60b393bd7e5 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -408,6 +408,16 @@ struct CodingUnit : public UnitArea #else int8_t timdFusionWeight[2]; #endif +#if JVET_AJ0061_TIMD_MERGE + int timdMrg; + int timdMrgList[NUM_TIMD_MERGE_MODES][TIMD_FUSION_NUM]; + bool timdMrgIsBlended[NUM_TIMD_MERGE_MODES]; + int timdMrgFusionWeight[NUM_TIMD_MERGE_MODES][TIMD_FUSION_NUM]; + bool timdMrgModeCheckWA[NUM_TIMD_MERGE_MODES][TIMD_FUSION_NUM]; + int8_t timdMrgLocDep[NUM_TIMD_MERGE_MODES][TIMD_FUSION_NUM]; + int timdMrgCand; + int timdmTrType[NUM_TIMD_MERGE_MODES + 1][2]; +#endif #endif #if JVET_AB0155_SGPM int timdHor; diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index c44b9b1a3f1a687bd3a79aaa222718466ee90dab..427a05caf631d2cb5648fa7cb1ba056c0d1e6314 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -5659,6 +5659,161 @@ uint32_t PU::getCoLocatedIntraLumaMode(const PredictionUnit &pu) } #endif +#if JVET_AJ0061_TIMD_MERGE +int PU::canTimdMergeImplicitDst7(const TransformUnit &tu) +{ + if(!CS::isDualITree(*tu.cs)) + { + return 0; + } + const int minTuSizeDst7 = 4; + const int maxTuSizeDst7 = 16; + const int compID = COMPONENT_Y; + int width = tu.blocks[compID].width; + int height = tu.blocks[compID].height; + bool widthDstOk = width >= minTuSizeDst7 && width <= maxTuSizeDst7; + bool heightDstOk = height >= minTuSizeDst7 && height <= maxTuSizeDst7; + /* + * 0: no + * 1: only ver + * 2: only hor + * 3: both hor and ver + */ + return (2 * widthDstOk + heightDstOk); +} + +bool PU::canTimdMerge(const PredictionUnit &pu) +{ + if (!pu.cu->cs->sps->getUseTimdMrg()) + { + return false; + } + if (pu.cu->predMode != MODE_INTRA) + { + return false; + } + if (!isLuma(pu.chType)) + { + return false; + } + if (!pu.cu->slice->getSPS()->getUseTimd()) + { + return false; + } + if (!pu.Y().valid()) + { + return false; + } + if (pu.Y().area() <= 16) + { + return false; + } + if (pu.Y().area() > 1024 && pu.cu->slice->getSliceType() == I_SLICE) + { + return false; + } + if (!PU::hasTimdMergeCandidate(pu)) + { + return false; + } + return true; +} + +static inline const CodingUnit* getTimdMergeNeighbour(const PredictionUnit &pu, size_t idx) +{ + static_assert(g_timdMergeOffsetXTable.size() == g_timdMergeOffsetYTable.size()); + CHECK(!(idx < TIMD_MERGE_MAX_NONADJACENT), "Invalid TIMD-Merge non-adjacent neighbour index"); + const int widthLog2MinusMinCU = floorLog2(pu.cu->lwidth()) - MIN_CU_LOG2; + const int heightLog2MinusMinCU = floorLog2(pu.cu->lheight()) - MIN_CU_LOG2; + const int dx = g_timdMergeOffsetXTable[widthLog2MinusMinCU][idx]; + const int dy = g_timdMergeOffsetYTable[heightLog2MinusMinCU][idx]; + return pu.cu->cs->getCURestricted(pu.cu->lumaPos().offset(dx, dy), *pu.cu, CH_L); +} + +std::array<const CodingUnit *, TIMD_MERGE_MAX_NONADJACENT> PU::timdMergeNonAdjacentNeighbours(const PredictionUnit &pu) +{ + std::array<const CodingUnit *, TIMD_MERGE_MAX_NONADJACENT> neighbours; + for (size_t i = 0; i < neighbours.size(); i++) + { + neighbours[i] = getTimdMergeNeighbour(pu, i); + } + return neighbours; +} + +// Returns true if at least one of the CUs from the TIMD-merge +// neighbour map is a suitable candidate for TIMD-merge, false otherwise. +bool PU::hasTimdMergeCandidate(const PredictionUnit &pu) +{ + if (pu.cu->cs->pcv->isEncoder && pu.cu->timdMrgCand >= 0) + { + return pu.cu->timdMrgCand > 0; + } + else + { + int step = 4; + const CodingUnit *cuLeft = NULL; + const CodingUnit *cuTop = NULL; + for (int i = 0; i <= pu.cu->lheight(); i += step) + { + cuLeft = pu.cu->cs->getCURestricted(pu.cu->lumaPos().offset(-1, i), *pu.cu, CH_L); + if (cuLeft && CU::isIntra(*cuLeft) && cuLeft->timd) + { + return true; + } + } + + for (int i = 0; i <= pu.cu->lwidth(); i += step) + { + cuTop = pu.cu->cs->getCURestricted(pu.cu->lumaPos().offset(i, -1), *pu.cu, CH_L); + if (cuTop && CU::isIntra(*cuTop) && cuTop->timd) + { + return true; + } + } + + const CodingUnit* cuNeighbour = pu.cu->cs->getCURestricted(pu.cu->lumaPos().offset(-1, -1), *pu.cu, CH_L); + if (cuNeighbour && CU::isIntra(*cuNeighbour) && cuNeighbour->timd) + { + return true; + } + + const CodingUnit *cuLeft2 = cuLeft ? pu.cu->cs->getCURestricted(cuLeft->lumaPos().offset(cuLeft->lwidth() - 1, cuLeft->lheight()), *pu.cu, CH_L) : NULL; + const CodingUnit *cuTop2 = cuTop ? pu.cu->cs->getCURestricted(cuTop->lumaPos().offset(cuTop->lwidth(), cuTop->lheight() - 1), *pu.cu, CH_L) : NULL; + cuNeighbour = cuLeft2; + if (cuNeighbour && CU::isIntra(*cuNeighbour) && cuNeighbour->timd) + { + return true; + } + cuNeighbour = cuTop2; + if (cuNeighbour && CU::isIntra(*cuNeighbour) && cuNeighbour->timd) + { + return true; + } + + cuNeighbour = cuLeft2 ? pu.cu->cs->getCURestricted(cuLeft2->lumaPos().offset(cuLeft2->lwidth() - 1, cuLeft2->lheight()), *pu.cu, CH_L) : NULL; + if (cuNeighbour && CU::isIntra(*cuNeighbour) && cuNeighbour->timd) + { + return true; + } + cuNeighbour = cuTop2 ? pu.cu->cs->getCURestricted(cuTop2->lumaPos().offset(cuTop2->lwidth(), cuTop2->lheight() - 1), *pu.cu, CH_L) : NULL; + if (cuNeighbour && CU::isIntra(*cuNeighbour) && cuNeighbour->timd) + { + return true; + } + + for (const CodingUnit *nonAdjacentNeighbour : PU::timdMergeNonAdjacentNeighbours(pu)) + { + if (nonAdjacentNeighbour && CU::isIntra(*nonAdjacentNeighbour) && nonAdjacentNeighbour->timd) + { + return true; + } + } + + return false; + } +} +#endif + #if JVET_AC0071_DBV #if JVET_AH0136_CHROMA_REORDERING bool PU::isDbvMode(int mode) @@ -31429,6 +31584,9 @@ bool CU::isMTSAllowed(const CodingUnit &cu, const ComponentID compID) #endif #if JVET_W0123_TIMD_FUSION mtsAllowed &= !(cu.timd && cu.firstPU->multiRefIdx); +#endif +#if JVET_AJ0061_TIMD_MERGE + mtsAllowed &= !cu.timdMrg; // Timd-Mrg CUs inherit transform type from their cands #endif mtsAllowed &= !(cu.bdpcmMode && cuWidth <= tsMaxSize && cuHeight <= tsMaxSize); @@ -31830,7 +31988,9 @@ bool TU::isTSAllowed(const TransformUnit &tu, const ComponentID compID) tsAllowed &= !(tu.cu->bdpcmModeChroma && isChroma(compID)); tsAllowed &= tu.blocks[compID].width <= transformSkipMaxSize && tu.blocks[compID].height <= transformSkipMaxSize; tsAllowed &= !tu.cu->sbtInfo; - +#if JVET_AJ0061_TIMD_MERGE + tsAllowed &= !tu.cu->timdMrg; +#endif return tsAllowed; } diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index cee2761cc416394ccd0ec33fb7f2cfe96fc753f0..d6434defe59c80db3482ba42b86d0cea5c54684c 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -212,6 +212,12 @@ namespace CU // PU tools namespace PU { +#if JVET_AJ0061_TIMD_MERGE + int canTimdMergeImplicitDst7(const TransformUnit &tu); + bool canTimdMerge(const PredictionUnit &pu); + std::array<const CodingUnit *, TIMD_MERGE_MAX_NONADJACENT> timdMergeNonAdjacentNeighbours(const PredictionUnit &pu); + bool hasTimdMergeCandidate(const PredictionUnit &pu); +#endif #if (JVET_AG0146_DIMD_ITMP_IBC || JVET_AG0152_SGPM_ITMP_IBC || JVET_AG0151_INTRA_TMP_MERGE_MODE) int getItmpMergeCandidate (const PredictionUnit& pu, std::vector<Mv>& pBvs #if JVET_AH0200_INTRA_TMP_BV_REORDER diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index 920b902b64ad4ef254b8913d4ab439144bc97ebd..7ccccfe645fec3770f913756f45b68556630deff 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -2478,6 +2478,12 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) } #if JVET_W0123_TIMD_FUSION cu_timd_flag(cu); +#if JVET_AJ0061_TIMD_MERGE + if (cu.timdMrg) + { + return; + } +#endif #endif #if JVET_AG0058_EIP cu_eip_flag(cu); @@ -2817,7 +2823,27 @@ void CABACReader::cu_timd_flag( CodingUnit& cu ) unsigned ctxId = DeriveCtx::CtxTimdFlag( cu ); cu.timd = m_BinDecoder.decodeBin( Ctx::TimdFlag(ctxId) ); DTRACE(g_trace_ctx, D_SYNTAX, "cu_timd_flag() ctx=%d pos=(%d,%d) timd=%d\n", ctxId, cu.lumaPos().x, cu.lumaPos().y, cu.timd); +#if JVET_AJ0061_TIMD_MERGE + cu_timd_merge_flag(cu); +#endif } + +#if JVET_AJ0061_TIMD_MERGE +void CABACReader::cu_timd_merge_flag( CodingUnit& cu) +{ + cu.timdMrg = 0; + if (!cu.timd || cu.dimd) + { + return; + } + if (!PU::canTimdMerge(*cu.firstPU)) + { + return; + } + int ctxId = cu.lwidth() * cu.lheight() >= 64 ? 0 : 1; + cu.timdMrg = m_BinDecoder.decodeBin( Ctx::TimdMrgFlag(ctxId) ) ? 1 : 0; +} +#endif #endif #if JVET_AB0155_SGPM diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index 8826032155389aebb889c28790f665cf8ef7b74e..92e1dbb500c27e55b2959e67e74fb65df1f150ca 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -128,6 +128,9 @@ public: void intra_luma_pred_modes ( CodingUnit& cu ); #if JVET_W0123_TIMD_FUSION void cu_timd_flag ( CodingUnit& cu ); +#if JVET_AJ0061_TIMD_MERGE + void cu_timd_merge_flag ( CodingUnit& cu ); +#endif #endif #if JVET_AB0155_SGPM void sgpm_flag ( CodingUnit& cu ); diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index b6d14b321e96f237a5feec76e86a2e88f3bd8d24..bcbcc9fe27550bd1dd4b3edb039f4a7e1e964a51 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -372,6 +372,29 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) #if JVET_W0123_TIMD_FUSION else if (currCU.timd) { +#if JVET_AJ0061_TIMD_MERGE + PredictionUnit *pu = currCU.firstPU; + const CompArea &area = currCU.Y(); + if (currCU.timdMrg) + { + m_pcIntraPred->deriveTimdMergeModes(currCU.cs->picture->getRecoBuf(area), area, currCU); + CHECK(currCU.timdMrgList[currCU.timdMrg - 1][0] < 0, "Wrong timd-merge mode!"); + pu->intraDir[0] = currCU.timdMrgList[currCU.timdMrg - 1][0]; + currCU.timdMode = currCU.timdMrgList[currCU.timdMrg - 1][0]; // temporary + } + else + { +#if SECONDARY_MPM + IntraPrediction::deriveDimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU); +#endif + currCU.timdMode = m_pcIntraPred->deriveTimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU); + pu->intraDir[0] = currCU.timdMode; + } + if (!currCU.timdMrg && !currCU.lfnstIdx) + { + m_pcTrQuant->getTrTypes(*currCU.firstTU, COMPONENT_Y, currCU.timdmTrType[NUM_TIMD_MERGE_MODES][0], currCU.timdmTrType[NUM_TIMD_MERGE_MODES][1]); + } +#else PredictionUnit *pu = currCU.firstPU; const CompArea &area = currCU.Y(); #if SECONDARY_MPM @@ -379,6 +402,7 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) #endif currCU.timdMode = m_pcIntraPred->deriveTimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU); pu->intraDir[0] = currCU.timdMode; +#endif } #endif diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index f974db44538cd4898d9ee7e5f2a9a49e3424a7e4..2ccf15a4c244805749c05eb816e28d7fcdf0dc58 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -2775,6 +2775,16 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) #endif #if JVET_W0123_TIMD_FUSION READ_FLAG(uiCode, "sps_timd_enabled_flag"); pcSPS->setUseTimd( uiCode != 0 ); +#if JVET_AJ0061_TIMD_MERGE + if (pcSPS->getUseTimd()) + { + READ_FLAG(uiCode, "sps_timd_merge_enabled_flag"); pcSPS->setUseTimdMrg( uiCode != 0 ); + } + else + { + pcSPS->setUseTimdMrg( false ); + } +#endif #endif #if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION if (pcSPS->getUseCiip() && pcSPS->getUseTimd()) diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index 3c5c6027663a50e40b9c1e720e64bdc6c865d5f6..0bd43aeb169e319aad16d6da6c52f9c18250db8d 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -2061,6 +2061,12 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu ) } #if JVET_W0123_TIMD_FUSION cu_timd_flag(cu); +#if JVET_AJ0061_TIMD_MERGE + if (cu.timdMrg) + { + return; + } +#endif #endif #if JVET_AG0058_EIP cu_eip_flag(cu); @@ -2287,6 +2293,12 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu ) } #if JVET_W0123_TIMD_FUSION cu_timd_flag(*pu.cu); +#if JVET_AJ0061_TIMD_MERGE + if (pu.cu->timdMrg) + { + return; + } +#endif #endif #if JVET_AG0058_EIP cu_eip_flag(*pu.cu); @@ -2507,7 +2519,26 @@ void CABACWriter::cu_timd_flag( const CodingUnit& cu ) unsigned ctxId = DeriveCtx::CtxTimdFlag(cu); m_BinEncoder.encodeBin(cu.timd, Ctx::TimdFlag(ctxId)); DTRACE(g_trace_ctx, D_SYNTAX, "cu_timd_flag() ctx=%d pos=(%d,%d) timd=%d\n", ctxId, cu.lumaPos().x, cu.lumaPos().y, cu.timd); +#if JVET_AJ0061_TIMD_MERGE + cu_timd_merge_flag(cu); +#endif } + +#if JVET_AJ0061_TIMD_MERGE +void CABACWriter::cu_timd_merge_flag( const CodingUnit& cu ) +{ + if (!cu.timd || cu.dimd) + { + return; + } + if (!PU::canTimdMerge(*cu.firstPU)) + { + return; + } + int ctxId = cu.lwidth() * cu.lheight() >= 64 ? 0 : 1; + m_BinEncoder.encodeBin(cu.timdMrg ? 1 : 0, Ctx::TimdMrgFlag(ctxId)); +} +#endif #endif #if JVET_AB0155_SGPM diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index cc52dfb8f4e7afdd9bf9fdf2e383b8156759369a..b9e466c017fe5fa437b88c5628c393e19d30fd27 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -164,6 +164,9 @@ public: #endif #if JVET_W0123_TIMD_FUSION void cu_timd_flag ( const CodingUnit& cu ); +#if JVET_AJ0061_TIMD_MERGE + void cu_timd_merge_flag ( const CodingUnit& cu ); +#endif #endif #if JVET_AB0155_SGPM void sgpm_flag (const CodingUnit& cu ); diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 616369020a22ab7bf9eddaa436d81378558df2ae..9a2cd70cc0510aaa95464412e48905c13fd6d219 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -511,6 +511,9 @@ protected: #endif #if JVET_W0123_TIMD_FUSION bool m_timd; +#if JVET_AJ0061_TIMD_MERGE + bool m_timdMrg; +#endif #endif #if JVET_AB0155_SGPM bool m_sgpm; @@ -1800,6 +1803,10 @@ public: #if JVET_W0123_TIMD_FUSION void setUseTimd ( bool b ) { m_timd = b; } bool getUseTimd () const { return m_timd; } +#if JVET_AJ0061_TIMD_MERGE + void setUseTimdMrg ( bool b ) { m_timdMrg = b; } + bool getUseTimdMrg () const { return m_timdMrg; } +#endif #endif #if JVET_AB0155_SGPM void setUseSgpm (bool b) { m_sgpm = b; } diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 4b27948fbd737786ae9a2356c6f0043142b9d13e..9382e7c45e731ecd5bae8b3ba605fe2b43ef11b6 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -3426,6 +3426,31 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS int8_t dimdChromaMode = -1; int8_t dimdChromaModeSecond = -1; #endif +#if JVET_AJ0061_TIMD_MERGE + int timdmTrTypes[NUM_TIMD_MERGE_MODES + 1][2]; + int timdMrgCand = -1; + int timdMrgList[NUM_TIMD_MERGE_MODES][TIMD_FUSION_NUM]; + bool timdMrgIsBlended[NUM_TIMD_MERGE_MODES]; + int timdMrgFusionWeight[NUM_TIMD_MERGE_MODES][TIMD_FUSION_NUM]; + bool timdMrgModeCheckWA[NUM_TIMD_MERGE_MODES][TIMD_FUSION_NUM]; + int8_t timdMrgLocDep[NUM_TIMD_MERGE_MODES][TIMD_FUSION_NUM]; + for (int i = 0; i <= NUM_TIMD_MERGE_MODES; i++) + { + timdmTrTypes[i][0] = TransType::DCT2; + timdmTrTypes[i][1] = TransType::DCT2; + } + for (int i = 0; i < NUM_TIMD_MERGE_MODES; i++) + { + timdMrgIsBlended[i] = false; + for (int j = 0; j < TIMD_FUSION_NUM; j++) + { + timdMrgList[i][j] = INVALID_TIMD_IDX; + timdMrgFusionWeight[i][j] = -1; + timdMrgModeCheckWA[i][j] = true; + timdMrgLocDep[i][j] = 0; + } + } +#endif #if JVET_AH0136_CHROMA_REORDERING int8_t dimdBlendModeChroma[DIMD_FUSION_NUM - 1] = { 0 }; int chromaList[7] = { -1 }; @@ -3490,6 +3515,12 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS dimdRelWeight[2] = cu.dimdRelWeight[2]; #endif } +#if JVET_AJ0061_TIMD_MERGE + for (int i = 0; i <= EXT_VDIA_IDX; i++) + { + g_timdMrgCost[i] = MAX_UINT64; + } +#endif #if SECONDARY_MPM #if JVET_AD0085_MPM_SORTING @@ -3595,6 +3626,20 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS #endif #if JVET_W0123_TIMD_FUSION bool timdDerived = false; +#if JVET_AJ0061_TIMD_MERGE + bool timdMrgDerived = false; + m_pcIntraSearch->m_skipTimdMrgLfnstMtsPass = false; + m_pcIntraSearch->m_skipObicMode = false; + m_pcIntraSearch->m_skipDimdMode = false; + m_pcIntraSearch->m_satdCostOBIC = MAX_UINT64; + m_pcIntraSearch->m_satdCostDIMD = MAX_UINT64; + for (int i = 0; i < NumTimdMode; i++) + { + m_pcIntraSearch->m_skipTimdMode[i] = false; + m_pcIntraSearch->m_satdCostTIMD[i][0] = MAX_UINT64; + m_pcIntraSearch->m_satdCostTIMD[i][1] = MAX_UINT64; + } +#endif #endif #if TMP_FAST_ENC bool tmpDerived = 0; @@ -3877,6 +3922,55 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS } } #endif +#if JVET_AJ0061_TIMD_MERGE + cu.timdMrg = 0; + if (PU::canTimdMerge(*cu.firstPU)) + { + if (!timdMrgDerived) + { + const CompArea &area = cu.Y(); + m_pcIntraSearch->deriveTimdMergeModes(bestCS->picture->getRecoBuf(area), area, cu); + for (int i = 0; i <= NUM_TIMD_MERGE_MODES; i++) + { + timdmTrTypes[i][0] = cu.timdmTrType[i][0]; + timdmTrTypes[i][1] = cu.timdmTrType[i][1]; + } + for (int i = 0; i < NUM_TIMD_MERGE_MODES; i++) + { + timdMrgIsBlended[i] = cu.timdMrgIsBlended[i]; + for (int j = 0; j < TIMD_FUSION_NUM; j++) + { + timdMrgList[i][j] = cu.timdMrgList[i][j]; + timdMrgFusionWeight[i][j] = cu.timdMrgFusionWeight[i][j]; + timdMrgModeCheckWA[i][j] = cu.timdMrgModeCheckWA[i][j]; + timdMrgLocDep[i][j] = cu.timdMrgLocDep[i][j]; + } + } + timdMrgCand = cu.timdMrgCand; + timdMrgDerived = true; + } + else + { + for (int i = 0; i <= NUM_TIMD_MERGE_MODES; i++) + { + cu.timdmTrType[i][0] = timdmTrTypes[i][0]; + cu.timdmTrType[i][1] = timdmTrTypes[i][1]; + } + for (int i = 0; i < NUM_TIMD_MERGE_MODES; i++) + { + cu.timdMrgIsBlended[i] = timdMrgIsBlended[i]; + for (int j = 0; j < TIMD_FUSION_NUM; j++) + { + cu.timdMrgList[i][j] = timdMrgList[i][j]; + cu.timdMrgFusionWeight[i][j] = timdMrgFusionWeight[i][j]; + cu.timdMrgModeCheckWA[i][j] = timdMrgModeCheckWA[i][j]; + cu.timdMrgLocDep[i][j] = timdMrgLocDep[i][j]; + } + } + cu.timdMrgCand = timdMrgCand; + } + } +#endif #if JVET_AB0157_TMRL cu.tmrlFlag = false; if (CU::allowTmrl(cu)) @@ -24991,7 +25085,11 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best pu->intraDir[0] = cu.dimdMode; } } +#if JVET_AJ0061_TIMD_MERGE + if(cu.timd && !cu.timdMrg) +#else if(cu.timd) +#endif { PredictionUnit *pu = cu.firstPU; const CompArea &area = cu.Y(); diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index d3209d48c5d0393c47f9c28c089ee38f79578fb2..39a51779191e80fee1e8590523512efea8728ac4 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -2022,6 +2022,9 @@ void EncLib::xInitSPS( SPS& sps ) #endif #if JVET_W0123_TIMD_FUSION sps.setUseTimd ( m_timd ); +#if JVET_AJ0061_TIMD_MERGE + sps.setUseTimdMrg ( m_timdMrg ); +#endif #endif #if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION sps.setUseCiipTimd ( m_ciipTimd ); diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 4d0e0cfda87dca5297611fd97f54b670c430e8ff..3baded6a64d05bf7775c99d7b7482c8495cc74bf 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -104,6 +104,12 @@ IntraSearch::IntraSearch() m_eipMergePredBuf[i] = nullptr; } #endif +#if JVET_AJ0061_TIMD_MERGE + for (int i = 0; i < NumTimdMode ; i++) + { + m_timdPredBuf[i] = nullptr; + } +#endif #if JVET_AH0076_OBIC m_dimdPredBuf = nullptr; m_obicPredBuf = nullptr; @@ -304,6 +310,13 @@ void IntraSearch::destroy() m_eipMergePredBuf[i] = nullptr; } #endif +#if JVET_AJ0061_TIMD_MERGE + for (int i = 0; i < NumTimdMode ; i++) + { + delete[] m_timdPredBuf[i]; + m_timdPredBuf[i] = nullptr; + } +#endif #if JVET_AH0076_OBIC delete[] m_dimdPredBuf; m_dimdPredBuf = nullptr; @@ -481,6 +494,12 @@ void IntraSearch::init( EncCfg* pcEncCfg, m_eipMergePredBuf[i] = new Pel[MAX_EIP_SIZE * MAX_EIP_SIZE]; } #endif +#if JVET_AJ0061_TIMD_MERGE + for (int i = 0; i < NumTimdMode; i++) + { + m_timdPredBuf[i] = new Pel[(MAX_CU_SIZE>>1) * (MAX_CU_SIZE>>1)]; + } +#endif #if JVET_AH0076_OBIC m_dimdPredBuf = new Pel[(MAX_CU_SIZE>>1) * (MAX_CU_SIZE>>1)]; m_obicPredBuf = new Pel[(MAX_CU_SIZE>>1) * (MAX_CU_SIZE>>1)]; @@ -614,6 +633,19 @@ void IntraSearch::init( EncCfg* pcEncCfg, #if INTRA_TRANS_ENC_OPT m_skipTimdLfnstMtsPass = false; #endif +#if JVET_AJ0061_TIMD_MERGE + m_skipTimdMrgLfnstMtsPass = false; + m_skipObicMode = false; + m_skipDimdMode = false; + m_satdCostOBIC = MAX_UINT64; + m_satdCostDIMD = MAX_UINT64; + for (int i = 0; i < NumTimdMode ; i++) + { + m_skipTimdMode[i] = false; + m_satdCostTIMD[i][0] = MAX_UINT64; + m_satdCostTIMD[i][1] = MAX_UINT64; + } +#endif #if JVET_AH0076_OBIC m_skipObicLfnstMtsPass = false; m_skipDimdLfnstMtsPass = false; @@ -714,6 +746,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif #if JVET_W0123_TIMD_FUSION const TempCtx ctxStartTimdFlag ( m_ctxCache, SubCtx( Ctx::TimdFlag, m_CABACEstimator->getCtx() ) ); +#if JVET_AJ0061_TIMD_MERGE + const TempCtx ctxStartTimdMrgFlag( m_ctxCache, SubCtx( Ctx::TimdMrgFlag, m_CABACEstimator->getCtx() ) ); +#endif #endif #if JVET_AB0155_SGPM const TempCtx ctxStartSgpmFlag ( m_ctxCache, SubCtx(Ctx::SgpmFlag, m_CABACEstimator->getCtx())); @@ -752,6 +787,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif #if JVET_W0123_TIMD_FUSION m_CABACEstimator->getCtx() = SubCtx(Ctx::TimdFlag, ctxStartTimdFlag); +#if JVET_AJ0061_TIMD_MERGE + m_CABACEstimator->getCtx() = SubCtx(Ctx::TimdMrgFlag, ctxStartTimdMrgFlag); +#endif #endif #if JVET_AB0155_SGPM m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag); @@ -782,6 +820,16 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif }; +#if JVET_AJ0061_TIMD_MERGE + bool isTimdValid = cu.slice->getSPS()->getUseTimd() && !(cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE); + static_vector<ModeInfo, NumTimdMode> timdModes; + static_vector<double, NumTimdMode> timdCosts; + static_vector<double, NumTimdMode> timdSadCosts; + int numTimdSatd = 0; + timdModes.clear(); + timdCosts.clear(); + timdSadCosts.clear(); +#endif CHECK( !cu.firstPU, "CU has no PUs" ); #if JVET_AH0103_LOW_DELAY_LFNST_NSPT bool spsIntraLfnstEnabled = ( ( cu.slice->getSliceType() == I_SLICE && cu.cs->sps->getUseIntraLFNSTISlice() ) || @@ -924,6 +972,21 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c bool setSkipTimdControl = (m_pcEncCfg->getIntraPeriod() == 1) && !cu.lfnstIdx && !cu.mtsFlag; double timdAngCost = MAX_DOUBLE; #endif +#if JVET_AJ0061_TIMD_MERGE + bool setSkipTimdMrgControl = (m_pcEncCfg->getIntraPeriod() == 1) && !cu.lfnstIdx && !cu.mtsFlag; + double timdMrgAngCost[NUM_TIMD_MERGE_MODES]; + for (int i = 0; i < NUM_TIMD_MERGE_MODES; i++) + { + timdMrgAngCost[i] = MAX_DOUBLE; + } + bool testTimd = cu.slice->getSPS()->getUseTimd(); + if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE) + { + testTimd = false; + } + bool testTimdMerge = testTimd && PU::canTimdMerge(*cu.firstPU) && cu.timdMrgList[0][0] != INVALID_TIMD_IDX; + bool testTimdMrl = false; +#endif #if JVET_AH0076_OBIC double obicAngCost = MAX_DOUBLE, dimdAngCost = MAX_DOUBLE; bool setSkipDimdControl = (m_pcEncCfg->getIntraPeriod() == 1) && !cu.lfnstIdx && !cu.mtsFlag; @@ -933,6 +996,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c static_vector<double, FAST_UDI_MAX_RDMODE_NUM> candCostList; static_vector<double, FAST_UDI_MAX_RDMODE_NUM> candHadList; +#if JVET_AJ0061_TIMD_MERGE + double mipHadCostStore[MAX_NUM_MIP_MODE] = { MAX_DOUBLE }; +#endif auto &pu = *cu.firstPU; bool validReturn = false; { @@ -985,6 +1051,10 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif #if JVET_W0123_TIMD_FUSION bool bestTimdMode = false; +#if JVET_AJ0061_TIMD_MERGE + int bestTimdMrgMode = 0; + int bestTimdTrType[2] = { TransType::DCT2, TransType::DCT2 }; +#endif #endif #if JVET_AC0105_DIRECTIONAL_PLANAR uint8_t bestPlMode = 0; @@ -1587,6 +1657,99 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } #endif #endif + +#if JVET_AJ0061_TIMD_MERGE + const int numTool = 2; // Timd, TimdMrl + bool isTimd, isTimdMrl; + /*-------------*/ + /* TIMD */ + int numPassTimd = (testTimd ? 1 : 0) + (testTimdMerge ? 1 : 0); + CHECK(!testTimd && testTimdMerge, "something went wrong"); + + /*-------------*/ + /* TIMD-MRL */ + bool isFirstLineOfCtu = pu.block(COMPONENT_Y).y == 0; + bool isTimdMrlAllowed = (lfnstIdx == 0 && !cu.mtsFlag) && isTimdValid; + numOfPassesExtendRef = !isTimdMrlAllowed ? 0 : (((!sps.getUseMRL() || isFirstLineOfCtu) ? 1 : 3) - 1); + + const int numToolPass[numTool] = { numPassTimd, numOfPassesExtendRef}; + + // Shared variables + int multiRefIdx; + uint8_t intraDir = 0; + uint32_t modeId = 0; + uint8_t modeRefIdx = 0; + + ModeInfo currMode; + for (int tool = 0; tool < numTool; tool++) + { + isTimd = tool == 0; + isTimdMrl = tool == 1; + + for (int pass = 0; pass < numToolPass[tool]; pass++) + { + if (isTimd) + { + cu.timd = true; + cu.timdMrg = pass == 1; + intraDir = !cu.timdMrg ? cu.timdMode : cu.timdMrgList[0][0]; + modeId = !cu.timdMrg ? TIMD_IDX : TIMDM_IDX; + modeRefIdx = 0; + multiRefIdx = 0; + } + + if (isTimdMrl) + { + multiRefIdx = MULTI_REF_LINE_IDX[pass + 1]; + modeRefIdx = multiRefIdx; + intraDir = cu.timdMode; + modeId = TIMD_IDX; + cu.timd = true; + cu.timdMrg = false; + testTimdMrl = true; + } + + // PU/CU init + pu.intraDir[0] = intraDir; + pu.cu->tmrlFlag = false; + pu.multiRefIdx = multiRefIdx; + pu.cu->timd = isTimd || isTimdMrl; + + // Init intra pattern + initIntraPatternChType(cu, pu.Y()); + + // Init IPM parameters + initPredIntraParams(pu, pu.Y(), sps); + + // Prediction + predIntraAng(COMPONENT_Y, piPred, pu); + + // Cost calculation + Distortion sadCost = distParamSad.distFunc(distParamSad); + Distortion minSadHad = std::min(sadCost * 2, distParamHad.distFunc(distParamHad)); + + loadStartStates(); + uint64_t fracModeBits = xFracModeBitsIntra(pu, intraDir, CHANNEL_TYPE_LUMA); + + double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass; + currMode = ModeInfo(false, false, modeRefIdx, NOT_INTRA_SUBPARTITIONS, modeId); + // Update lists + numTimdSatd++; + timdModes.push_back(currMode); + timdCosts.push_back(cost); + timdSadCosts.push_back(static_cast<double>(minSadHad)); + TimdMode mode = getTimdMode(cu.timdMrg, multiRefIdx); + PelBuf timdSaveBuf(m_timdPredBuf[mode], pu.Y()); + timdSaveBuf.copyFrom(piPred); + m_satdCostTIMD[mode][0] = static_cast<uint64_t>(cost); + m_satdCostTIMD[mode][1] = minSadHad; + } + } + cu.tmrlFlag = false; + cu.timd = false; + cu.timdMrg = false; +#endif + CHECKD(uiRdModeList.size() != numModesForFullRD, "Error: RD mode list size"); #if JVET_V0130_INTRA_TMP && JVET_AB0130_ITMP_SAMPLING @@ -2317,6 +2480,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c 0.8 * double(minSadHad), uiHadModeList, candHadList, numHadCand); } +#if !JVET_AJ0061_TIMD_MERGE const double thresholdHadCost = 1.0 + 1.4 / sqrt((double) (pu.lwidth() * pu.lheight())); reduceHadCandList(uiRdModeList, candCostList, numModesForFullRD, thresholdHadCost, mipHadCost, pu, fastMip @@ -2327,6 +2491,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c , dirPlanarCostList #endif ); +#else + std::memcpy(mipHadCostStore, mipHadCost, MAX_NUM_MIP_MODE * sizeof(double)); +#endif } #if JVET_AH0076_OBIC if (obicSaveFlag || dimdSaveFlag) @@ -2422,6 +2589,148 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } } } +#endif +#if JVET_AJ0061_TIMD_MERGE + m_skipDimdMode = !testDimd; + m_skipObicMode = !testObic; + m_skipDimdLfnstMtsPass = !testDimd; + m_skipObicLfnstMtsPass = !testObic; + m_skipTimdMode[Timd] = !testTimd; + m_skipTimdMode[TimdMrg] = !testTimdMerge; + m_skipTimdMode[TimdMrl1] = !testTimdMrl; + m_skipTimdMode[TimdMrl3] = !testTimdMrl; + m_skipTimdLfnstMtsPass = !testTimd; + m_skipTimdMrgLfnstMtsPass = !testTimdMerge; +#if JVET_AH0076_OBIC + if (obicSaveFlag || dimdSaveFlag) + { + cu.dimd = true; + cu.obicFlag = false; + cu.timd = false; + cu.mipFlag = false; + cu.tmpFlag = false; + cu.tmrlFlag = false; + cu.firstPU->multiRefIdx = 0; + cu.ispMode = NOT_INTRA_SUBPARTITIONS; + int iWidth = cu.lwidth(); + int iHeight = cu.lheight(); + if (obicSaveFlag) + { + cu.obicFlag = true; + int obicMode = cu.obicMode[0]; + pu.intraDir[CHANNEL_TYPE_LUMA] = obicMode; + bool blendModes[OBIC_FUSION_NUM - 1] = {false}; + PelBuf predFusion[OBIC_FUSION_NUM - 1]; +#if JVET_AH0209_PDP + CHECK(!m_intraModeReady[obicMode] && !m_pdpIntraPredReady[obicMode], "OBIC mode is not ready!"); +#else + CHECK(!m_intraModeReady[obicMode], "OBIC mode is not ready!"); +#endif + const UnitArea localUnitArea( pu.chromaFormat, Area( 0, 0, iWidth, iHeight ) ); +#if JVET_AH0209_PDP + PelBuf predBuf(m_pdpIntraPredReady[obicMode]? m_pdpIntraPredBuf[obicMode]: m_intraPredBuf[obicMode], pu.Y()); +#else + PelBuf predBuf(m_intraPredBuf[obicMode], pu.Y()); +#endif + piPred.copyFrom(predBuf); + int planarIdx = 0; + for (int idx = 0; idx < OBIC_FUSION_NUM - 1; idx++) + { + blendModes[idx] = false; + predFusion[idx] = m_tempBuffer[idx].getBuf( localUnitArea.Y() ); + int iMode = cu.obicMode[idx + 1]; + if (iMode >= 0) + { + blendModes[idx] = true; + CHECK(!m_intraModeReady[iMode], "OBIC mode is not ready!"); + PelBuf predBufTmp(m_intraPredBuf[iMode], pu.Y()); + predFusion[idx].copyFrom(predBufTmp); + if (iMode == PLANAR_IDX) + { + planarIdx = idx; + } + } + else + { + PelBuf planarBuf(m_intraPredBuf[PLANAR_IDX], pu.Y()); + predFusion[idx].copyFrom(planarBuf); + } + } + if (cu.obicIsBlended) + { + generateObicBlending(piPred, pu, predFusion, blendModes, planarIdx); + } + else + { + initIntraPatternChType(cu, pu.Y(), false); + predIntraAng(COMPONENT_Y, piPred, pu); + } + PelBuf obicSaveBuf(m_obicPredBuf, pu.Y()); + obicSaveBuf.copyFrom(piPred); + Distortion sadCost = distParamSad.distFunc(distParamSad); + Distortion minSadHadObic = std::min(sadCost * 2, distParamHad.distFunc(distParamHad)); + m_satdCostOBIC = minSadHadObic; + } + if (dimdSaveFlag) + { + cu.obicFlag = false; + int dimdMode = cu.dimdMode; + pu.intraDir[CHANNEL_TYPE_LUMA] = dimdMode; + if (cu.dimdBlending) + { +#if JVET_AH0209_PDP + PelBuf predBuf(m_pdpIntraPredReady[dimdMode]? m_pdpIntraPredBuf[dimdMode] : m_intraPredBuf[dimdMode], tmpArea); +#else + PelBuf predBuf(m_intraPredBuf[dimdMode], tmpArea); +#endif + piPred.copyFrom(predBuf); +#if JVET_AH0209_PDP + dimdMode = cu.dimdBlendMode[0] > 0 ? cu.dimdBlendMode[0] : PLANAR_IDX; + PelBuf blendBuf0(dimdMode && m_pdpIntraPredReady[dimdMode] ? m_pdpIntraPredBuf[dimdMode] : m_intraPredBuf[dimdMode], tmpArea); + dimdMode = cu.dimdBlendMode[1] > 0 ? cu.dimdBlendMode[1] : PLANAR_IDX; + PelBuf blendBuf1(dimdMode && m_pdpIntraPredReady[dimdMode] ? m_pdpIntraPredBuf[dimdMode] : m_intraPredBuf[dimdMode], tmpArea); + dimdMode = cu.dimdBlendMode[2] > 0 ? cu.dimdBlendMode[2] : PLANAR_IDX; + PelBuf blendBuf2(dimdMode && m_pdpIntraPredReady[dimdMode] ? m_pdpIntraPredBuf[dimdMode] : m_intraPredBuf[dimdMode], tmpArea); + dimdMode = cu.dimdBlendMode[3] > 0 ? cu.dimdBlendMode[3] : PLANAR_IDX; + PelBuf blendBuf3(dimdMode && m_pdpIntraPredReady[dimdMode] ? m_pdpIntraPredBuf[dimdMode] : m_intraPredBuf[dimdMode], tmpArea); + PelBuf planarBuf(m_intraPredBuf[PLANAR_IDX], tmpArea); + #else + PelBuf blendBuf0((m_intraPredBuf[cu.dimdBlendMode[0] > 0 ?cu.dimdBlendMode[0] : PLANAR_IDX]), tmpArea); + PelBuf blendBuf1((m_intraPredBuf[cu.dimdBlendMode[1] > 0 ?cu.dimdBlendMode[1] : PLANAR_IDX]), tmpArea); + PelBuf blendBuf2((m_intraPredBuf[cu.dimdBlendMode[2] > 0 ?cu.dimdBlendMode[2] : PLANAR_IDX]), tmpArea); + PelBuf blendBuf3((m_intraPredBuf[cu.dimdBlendMode[3] > 0 ?cu.dimdBlendMode[3] : PLANAR_IDX]), tmpArea); + PelBuf planarBuf(m_intraPredBuf[PLANAR_IDX], tmpArea); + #endif + generateDimdBlending(piPred, pu, blendBuf0, blendBuf1, blendBuf2, blendBuf3, planarBuf); + } + else + { + initIntraPatternChType(cu, pu.Y(), false); + predIntraAng(COMPONENT_Y, piPred, pu); + } + PelBuf dimdSaveBuf(m_dimdPredBuf, pu.Y()); + dimdSaveBuf.copyFrom(piPred); + Distortion sadCost = distParamSad.distFunc(distParamSad); + Distortion minSadHadDimd = std::min(sadCost * 2, distParamHad.distFunc(distParamHad)); + m_satdCostDIMD = minSadHadDimd; + } + } + cu.dimd = false; + cu.obicFlag = false; +#endif + if (!cu.lfnstIdx && !cu.mtsFlag && testMip) + { + // now reduce the candidates + const double thresholdHadCost = 1.0 + 1.4 / sqrt((double) (pu.lwidth() * pu.lheight())); + reduceHadCandList(uiRdModeList, candCostList, numModesForFullRD, thresholdHadCost, mipHadCostStore, pu, fastMip +#if JVET_AB0157_TMRL + , tmrlCostList +#endif +#if JVET_AC0105_DIRECTIONAL_PLANAR + , dirPlanarCostList +#endif + ); + } #endif if (sps.getUseMIP() && LFNSTSaveFlag) { @@ -2433,6 +2742,38 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c m_uiSavedHadModeListLFNST = uiHadModeList; m_dSavedHadListLFNST = candHadList; LFNSTSaveFlag = false; +#if JVET_AJ0061_TIMD_MERGE + m_uiSavedRdModeListTimd = timdModes; + m_uiSavedModeCostTimd = timdCosts; + for (int i = numTimdSatd - 1; i >= 0; i--) + { + if(timdModes[i].mRefId > 0) + { + m_uiSavedRdModeListTimd.erase(m_uiSavedRdModeListTimd.begin() + i); + m_uiSavedModeCostTimd.erase(m_uiSavedModeCostTimd.begin() + i); + } + } + ModeInfo m; + for (int i = numModesForFullRD - 1; i >= 0; i--) + { + m = m_uiSavedRdModeListLFNST.at(i); + if (m.modeId == TIMD_IDX && m.mRefId) + { + m_uiSavedRdModeListLFNST.erase(m_uiSavedRdModeListLFNST.begin() + i); + m_dSavedModeCostLFNST.erase(m_dSavedModeCostLFNST.begin() + i); + m_uiSavedNumRdModesLFNST--; + } + } + for (int i = (int)(uiHadModeList.size()) - 1; i >= 0; i--) + { + m = m_uiSavedHadModeListLFNST.at(i); + if (m.modeId == TIMD_IDX && m.mRefId) + { + m_uiSavedHadModeListLFNST.erase(m_uiSavedHadModeListLFNST.begin() + i); + m_dSavedHadListLFNST.erase(m_dSavedHadListLFNST.begin() + i); + } + } +#endif } } else // if( sps.getUseMIP() && LFNSTLoadFlag) @@ -2444,8 +2785,23 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c // PBINTRA fast uiHadModeList = m_uiSavedHadModeListLFNST; candHadList = m_dSavedHadListLFNST; +#if JVET_AJ0061_TIMD_MERGE + timdModes = m_uiSavedRdModeListTimd; + timdCosts = m_uiSavedModeCostTimd; + if (cu.mtsFlag) + { + for (int i = (int)timdModes.size() - 1; i >= 0; i--) + { + if (timdModes[i].modeId == TIMDM_IDX) + { + timdModes.erase(timdModes.begin() + i); + timdCosts.erase(timdCosts.begin() + i); + } + } + } +#endif } -#if JVET_AH0076_OBIC +#if JVET_AH0076_OBIC && !JVET_AJ0061_TIMD_MERGE if (obicSaveFlag || dimdSaveFlag) { cu.dimd = true; @@ -2939,8 +3295,31 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif { // Store the modes to be checked with RD +#if JVET_AJ0061_TIMD_MERGE + if (!cu.mtsFlag && !cu.lfnstIdx) + { + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiRdModeListNoTimdMrl = uiRdModeList; + for (int i = (int)uiRdModeList.size() - 1; i >= 0 ; i--) + { + int refIdx = uiRdModeListNoTimdMrl.at(i).mRefId; + int mode = uiRdModeListNoTimdMrl.at(i).modeId; + if ( refIdx > 0 && mode == TIMD_IDX ) + { + uiRdModeListNoTimdMrl.erase(uiRdModeListNoTimdMrl.begin() + i); + } + } + m_savedNumRdModes[lfnstIdx] = (int)uiRdModeList.size() - ((int)uiRdModeList.size() - (int)uiRdModeListNoTimdMrl.size()); + std::copy_n(uiRdModeListNoTimdMrl.begin(), m_savedNumRdModes[lfnstIdx], m_savedRdModeList[lfnstIdx]); + } + else + { + m_savedNumRdModes[lfnstIdx] = numModesForFullRD; + std::copy_n(uiRdModeList.begin(), numModesForFullRD, m_savedRdModeList[lfnstIdx]); + } +#else m_savedNumRdModes[lfnstIdx] = numModesForFullRD; std::copy_n(uiRdModeList.begin(), numModesForFullRD, m_savedRdModeList[lfnstIdx]); +#endif } } else // mtsUsage = 2 (here we potentially reduce the number of modes that will be full-RD checked) @@ -2978,6 +3357,22 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c numModesForFullRD++; } } +#endif +#if JVET_AJ0061_TIMD_MERGE + timdModes = m_uiSavedRdModeListTimd; + timdCosts = m_uiSavedModeCostTimd; + if (cu.mtsFlag) + { + for (int i = (int)timdModes.size() - 1; i >= 0; i--) + { + if (timdModes[i].modeId == TIMDM_IDX) + { + timdModes.erase(timdModes.begin() + i); + timdCosts.erase(timdCosts.begin() + i); + } + } + } + numTimdSatd = (int)timdModes.size(); #endif } else // this is necessary because we skip the candidates list calculation, since it was already obtained for @@ -2992,7 +3387,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } #if ENABLE_DIMD +#if JVET_AJ0061_TIMD_MERGE + bool isDimdValid = cu.slice->getSPS()->getUseDimd() && !m_skipDimdMode; +#else bool isDimdValid = cu.slice->getSPS()->getUseDimd(); +#endif if (isDimdValid) { cu.dimd = false; @@ -3014,7 +3413,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #if JVET_AH0076_OBIC cu.obicFlag = false; +#if JVET_AJ0061_TIMD_MERGE + if (testObic && !m_skipObicMode) +#else if (testObic) +#endif { ModeInfo m = ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, OBIC_IDX ); uiRdModeList.push_back(m); @@ -3028,6 +3431,21 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } #endif +#if JVET_AJ0061_TIMD_MERGE + cu.timdMrg = 0; + if (testTimdMerge && !cu.mtsFlag /* No MTS loop for Timd-Mrg CUs, as they inherit transform type from their cands*/) + { + int iNum = std::min(NUM_TIMD_MERGE_MODES, cu.timdMrgCand); + for (int idx = 0; idx < iNum; idx++) + { + if (cu.timdMrgList[idx][0] != INVALID_TIMD_IDX && !m_skipTimdMode[TimdMrg]) + { + ModeInfo m = ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, TIMDM_IDX + idx ); + uiRdModeList.push_back(m); + } + } + } +#endif // after this point, don't use numModesForFullRD // PBINTRA fast if (m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable @@ -3104,12 +3522,16 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif int numNonISPModes = (int)uiRdModeList.size(); #if JVET_W0123_TIMD_FUSION +#if !JVET_AJ0061_TIMD_MERGE bool isTimdValid = cu.slice->getSPS()->getUseTimd(); if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE) { isTimdValid = false; } if (isTimdValid) +#else + if (isTimdValid && !m_skipTimdMode[Timd]) +#endif { cu.timd = false; uiRdModeList.push_back( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, TIMD_IDX ) ); @@ -3156,8 +3578,17 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c for (int mRefNum = 1; mRefNum < numOfPassesExtendRef; mRefNum++) { int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum]; +#if JVET_AJ0061_TIMD_MERGE + TimdMode mode = getTimdMode(false, multiRefIdx); + if (!m_skipTimdMode[mode]) + { + uiRdModeList.push_back( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, TIMD_IDX ) ); + numNonISPModes++; + } +#else uiRdModeList.push_back( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, TIMD_IDX ) ); numNonISPModes++; +#endif } } } @@ -3178,7 +3609,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } } #if JVET_W0123_TIMD_FUSION +#if JVET_AJ0061_TIMD_MERGE + if (isTimdValid && !m_skipTimdMode[Timd] && sps.getUseISP() && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize()) && lfnstIdx == 0 && !cu.mtsFlag) +#else if (isTimdValid && sps.getUseISP() && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize()) && lfnstIdx == 0 && !cu.mtsFlag) +#endif { uiRdModeList.push_back( ModeInfo( false, false, 0, HOR_INTRA_SUBPARTITIONS, TIMD_IDX ) ); uiRdModeList.push_back( ModeInfo( false, false, 0, VER_INTRA_SUBPARTITIONS, TIMD_IDX ) ); @@ -3278,10 +3713,17 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c if ((m_pcEncCfg->getIntraPeriod() == 1) && cu.slice->getSPS()->getUseDimd() && mode >= 0 && !cu.dimdBlending && uiOrgMode.ispMod == 0 && uiOrgMode.mRefId == 0 && uiOrgMode.modeId != TIMD_IDX && uiOrgMode.modeId != DIMD_IDX #if JVET_AH0076_OBIC && uiOrgMode.modeId != OBIC_IDX +#endif +#if JVET_AJ0061_TIMD_MERGE + && uiOrgMode.modeId != TIMDM_IDX #endif ) { +#if JVET_AJ0061_TIMD_MERGE + bool modeDuplicated = (uiOrgMode.modeId == cu.dimdMode) && !m_skipDimdMode; +#else bool modeDuplicated = (uiOrgMode.modeId == cu.dimdMode); +#endif if (modeDuplicated) { m_modeCostStore[lfnstIdx][mode] = MAX_DOUBLE / 2.0; @@ -3296,7 +3738,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #if JVET_AH0076_OBIC if (m_skipDimdLfnstMtsPass) { +#if !JVET_AJ0061_TIMD_MERGE CHECK(!cu.lfnstIdx && !cu.mtsFlag, "invalid logic"); +#endif continue; } #endif @@ -3310,7 +3754,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c { if (m_skipObicLfnstMtsPass) { +#if !JVET_AJ0061_TIMD_MERGE CHECK(!cu.lfnstIdx && !cu.mtsFlag, "invalid logic"); +#endif continue; } cu.obicFlag = true; @@ -3410,6 +3856,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif #if JVET_W0123_TIMD_FUSION cu.timd = false; +#if JVET_AJ0061_TIMD_MERGE + cu.timdMrg = 0; +#endif if (mode >= 0 && uiOrgMode.modeId == TIMD_IDX) { if (cu.ispMode) @@ -3432,7 +3881,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #if INTRA_TRANS_ENC_OPT else if (m_skipTimdLfnstMtsPass) { +#if !JVET_AJ0061_TIMD_MERGE CHECK(!cu.lfnstIdx && !cu.mtsFlag, "invalid logic"); +#endif continue; } #endif @@ -3452,7 +3903,21 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c cu.tmrlFlag = true; } #endif - +#if JVET_AJ0061_TIMD_MERGE + cu.timdMrg = 0; + if( mode >= 0 && uiOrgMode.modeId >= TIMDM_IDX && uiOrgMode.modeId < TIMDM_IDX + NUM_TIMD_MERGE_MODES + 1) + { + if (m_skipTimdMrgLfnstMtsPass) + { + continue; + } + cu.timdMrg = uiOrgMode.modeId - TIMDM_IDX + 1; + cu.timd = true; + uiOrgMode.modeId = cu.timdMrgList[uiOrgMode.modeId - TIMDM_IDX][0]; + pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId; + pu.multiRefIdx = uiOrgMode.mRefId; + } +#endif CHECK(cu.mipFlag && pu.multiRefIdx, "Error: combination of MIP and MRL not supported"); #if JVET_W0123_TIMD_FUSION if (!cu.timd) @@ -3568,7 +4033,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } #if JVET_Y0142_ADAPT_INTRA_MTS #if JVET_W0123_TIMD_FUSION - if (!cu.mtsFlag && !lfnstIdx && mode < numNonISPModes && !(cu.timd && pu.multiRefIdx)) + if (!cu.mtsFlag && !lfnstIdx && mode < numNonISPModes && !(cu.timd && pu.multiRefIdx) +#if JVET_AJ0061_TIMD_MERGE + && !cu.timdMrg +#endif + ) #else if( !cu.mtsFlag && !lfnstIdx && mode < numNonISPModes && !pu.multiRefIdx ) #endif @@ -3682,7 +4151,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } #endif #if JVET_W0123_TIMD_FUSION +#if JVET_AJ0061_TIMD_MERGE + if (cu.timd && !cu.timdMrg) +#else if (cu.timd) +#endif { if (csTemp->cost < timdAngCost) { @@ -3692,6 +4165,15 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif } #endif +#if JVET_AJ0061_TIMD_MERGE + if (setSkipTimdMrgControl && cu.timdMrg) + { + if (csTemp->cost < timdMrgAngCost[cu.timdMrg - 1]) + { + timdMrgAngCost[cu.timdMrg - 1] = csTemp->cost; + } + } +#endif #if JVET_AH0076_OBIC if (setSkipDimdControl && cu.dimd) { @@ -3727,6 +4209,13 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #if JVET_W0123_TIMD_FUSION bestTimdMode = cu.timd; #endif +#if JVET_AJ0061_TIMD_MERGE + bestTimdMrgMode = cu.timdMrg; + if (cu.timd && !cu.timdMrg) + { + m_pcTrQuant->getTrTypes(*csBest->cus[0]->firstTU, COMPONENT_Y, bestTimdTrType[0], bestTimdTrType[1]); + } +#endif #if JVET_AC0105_DIRECTIONAL_PLANAR bestPlMode = cu.plIdx; #endif @@ -3829,6 +4318,19 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } } #endif +#if JVET_AJ0061_TIMD_MERGE + for (int i = 0; i < NUM_TIMD_MERGE_MODES && setSkipTimdMrgControl; i++) + { + if (timdMrgAngCost[i] != MAX_DOUBLE && (regAngCost * 1.3 < timdMrgAngCost[i] || timdAngCost * 1.3 < timdMrgAngCost[i])) + { + m_skipTimdMrgLfnstMtsPass = true; + } + if (timdMrgAngCost[i] != MAX_DOUBLE && timdAngCost != MAX_DOUBLE && timdMrgAngCost[i] * 2 < timdAngCost) + { + m_skipTimdLfnstMtsPass = true; + } + } +#endif #if JVET_AH0076_OBIC if (setSkipDimdControl) { @@ -3914,8 +4416,23 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c if (cu.timd) { pu.intraDir[ CHANNEL_TYPE_LUMA ] = cu.timdMode; +#if JVET_AJ0061_TIMD_MERGE + if (!cu.timdMrg) + { + cu.timdmTrType[NUM_TIMD_MERGE_MODES][0] = bestTimdTrType[0]; + cu.timdmTrType[NUM_TIMD_MERGE_MODES][1] = bestTimdTrType[1]; + } +#endif + } +#if JVET_AJ0061_TIMD_MERGE + cu.timdMrg = bestTimdMrgMode; + if (cu.timdMrg) + { + pu.intraDir[ CHANNEL_TYPE_LUMA ] = cu.timdMrgList[cu.timdMrg - 1][0]; + cu.timd = true; } #endif +#endif #if JVET_AB0155_SGPM cu.sgpm = uiBestPUMode.sgpmFlag; if (cu.sgpm) @@ -9703,6 +10220,14 @@ void IntraSearch::xSelectAMTForFullRD(TransformUnit &tu PelBuf predBuf(m_pdpIntraPredBuf[uiDirMode], tmpArea); piPred.copyFrom(predBuf); } +#if JVET_AJ0061_TIMD_MERGE + else if (pu.cu->timd && chType == CHANNEL_TYPE_LUMA && pu.cu->ispMode == NOT_INTRA_SUBPARTITIONS) + { + TimdMode mode = getTimdMode(pu.cu->timdMrg, pu.multiRefIdx); + const CPelBuf timdSaveBuf(m_timdPredBuf[mode], pu.Y()); + piPred.copyFrom(timdSaveBuf); + } +#endif else #endif { @@ -10089,7 +10614,15 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp } else #endif - +#if JVET_AJ0061_TIMD_MERGE + if (pu.cu->timd && chType == CHANNEL_TYPE_LUMA && pu.cu->ispMode == NOT_INTRA_SUBPARTITIONS) + { + TimdMode mode = getTimdMode(pu.cu->timdMrg, pu.multiRefIdx); + const CPelBuf timdSaveBuf(m_timdPredBuf[mode], pu.Y()); + piPred.copyFrom(timdSaveBuf); + } + else +#endif predIntraAng(compID, piPred, pu); } #if JVET_Z0050_DIMD_CHROMA_FUSION @@ -13542,6 +14075,10 @@ void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_ve #endif ) { +#if JVET_AJ0061_TIMD_MERGE + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> allModes; + static_vector<double, FAST_UDI_MAX_RDMODE_NUM> allCosts; +#endif const int maxCandPerType = numModesForFullRD >> 1; static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList; static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList; @@ -13566,6 +14103,10 @@ void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_ve keepOneMip = false; numMip += addMode ? 1:0; } +#if JVET_AJ0061_TIMD_MERGE + allModes.push_back(orgMode); + allCosts.push_back(candCostList[idx]); +#endif if( addMode ) { tempRdModeList.push_back(orgMode); @@ -13586,6 +14127,19 @@ void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_ve } // Append MIP mode to RD mode list +#if JVET_AJ0061_TIMD_MERGE + for (int idx = 0; idx < 3; idx++) + { + const bool isTransposed = (sortedMipModes[idx] >= transpOff ? true : false); + const uint32_t mipIdx = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]); + const ModeInfo mipMode = ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx); + if (std::find(allModes.begin(), allModes.end(), mipMode) == allModes.end()) + { + allModes.push_back(mipMode); + allCosts.push_back(sortedMipCost[idx]); + } + } +#endif const int modeListSize = int(tempRdModeList.size()); for (int idx = 0; idx < 3; idx++) { @@ -13628,6 +14182,18 @@ void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_ve } // Append TMRL mode to RD mode list +#if JVET_AJ0061_TIMD_MERGE + for (int idx = 0; idx < 3; idx++) + { + const uint8_t tmrlListIdx = sortedTmrlModes[idx]; + const ModeInfo tmrlMode(false, false, tmrlListIdx + MAX_REF_LINE_IDX, NOT_INTRA_SUBPARTITIONS, 0); + if (std::find(allModes.begin(), allModes.end(), tmrlMode) == allModes.end()) + { + allModes.push_back(tmrlMode); + allCosts.push_back(sortedTmrlCost[idx]); + } + } +#endif const int modeListSize = int(tempRdModeList.size()); for (int idx = 0; idx < 3; idx++) { @@ -13669,6 +14235,18 @@ void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_ve updateCandList(idx, dirPlanarCostList[idx], sortedDirPlanarModes, sortedDirPlanarCost, 2); } +#if JVET_AJ0061_TIMD_MERGE + for (int idx = 0; idx < 2; idx++) + { + const uint8_t dirPlanarListIdx = sortedDirPlanarModes[idx]; + const ModeInfo dirPlanarMode(false, false, 0, NOT_INTRA_SUBPARTITIONS, dirPlanarListIdx == 0 ? PL_HOR_IDX : PL_VER_IDX); + if (std::find(allModes.begin(), allModes.end(), dirPlanarMode) == allModes.end()) + { + allModes.push_back(dirPlanarMode); + allCosts.push_back(sortedDirPlanarCost[idx]); + } + } +#endif const int modeListSize = int(tempRdModeList.size()); for (int idx = 0; idx < 2; idx++) { @@ -13694,9 +14272,147 @@ void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_ve } #endif +#if JVET_AJ0061_TIMD_MERGE + numModesForFullRD = int(tempRdModeList.size()); + CHECK(numModesForFullRD > (int) allModes.size(), "something went wrong"); + tempRdModeList.clear(); + tempCandCostList.clear(); + ModeInfo mode; + double cost, bestCost = MAX_DOUBLE; + bool earlyExist = false; + for (int idx = 0; idx < numModesForFullRD && !earlyExist; idx++) + { + mode = allModes[idx]; + cost = allCosts[idx]; + updateCandList(mode, cost, tempRdModeList, tempCandCostList, numModesForFullRD); + if (cost < bestCost) + { + bestCost = cost; + } + } + + bool testDimd = !m_skipDimdMode && m_satdCostDIMD != MAX_UINT64; + bool testObic = !m_skipObicMode && m_satdCostOBIC != MAX_UINT64; + bool testTimd = !m_skipTimdMode[Timd] && m_satdCostTIMD[Timd][0] != MAX_UINT64; + bool testTimdMerge = !m_skipTimdMode[TimdMrg] && m_satdCostTIMD[TimdMrg][0] != MAX_UINT64; + bool testTimdMrl1 = !m_skipTimdMode[TimdMrl1] && m_satdCostTIMD[TimdMrl1][0] != MAX_UINT64; + bool testTimdMrl3 = !m_skipTimdMode[TimdMrl3] && m_satdCostTIMD[TimdMrl3][0] != MAX_UINT64; + const double timdDimdCostMult = 1.3; + const double angCostMult = 2.0; + const double multMrl = 1.0; + const double bestAngCost = numModesForFullRD ? tempCandCostList[0] : MAX_DOUBLE; + int numTotalRD = !m_skipObicMode + !m_skipDimdMode + !m_skipTimdMode[Timd] + !m_skipTimdMode[TimdMrg]; + + if (bestAngCost != MAX_DOUBLE) + { + if (testTimdMerge && bestAngCost * angCostMult < m_satdCostTIMD[TimdMrg][0]) + { + m_skipTimdMrgLfnstMtsPass = true; + } + if (testTimd && bestAngCost * angCostMult < m_satdCostTIMD[Timd][0]) + { + m_skipTimdLfnstMtsPass = true; + } + } + + if ((testObic && testTimd && timdDimdCostMult * m_satdCostTIMD[Timd][0] < m_satdCostOBIC) || + (testObic && testTimdMerge && timdDimdCostMult * m_satdCostTIMD[TimdMrg][0] < m_satdCostOBIC) ) + { + m_skipObicMode = true; + testObic = false; + } + if ((testDimd && testTimd && timdDimdCostMult * m_satdCostTIMD[Timd][0] < m_satdCostDIMD) || + (testDimd && testTimdMerge && timdDimdCostMult * m_satdCostTIMD[TimdMrg][0] < m_satdCostDIMD) ) + { + m_skipDimdMode = true; + testDimd = false; + } + if (testTimdMerge && + ( (testDimd && timdDimdCostMult * m_satdCostDIMD < m_satdCostTIMD[TimdMrg][1]) || + (testObic && timdDimdCostMult * m_satdCostOBIC < m_satdCostTIMD[TimdMrg][1]) )) + { + m_skipTimdMode[TimdMrg] = true; + testTimdMerge = false; + } + if (testTimd && + ( (testDimd && timdDimdCostMult * m_satdCostDIMD < m_satdCostTIMD[Timd][1]) || + (testObic && timdDimdCostMult * m_satdCostOBIC < m_satdCostTIMD[Timd][1]))) + { + m_skipTimdMode[Timd] = true; + testTimd = false; + } + if (testDimd && testObic) + { + if (!m_skipObicMode && timdDimdCostMult * m_satdCostDIMD < m_satdCostOBIC) + { + m_skipObicMode = true; + testObic = false; + } + if (!m_skipDimdMode && timdDimdCostMult * m_satdCostOBIC < m_satdCostDIMD) + { + m_skipDimdMode = true; + testDimd = false; + } + } + + if (testTimdMrl1 && + ((testDimd && timdDimdCostMult * m_satdCostDIMD < m_satdCostTIMD[TimdMrl1][1]) || + (testObic && timdDimdCostMult * m_satdCostOBIC < m_satdCostTIMD[TimdMrl1][1]) || + (testTimd && multMrl * m_satdCostTIMD[Timd][1] < m_satdCostTIMD[TimdMrl1][1]) || + (testTimdMerge && multMrl * m_satdCostTIMD[TimdMrg][1] < m_satdCostTIMD[TimdMrl1][1]) + )) + { + m_skipTimdMode[TimdMrl1] = true; + testTimdMrl1 = false; + } + if (testTimdMrl3 && + ((testDimd && timdDimdCostMult * m_satdCostDIMD < m_satdCostTIMD[TimdMrl3][1]) || + (testObic && timdDimdCostMult * m_satdCostOBIC < m_satdCostTIMD[TimdMrl3][1]) || + (testTimd && multMrl * m_satdCostTIMD[Timd][1] < m_satdCostTIMD[TimdMrl3][1]) || + (testTimdMerge && multMrl * m_satdCostTIMD[TimdMrg][1] < m_satdCostTIMD[TimdMrl3][1]) + )) + { + m_skipTimdMode[TimdMrl3] = true; + testTimdMrl3 = false; + } + int numTotalRD2 = !m_skipObicMode + !m_skipDimdMode + !m_skipTimdMode[Timd] + !m_skipTimdMode[TimdMrg]; // Don't involve TimdMrl in this + if (numModesForFullRD > 0 && numTotalRD == numTotalRD2 && numTotalRD > 2) // If nothing was decided to be skipped, conditionally pop-back the worse (only one) + { + const double dimdCostMul2 = 2.0; + if (testTimd && tempCandCostList[numModesForFullRD - 1] > timdDimdCostMult * m_satdCostTIMD[Timd][0]) + { + tempRdModeList.pop_back(); + tempCandCostList.pop_back(); + numModesForFullRD--; + } + else if (testTimdMerge && tempCandCostList[numModesForFullRD - 1] > timdDimdCostMult * m_satdCostTIMD[TimdMrg][0]) + { + tempRdModeList.pop_back(); + tempCandCostList.pop_back(); + numModesForFullRD--; + } + else if (testDimd && tempCandCostList[numModesForFullRD - 1] > dimdCostMul2 * m_satdCostDIMD) + { + tempRdModeList.pop_back(); + tempCandCostList.pop_back(); + numModesForFullRD--; + } + else if (testObic && tempCandCostList[numModesForFullRD - 1] > dimdCostMul2 * m_satdCostOBIC) + { + tempRdModeList.pop_back(); + tempCandCostList.pop_back(); + numModesForFullRD--; + } + } + + CHECK(numModesForFullRD != (int)tempRdModeList.size(), "something went wrong"); + candModeList = tempRdModeList; + candCostList = tempCandCostList; +#else candModeList = tempRdModeList; candCostList = tempCandCostList; numModesForFullRD = int(candModeList.size()); +#endif } // It decides which modes from the ISP lists can be full RD tested diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h index 731b16b60011d7046f05d8729bc06da9db8dc334..627219c310d4b0ebaeed85b0f98265e1553f61a1 100644 --- a/source/Lib/EncoderLib/IntraSearch.h +++ b/source/Lib/EncoderLib/IntraSearch.h @@ -590,6 +590,10 @@ private: int64_t m_coeffAbsSumDCT2; #endif #endif +#if JVET_AJ0061_TIMD_MERGE + static_vector<ModeInfo, NumTimdMode> m_uiSavedRdModeListTimd; + static_vector<double, NumTimdMode> m_uiSavedModeCostTimd; +#endif #if JVET_AE0169_BIPREDICTIVE_IBC double m_bestIntraSADHADCost; #endif @@ -639,6 +643,9 @@ private: static_vector<double, NUM_DERIVED_EIP + MAX_MERGE_EIP> m_dSavedModeCostEip; static_vector<double, NUM_DERIVED_EIP + MAX_MERGE_EIP> m_dSavedHadListEip; #endif +#if JVET_AJ0061_TIMD_MERGE + Pel* m_timdPredBuf[NumTimdMode]; +#endif #if JVET_AH0076_OBIC Pel* m_dimdPredBuf; Pel* m_obicPredBuf; @@ -728,6 +735,15 @@ public: #if INTRA_TRANS_ENC_OPT bool m_skipTimdLfnstMtsPass; #endif +#if JVET_AJ0061_TIMD_MERGE + bool m_skipTimdMrgLfnstMtsPass; + bool m_skipObicMode; + bool m_skipDimdMode; + uint64_t m_satdCostOBIC; + uint64_t m_satdCostDIMD; + bool m_skipTimdMode[NumTimdMode]; + uint64_t m_satdCostTIMD[NumTimdMode][2]; +#endif #if JVET_AH0076_OBIC bool m_skipObicLfnstMtsPass; bool m_skipDimdLfnstMtsPass; diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index bdb2d71d23654628a9d1001ee3451c316293773d..92e638e5519b7375790bb75a150f3192f3d1c4e8 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -1793,6 +1793,12 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) #if JVET_W0123_TIMD_FUSION WRITE_FLAG( pcSPS->getUseTimd() ? 1 : 0, "sps_timd_enabled_flag"); +#if JVET_AJ0061_TIMD_MERGE + if (pcSPS->getUseTimd()) + { + WRITE_FLAG( pcSPS->getUseTimdMrg() ? 1 : 0, "sps_timd_merge_enabled_flag"); + } +#endif #endif #if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION if (pcSPS->getUseCiip() && pcSPS->getUseTimd())