diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index 378561a7a65918bc4935c41dab9ab3711c3d642f..b7f45d32aae6c42cb38ab92c06ae4207a3b5534c 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -403,6 +403,10 @@ void EncApp::xInitLibCfg() m_cEncLib.setNoDimdConstraintFlag(m_noDimdConstraintFlag); CHECK(m_noDimdConstraintFlag && m_dimd, "DIMD shall be deactivated when m_noDimdConstraintFlag is equal to 1"); #endif +#if JVET_W0123_TIMD_FUSION + m_cEncLib.setNoTimdConstraintFlag(m_noTimdConstraintFlag); + CHECK(m_noTimdConstraintFlag && m_timd, "TIMD shall be deactivated when m_noTimdConstraintFlag is equal to 1"); +#endif #if ENABLE_OBMC m_cEncLib.setNoObmcConstraintFlag(m_noObmcConstraintFlag); CHECK(m_noObmcConstraintFlag && m_OBMC, "OBMC shall be deactivated when m_noObmcConstraintFlag is equal to 1"); @@ -534,6 +538,9 @@ void EncApp::xInitLibCfg() #if ENABLE_DIMD m_cEncLib.setNoDimdConstraintFlag(false); #endif +#if JVET_W0123_TIMD_FUSION + m_cEncLib.setNoTimdConstraintFlag(false); +#endif #if ENABLE_OBMC m_cEncLib.setNoObmcConstraintFlag(false); #endif @@ -761,6 +768,9 @@ void EncApp::xInitLibCfg() #if ENABLE_DIMD m_cEncLib.setUseDimd ( m_dimd ); #endif +#if JVET_W0123_TIMD_FUSION + m_cEncLib.setUseTimd ( m_timd ); +#endif #if ENABLE_OBMC m_cEncLib.setUseObmc ( m_OBMC ); #endif diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 15e65b15408c26752f056c2500942d84478eb132..9d312e8c8656ce6e11391a3800eaa870fd622000 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -888,6 +888,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #if ENABLE_DIMD ("NoDimdConstraintFlag", m_noDimdConstraintFlag, false, "Indicate that DIMD is deactivated") #endif +#if JVET_W0123_TIMD_FUSION + ("NoTimdConstraintFlag", m_noTimdConstraintFlag, false, "Indicate that TIMD is deactivated") +#endif #if ENABLE_OBMC ("NoObmcConstraintFlag", m_noObmcConstraintFlag, false, "Indicate that OBMC is deactivated") #endif @@ -1012,6 +1015,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #if ENABLE_DIMD ( "DIMD", m_dimd, true, "Enable decoder side intra mode derivation\n" ) #endif +#if JVET_W0123_TIMD_FUSION + ( "TIMD", m_timd, true, "Enable template based intra mode derivation\n" ) +#endif #if ENABLE_OBMC ("OBMC", m_OBMC, true, "Overlapping Block Motion Compensation") #endif @@ -4276,6 +4282,9 @@ void EncAppCfg::xPrintParameter() #if ENABLE_DIMD msg(VERBOSE, "DIMD:%d ", m_dimd); #endif +#if JVET_W0123_TIMD_FUSION + msg(VERBOSE, "TIMD:%d ", m_timd); +#endif #if ENABLE_OBMC msg(VERBOSE, "OBMC:%d ", m_OBMC); #endif diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 83b5967d6cfb4d41bb9ed786457ce56db66ab722..965161ce8943a1f2b4d519332fb100ed409cd8c6 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -187,6 +187,9 @@ protected: #if ENABLE_DIMD bool m_noDimdConstraintFlag; #endif +#if JVET_W0123_TIMD_FUSION + bool m_noTimdConstraintFlag; +#endif #if ENABLE_OBMC bool m_noObmcConstraintFlag; #endif @@ -377,6 +380,9 @@ protected: #if ENABLE_DIMD bool m_dimd; #endif +#if JVET_W0123_TIMD_FUSION + bool m_timd; +#endif #if ENABLE_OBMC bool m_OBMC; #endif diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index f6aa142477825ace6caeea4e5e0c6f3e62d6f9d5..36f47f6942e943622ee5fc94353b5e4a9e85eecd 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -190,6 +190,10 @@ typedef AreaBuf<const TCoeff> CCoeffBuf; typedef AreaBuf< MotionInfo> MotionBuf; typedef AreaBuf<const MotionInfo> CMotionBuf; +#if JVET_W0123_TIMD_FUSION +typedef AreaBuf< uint8_t> IpmBuf; +typedef AreaBuf<const uint8_t> CIpmBuf; +#endif typedef AreaBuf< TCoeff> PLTescapeBuf; typedef AreaBuf<const TCoeff> CPLTescapeBuf; diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp index 7c7628c628b57dc30f726e09fba3bc19c5859793..80ad1a359a832ad0a78923801dcd9486555658e0 100644 --- a/source/Lib/CommonLib/CodingStructure.cpp +++ b/source/Lib/CommonLib/CodingStructure.cpp @@ -101,6 +101,9 @@ CodingStructure::CodingStructure(CUCache& cuCache, PUCache& puCache, TUCache& tu } m_motionBuf = nullptr; +#if JVET_W0123_TIMD_FUSION + m_ipmBuf = nullptr; +#endif features.resize( NUM_ENC_FEATURES ); #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS treeType = TREE_D; @@ -140,6 +143,10 @@ void CodingStructure::destroy() delete[] m_motionBuf; m_motionBuf = nullptr; +#if JVET_W0123_TIMD_FUSION + delete[] m_ipmBuf; + m_ipmBuf = nullptr; +#endif m_tuCache.cache( tus ); @@ -1007,6 +1014,9 @@ void CodingStructure::createInternals(const UnitArea& _unit, const bool isTopLay unsigned _lumaAreaScaled = g_miScaling.scale( area.lumaSize() ).area(); m_motionBuf = new MotionInfo[_lumaAreaScaled]; +#if JVET_W0123_TIMD_FUSION + m_ipmBuf = new uint8_t[_lumaAreaScaled]; +#endif initStructData(); } @@ -1345,6 +1355,14 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C motionLut = subStruct.motionLut; } +#if JVET_W0123_TIMD_FUSION + if (!subStruct.m_isTuEnc && chType != CHANNEL_TYPE_CHROMA) + { + IpmBuf ownIB = getIpmBuf ( clippedArea ); + CIpmBuf subIB = subStruct.getIpmBuf( clippedArea ); + ownIB.copyFrom( subIB ); + } +#endif prevPLT = subStruct.prevPLT; @@ -1469,6 +1487,11 @@ void CodingStructure::copyStructure( const CodingStructure& other, const Channel motionLut = other.motionLut; } +#if JVET_W0123_TIMD_FUSION + IpmBuf ownIB = getIpmBuf(); + CIpmBuf subIB = other.getIpmBuf(); + ownIB.copyFrom( subIB ); +#endif prevPLT = other.prevPLT; if( copyTUs ) @@ -1536,6 +1559,9 @@ void CodingStructure::initStructData( const int &QP, const bool &skipMotBuf ) { getMotionBuf().memset(0); } +#if JVET_W0123_TIMD_FUSION + getIpmBuf().memset(0); +#endif fracBits = 0; dist = 0; @@ -1653,6 +1679,56 @@ const MotionInfo& CodingStructure::getMotionInfo( const Position& pos ) const return *( m_motionBuf + miPos.y * stride + miPos.x ); } +#if JVET_W0123_TIMD_FUSION +IpmBuf CodingStructure::getIpmBuf( const Area& _area ) +{ + const CompArea& _luma = area.Y(); + + CHECKD( !_luma.contains( _area ), "Trying to access motion information outside of this coding structure" ); + + const Area miArea = g_miScaling.scale( _area ); + const Area selfArea = g_miScaling.scale( _luma ); + + return IpmBuf( m_ipmBuf + rsAddr( miArea.pos(), selfArea.pos(), selfArea.width ), selfArea.width, miArea.size() ); +} + +const CIpmBuf CodingStructure::getIpmBuf( const Area& _area ) const +{ + const CompArea& _luma = area.Y(); + + CHECKD( !_luma.contains( _area ), "Trying to access motion information outside of this coding structure" ); + + const Area miArea = g_miScaling.scale( _area ); + const Area selfArea = g_miScaling.scale( _luma ); + + return IpmBuf( m_ipmBuf + rsAddr( miArea.pos(), selfArea.pos(), selfArea.width ), selfArea.width, miArea.size() ); +} + +uint8_t& CodingStructure::getIpmInfo( const Position& pos ) +{ + CHECKD( !area.Y().contains( pos ), "Trying to access motion information outside of this coding structure" ); + + //return getIpmBuf().at( g_miScaling.scale( pos - area.lumaPos() ) ); + // bypass the intra prediction mode buf calling and get the value directly + const unsigned stride = g_miScaling.scaleHor( area.lumaSize().width ); + const Position miPos = g_miScaling.scale( pos - area.lumaPos() ); + + return *( m_ipmBuf + miPos.y * stride + miPos.x ); +} + +const uint8_t& CodingStructure::getIpmInfo( const Position& pos ) const +{ + CHECKD( !area.Y().contains( pos ), "Trying to access motion information outside of this coding structure" ); + + //return getIpmBuf().at( g_miScaling.scale( pos - area.lumaPos() ) ); + // bypass the intra prediction mode buf calling and get the value directly + const unsigned stride = g_miScaling.scaleHor( area.lumaSize().width ); + const Position miPos = g_miScaling.scale( pos - area.lumaPos() ); + + return *( m_ipmBuf + miPos.y * stride + miPos.x ); +} +#endif + // data accessors PelBuf CodingStructure::getPredBuf(const CompArea &blk) { return getBuf(blk, PIC_PREDICTION); } diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h index 097a9e2a025cfad77840236141dc3128625609be..aa745489463cbe3c7e2db87da607efe9de2dda6e 100644 --- a/source/Lib/CommonLib/CodingStructure.h +++ b/source/Lib/CommonLib/CodingStructure.h @@ -252,6 +252,9 @@ private: int m_offsets[ MAX_NUM_COMPONENT ]; MotionInfo *m_motionBuf; +#if JVET_W0123_TIMD_FUSION + uint8_t *m_ipmBuf; +#endif public: CodingStructure *bestParent; @@ -272,6 +275,19 @@ public: MotionInfo& getMotionInfo( const Position& pos ); const MotionInfo& getMotionInfo( const Position& pos ) const; +#if JVET_W0123_TIMD_FUSION + IpmBuf getIpmBuf( const Area& _area ); + IpmBuf getIpmBuf( const UnitArea& _area ) { return getIpmBuf( _area.Y() ); } + IpmBuf getIpmBuf() { return getIpmBuf( area.Y() ); } + + const CIpmBuf getIpmBuf( const Area& _area ) const; + const CIpmBuf getIpmBuf( const UnitArea& _area ) const { return getIpmBuf( _area.Y() ); } + const CIpmBuf getIpmBuf() const { return getIpmBuf( area.Y() ); } + + uint8_t& getIpmInfo( const Position& pos ); + const uint8_t& getIpmInfo( const Position& pos ) const; +#endif + public: // --------------------------------------------------------------------------- diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 7a26296aafdd575c98bcc7e5a92fffb67e651a08..510b0f039f154372866fc88ca2fac83780dc0699 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -300,13 +300,27 @@ static const int HOR_IDX = (1 * (NUM_DIR - 1) + 2); ///< inde static const int DIA_IDX = (2 * (NUM_DIR - 1) + 2); ///< index for intra DIAGONAL mode static const int VER_IDX = (3 * (NUM_DIR - 1) + 2); ///< index for intra VERTICAL mode static const int VDIA_IDX = (4 * (NUM_DIR - 1) + 2); ///< index for intra VDIAGONAL mode +#if JVET_W0123_TIMD_FUSION +static const int BDPCM_IDX = 162; +#else static const int BDPCM_IDX = (5 * (NUM_DIR - 1) + 2); ///< index for intra VDIAGONAL mode +#endif static const int NOMODE_IDX = MAX_UCHAR; ///< indicating uninitialized elements static const int NUM_CHROMA_MODE = (5 + NUM_LMC_MODE); ///< total number of chroma modes static const int LM_CHROMA_IDX = NUM_LUMA_MODE; ///< chroma mode index for derived from LM mode #if ENABLE_DIMD static const int DIMD_IDX = 99; ///< index for intra DIMD mode #endif +#if JVET_W0123_TIMD_FUSION +static const int TIMD_IDX = 199; ///< index for intra TIMD mode +static const int DIMD_MAX_TEMP_SIZE = 4; +static const int EXT_HOR_IDX = 34; +static const int EXT_DIA_IDX = 66; +static const int EXT_VER_IDX = 98; +static const int EXT_VDIA_IDX = 130; +#define MAP131TO67( mode ) (mode<2?mode:((mode>>1)+1)) +#define MAP67TO131( mode ) (mode<2?mode:((mode<<1)-2)) +#endif #if MMLM static const int MMLM_CHROMA_IDX = LM_CHROMA_IDX + 1; ///< MDLM_L static const int MDLM_L_IDX = LM_CHROMA_IDX + 2; ///< MDLM_L diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp index 895e739d4da85b93e85c63642f9cebff728cca06..e6db8469e2e0819aa65227a22515c948bdabdf74 100644 --- a/source/Lib/CommonLib/ContextModelling.cpp +++ b/source/Lib/CommonLib/ContextModelling.cpp @@ -357,6 +357,19 @@ unsigned DeriveCtx::CtxDIMDFlag(const CodingUnit& cu) } #endif +#if JVET_W0123_TIMD_FUSION +unsigned DeriveCtx::CtxTimdFlag(const CodingUnit& cu) +{ + const CodingStructure *cs = cu.cs; + unsigned ctxId = 0; + const CodingUnit *cuLeft = cs->getCURestricted( cu.lumaPos().offset( -1, 0 ), cu, CH_L ); + ctxId = (cuLeft && cuLeft->timd) ? 1 : 0; + const CodingUnit *cuAbove = cs->getCURestricted( cu.lumaPos().offset( 0, -1 ), cu, CH_L ); + ctxId += (cuAbove && cuAbove->timd) ? 1 : 0; + return ctxId; +} +#endif + unsigned DeriveCtx::CtxPredModeFlag( const CodingUnit& cu ) { const CodingUnit *cuLeft = cu.cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L); diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h index 834e03af34a3c377352cdbcc5d6afa0f90870959..dd33025f3b5b0e6faa8424b0022261c99671eee5 100644 --- a/source/Lib/CommonLib/ContextModelling.h +++ b/source/Lib/CommonLib/ContextModelling.h @@ -618,6 +618,9 @@ unsigned CtxPltCopyFlag( const unsigned prevRunType, const unsigned dist ); #if ENABLE_DIMD unsigned CtxDIMDFlag(const CodingUnit& cu); #endif +#if JVET_W0123_TIMD_FUSION +unsigned CtxTimdFlag( const CodingUnit& cu ); +#endif } #endif // __CONTEXTMODELLING__ diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index dfce0eb5fa9ac8c61ce60203397cadb392f9f0dd..1bd41905edc89da556daaa387d9d01f3b84b1857 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -909,12 +909,21 @@ const CtxSet ContextSetCfg::PredMode = ContextSetCfg::addCtxSet const CtxSet ContextSetCfg::MultiRefLineIdx = ContextSetCfg::addCtxSet ({ +#if JVET_W0123_TIMD_FUSION + { 25, 59, 25, 59 }, + { 25, 58, 25, 58 }, + { 25, 60, 25, 60 }, + { 6, 5, 6, 5 }, + { 6, 5, 6, 5 }, + { 6, 8, 6, 8 } +#else { 25, 59 }, { 25, 58 }, { 25, 60 }, { 6, 5 }, { 6, 5 }, { 6, 8 } +#endif }); const CtxSet ContextSetCfg::IntraLumaMpmFlag = ContextSetCfg::addCtxSet @@ -1575,12 +1584,21 @@ const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet ({ +#if JVET_W0123_TIMD_FUSION + { 33, 43, 33 }, + { 33, 43, 33 }, + { 33, 43, 33 }, + { 9, 2, 9 }, + { 9, 3, 9 }, + { 9, 2, 9 } +#else { 33, 43 }, { 33, 43 }, { 33, 43 }, { 9, 2 }, { 9, 3 }, { 9, 2 } +#endif }); const CtxSet ContextSetCfg::SbtFlag = ContextSetCfg::addCtxSet @@ -1645,6 +1663,18 @@ const CtxSet ContextSetCfg::DimdFlag = ContextSetCfg::addCtxSet }); #endif +#if JVET_W0123_TIMD_FUSION +const CtxSet ContextSetCfg::TimdFlag = ContextSetCfg::addCtxSet +({ + { 48, 56, 56 }, + { 41, 49, 49 }, + { 33, 49, 49 }, + { 5, 1, 1 }, + { 5, 1, 1 }, + { 2, 1, 1 } +}); +#endif + #if ENABLE_OBMC const CtxSet ContextSetCfg::ObmcFlag = ContextSetCfg::addCtxSet ({ @@ -2016,10 +2046,17 @@ const CtxSet ContextSetCfg::PredMode = ContextSetCfg::addCtxSet const CtxSet ContextSetCfg::MultiRefLineIdx = ContextSetCfg::addCtxSet ({ +#if JVET_W0123_TIMD_FUSION + { 25, 59, 25, 59, }, + { 25, 58, 25, 58, }, + { 25, 60, 25, 60, }, + { 5, 8, 5, 8, }, +#else { 25, 59, }, { 25, 58, }, { 25, 60, }, { 5, 8, }, +#endif }); const CtxSet ContextSetCfg::IntraLumaMpmFlag = ContextSetCfg::addCtxSet @@ -2553,10 +2590,17 @@ const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet ({ +#if JVET_W0123_TIMD_FUSION + { 33, 43, 33, }, + { 33, 36, 33, }, + { 33, 43, 33, }, + { 9, 2, 9, }, +#else { 33, 43, }, { 33, 36, }, { 33, 43, }, { 9, 2, }, +#endif }); const CtxSet ContextSetCfg::SbtFlag = ContextSetCfg::addCtxSet @@ -2607,6 +2651,17 @@ const CtxSet ContextSetCfg::DimdFlag = ContextSetCfg::addCtxSet { 5, 1, 1 } }); #endif + +#if JVET_W0123_TIMD_FUSION +const CtxSet ContextSetCfg::TimdFlag = ContextSetCfg::addCtxSet +({ + { 48, 56, 56 }, + { 41, 49, 49 }, + { 33, 49, 49 }, + { 5, 1, 1 } +}); +#endif + #if ENABLE_OBMC const CtxSet ContextSetCfg::ObmcFlag = ContextSetCfg::addCtxSet ({ diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index 8993e11bbff8dccf4930e823bf6bb63ff935030c..a49689489b0a084f862925857c22cfed25f8c04c 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -315,6 +315,9 @@ public: #if ENABLE_DIMD static const CtxSet DimdFlag; #endif +#if JVET_W0123_TIMD_FUSION + static const CtxSet TimdFlag; +#endif #if ENABLE_OBMC static const CtxSet ObmcFlag; #endif diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp index d8b3d01ba7fa6cde2971eb894786f9372592fee1..b2a4f35265b1c6c9062483ffff23bac04ebe269f 100644 --- a/source/Lib/CommonLib/InterpolationFilter.cpp +++ b/source/Lib/CommonLib/InterpolationFilter.cpp @@ -490,6 +490,211 @@ const TFilterCoeff InterpolationFilter::m_lumaIntraFilter[CHROMA_INTERPOLATION_F { 0, -4, 17, 249, -7, 1 }, // 30/32 position { 0, - 2, 9, 253, -4, 0 }, // 31/32 position }; + +#if JVET_W0123_TIMD_FUSION +const TFilterCoeff InterpolationFilter::m_lumaIntraFilterExt[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS<<1][6] = +{ + { 0, 0, 256, 0, 0, 0 }, + { 0, -2, 255, 4, -1, 0 }, + { 0, -4, 253, 9, -2, 0 }, + { 0, -5, 251, 13, -3, 0 }, + { 1, -7, 249, 17, -4, 0 }, + { 1, -9, 247, 21, -5, 1 }, + { 1, -10, 245, 25, -6, 1 }, + { 1, -12, 243, 30, -7, 1 }, + { 1, -13, 241, 34, -8, 1 }, + { 2, -15, 238, 39, -9, 1 }, + { 2, -16, 235, 44, -10, 1 }, + { 2, -17, 232, 49, -11, 1 }, + { 2, -18, 229, 53, -12, 2 }, + { 2, -19, 226, 58, -13, 2 }, + { 2, -20, 223, 63, -14, 2 }, + { 2, -21, 220, 68, -15, 2 }, + { 2, -22, 217, 72, -15, 2 }, + { 2, -23, 213, 78, -16, 2 }, + { 3, -23, 209, 82, -17, 2 }, + { 3, -24, 205, 88, -18, 2 }, + { 3, -24, 202, 92, -19, 2 }, + { 3, -24, 198, 97, -20, 2 }, + { 3, -25, 194, 101, -20, 3 }, + { 3, -25, 189, 106, -20, 3 }, + { 3, -25, 185, 111, -21, 3 }, + { 3, -25, 181, 116, -22, 3 }, + { 3, -26, 178, 121, -23, 3 }, + { 3, -26, 173, 126, -23, 3 }, + { 3, -25, 168, 131, -24, 3 }, + { 3, -25, 163, 137, -25, 3 }, + { 3, -25, 159, 141, -25, 3 }, + { 3, -25, 155, 145, -25, 3 }, + { 3, -25, 150, 150, -25, 3 }, + { 3, -25, 145, 155, -25, 3 }, + { 3, -25, 141, 159, -25, 3 }, + { 3, -25, 137, 163, -25, 3 }, + { 3, -24, 131, 168, -25, 3 }, + { 3, -24, 126, 173, -25, 3 }, + { 3, -23, 121, 178, -26, 3 }, + { 3, -22, 116, 181, -25, 3 }, + { 3, -21, 111, 185, -25, 3 }, + { 3, -21, 106, 180, -25, 3 }, + { 3, -20, 101, 194, -25, 3 }, + { 2, -20, 97, 198, -24, 3 }, + { 2, -19, 92, 202, -24, 3 }, + { 2, -18, 86, 206, -23, 3 }, + { 2, -17, 82, 209, -23, 3 }, + { 2, -16, 77, 213, -23, 3 }, + { 2, -15, 72, 217, -22, 2 }, + { 2, -15, 68, 220, -21, 2 }, + { 2, -14, 63, 223, -20, 2 }, + { 2, -13, 58, 226, -19, 2 }, + { 2, -12, 53, 229, -18, 2 }, + { 2, -11, 48, 232, -17, 2 }, + { 1, -10, 44, 235, -16, 2 }, + { 1, -9, 39, 238, -15, 2 }, + { 1, -8, 34, 241, -13, 1 }, + { 1, -7, 29, 243, -11, 1 }, + { 1, -6, 25, 245, -10, 1 }, + { 0, -5, 21, 247, -8, 1 }, + { 0, -4, 17, 249, -7, 1 }, + { 0, -3, 13, 251, -5, 0 }, + { 0, -2, 9, 253, -4, 0 }, + { 0, -1, 5, 255, -3, 0 }, +}; +#endif +#endif + +#if JVET_W0123_TIMD_FUSION +const TFilterCoeff InterpolationFilter::g_aiExtIntraCubicFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS<<1][NTAPS_CHROMA] = { + { 0, 256, 0, 0 }, + { -1, 254, 4, -1 }, + { -3, 252, 8, -1 }, + { -4, 250, 12, -2 }, + { -5, 247, 17, -3 }, + { -6, 244, 21, -3 }, + { -7, 242, 25, -4 }, + { -8, 239, 29, -4 }, + { -9, 236, 34, -5 }, + { -9, 233, 38, -6 }, + { -10, 230, 43, -7 }, + { -11, 227, 47, -7 }, + { -12, 224, 52, -8 }, + { -12, 220, 56, -8 }, + { -13, 217, 61, -9 }, + { -14, 214, 65, -9 }, + { -14, 210, 70, -10 }, + { -14, 206, 75, -11 }, + { -15, 203, 79, -11 }, + { -15, 199, 84, -12 }, + { -16, 195, 89, -12 }, + { -16, 191, 93, -12 }, + { -16, 187, 98, -13 }, + { -16, 183, 102, -13 }, + { -16, 179, 107, -14 }, + { -16, 174, 112, -14 }, + { -16, 170, 116, -14 }, + { -16, 166, 121, -15 }, + { -17, 162, 126, -15 }, + { -16, 157, 130, -15 }, + { -16, 153, 135, -16 }, + { -16, 148, 140, -16 }, + { -16, 144, 144, -16 }, + { -16, 140, 148, -16}, + { -16, 135, 153, -16}, + { -15, 130, 157, -16}, + { -15, 126, 162, -17}, + { -15, 121, 166, -16}, + { -14, 116, 170, -16}, + { -14, 112, 174, -16}, + { -14, 107, 179, -16}, + { -13, 102, 183, -16}, + { -13, 98, 187, -16}, + { -12, 93, 191, -16}, + { -12, 89, 195, -16}, + { -12, 84, 199, -15}, + { -11, 79, 203, -15}, + { -11, 75, 206, -14}, + { -10, 70, 210, -14}, + { -9, 65, 214, -14}, + { -9, 61, 217, -13}, + { -8, 56, 220, -12}, + { -8, 52, 224, -12}, + { -7, 47, 227, -11}, + { -7, 43, 230, -10}, + { -6, 38, 233, -9}, + { -5, 34, 236, -9}, + { -4, 29, 239, -8}, + { -4, 25, 242, -7}, + { -3, 21, 244, -6}, + { -3, 17, 247, -5}, + { -2, 12, 250, -4}, + { -1, 8, 252, -3}, + { -1, 4, 254, -1}, +}; +const TFilterCoeff InterpolationFilter::g_aiExtIntraGaussFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS<<1][NTAPS_CHROMA] = { + { 47, 161, 47, 1 }, + { 45, 161, 49, 1 }, + { 43, 161, 51, 1 }, + { 42, 160, 52, 2 }, + { 40, 160, 54, 2 }, + { 38, 160, 56, 2 }, + { 37, 159, 58, 2 }, + { 35, 158, 61, 2 }, + { 34, 158, 62, 2 }, + { 32, 157, 65, 2 }, + { 31, 156, 67, 2 }, + { 29, 155, 69, 3 }, + { 28, 154, 71, 3 }, + { 27, 153, 73, 3 }, + { 26, 151, 76, 3 }, + { 25, 150, 78, 3 }, + { 23, 149, 80, 4 }, + { 22, 147, 83, 4 }, + { 21, 146, 85, 4 }, + { 20, 144, 87, 5 }, + { 19, 142, 90, 5 }, + { 18, 141, 92, 5 }, + { 17, 139, 94, 6 }, + { 16, 137, 97, 6 }, + { 16, 135, 99, 6 }, + { 15, 133, 101, 7 }, + { 14, 131, 104, 7 }, + { 13, 129, 106, 8 }, + { 13, 127, 108, 8 }, + { 12, 125, 111, 8 }, + { 11, 123, 113, 9 }, + { 11, 120, 116, 9 }, + { 10, 118, 118, 10 }, + { 9, 116, 120, 11}, + { 9, 113, 123, 11}, + { 8, 111, 125, 12}, + { 8, 108, 127, 13}, + { 8, 106, 129, 13}, + { 7, 104, 131, 14}, + { 7, 101, 133, 15}, + { 6, 99, 135, 16}, + { 6, 97, 137, 16}, + { 6, 94, 139, 17}, + { 5, 92, 141, 18}, + { 5, 90, 142, 19}, + { 5, 87, 144, 20}, + { 4, 85, 146, 21}, + { 4, 83, 147, 22}, + { 4, 80, 149, 23}, + { 3, 78, 150, 25}, + { 3, 76, 151, 26}, + { 3, 73, 153, 27}, + { 3, 71, 154, 28}, + { 3, 69, 155, 29}, + { 2, 67, 156, 31}, + { 2, 65, 157, 32}, + { 2, 62, 158, 34}, + { 2, 61, 158, 35}, + { 2, 58, 159, 37}, + { 2, 56, 160, 38}, + { 2, 54, 160, 40}, + { 2, 52, 160, 42}, + { 1, 51, 161, 43}, + { 1, 49, 161, 45}, +}; #endif //1.5x diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h index cb41d2d4643251f5316091e9dd427e420af403b9..f1bb512b013566d6478908b5b3054e115bfb26b8 100644 --- a/source/Lib/CommonLib/InterpolationFilter.h +++ b/source/Lib/CommonLib/InterpolationFilter.h @@ -75,6 +75,9 @@ public: #if INTRA_6TAP static const TFilterCoeff m_lumaIntraFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][6]; ///< Chroma filter 6 taps static const TFilterCoeff m_weak4TapFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Weak filter 4 taps +#if JVET_W0123_TIMD_FUSION + static const TFilterCoeff m_lumaIntraFilterExt[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << 1][6]; ///< Chroma filter 6 taps +#endif #endif static const TFilterCoeff m_lumaFilterRPR1[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][8]; ///< Luma filter taps 1.5x static const TFilterCoeff m_lumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][8]; ///< Luma filter taps 2x @@ -101,6 +104,10 @@ private: static const TFilterCoeff m_affineLumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps 2x private: static const TFilterCoeff m_lumaAltHpelIFilter[NTAPS_LUMA]; ///< Luma filter taps +#endif +#if JVET_W0123_TIMD_FUSION + static const TFilterCoeff g_aiExtIntraCubicFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS<<1][NTAPS_CHROMA]; ///< Chroma filter taps + static const TFilterCoeff g_aiExtIntraGaussFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS<<1][NTAPS_CHROMA]; ///< Chroma filter taps #endif static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps static const TFilterCoeff m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps @@ -157,8 +164,15 @@ public: #if INTRA_6TAP static TFilterCoeff const * const getIntraLumaFilterTable(const int deltaFract) { return m_lumaIntraFilter[deltaFract]; }; static TFilterCoeff const * const getWeak4TapFilterTable(const int deltaFract) { return m_weak4TapFilter[deltaFract]; }; +#if JVET_W0123_TIMD_FUSION + static TFilterCoeff const * const getIntraLumaFilterTableExt(const int deltaFract) { return m_lumaIntraFilterExt[deltaFract]; }; +#endif #endif static TFilterCoeff const * const getChromaFilterTable(const int deltaFract) { return m_chromaFilter[deltaFract]; }; +#if JVET_W0123_TIMD_FUSION + static TFilterCoeff const * const getExtIntraCubicFilter(const int deltaFract) { return g_aiExtIntraCubicFilter[deltaFract]; }; + static TFilterCoeff const * const getExtIntraGaussFilter(const int deltaFract) { return g_aiExtIntraGaussFilter[deltaFract]; }; +#endif }; //! \} diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index bc5c898ed0d09a8e270f0db2f8d5176041b8e4dc..b74d3927f1a5b6f7c37d39f1ff25477cd53517e4 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -67,6 +67,19 @@ const uint8_t IntraPrediction::m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS] = 0 // 128xn }; +#if JVET_W0123_TIMD_FUSION +const uint8_t IntraPrediction::m_aucIntraFilterExt[MAX_INTRA_FILTER_DEPTHS] = +{ + 48, // 1xn + 48, // 2xn + 48, // 4xn + 28, // 8xn + 4, // 16xn + 0, // 32xn + 0, // 64xn + 0 // 128xn +}; +#endif // ==================================================================================================================== // Constructor / destructor / initialize @@ -86,6 +99,9 @@ IntraPrediction::IntraPrediction() } #endif +#if JVET_W0123_TIMD_FUSION + m_timdSatdCost = nullptr; +#endif m_piTemp = nullptr; m_pMdlmTemp = nullptr; #if MMLM @@ -111,6 +127,9 @@ void IntraPrediction::destroy() } #endif +#if JVET_W0123_TIMD_FUSION + delete m_timdSatdCost; +#endif delete[] m_piTemp; m_piTemp = nullptr; delete[] m_pMdlmTemp; @@ -163,6 +182,12 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth } #endif +#if JVET_W0123_TIMD_FUSION + if (m_timdSatdCost == nullptr) + { + m_timdSatdCost = new RdCost; + } +#endif if (m_piTemp == nullptr) { m_piTemp = new Pel[(MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1)]; @@ -187,6 +212,175 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth } } +#if JVET_W0123_TIMD_FUSION +void IntraPrediction::xIntraPredTimdAngPdpc(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height, int xOffset, int yOffset, int scale,int invAngle) +{ + int xlim = std::min(3 << scale, width); + for (int y = yOffset; y<height; y++) + { + int invAngleSum = 256; + if (width < 4) + { + for (int x = xOffset; x < 2; x++) + { + invAngleSum += invAngle; + int wL = 32 >> (2 * x >> scale); + Pel left = refSide[y + (invAngleSum >> 9) + 1]; + pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6); + } + } + else + { + for (int x = xOffset; x < xlim; x++) + { + invAngleSum += invAngle; + int wL = 32 >> (2 * x >> scale); + Pel left = refSide[y + (invAngleSum >> 9) + 1]; + pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6); + } + } + pDsty += dstStride; + } +} + +#if GRAD_PDPC +void IntraPrediction::xIntraPredTimdAngGradPdpc(Pel* pDsty, const int dstStride, Pel* refMain, Pel* refSide, const int width, const int height, int xOffset, int yOffset, int scale, int deltaPos, int intraPredAngle, const ClpRng& clpRng) +{ + for (int y = yOffset; y<height; y++) + { + const int deltaInt = deltaPos >> 6; + const int deltaFract = deltaPos & 63; + const Pel left = refSide[1 + y]; + const Pel topLeft = refMain[deltaInt] + ((deltaFract * (refMain[deltaInt + 1] - refMain[deltaInt]) + 32) >> 6); + for (int x = xOffset; x < std::min(3 << scale, width); x++) + { + int wL = 32 >> (2 * (x - xOffset) >> scale); + pDsty[x] = ClipPel(pDsty[x] + ((wL * (left - topLeft) + 32) >> 6), clpRng); + } + pDsty += dstStride; + deltaPos += intraPredAngle; + } +} +#endif + +void IntraPrediction::xIntraPredTimdHorVerPdpc(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height, int xOffset, int yOffset, int scale,const Pel* refMain, const ClpRng& clpRng) +{ + const Pel topLeft = refMain[0]; + + for( int y = yOffset; y < height; y++ ) + { + memcpy(pDsty,&refMain[1],width*sizeof(Pel)); + const Pel left = refSide[1 + y]; + for (int x = xOffset; x < std::min(3 << scale, width); x++) + { + const int wL = 32 >> (2 * x >> scale); + const Pel val = pDsty[x]; + pDsty[x] = ClipPel(val + ((wL * (left - topLeft) + 32) >> 6), clpRng); + } + pDsty += dstStride; + } +} + +void IntraPrediction::xIntraPredTimdPlanarDcPdpc(const CPelBuf &pSrc, Pel* pDst, int iDstStride, int width, int height, TEMPLATE_TYPE eTempType, int iTemplateWidth, int iTemplateHeight) +{ + if (eTempType == LEFT_ABOVE_NEIGHBOR) + { + int xOffset = 0; + int yOffset = 0; + // PDPC for above template + { + const int iWidth = width; + const int iHeight = iTemplateHeight; + xOffset = iTemplateWidth; + const int scale = ((floorLog2(width) - 2 + floorLog2(height) - 2 + 2) >> 2); + for (int y = 0; y < iHeight; y++) + { + const int wT = 32 >> std::min(31, ((y << 1) >> scale)); + const Pel left = pSrc.at(y + 1, 1); + for (int x = xOffset; x < iWidth; x++) + { + const int wL = 32 >> std::min(31, ((x << 1) >> scale)); + const Pel top = pSrc.at(x + 1, 0); + const Pel val = pDst[y * iDstStride + x]; + pDst[y * iDstStride + x] = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6); + } + } + } + + // PDPC for left template + { + const int iWidth = iTemplateWidth; + const int iHeight = height; + yOffset = iTemplateHeight; + const int scale = ((floorLog2(width) - 2 + floorLog2(height) - 2 + 2) >> 2); + for (int y = yOffset; y < iHeight; y++) + { + const int wT = 32 >> std::min(31, ((y << 1) >> scale)); + const Pel left = pSrc.at(y + 1, 1); + for (int x = 0; x < iWidth; x++) + { + const int wL = 32 >> std::min(31, ((x << 1) >> scale)); + const Pel top = pSrc.at(x + 1, 0); + const Pel val = pDst[y * iDstStride + x]; + pDst[y * iDstStride + x] = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6); + } + } + } + } + else if (eTempType == LEFT_NEIGHBOR) + { + const int iHeight = height; + const int scale = ((floorLog2(width) - 2 + floorLog2(height) - 2 + 2) >> 2); + for (int y = 0; y < iHeight; y++) + { + const int wT = 32 >> std::min(31, ((y << 1) >> scale)); + const Pel left = pSrc.at(y + 1, 1); + for (int x = 0; x < iTemplateWidth; x++) + { + const int wL = 32 >> std::min(31, ((x << 1) >> scale)); + const Pel top = pSrc.at(x + 1, 0); + const Pel val = pDst[y * iDstStride + x]; + pDst[y * iDstStride + x] = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6); + } + } + } + else // eTempType == ABOVE_NEIGHBOR + { + const int iWidth = width; + const int scale = ((floorLog2(width) - 2 + floorLog2(height) - 2 + 2) >> 2); + for (int y = 0; y < iTemplateHeight; y++) + { + const int wT = 32 >> std::min(31, ((y << 1) >> scale)); + const Pel left = pSrc.at(y + 1, 1); + for (int x = 0; x < iWidth; x++) + { + const int wL = 32 >> std::min(31, ((x << 1) >> scale)); + const Pel top = pSrc.at(x + 1, 0); + const Pel val = pDst[y * iDstStride + x]; + pDst[y * iDstStride + x] = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6); + } + } + } +} + +void IntraPrediction::xIntraPredTimdAngLuma(Pel* pDstBuf, const ptrdiff_t dstStride, Pel* refMain, int width, int height, int deltaPos, int intraPredAngle, const ClpRng& clpRng, int xOffset, int yOffset) +{ + for (int y = yOffset; y<height; y++ ) + { + const int deltaInt = deltaPos >> 6; + const int deltaFract = deltaPos & 63; + const TFilterCoeff* const f = InterpolationFilter::getExtIntraCubicFilter(deltaFract); + int refMainIndex = deltaInt + 1 + xOffset; + for( int x = xOffset; x < width; x++, refMainIndex++ ) + { + pDstBuf[y*dstStride + x] = (f[0] * refMain[refMainIndex - 1] + f[1] * refMain[refMainIndex] + f[2] * refMain[refMainIndex + 1] + f[3] * refMain[refMainIndex + 2] + 128) >> 8; + pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng ); // always clip even though not always needed + } + deltaPos += intraPredAngle; + } +} +#endif + // ==================================================================================================================== // Public member functions // ==================================================================================================================== @@ -249,6 +443,26 @@ int IntraPrediction::getModifiedWideAngle( int width, int height, int predMode ) return predMode; } +#if JVET_W0123_TIMD_FUSION +int IntraPrediction::getWideAngleExt( int width, int height, int predMode ) +{ + if ( predMode > DC_IDX && predMode <= EXT_VDIA_IDX ) + { + int modeShift[] = { 0, 11, 19, 23, 27, 29 }; + int deltaSize = abs(floorLog2(width) - floorLog2(height)); + if (width > height && predMode < 2 + modeShift[deltaSize]) + { + predMode += (EXT_VDIA_IDX - 1); + } + else if (height > width && predMode > EXT_VDIA_IDX - modeShift[deltaSize]) + { + predMode -= (EXT_VDIA_IDX - 1); + } + } + return predMode; +} +#endif + void IntraPrediction::setReferenceArrayLengths( const CompArea &area ) { // set Top and Left reference samples length @@ -274,6 +488,9 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co CHECK(PU::isMIP(pu, toChannelType(compId)), "We should not get here for MIP."); const uint32_t uiDirMode = isLuma( compId ) && pu.cu->bdpcmMode ? BDPCM_IDX : !isLuma(compId) && pu.cu->bdpcmModeChroma ? BDPCM_IDX : PU::getFinalIntraMode(pu, channelType); +#if JVET_W0123_TIMD_FUSION + bool bExtIntraDir = pu.cu->timd && isLuma( compId ); +#endif CHECK( floorLog2(iWidth) < 2 && pu.cs->pcv->noChroma2x2, "Size not allowed" ); CHECK( floorLog2(iWidth) > 7, "Size not allowed" ); @@ -292,7 +509,11 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co case(PLANAR_IDX): xPredIntraPlanar(srcBuf, piPred); break; case(DC_IDX): xPredIntraDc(srcBuf, piPred, channelType, false); break; case(BDPCM_IDX): xPredIntraBDPCM(srcBuf, piPred, isLuma(compID) ? pu.cu->bdpcmMode : pu.cu->bdpcmModeChroma, clpRng); break; +#if JVET_W0123_TIMD_FUSION + default: xPredIntraAng(srcBuf, piPred, channelType, clpRng, bExtIntraDir); break; +#else default: xPredIntraAng(srcBuf, piPred, channelType, clpRng); break; +#endif } #if CIIP_PDPC } @@ -316,7 +537,11 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co pu2.intraDir[0] = pu.cu->dimdBlendMode[0]; initPredIntraParams(pu2, pu.Y(), *(pu.cs->sps)); +#if JVET_W0123_TIMD_FUSION + xPredIntraAng(srcBuf, predAng, channelType, clpRng, false); +#else xPredIntraAng(srcBuf, predAng, channelType, clpRng); +#endif #else const bool useISP = NOT_INTRA_SUBPARTITIONS != pu.cu->ispMode && isLuma( CHANNEL_TYPE_LUMA );//ok const Size cuSize = Size( pu.cu->blocks[compId].width, pu.cu->blocks[compId].height ); //ok @@ -395,7 +620,11 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co } } +#if JVET_W0123_TIMD_FUSION + xPredIntraAng( srcBuf, predAng, channelType, clpRng, false ); +#else xPredIntraAng( srcBuf, predAng, channelType, clpRng ); +#endif #endif m_ipaParam.applyPDPC = applyPdpc; @@ -423,6 +652,50 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co } #endif +#if JVET_W0123_TIMD_FUSION + if (pu.cu->timd && pu.cu->timdIsBlended && isLuma(compID)) + { + int width = piPred.width; + int height = piPred.height; + const UnitArea localUnitArea( pu.chromaFormat, Area( 0, 0, width, height ) ); + + PelBuf predFusion = m_tempBuffer[1].getBuf( localUnitArea.Y() ); + + const bool applyPdpc = m_ipaParam.applyPDPC; + PredictionUnit pu2 = pu; + pu2.intraDir[0] = pu.cu->timdModeSecondary; + initPredIntraParams(pu2, pu.Y(), *(pu.cs->sps)); + + switch (pu.cu->timdModeSecondary) + { + case(PLANAR_IDX): xPredIntraPlanar(srcBuf, predFusion); break; + case(DC_IDX): xPredIntraDc(srcBuf, predFusion, channelType, false); break; + default: xPredIntraAng(srcBuf, predFusion, channelType, clpRng, bExtIntraDir); break; + } + + m_ipaParam.applyPDPC = applyPdpc; + + // do blending + const int log2WeightSum = 6; + Pel *pelPred = piPred.buf; + Pel *pelPredFusion = predFusion.buf; + int w0 = pu.cu->timdFusionWeight[0], w1 = pu.cu->timdFusionWeight[1]; + + for( int y = 0; y < height; y++ ) + { + for( int x = 0; x < width; x++ ) + { + int blend = pelPred[x] * w0; + blend += pelPredFusion[x] * w1; + pelPred[x] = (Pel)(blend >> log2WeightSum); + } + + pelPred += piPred.stride; + pelPredFusion += predFusion.stride; + } + } +#endif + #if CIIP_PDPC if (m_ipaParam.applyPDPC || pu.ciipPDPC) #else @@ -625,6 +898,9 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA { const ComponentID compId = area.compID; const ChannelType chType = toChannelType(compId); +#if JVET_W0123_TIMD_FUSION + bool bExtIntraDir = pu.cu->timd && isLuma( chType ); +#endif const bool useISP = NOT_INTRA_SUBPARTITIONS != pu.cu->ispMode && isLuma( chType ); @@ -632,31 +908,58 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA const Size puSize = Size( area.width, area.height ); const Size& blockSize = useISP ? cuSize : puSize; const int dirMode = PU::getFinalIntraMode(pu, chType); +#if JVET_W0123_TIMD_FUSION + const int predMode = bExtIntraDir ? getWideAngleExt( blockSize.width, blockSize.height, dirMode ) : getModifiedWideAngle( blockSize.width, blockSize.height, dirMode ); +#else const int predMode = getModifiedWideAngle( blockSize.width, blockSize.height, dirMode ); +#endif +#if JVET_W0123_TIMD_FUSION + m_ipaParam.isModeVer = bExtIntraDir ? (predMode >= EXT_DIA_IDX) : (predMode >= DIA_IDX); +#else m_ipaParam.isModeVer = predMode >= DIA_IDX; +#endif m_ipaParam.multiRefIndex = isLuma (chType) ? pu.multiRefIdx : 0 ; m_ipaParam.refFilterFlag = false; m_ipaParam.interpolationFlag = false; m_ipaParam.applyPDPC = (puSize.width >= MIN_TB_SIZEY && puSize.height >= MIN_TB_SIZEY) && m_ipaParam.multiRefIndex == 0; +#if JVET_W0123_TIMD_FUSION + const int intraPredAngleMode = (m_ipaParam.isModeVer) ? (predMode - (bExtIntraDir? EXT_VER_IDX : VER_IDX)) : (-(predMode - (bExtIntraDir ? EXT_HOR_IDX : HOR_IDX))); +#else const int intraPredAngleMode = (m_ipaParam.isModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX); +#endif int absAng = 0; +#if JVET_W0123_TIMD_FUSION + if (dirMode > DC_IDX && dirMode < (bExtIntraDir ? EXT_VDIA_IDX + 1 : NUM_LUMA_MODE)) // intraPredAngle for directional modes +#else if (dirMode > DC_IDX && dirMode < NUM_LUMA_MODE) // intraPredAngle for directional modes +#endif { static const int angTable[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 }; static const int invAngTable[32] = { 0, 16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565, 512, 468, 420, 364, 321, 287, 256, 224, 191, 161, 128, 96, 64, 48, 32, 16 }; // (512 * 32) / Angle +#if JVET_W0123_TIMD_FUSION + static const int extAngTable[64] = { 0, 1, 2, 3, 4, 5, 6,7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 43, 46, 49, 52, 55, 58, 61, 64, 67, 70, 74, 78, 84, 90, 96, 102, 108, 114, 121, 128, 137, 146, 159, 172, 188, 204, 230, 256, 299, 342, 427, 512, 597, 682, 853, 1024, 1536, 2048, 3072 }; + static const int extInvAngTable[64] = { + 0, 32768, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3277, 2731, 2341, 2048, 1820, 1638, 1489, 1365, 1260, 1170, 1092, 1024, 964, 910, 862, 819, 762, 712, 669, 630, 596, 565, 537, 512, 489, 468, 443, 420, 390, 364, 341, 321, 303, 287, 271, 256, 239, 224, 206, 191, 174, 161, 142, 128, 110, 96, 77, 64, 55, 48, 38, 32, 21, 16, 11 + }; // (512 * 64) / Angle +#endif const int absAngMode = abs(intraPredAngleMode); const int signAng = intraPredAngleMode < 0 ? -1 : 1; +#if JVET_W0123_TIMD_FUSION + absAng = bExtIntraDir ? extAngTable[absAngMode] : angTable[absAngMode]; + m_ipaParam.absInvAngle = bExtIntraDir ? extInvAngTable[absAngMode] : invAngTable[absAngMode]; +#else absAng = angTable [absAngMode]; m_ipaParam.absInvAngle = invAngTable[absAngMode]; +#endif m_ipaParam.intraPredAngle = signAng * absAng; if (intraPredAngleMode < 0) { @@ -707,16 +1010,28 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA { bool filterFlag = false; { +#if JVET_W0123_TIMD_FUSION + const int diff = std::min<int>( abs( predMode - (bExtIntraDir ? EXT_HOR_IDX : HOR_IDX) ), abs( predMode - (bExtIntraDir ? EXT_VER_IDX : VER_IDX) ) ); +#else const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) ); +#endif const int log2Size = ((floorLog2(puSize.width) + floorLog2(puSize.height)) >> 1); CHECK( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" ); +#if JVET_W0123_TIMD_FUSION + filterFlag = (diff > (bExtIntraDir ? m_aucIntraFilterExt[log2Size] : m_aucIntraFilter[log2Size])); +#else filterFlag = (diff > m_aucIntraFilter[log2Size]); +#endif } // Selelection of either ([1 2 1] / 4 ) refrence filter OR Gaussian 4-tap interpolation filter if (filterFlag) { +#if JVET_W0123_TIMD_FUSION + const bool isRefFilter = bExtIntraDir ? isIntegerSlopeExt(absAng) : isIntegerSlope(absAng); +#else const bool isRefFilter = isIntegerSlope(absAng); +#endif CHECK( puSize.width * puSize.height <= 32, "DCT-IF interpolation filter is always used for 4x4, 4x8, and 8x4 luma CB" ); m_ipaParam.refFilterFlag = isRefFilter; m_ipaParam.interpolationFlag = !isRefFilter; @@ -737,7 +1052,11 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA */ //NOTE: Bit-Limit - 25-bit source +#if JVET_W0123_TIMD_FUSION +void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng, const bool bExtIntraDir) +#else void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng) +#endif { int width =int(pDst.width); int height=int(pDst.height); @@ -783,7 +1102,11 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch // left extend by 1 for (int k = -(sizeSide + 1); k <= -1; k++) { +#if JVET_W0123_TIMD_FUSION + int frac32precision = bExtIntraDir ? ((-k * absInvAngle + 16) >> 5) : ((-k * absInvAngle + 8) >> 4); +#else int frac32precision = (-k * absInvAngle + 8) >> 4; +#endif int intpel = frac32precision >> 5; int fracpel = frac32precision & 31; //std::cout << " fracPel: " << fracpel << std::endl; @@ -836,7 +1159,11 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch // Extend main reference to right using replication const int log2Ratio = floorLog2(width) - floorLog2(height); const int s = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio); +#if JVET_W0123_TIMD_FUSION + const int maxIndex = (multiRefIdx << s) + 6; +#else const int maxIndex = (multiRefIdx << s) + 2; +#endif const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength; const Pel val = refMain[refLength + multiRefIdx]; // right extended by 1 (z range) @@ -860,7 +1187,11 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch // Extend main reference to right using replication const int log2Ratio = floorLog2(width) - floorLog2(height); const int s = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio); +#if JVET_W0123_TIMD_FUSION + const int maxIndex = (multiRefIdx << s) + 6; +#else const int maxIndex = (multiRefIdx << s) + 2; +#endif const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength; const Pel val = refMain[refLength + multiRefIdx]; for (int z = 1; z <= maxIndex; z++) @@ -911,10 +1242,20 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch { for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride) { +#if JVET_W0123_TIMD_FUSION + const int deltaInt = bExtIntraDir ? deltaPos >> 6 : deltaPos >> 5; + const int deltaFract = bExtIntraDir ? deltaPos & 63 : deltaPos & 31; +#else const int deltaInt = deltaPos >> 5; const int deltaFract = deltaPos & 31; +#endif +#if JVET_W0123_TIMD_FUSION + bool bIntSlope = bExtIntraDir ? isIntegerSlopeExt( abs(intraPredAngle) ) : isIntegerSlope( abs(intraPredAngle) ); + if ( !bIntSlope ) +#else if ( !isIntegerSlope( abs(intraPredAngle) ) ) +#endif { if( isLuma(channelType) ) { @@ -924,14 +1265,29 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch const TFilterCoeff intraSmoothingFilter[6] = { TFilterCoeff(0), TFilterCoeff(64 - (deltaFract << 1)), TFilterCoeff(128 - (deltaFract << 1)), TFilterCoeff(64 + (deltaFract << 1)), TFilterCoeff(deltaFract << 1), TFilterCoeff(0) }; const TFilterCoeff intraSmoothingFilter2[6] = { TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(64 - 3*(deltaFract >> 1)), TFilterCoeff(96 - (deltaFract)), TFilterCoeff(64 + (deltaFract)), TFilterCoeff(16 + 3*(deltaFract >> 1)), TFilterCoeff((deltaFract >> 1)) }; +#if JVET_W0123_TIMD_FUSION + const TFilterCoeff intraSmoothingFilterExt[6] = { TFilterCoeff(0), TFilterCoeff(64 - (deltaFract)), TFilterCoeff(128 - (deltaFract)), TFilterCoeff(64 + (deltaFract)), TFilterCoeff(deltaFract), TFilterCoeff(0) }; + const TFilterCoeff intraSmoothingFilter2Ext[6] = { TFilterCoeff(16 - (deltaFract >> 2)), TFilterCoeff(64 - 3*(deltaFract >> 2)), TFilterCoeff(96 - (deltaFract >> 1)), TFilterCoeff(64 + (deltaFract >> 1)), + TFilterCoeff(16 + 3*(deltaFract >> 2)), TFilterCoeff((deltaFract >> 2)) }; + const TFilterCoeff* const f = (useCubicFilter) ? ( bExtIntraDir ? InterpolationFilter::getIntraLumaFilterTableExt(deltaFract) : InterpolationFilter::getIntraLumaFilterTable(deltaFract)) : (width >=32 && height >=32)? (bExtIntraDir ? intraSmoothingFilter2Ext : intraSmoothingFilter2) : (bExtIntraDir ? intraSmoothingFilterExt : intraSmoothingFilter); +#else const TFilterCoeff* const f = (useCubicFilter) ? InterpolationFilter::getIntraLumaFilterTable(deltaFract) : (width >=32 && height >=32)? intraSmoothingFilter2 : intraSmoothingFilter; +#endif #else #if IF_12TAP const TFilterCoeff intraSmoothingFilter[4] = { TFilterCoeff(64 - (deltaFract << 1)), TFilterCoeff(128 - (deltaFract << 1)), TFilterCoeff(64 + (deltaFract << 1)), TFilterCoeff(deltaFract << 1) }; +#if JVET_W0123_TIMD_FUSION + const TFilterCoeff intraSmoothingFilterExt[4] = { TFilterCoeff(64 - (deltaFract)), TFilterCoeff(128 - (deltaFract)), TFilterCoeff(64 + (deltaFract)), TFilterCoeff(deltaFract) }; +#endif #else const TFilterCoeff intraSmoothingFilter[4] = {TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(32 - (deltaFract >> 1)), TFilterCoeff(16 + (deltaFract >> 1)), TFilterCoeff(deltaFract >> 1)}; -#endif +#endif + +#if JVET_W0123_TIMD_FUSION + const TFilterCoeff* const f = (useCubicFilter) ? (bExtIntraDir ? InterpolationFilter::getExtIntraCubicFilter(deltaFract) : InterpolationFilter::getChromaFilterTable(deltaFract)) : (bExtIntraDir ? InterpolationFilter::getExtIntraGaussFilter(deltaFract) : intraSmoothingFilter); +#else const TFilterCoeff* const f = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : intraSmoothingFilter; +#endif #endif for (int x = 0; x < width; x++) @@ -950,9 +1306,20 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch #if IF_12TAP Pel val = ( f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + 128 ) >> 8; +#else +#if JVET_W0123_TIMD_FUSION + int tOffset = 32; + int tShift = 6; + if (bExtIntraDir) + { + tOffset = 128; + tShift = 8; + } + Pel val = (f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + tOffset) >> tShift; #else Pel val = (f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + 32) >> 6; #endif +#endif #endif pDsty[x] = ClipPel(val, clpRng); // always clip even though not always needed @@ -978,7 +1345,18 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch { const int scale = m_ipaParam.angularScale; const Pel left = refSide[1 + y]; +#if JVET_W0123_TIMD_FUSION + int gradOffset = 16; + int gradShift = 5; + if (bExtIntraDir) + { + gradOffset = 32; + gradShift = 6; + } + const Pel topLeft = refMain[deltaInt] + ((deltaFract * (refMain[deltaInt + 1] - refMain[deltaInt]) + gradOffset) >> gradShift); +#else const Pel topLeft = refMain[deltaInt] + ((deltaFract * (refMain[deltaInt + 1] - refMain[deltaInt]) + 16) >> 5); +#endif for (int x = 0; x < std::min(3 << scale, width); x++) { @@ -1724,6 +2102,1339 @@ void IntraPrediction::xFilterReferenceSamples(const Pel *refBufUnfiltered, Pel * } refBufFiltered[predHSize] = refBufUnfiltered[predHSize]; } + +#if JVET_W0123_TIMD_FUSION +Pel IntraPrediction::xGetPredTimdValDc( const CPelBuf &pSrc, const Size &dstSize, TEMPLATE_TYPE eTempType, int iTempHeight, int iTempWidth ) +{ + int idx, sum = 0; + Pel dcVal; + const int width = dstSize.width; + const int height = dstSize.height; + auto denom = (width == height) ? (width << 1) : std::max(width,height); + auto divShift = floorLog2(denom); + auto divOffset = (denom >> 1); + + if (eTempType == LEFT_NEIGHBOR) + { + denom = height; + divShift = floorLog2(denom); + divOffset = (denom >> 1); + for(idx = 0; idx < height; idx++) + sum += pSrc.at(1 + idx, 1); + dcVal = (sum + divOffset) >> divShift; + return dcVal; + } + else if (eTempType == ABOVE_NEIGHBOR) + { + denom = width; + divShift = floorLog2(denom); + divOffset = (denom >> 1); + for(idx = 0; idx < width; idx++) + sum += pSrc.at( 1 + idx, 0); + dcVal = (sum + divOffset) >> divShift; + return dcVal; + } + + if ( width >= height ) + { + for( idx = 0; idx < width; idx++ ) + { + sum += pSrc.at(iTempWidth + 1 + idx, 0); + } + } + if ( width <= height ) + { + for( idx = 0; idx < height; idx++ ) + { + sum += pSrc.at(iTempHeight + 1 + idx, 1); + } + } + dcVal = (sum + divOffset) >> divShift; + return dcVal; +} + +void IntraPrediction::predTimdIntraAng( const ComponentID compId, const PredictionUnit &pu, uint32_t uiDirMode, Pel* pPred, uint32_t uiStride, uint32_t iWidth, uint32_t iHeight, TEMPLATE_TYPE eTempType, int32_t iTemplateWidth, int32_t iTemplateHeight) +{ + const ComponentID compID = MAP_CHROMA( compId ); + + const int srcStride = m_refBufferStride[compID]; + const int srcHStride = 2; + + const CPelBuf & srcBuf = CPelBuf(getPredictorPtr(compID), srcStride, srcHStride); + const ClpRng& clpRng(pu.cu->cs->slice->clpRng(compID)); + + switch (uiDirMode) + { + case(PLANAR_IDX): xPredTimdIntraPlanar(srcBuf, pPred, uiStride, iWidth, iHeight, eTempType, iTemplateWidth, iTemplateHeight); break; + case(DC_IDX): xPredTimdIntraDc(pu, srcBuf, pPred, uiStride, iWidth, iHeight, eTempType, iTemplateWidth, iTemplateHeight); break; + default: xPredTimdIntraAng(srcBuf, clpRng, pPred, uiStride, iWidth, iHeight, eTempType, iTemplateWidth, iTemplateHeight, uiDirMode); break; + } + + if (m_ipaParam.applyPDPC && (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX)) + { + xIntraPredTimdPlanarDcPdpc(srcBuf, pPred, uiStride, iWidth, iHeight, eTempType, iTemplateWidth, iTemplateHeight); + } +} + +void IntraPrediction::xPredTimdIntraPlanar( const CPelBuf &pSrc, Pel* rpDst, int iDstStride, int width, int height, TEMPLATE_TYPE eTempType, int iTemplateWidth, int iTemplateHeight ) +{ + static int leftColumn[MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE+1] = {0}, topRow[MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE+1] ={0}, bottomRow[MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE] = {0}, rightColumn[MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE]={0}; + if(eTempType == LEFT_ABOVE_NEIGHBOR) + { + //predict above template + { + uint32_t w = width - iTemplateWidth; + const uint32_t log2W = floorLog2( w ); + const uint32_t log2H = floorLog2( iTemplateHeight ); + const uint32_t offset = 1 << (log2W + log2H); + for(int k = 0; k < w + 1; k++) + { + topRow[k] = pSrc.at( k + iTemplateWidth + 1, 0 ); + } + for (int k=0; k < iTemplateHeight + 1; k++) + { + leftColumn[k] = pSrc.at( k + 1, 1 ); + } + + int bottomLeft = leftColumn[iTemplateHeight]; + int topRight = topRow[w]; + for(int k = 0; k < w; k++) + { + bottomRow[k] = bottomLeft - topRow[k]; + topRow[k] = topRow[k] << log2H; + } + for(int k = 0; k < iTemplateHeight; k++) + { + rightColumn[k] = topRight - leftColumn[k]; + leftColumn[k] = leftColumn[k] << log2W; + } + + const uint32_t finalShift = 1 + log2W + log2H; + for (int y = 0; y < iTemplateHeight; y++) + { + int horPred = leftColumn[y]; + for (int x = 0; x < w; x++) + { + horPred += rightColumn[y]; + topRow[x] += bottomRow[x]; + int vertPred = topRow[x]; + rpDst[y*iDstStride+x + iTemplateWidth] = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift; + } + } + } + + //predict left template + { + uint32_t h = height - iTemplateHeight; + const uint32_t log2W = floorLog2( iTemplateWidth ); + const uint32_t log2H = floorLog2( h ); + const uint32_t offset = 1 << (log2W + log2H); + for (int k = 0; k < h + 1; k++) + { + leftColumn[k] = pSrc.at( k + iTemplateHeight + 1, 1 ); + } + for(int k = 0; k < iTemplateWidth + 1; k++) + { + topRow[k] = pSrc.at( k + 1, 0 ); + } + int bottomLeft = leftColumn[h]; + int topRight = topRow[iTemplateWidth]; + for(int k = 0; k < iTemplateWidth; k++) + { + bottomRow[k] = bottomLeft - topRow[k]; + topRow[k] = topRow[k] << log2H; + } + for(int k = 0; k < h; k++) + { + rightColumn[k] = topRight - leftColumn[k]; + leftColumn[k] = leftColumn[k] << log2W; + } + const uint32_t finalShift = 1 + log2W + log2H; + for (int y = 0; y < height; y++) + { + int horPred = leftColumn[y]; + for (int x = 0; x < iTemplateWidth; x++) + { + horPred += rightColumn[y]; + topRow[x] += bottomRow[x]; + int vertPred = topRow[x]; + rpDst[(y + iTemplateHeight)*iDstStride+x] = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift; + } + } + } + } + else if(eTempType == LEFT_NEIGHBOR) + { + const uint32_t log2W = floorLog2( iTemplateWidth ); + const uint32_t log2H = floorLog2( height ); + const uint32_t offset = 1 << (log2W + log2H); + for (int k = 0; k < height + 1; k++) + { + leftColumn[k] = pSrc.at( k + iTemplateHeight + 1, 1 ); + } + for(int k = 0; k < iTemplateWidth + 1; k++) + { + topRow[k] = pSrc.at( k + 1, 0 ); + } + + int bottomLeft = leftColumn[height]; + int topRight = topRow[iTemplateWidth]; + for(int k = 0; k < iTemplateWidth; k++) + { + bottomRow[k] = bottomLeft - topRow[k]; + topRow[k] = topRow[k] << log2H; + } + for(int k = 0; k < height; k++) + { + rightColumn[k] = topRight - leftColumn[k]; + leftColumn[k] = leftColumn[k] << log2W; + } + + const uint32_t finalShift = 1 + log2W + log2H; + for (int y = 0; y < height; y++) + { + int horPred = leftColumn[y]; + for (int x = 0; x < iTemplateWidth; x++) + { + horPred += rightColumn[y]; + topRow[x] += bottomRow[x]; + int vertPred = topRow[x]; + rpDst[y*iDstStride+x] = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift; + } + } + } + else if(eTempType == ABOVE_NEIGHBOR) + { + const uint32_t log2W = floorLog2( width ); + const uint32_t log2H = floorLog2( iTemplateHeight ); + const uint32_t offset = 1 << (log2W + log2H); + for(int k = 0; k < width + 1; k++) + { + topRow[k] = pSrc.at( k + iTemplateWidth + 1, 0 ); + } + for (int k=0; k < iTemplateHeight + 1; k++) + { + leftColumn[k] = pSrc.at( k + 1, 1 ); + } + + int bottomLeft = leftColumn[iTemplateHeight]; + int topRight = topRow[width]; + for(int k=0;k<width;k++) + { + bottomRow[k] = bottomLeft - topRow[k]; + topRow[k] = topRow[k] << log2H; + } + for(int k = 0; k < iTemplateHeight; k++) + { + rightColumn[k] = topRight - leftColumn[k]; + leftColumn[k] = leftColumn[k] << log2W; + } + + const uint32_t finalShift = 1 + log2W + log2H; + for (int y = 0; y < iTemplateHeight; y++) + { + int horPred = leftColumn[y]; + for (int x = 0; x < width; x++) + { + horPred += rightColumn[y]; + topRow[x] += bottomRow[x]; + int vertPred = topRow[x]; + rpDst[y*iDstStride+x] = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift; + } + } + } + else + { + assert(0); + } +} + +void IntraPrediction::xPredTimdIntraDc( const PredictionUnit &pu, const CPelBuf &pSrc, Pel* pDst, int iDstStride, int iWidth, int iHeight, TEMPLATE_TYPE eTempType, int iTemplateWidth, int iTemplateHeight ) +{ + const Size &dstSize = Size(pu.lwidth(), pu.lheight()); + const Pel dcval = xGetPredTimdValDc( pSrc, dstSize, eTempType, iTemplateHeight, iTemplateWidth ); + if(eTempType == LEFT_ABOVE_NEIGHBOR) + { + for (int y = 0; y < iHeight; y++,pDst += iDstStride) + { + if(y < iTemplateHeight) + { + for (int x = iTemplateWidth; x < iWidth; x++) + { + pDst[x] = dcval; + } + } + else + { + for (int x = 0; x < iTemplateWidth; x++) + { + pDst[x] = dcval; + } + } + } + } + else if(eTempType == LEFT_NEIGHBOR) + { + for (int y = 0; y < iHeight; y++, pDst += iDstStride) + { + for (int x = 0; x < iTemplateWidth; x++) + { + pDst[x] = dcval; + } + } + } + else if(eTempType == ABOVE_NEIGHBOR) + { + for (int y = 0; y < iTemplateHeight; y++, pDst+=iDstStride) + { + for (int x = 0; x < iWidth; x++) + { + pDst[x] = dcval; + } + } + } + else + { + assert(0); + } +} + +void IntraPrediction::initPredTimdIntraParams(const PredictionUnit & pu, const CompArea area, int dirMode) +{ + const Size puSize = Size( area.width, area.height ); + const Size& blockSize = puSize; + const int predMode = getWideAngleExt( blockSize.width, blockSize.height, dirMode ); + + m_ipaParam.isModeVer = predMode >= EXT_DIA_IDX; + m_ipaParam.refFilterFlag = false; + m_ipaParam.interpolationFlag = false; + m_ipaParam.applyPDPC = puSize.width >= MIN_TB_SIZEY && puSize.height >= MIN_TB_SIZEY; + const int intraPredAngleMode = (m_ipaParam.isModeVer) ? predMode - EXT_VER_IDX : -(predMode - EXT_HOR_IDX); + + int absAng = 0; + static const int extAngTable[64] = { 0, 1, 2, 3, 4, 5, 6,7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 43, 46, 49, 52, 55, 58, 61, 64, 67, 70, 74, 78, 84, 90, 96, 102, 108, 114, 121, 128, 137, 146, 159, 172, 188, 204, 230, 256, 299, 342, 427, 512, 597, 682, 853, 1024, 1536, 2048, 3072 }; + static const int extInvAngTable[64] = { 0, 32768, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3277, 2731, 2341, 2048, 1820, 1638, 1489, 1365, 1260, 1170, 1092, 1024, 964, 910, 862, 819, 762, 712, 669, 630, 596, 565, 537, 512, 489, 468, 443, 420, 390, 364, 341, 321, 303, 287, 271, 256, 239, 224, 206, 191, 174, 161, 142, 128, 110, 96, 77, 64, 55, 48, 38, 32, 21, 16, 11 }; // (512 * 64) / Angle + + const int absAngMode = abs(intraPredAngleMode); + const int signAng = intraPredAngleMode < 0 ? -1 : 1; + absAng = extAngTable [absAngMode]; + + m_ipaParam.absInvAngle = extInvAngTable[absAngMode]; + m_ipaParam.intraPredAngle = signAng * absAng; + + if (dirMode > 1) + { + if (intraPredAngleMode < 0) + { + m_ipaParam.applyPDPC = false; + } + else if (intraPredAngleMode > 0) + { + const int sideSize = m_ipaParam.isModeVer ? puSize.height : puSize.width; + const int maxScale = 2; +#if GRAD_PDPC + m_ipaParam.useGradPDPC = false; +#endif + m_ipaParam.angularScale = std::min(maxScale, floorLog2(sideSize) - (floorLog2(3 * m_ipaParam.absInvAngle - 2) - 8)); +#if GRAD_PDPC + if (m_ipaParam.angularScale < 0) + { + m_ipaParam.angularScale = (floorLog2(puSize.width) + floorLog2(puSize.height) - 2) >> 2; + m_ipaParam.useGradPDPC = true; + } +#endif + m_ipaParam.applyPDPC &= m_ipaParam.angularScale >= 0; + } + } +} + +void IntraPrediction::xPredTimdIntraAng( const CPelBuf &pSrc, const ClpRng& clpRng, Pel* pTrueDst, int iDstStride, int iWidth, int iHeight, TEMPLATE_TYPE eTempType, int iTemplateWidth , int iTemplateHeight, uint32_t dirMode) +{ + int width = iWidth; + int height = iHeight; + const bool bIsModeVer = m_ipaParam.isModeVer; + const int intraPredAngle = m_ipaParam.intraPredAngle; + const int invAngle = m_ipaParam.absInvAngle; + Pel* refMain; + Pel* refSide; + static Pel refAbove[2 * MAX_CU_SIZE + 5 + 33 * MAX_REF_LINE_IDX]; + static Pel refLeft[2 * MAX_CU_SIZE + 5 + 33 * MAX_REF_LINE_IDX]; + + // Initialize the Main and Left reference array. + if (intraPredAngle < 0) + { + for (int x = 0; x <= width + 1; x++) + { + refAbove[x + height] = pSrc.at(x, 0); + } + for (int y = 0; y <= height + 1; y++) + { + refLeft[y + width] = pSrc.at(y, 1); + } + refMain = bIsModeVer ? refAbove + height : refLeft + width; + refSide = bIsModeVer ? refLeft + width : refAbove + height; + // Extend the Main reference to the left. + int sizeSide = bIsModeVer ? height : width; + for (int k = -sizeSide; k <= -1; k++) + { + refMain[k] = refSide[std::min((-k * invAngle + 256) >> 9, sizeSide)]; + } + } + else + { + for (int x = 0; x <= m_topRefLength; x++) + { + refAbove[x] = pSrc.at(x, 0); + } + for (int y = 0; y <= m_leftRefLength; y++) + { + refLeft[y] = pSrc.at(y, 1); + } + refMain = bIsModeVer ? refAbove : refLeft; + refSide = bIsModeVer ? refLeft : refAbove; + // Extend main reference to right using replication + const int log2Ratio = floorLog2(width - iTemplateWidth) - floorLog2(height - iTemplateHeight); + const int s = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio); + const int maxIndex = (std::max(iTemplateWidth, iTemplateHeight) << s) + 2 + std::max(iTemplateWidth, iTemplateHeight); + const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength; + const Pel val = refMain[refLength]; + for (int z = 1; z <= maxIndex; z++) + { + refMain[refLength + z] = val; + } + } + + // swap width/height if we are doing a horizontal mode: + static Pel tempArray[(MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE)*(MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE)]; ///< buffer size may not be big enough + const int dstStride = bIsModeVer ? iDstStride : (MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE); + Pel *pDst = bIsModeVer ? pTrueDst : tempArray; + if (!bIsModeVer) + { + std::swap(width, height); + std::swap(iTemplateWidth, iTemplateHeight); + } + + if( intraPredAngle == 0 ) // pure vertical or pure horizontal + { + if(eTempType == LEFT_ABOVE_NEIGHBOR) + { + if (m_ipaParam.applyPDPC) + { + int scale = (floorLog2(width) + floorLog2(height) - 2) >> 2; + xIntraPredTimdHorVerPdpc(pDst, dstStride, refSide, width, iTemplateHeight, iTemplateWidth, 0, scale, refMain, clpRng); + xIntraPredTimdHorVerPdpc(pDst+iTemplateHeight*iDstStride, dstStride, refSide, iTemplateWidth, height, 0, iTemplateHeight, scale, refMain, clpRng); + } + else + { + for (int y = 0; y < iTemplateHeight; y++) + { + memcpy(pDst + y * dstStride + iTemplateWidth, &refMain[iTemplateWidth + 1], (width - iTemplateWidth) * sizeof(Pel)); + } + for (int y = iTemplateHeight; y < height; y++) + { + memcpy(pDst + y * dstStride, &refMain[1], iTemplateWidth * sizeof(Pel)); + } + } + } + else if(eTempType == LEFT_NEIGHBOR || eTempType == ABOVE_NEIGHBOR) + { + if((eTempType == LEFT_NEIGHBOR && bIsModeVer)||(eTempType == ABOVE_NEIGHBOR && !bIsModeVer)) + { + if (m_ipaParam.applyPDPC) + { + const int scale = (floorLog2(width) + floorLog2(height) - 2) >> 2; + xIntraPredTimdHorVerPdpc(pDst, dstStride, refSide, iTemplateWidth, height, 0, 0, scale, refMain, clpRng); + } + else + { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < iTemplateWidth; x++) + { + pDst[y * dstStride+x] = refMain[x + 1]; + } + } + } + } + else + { + if (m_ipaParam.applyPDPC) + { + const int scale = (floorLog2(width) + floorLog2(height) - 2) >> 2; + xIntraPredTimdHorVerPdpc(pDst, dstStride, refSide, width, iTemplateHeight, 0, 0, scale, refMain, clpRng); + } + else + { + for (int y = 0; y < iTemplateHeight; y++) + { + memcpy(pDst + y * dstStride, &refMain[1], width * sizeof(Pel)); + } + } + } + } + else + { + assert(0); + } + } + else + { + Pel *pDsty=pDst; + if ( !isIntegerSlopeExt( abs(intraPredAngle) ) ) + { + int deltaPos = intraPredAngle; + if (eTempType == LEFT_ABOVE_NEIGHBOR) + { + Pel *pDsty=pDst; + // Above template + xIntraPredTimdAngLuma(pDsty, dstStride, refMain, width, iTemplateHeight, deltaPos, intraPredAngle, clpRng, iTemplateWidth, 0); + // Left template + for (int y = 0; y < iTemplateHeight; y++) + deltaPos += intraPredAngle; + xIntraPredTimdAngLuma(pDsty, dstStride, refMain, iTemplateWidth, height, deltaPos, intraPredAngle, clpRng, 0, iTemplateHeight); +#if GRAD_PDPC + if (m_ipaParam.applyPDPC && m_ipaParam.useGradPDPC) + { + int deltaPos2 = intraPredAngle; + const int scale = m_ipaParam.angularScale; + xIntraPredTimdAngGradPdpc(pDst, dstStride, refMain, refSide, width, iTemplateHeight, iTemplateWidth, 0, scale, deltaPos2, intraPredAngle, clpRng); + for (int y = 0; y < iTemplateHeight; y++) + deltaPos2 += intraPredAngle; + xIntraPredTimdAngGradPdpc(pDst+iTemplateHeight*dstStride, dstStride, refMain, refSide, iTemplateWidth, height, 0, iTemplateHeight, scale, deltaPos2, intraPredAngle, clpRng); + } + else +#endif + if (m_ipaParam.applyPDPC) + { + const int scale = m_ipaParam.angularScale; + xIntraPredTimdAngPdpc(pDst, dstStride, refSide, width, iTemplateHeight, iTemplateWidth, 0, scale, invAngle); + xIntraPredTimdAngPdpc(pDst+iTemplateHeight*dstStride, dstStride, refSide, iTemplateWidth, height, 0, iTemplateHeight, scale, invAngle); + } + } + else if (eTempType == LEFT_NEIGHBOR || eTempType == ABOVE_NEIGHBOR) + { + int iRegionWidth, iRegionHeight; + if((eTempType == LEFT_NEIGHBOR && bIsModeVer)||(eTempType == ABOVE_NEIGHBOR && !bIsModeVer)) + { + iRegionWidth = iTemplateWidth; + iRegionHeight = height; + } + else + { + iRegionWidth = width; + iRegionHeight = iTemplateHeight; + } + xIntraPredTimdAngLuma(pDsty, dstStride, refMain, iRegionWidth, iRegionHeight, deltaPos, intraPredAngle, clpRng, 0, 0); +#if GRAD_PDPC + if (m_ipaParam.applyPDPC && m_ipaParam.useGradPDPC) + { + int deltaPos2 = intraPredAngle; + const int scale = m_ipaParam.angularScale; + xIntraPredTimdAngGradPdpc(pDst, dstStride, refMain, refSide, iRegionWidth, iRegionHeight, 0, 0, scale, deltaPos2, intraPredAngle, clpRng); + } + else +#endif + if (m_ipaParam.applyPDPC) + { + const int scale = m_ipaParam.angularScale; + xIntraPredTimdAngPdpc(pDst, dstStride, refSide, iRegionWidth, iRegionHeight, 0, 0, scale, invAngle); + } + } + } + else + { + if(eTempType == LEFT_ABOVE_NEIGHBOR) + { + Pel *pDsty=pDst; + for (int y = 0, deltaPos = intraPredAngle; y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride) + { + const int deltaInt = deltaPos >> 6; + int iStartIdx, iEndIdx; + if(y < iTemplateHeight) + { + iStartIdx = iTemplateWidth; + iEndIdx = width - 1; + } + else + { + iStartIdx = 0; + iEndIdx = iTemplateWidth - 1; + } + memcpy(pDsty + iStartIdx, &refMain[iStartIdx + deltaInt + 1], (iEndIdx - iStartIdx + 1) * sizeof(Pel)); + } +#if GRAD_PDPC + if (m_ipaParam.applyPDPC && m_ipaParam.useGradPDPC) + { + int deltaPos2 = intraPredAngle; + const int scale = m_ipaParam.angularScale; + xIntraPredTimdAngGradPdpc(pDst, dstStride, refMain, refSide, width, iTemplateHeight, iTemplateWidth, 0, scale, deltaPos2, intraPredAngle, clpRng); + for (int y = 0; y < iTemplateHeight; y++) + deltaPos2 += intraPredAngle; + xIntraPredTimdAngGradPdpc(pDst+iTemplateHeight*dstStride, dstStride, refMain, refSide, iTemplateWidth, height, 0, iTemplateHeight, scale, deltaPos2, intraPredAngle, clpRng); + } + else +#endif + if (m_ipaParam.applyPDPC) + { + const int scale = m_ipaParam.angularScale; + xIntraPredTimdAngPdpc(pDst, dstStride, refSide, width, iTemplateHeight, iTemplateWidth, 0, scale, invAngle); + xIntraPredTimdAngPdpc(pDst+iTemplateHeight*dstStride, dstStride, refSide, iTemplateWidth, height, 0, iTemplateHeight, scale, invAngle); + } + } + else // if (eTempType == LEFT_NEIGHBOR || eTempType == ABOVE_NEIGHBOR) + { + Pel *pDsty=pDst; + assert(eTempType == LEFT_NEIGHBOR || eTempType == ABOVE_NEIGHBOR); + int iRegionWidth, iRegionHeight; + if((eTempType == LEFT_NEIGHBOR && bIsModeVer)||(eTempType == ABOVE_NEIGHBOR && !bIsModeVer)) + { + iRegionWidth = iTemplateWidth; + iRegionHeight = height; + } + else + { + iRegionWidth = width; + iRegionHeight = iTemplateHeight; + } + for (int y = 0, deltaPos = intraPredAngle; y<iRegionHeight; y++, deltaPos += intraPredAngle, pDsty += dstStride) + { + const int deltaInt = deltaPos >> 6; + memcpy(pDsty, &refMain[deltaInt + 1], iRegionWidth * sizeof(Pel)); + } +#if GRAD_PDPC + if (m_ipaParam.applyPDPC && m_ipaParam.useGradPDPC) + { + int deltaPos2 = intraPredAngle; + const int scale = m_ipaParam.angularScale; + xIntraPredTimdAngGradPdpc(pDst, dstStride, refMain, refSide, iRegionWidth, iRegionHeight, 0, 0, scale, deltaPos2, intraPredAngle, clpRng); + } + else +#endif + if (m_ipaParam.applyPDPC) + { + const int scale = m_ipaParam.angularScale; + xIntraPredTimdAngPdpc(pDst, dstStride, refSide, iRegionWidth, iRegionHeight, 0, 0, scale, invAngle); + } + } + } + } + + // Flip the block if this is the horizontal mode + if (!bIsModeVer) + { + if(eTempType == LEFT_ABOVE_NEIGHBOR) + { + for (int y = 0; y < height; y++) + { + int iStartIdx, iEndIdx; + if(y < iTemplateHeight) + { + iStartIdx = iTemplateWidth; + iEndIdx = width - 1; + } + else + { + iStartIdx = 0; + iEndIdx = iTemplateWidth - 1; + } + for (int x = iStartIdx; x <= iEndIdx; x++) + { + pTrueDst[x*iDstStride+y] = pDst[y*dstStride+x]; + } + } + } + else if(eTempType == LEFT_NEIGHBOR) + { + for (int y = 0; y < iTemplateHeight; y++) + { + for (int x = 0; x < width; x++) + { + pTrueDst[x*iDstStride+y] = pDst[y*dstStride+x]; + } + } + } + else if(eTempType == ABOVE_NEIGHBOR) + { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < iTemplateWidth; x++) + { + pTrueDst[x*iDstStride+y] = pDst[y*dstStride+x]; + } + } + } + else + { + assert(0); + } + } +} + +void IntraPrediction::initTimdIntraPatternLuma(const CodingUnit &cu, const CompArea &area, int iTemplateWidth, int iTemplateHeight, uint32_t uiRefWidth, uint32_t uiRefHeight) +{ + const CodingStructure& cs = *cu.cs; + Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED]; + bool bLeftAbove = iTemplateHeight > 0 && iTemplateWidth > 0; + m_leftRefLength = bLeftAbove ? (uiRefHeight << 1) : ((uiRefHeight + iTemplateHeight) << 1); + m_topRefLength = bLeftAbove ? (uiRefWidth << 1) : ((uiRefWidth + iTemplateWidth) << 1); + xFillTimdReferenceSamples(cs.picture->getRecoBuf(area), refBufUnfiltered, area, cu, iTemplateWidth, iTemplateHeight); +} + +void IntraPrediction::xFillTimdReferenceSamples(const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu, int iTemplateWidth, int iTemplateHeight) +{ + const ChannelType chType = toChannelType( area.compID ); + const CodingStructure &cs = *cu.cs; + const SPS &sps = *cs.sps; + const PreCalcValues &pcv = *cs.pcv; + + const int tuWidth = area.width; + const int tuHeight = area.height; + const int predSize = m_topRefLength; + const int predHSize = m_leftRefLength; + const int predStride = predSize + 1; + m_refBufferStride[area.compID] = predStride; + + const bool noShift = pcv.noChroma2x2 && area.width == 4; // don't shift on the lowest level (chroma not-split) + const int unitWidth = tuWidth <= 2 && cu.ispMode && isLuma(area.compID) ? tuWidth : pcv.minCUWidth >> (noShift ? 0 : getComponentScaleX(area.compID, sps.getChromaFormatIdc())); + const int unitHeight = tuHeight <= 2 && cu.ispMode && isLuma(area.compID) ? tuHeight : pcv.minCUHeight >> (noShift ? 0 : getComponentScaleY(area.compID, sps.getChromaFormatIdc())); + int leftTempUnitNum = 0; + int aboveTempUnitNum = 0; + if (iTemplateHeight >= 4) + { + leftTempUnitNum = iTemplateHeight / unitHeight; + } + if (iTemplateWidth >= 4) + { + aboveTempUnitNum = iTemplateWidth / unitWidth; + } + + const int totalAboveUnits = (predSize + (unitWidth - 1)) / unitWidth - aboveTempUnitNum; + const int totalLeftUnits = (predHSize + (unitHeight - 1)) / unitHeight - leftTempUnitNum; + const int totalUnits = totalAboveUnits + totalLeftUnits + 1 + aboveTempUnitNum + leftTempUnitNum; //+1 for top-left + const int numAboveUnits = std::max<int>( tuWidth / unitWidth, 1 ); + const int numLeftUnits = std::max<int>( tuHeight / unitHeight, 1 ); + const int numAboveRightUnits = totalAboveUnits - numAboveUnits; + const int numLeftBelowUnits = totalLeftUnits - numLeftUnits; + + // ----- Step 1: analyze neighborhood ----- + const Position posLT = area; + const Position posRT = area.topRight(); + const Position posLB = area.bottomLeft(); + + bool neighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1]; + int numIntraNeighbor = 0; + + memset( neighborFlags, 0, totalUnits ); + + neighborFlags[totalLeftUnits] = isAboveLeftAvailable( cu, chType, posLT.offset(-iTemplateWidth, -iTemplateHeight) ); + neighborFlags[totalLeftUnits + leftTempUnitNum] = neighborFlags[totalLeftUnits]; + neighborFlags[totalLeftUnits + leftTempUnitNum + aboveTempUnitNum] = neighborFlags[totalLeftUnits]; + numIntraNeighbor += neighborFlags[totalLeftUnits] ? 1 : 0; + numIntraNeighbor += leftTempUnitNum > 0 && neighborFlags[totalLeftUnits] ? 1 : 0; + numIntraNeighbor += aboveTempUnitNum > 0 && neighborFlags[totalLeftUnits] ? 1 : 0; + numIntraNeighbor += isAboveAvailable ( cu, chType, posLT.offset(0, -iTemplateHeight), numAboveUnits, unitWidth, (neighborFlags + totalLeftUnits + 1 + leftTempUnitNum + aboveTempUnitNum) ); + numIntraNeighbor += isAboveRightAvailable( cu, chType, posRT.offset(0, -iTemplateHeight), numAboveRightUnits, unitWidth, (neighborFlags + totalLeftUnits + 1 + leftTempUnitNum + aboveTempUnitNum + numAboveUnits) ); + numIntraNeighbor += isLeftAvailable ( cu, chType, posLT.offset(-iTemplateWidth, 0), numLeftUnits, unitHeight, (neighborFlags + totalLeftUnits - 1) ); + numIntraNeighbor += isBelowLeftAvailable ( cu, chType, posLB.offset(-iTemplateWidth, 0), numLeftBelowUnits, unitHeight, (neighborFlags + totalLeftUnits - 1 - numLeftUnits) ); + + // ----- Step 2: fill reference samples (depending on neighborhood) ----- + + const Pel* srcBuf = recoBuf.buf; + const int srcStride = recoBuf.stride; + Pel* ptrDst = refBufUnfiltered; + const Pel* ptrSrc; + const Pel valueDC = 1 << (sps.getBitDepth( chType ) - 1); + + + if( numIntraNeighbor == 0 ) + { + // Fill border with DC value + for (int j = 0; j <= predSize; j++) { ptrDst[j] = valueDC; } + for (int i = 0; i <= predHSize; i++) + { + ptrDst[i + predStride] = valueDC; + } + } + else if( numIntraNeighbor == totalUnits ) + { + // Fill top-left border and top and top right with rec. samples + ptrSrc = srcBuf - (1 + iTemplateHeight) * srcStride - (1 + iTemplateWidth); + for (int j = 0; j <= predSize; j++) + { + ptrDst[j] = ptrSrc[j]; + } + for (int i = 0; i <= predHSize; i++) + { + ptrDst[i + predStride] = ptrSrc[i * srcStride]; + } + } + else // reference samples are partially available + { + // Fill top-left sample(s) if available + ptrSrc = srcBuf - (1 + iTemplateHeight) * srcStride - (1 + iTemplateWidth); + ptrDst = refBufUnfiltered; + if (neighborFlags[totalLeftUnits]) + { + for (int i = 0; i <= iTemplateWidth; i++) + ptrDst[i] = ptrSrc[i]; + for (int i = 0; i <= iTemplateHeight; i++) + ptrDst[i + predStride] = ptrSrc[i * srcStride]; + } + + // Fill left & below-left samples if available (downwards) + ptrSrc += (1 + iTemplateHeight) * srcStride; + ptrDst += (1 + iTemplateHeight) + predStride; + for (int unitIdx = totalLeftUnits - 1; unitIdx > 0; unitIdx--) + { + if (neighborFlags[unitIdx]) + { + for (int i = 0; i < unitHeight; i++) + { + ptrDst[i] = ptrSrc[i * srcStride]; + } + } + ptrSrc += unitHeight * srcStride; + ptrDst += unitHeight; + } + // Fill last below-left sample(s) + if (neighborFlags[0]) + { + int lastSample = ((predHSize - iTemplateHeight) % unitHeight == 0) ? unitHeight : (predHSize - iTemplateHeight) % unitHeight; + for (int i = 0; i < lastSample; i++) + { + ptrDst[i] = ptrSrc[i * srcStride]; + } + } + + // Fill above & above-right samples if available (left-to-right) + ptrSrc = srcBuf - srcStride * (1 + iTemplateHeight); + ptrDst = refBufUnfiltered + 1 + iTemplateWidth; + for (int unitIdx = totalLeftUnits + 1 + leftTempUnitNum + aboveTempUnitNum; unitIdx < totalUnits - 1; unitIdx++) + { + if (neighborFlags[unitIdx]) + { + for (int j = 0; j < unitWidth; j++) + { + ptrDst[j] = ptrSrc[j]; + } + } + ptrSrc += unitWidth; + ptrDst += unitWidth; + } + // Fill last above-right sample(s) + if (neighborFlags[totalUnits - 1]) + { + int lastSample = ((predSize - iTemplateWidth) % unitWidth == 0) ? unitWidth : (predSize - iTemplateWidth) % unitWidth; + for (int j = 0; j < lastSample; j++) + { + ptrDst[j] = ptrSrc[j]; + } + } + + // pad from first available down to the last below-left + ptrDst = refBufUnfiltered; + int lastAvailUnit = 0; + if (!neighborFlags[0]) + { + int firstAvailUnit = 1; + while (firstAvailUnit < totalUnits && !neighborFlags[firstAvailUnit]) + { + firstAvailUnit++; + } + + // first available sample + int firstAvailRow = -1; + int firstAvailCol = 0; + if (firstAvailUnit < totalLeftUnits) + { + firstAvailRow = (totalLeftUnits - firstAvailUnit) * unitHeight + iTemplateHeight; + } + else if (firstAvailUnit == totalLeftUnits) + { + firstAvailRow = iTemplateHeight; + } + else + { + firstAvailCol = (firstAvailUnit - (totalLeftUnits + leftTempUnitNum + aboveTempUnitNum) - 1) * unitWidth + 1 + iTemplateWidth; + } + const Pel firstAvailSample = ptrDst[firstAvailRow < 0 ? firstAvailCol : firstAvailRow + predStride]; + + // last sample below-left (n.a.) + int lastRow = predHSize; + + // fill left column + for (int i = lastRow; i > firstAvailRow; i--) + { + ptrDst[i + predStride] = firstAvailSample; + } + // fill top row + if (firstAvailCol > 0) + { + for (int j = 0; j < firstAvailCol; j++) + { + ptrDst[j] = firstAvailSample; + } + } + lastAvailUnit = firstAvailUnit; + } + + // pad all other reference samples. + int currUnit = lastAvailUnit + 1; + while (currUnit < totalUnits) + { + if (!neighborFlags[currUnit]) // samples not available + { + // last available sample + int lastAvailRow = -1; + int lastAvailCol = 0; + if (lastAvailUnit < totalLeftUnits) + { + lastAvailRow = (totalLeftUnits - lastAvailUnit - 1) * unitHeight + iTemplateHeight + 1; + } + else if (lastAvailUnit == totalLeftUnits) + { + lastAvailCol = iTemplateWidth; + } + else + { + lastAvailCol = (lastAvailUnit - (totalLeftUnits + leftTempUnitNum + aboveTempUnitNum)) * unitWidth + iTemplateWidth; + } + const Pel lastAvailSample = ptrDst[lastAvailRow < 0 ? lastAvailCol : lastAvailRow + predStride]; + + // fill current unit with last available sample + if (currUnit < totalLeftUnits) + { + for (int i = lastAvailRow - 1; i >= lastAvailRow - unitHeight; i--) + { + ptrDst[i + predStride] = lastAvailSample; + } + } + else if (currUnit == totalLeftUnits) + { + for (int i = 0; i < iTemplateHeight + 1; i++) + { + ptrDst[i + predStride] = lastAvailSample; + } + for (int j = 0; j < iTemplateWidth + 1; j++) + { + ptrDst[j] = lastAvailSample; + } + } + else + { + int numSamplesInUnit = (currUnit == totalUnits - 1) ? (((predSize - iTemplateWidth) % unitWidth == 0) ? unitWidth : (predSize - iTemplateWidth) % unitWidth) : unitWidth; + for (int j = lastAvailCol + 1; j <= lastAvailCol + numSamplesInUnit; j++) + { + ptrDst[j] = lastAvailSample; + } + } + } + lastAvailUnit = currUnit; + currUnit++; + } + } +} + +int IntraPrediction::deriveTimdMode( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu ) +{ + int channelBitDepth = cu.slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA); + SizeType uiWidth = cu.lwidth(); + SizeType uiHeight = cu.lheight(); + + static Pel PredLuma[(MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE)]; + memset(PredLuma, 0, (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * sizeof(Pel)); + Pel* piPred = PredLuma; + uint32_t uiPredStride = MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE; + + int iCurX = cu.lx(); + int iCurY = cu.ly(); + int iRefX = -1, iRefY = -1; + uint32_t uiRefWidth = 0, uiRefHeight = 0; + + int iTempWidth = 4, iTempHeight = 4; + if(uiWidth <= 8) + { + iTempWidth = 2; + } + if(uiHeight <= 8) + { + iTempHeight = 2; + } + + TEMPLATE_TYPE eTempType = CU::deriveTimdRefType(iCurX, iCurY, uiWidth, uiHeight, iTempWidth, iTempHeight, iRefX, iRefY, uiRefWidth, uiRefHeight); + + if (eTempType != NO_NEIGHBOR) + { + const CodingStructure& cs = *cu.cs; + m_ipaParam.multiRefIndex = iTempWidth; + Pel* piOrg = cs.picture->getRecoBuf( area ).buf; + int iOrgStride = cs.picture->getRecoBuf( area ).stride; + piOrg += (iRefY - iCurY) * iOrgStride + (iRefX - iCurX); + DistParam distParamSad[2]; // above, left + distParamSad[0].applyWeight = false; + distParamSad[0].useMR = false; + distParamSad[1].applyWeight = false; + distParamSad[1].useMR = false; + if(eTempType == LEFT_ABOVE_NEIGHBOR) + { + m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg + iTempWidth, piPred + iTempWidth, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true); // Use HAD (SATD) cost + m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg + iTempHeight * iOrgStride, piPred + iTempHeight * uiPredStride, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); // Use HAD (SATD) cost + } + else if(eTempType == LEFT_NEIGHBOR) + { + m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); + } + else if(eTempType == ABOVE_NEIGHBOR) + { + m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true); + } + initTimdIntraPatternLuma(cu, area, eTempType != ABOVE_NEIGHBOR ? iTempWidth : 0, eTempType != LEFT_NEIGHBOR ? iTempHeight : 0, uiRefWidth, uiRefHeight); + + uint32_t uiIntraDirNeighbor[5] = {0}, modeIdx = 0; + bool includedMode[EXT_VDIA_IDX + 1]; + memset(includedMode, false, (EXT_VDIA_IDX + 1) * sizeof(bool)); + auto &pu = *cu.firstPU; + uint32_t uiRealW = uiRefWidth + (eTempType == LEFT_NEIGHBOR? iTempWidth : 0); + uint32_t uiRealH = uiRefHeight + (eTempType == ABOVE_NEIGHBOR? iTempHeight : 0); + uint64_t maxCost = (uint64_t)(iTempWidth * cu.lheight() + iTempHeight * cu.lwidth()); + + uint64_t uiBestCost = MAX_UINT64; + int iBestMode = PLANAR_IDX; + uint64_t uiSecondaryCost = MAX_UINT64; + int iSecondaryMode = PLANAR_IDX; + + const Position posLTx = pu.Y().topLeft(); + const Position posRTx = pu.Y().topRight(); + const Position posLBx = pu.Y().bottomLeft(); + + // left + const PredictionUnit *puLeftx = pu.cs->getPURestricted(posLBx.offset(-1, 0), pu, pu.chType); + if (puLeftx && CU::isIntra(*puLeftx->cu)) + { + uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma( *puLeftx ); + if (!puLeftx->cu->timd) + { + uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]); + } + if( !includedMode[uiIntraDirNeighbor[modeIdx]] ) + { + includedMode[uiIntraDirNeighbor[modeIdx]] = true; + modeIdx++; + } + } + // above + const PredictionUnit *puAbovex = pu.cs->getPURestricted(posRTx.offset(0, -1), pu, pu.chType); + if (puAbovex && CU::isIntra(*puAbovex->cu) && CU::isSameCtu(*pu.cu, *puAbovex->cu)) + { + uiIntraDirNeighbor[modeIdx] =PU::getIntraDirLuma( *puAbovex ); + if (!puAbovex->cu->timd) + { + uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]); + } + if( !includedMode[uiIntraDirNeighbor[modeIdx]] ) + { + includedMode[uiIntraDirNeighbor[modeIdx]] = true; + modeIdx++; + } + } + // below left + const PredictionUnit *puLeftBottomx = cs.getPURestricted( posLBx.offset( -1, 1 ), pu, pu.chType ); + if (puLeftBottomx && CU::isIntra(*puLeftBottomx->cu)) + { + uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma( *puLeftBottomx ); + if (!puLeftBottomx->cu->timd) + { + uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]); + } + if( !includedMode[uiIntraDirNeighbor[modeIdx]] ) + { + includedMode[uiIntraDirNeighbor[modeIdx]] = true; + modeIdx++; + } + } + // above right + const PredictionUnit *puAboveRightx = cs.getPURestricted( posRTx.offset( 1, -1 ), pu, pu.chType ); + if (puAboveRightx && CU::isIntra(*puAboveRightx->cu)) + { + uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma( *puAboveRightx ); + if (!puAboveRightx->cu->timd) + { + uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]); + } + if( !includedMode[uiIntraDirNeighbor[modeIdx]] ) + { + includedMode[uiIntraDirNeighbor[modeIdx]] = true; + modeIdx++; + } + } + //above left + const PredictionUnit *puAboveLeftx = cs.getPURestricted( posLTx.offset( -1, -1 ), pu, pu.chType ); + if( puAboveLeftx && CU::isIntra(*puAboveLeftx->cu) ) + { + uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma( *puAboveLeftx ); + if (!puAboveLeftx->cu->timd) + { + uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]); + } + if( !includedMode[uiIntraDirNeighbor[modeIdx]] ) + { + includedMode[uiIntraDirNeighbor[modeIdx]] = true; + modeIdx++; + } + } + bool bNoAngular = false; + if(modeIdx >= 2) + { + bNoAngular = true; + for(uint32_t i = 0; i < modeIdx; i++) + { + if(uiIntraDirNeighbor[i] > DC_IDX) + { + bNoAngular = false; + break; + } + } + } + + if (bNoAngular) + { + for(int iMode = 0; iMode <= 1; iMode ++) + { + uint64_t uiCost = 0; + initPredTimdIntraParams(pu, area, iMode); + predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, (eTempType == ABOVE_NEIGHBOR)? 0: iTempWidth, (eTempType == LEFT_NEIGHBOR)? 0: iTempHeight); + if(eTempType == LEFT_ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + uiCost += distParamSad[1].distFunc(distParamSad[1]); + } + else if(eTempType == LEFT_NEIGHBOR) + { + uiCost = distParamSad[1].distFunc(distParamSad[1]); + } + else if(eTempType == ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + } + else + { + assert(0); + } + + if(uiCost < uiBestCost) + { + uiBestCost = uiCost; + iBestMode = iMode; + } + if(uiBestCost <= maxCost) + { + break; + } + } + cu.timdMode = iBestMode; + cu.timdIsBlended = false; + + return iBestMode; + } +#if SECONDARY_MPM + uint8_t mpmList[NUM_MOST_PROBABLE_MODES]; + uint8_t intraNonMPM[NUM_NON_MPM_MODES]; + PU::getIntraMPMs(pu, mpmList, intraNonMPM); +#else + unsigned mpmList[NUM_MOST_PROBABLE_MODES]; + PU::getIntraMPMs(pu, mpmList); +#endif + unsigned mpmExtraList[NUM_MOST_PROBABLE_MODES + 3]; // +DC/VER/HOR + int maxModeNum = NUM_MOST_PROBABLE_MODES; + unsigned modeCandList[3] = {DC_IDX, HOR_IDX, VER_IDX}; + bool bNotExist[3] = {true, true, true}; + for (int i = 0; i < NUM_MOST_PROBABLE_MODES; i++) + { + mpmExtraList[i] = mpmList[i]; + if (bNotExist[0] && mpmList[i] == DC_IDX) + { + bNotExist[0] = false; + } + if (bNotExist[1] && mpmList[i] == HOR_IDX) + { + bNotExist[1] = false; + } + if (bNotExist[2] && mpmList[i] == VER_IDX) + { + bNotExist[2] = false; + } + } + for (int i = 0; i < 3; i++) + { + if (bNotExist[i]) + { + mpmExtraList[maxModeNum++] = modeCandList[i]; + } + } + for(int i = 0; i < maxModeNum; i ++) + { + uint64_t uiCost = 0; + int iMode = mpmExtraList[i]; + if (iMode > DC_IDX) + { + iMode = MAP67TO131(iMode); + } + initPredTimdIntraParams(pu, area, iMode); + predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, (eTempType == ABOVE_NEIGHBOR)? 0: iTempWidth, (eTempType == LEFT_NEIGHBOR)? 0: iTempHeight); + if(eTempType == LEFT_ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + uiCost += distParamSad[1].distFunc(distParamSad[1]); + } + else if(eTempType == LEFT_NEIGHBOR) + { + uiCost = distParamSad[1].distFunc(distParamSad[1]); + } + else if(eTempType == ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + } + else + { + assert(0); + } + + if( uiCost < uiBestCost ) + { + uiSecondaryCost = uiBestCost; + iSecondaryMode = iBestMode; + uiBestCost = uiCost; + iBestMode = iMode; + } + else if (uiCost < uiSecondaryCost) + { + uiSecondaryCost = uiCost; + iSecondaryMode = iMode; + } + if (uiSecondaryCost <= maxCost) + { + break; + } + } + + int midMode = iBestMode; + if (midMode > DC_IDX && uiBestCost > maxCost) + { + for (int i = -1; i <= 1; i+=2) + { + int iMode = midMode + i; + if (iMode <= DC_IDX || iMode > EXT_VDIA_IDX) + { + continue; + } + initPredTimdIntraParams(pu, area, iMode); + predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, (eTempType == ABOVE_NEIGHBOR)? 0: iTempWidth, (eTempType == LEFT_NEIGHBOR)? 0: iTempHeight); + uint64_t uiCost = 0; + if(eTempType == LEFT_ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + uiCost += distParamSad[1].distFunc(distParamSad[1]); + } + else if(eTempType == LEFT_NEIGHBOR) + { + uiCost = distParamSad[1].distFunc(distParamSad[1]); + } + else if(eTempType == ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + } + else + { + assert(0); + } + + if(uiCost < uiBestCost) + { + uiBestCost = uiCost; + iBestMode = iMode; + } + if(uiBestCost <= maxCost) + { + break; + } + } + } + + midMode = iSecondaryMode; + if (midMode > DC_IDX && uiSecondaryCost > maxCost) + { + for (int i = -1; i <= 1; i+=2) + { + int iMode = midMode + i; + if (iMode <= DC_IDX || iMode > EXT_VDIA_IDX) + { + continue; + } + initPredTimdIntraParams(pu, area, iMode); + predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, (eTempType == ABOVE_NEIGHBOR)? 0: iTempWidth, (eTempType == LEFT_NEIGHBOR)? 0: iTempHeight); + uint64_t uiCost = 0; + if(eTempType == LEFT_ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + uiCost += distParamSad[1].distFunc(distParamSad[1]); + } + else if(eTempType == LEFT_NEIGHBOR) + { + uiCost = distParamSad[1].distFunc(distParamSad[1]); + } + else if(eTempType == ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + } + else + { + assert(0); + } + + if(uiCost < uiSecondaryCost) + { + uiSecondaryCost = uiCost; + iSecondaryMode = iMode; + } + if(uiSecondaryCost <= maxCost) + { + break; + } + } + } + + if ((uiSecondaryCost - uiBestCost) < uiBestCost) + { + cu.timdMode = iBestMode; + cu.timdIsBlended = true; + cu.timdModeSecondary = iSecondaryMode; + + const int blend_sum_weight = 6; + int sum_weight = 1 << blend_sum_weight; + + double dRatio = 0.0; + dRatio = (double) uiSecondaryCost / (double) (uiBestCost + uiSecondaryCost); + int iRatio = static_cast<int>(dRatio * sum_weight + 0.5); + cu.timdFusionWeight[0] = iRatio; + cu.timdFusionWeight[1] = sum_weight - iRatio; + } + else + { + cu.timdMode = iBestMode; + cu.timdIsBlended = false; + } + + return iBestMode; + } + else + { + cu.timdMode = PLANAR_IDX; + cu.timdIsBlended = false; + + return PLANAR_IDX; + } +} +#endif #if ENABLE_DIMD void IntraPrediction::deriveDimdMode(const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu) { diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h index 95a98729e8b67700dfec4de8d5f5f4ad4614de51..83612248be99bd78020ffd203b73dcb169023b66 100644 --- a/source/Lib/CommonLib/IntraPrediction.h +++ b/source/Lib/CommonLib/IntraPrediction.h @@ -43,6 +43,9 @@ #include "Unit.h" #include "Buffer.h" #include "Picture.h" +#if JVET_W0123_TIMD_FUSION +#include "RdCost.h" +#endif #include "MatrixIntraPrediction.h" @@ -81,6 +84,10 @@ private: #endif static const uint8_t m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS]; +#if JVET_W0123_TIMD_FUSION + static const uint8_t m_aucIntraFilterExt[MAX_INTRA_FILTER_DEPTHS]; + RdCost* m_timdSatdCost; +#endif #if LMS_LINEAR_MODEL unsigned m_auShiftLM[32]; // Table for substituting division operation by multiplication #endif @@ -136,11 +143,18 @@ protected: // prediction void xPredIntraPlanar ( const CPelBuf &pSrc, PelBuf &pDst ); void xPredIntraDc ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter = true ); +#if JVET_W0123_TIMD_FUSION + void xPredIntraAng ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng, const bool bExtIntraDir); +#else void xPredIntraAng ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng); +#endif void initPredIntraParams ( const PredictionUnit & pu, const CompArea compArea, const SPS& sps ); static bool isIntegerSlope(const int absAng) { return (0 == (absAng & 0x1F)); } +#if JVET_W0123_TIMD_FUSION + static bool isIntegerSlopeExt(const int absAng) { return (0 == (absAng & 0x3F)); } +#endif void xPredIntraBDPCM ( const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng ); Pel xGetPredValDc ( const CPelBuf &pSrc, const Size &dstSize ); @@ -154,6 +168,9 @@ protected: ); static int getModifiedWideAngle ( int width, int height, int predMode ); +#if JVET_W0123_TIMD_FUSION + static int getWideAngleExt ( int width, int height, int predMode ); +#endif void setReferenceArrayLengths ( const CompArea &area ); void destroy (); @@ -189,6 +206,24 @@ public: void init (ChromaFormat chromaFormatIDC, const unsigned bitDepthY); #if ENABLE_DIMD static void deriveDimdMode (const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu); +#endif +#if JVET_W0123_TIMD_FUSION + void xIntraPredTimdHorVerPdpc (Pel* pDsty,const int dstStride, Pel* refSide, const int width, const int height, int xOffset, int yOffset, int scale, const Pel* refMain, const ClpRng& clpRng); + void xPredTimdIntraPlanar (const CPelBuf &pSrc, Pel* pDst, int iDstStride, int width, int height, TEMPLATE_TYPE eTempType, int iTemplateWidth , int iTemplateHeight); + void xPredTimdIntraDc ( const PredictionUnit &pu, const CPelBuf &pSrc, Pel* pDst, int iDstStride, int iWidth, int iHeight, TEMPLATE_TYPE eTempType, int iTemplateWidth , int iTemplateHeight); + void xPredTimdIntraAng ( const CPelBuf &pSrc, const ClpRng& clpRng, Pel* pTrueDst, int iDstStride, int iWidth, int iHeight, TEMPLATE_TYPE eTempType, int iTemplateWidth , int iTemplateHeight, uint32_t dirMode); + void xIntraPredTimdAngLuma(Pel* pDstBuf, const ptrdiff_t dstStride, Pel* refMain, int width, int height, int deltaPos, int intraPredAngle, const ClpRng& clpRng, int xOffset, int yOffset); + void xIntraPredTimdPlanarDcPdpc (const CPelBuf &pSrc, Pel* pDst, int iDstStride, int width, int height, TEMPLATE_TYPE eTempType, int iTemplateWidth , int iTemplateHeight); + void xIntraPredTimdAngPdpc(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height, int xOffset, int yOffset, int scale, int invAngle); + void xFillTimdReferenceSamples ( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu, int iTemplateWidth, int iTemplateHeight ); + Pel xGetPredTimdValDc ( const CPelBuf &pSrc, const Size &dstSize, TEMPLATE_TYPE eTempType, int iTempHeight, int iTempWidth ); + void initPredTimdIntraParams (const PredictionUnit & pu, const CompArea area, int dirMode); + void predTimdIntraAng ( const ComponentID compId, const PredictionUnit &pu, uint32_t uiDirMode, Pel* pPred, uint32_t uiStride, uint32_t iWidth, uint32_t iHeight, TEMPLATE_TYPE eTempType, int32_t iTemplateWidth, int32_t iTemplateHeight); + int deriveTimdMode ( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu ); + void initTimdIntraPatternLuma (const CodingUnit &cu, const CompArea &area, int iTemplateWidth, int iTemplateHeight, uint32_t uiRefWidth, uint32_t uiRefHeight); +#if GRAD_PDPC + void xIntraPredTimdAngGradPdpc (Pel* pDsty, const int dstStride, Pel* refMain, Pel* refSide, const int width, const int height, int xOffset, int yOffset, int scale, int deltaPos, int intraPredAngle, const ClpRng& clpRng); +#endif #endif // Angular Intra void predIntraAng ( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu); diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp index 7470c66ce645ad92378171c3d6f5860233e6906d..b5018fb3c399eee6730efe4260d6a786d19717ad 100644 --- a/source/Lib/CommonLib/RdCost.cpp +++ b/source/Lib/CommonLib/RdCost.cpp @@ -446,6 +446,64 @@ void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, } } +#if JVET_W0123_TIMD_FUSION +void RdCost::setTimdDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard ) +{ + rcDP.bitDepth = bitDepth; + rcDP.compID = compID; + + rcDP.org.buf = pOrg; + rcDP.org.stride = iOrgStride; + rcDP.org.width = width; + rcDP.org.height = height; + + rcDP.cur.buf = piRefY; + rcDP.cur.stride = iRefStride; + rcDP.cur.width = width; + rcDP.cur.height = height; + rcDP.subShift = subShiftMode; + rcDP.step = step; + rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max(); + + const int DFOffset = ( rcDP.useMR ? DF_MRSAD - DF_SAD : 0 ); + if( !useHadamard ) + { + if( width == 12 ) + { + rcDP.distFunc = m_afpDistortFunc[ DF_SAD12 + DFOffset ]; + } + else if( width == 24 ) + { + rcDP.distFunc = m_afpDistortFunc[ DF_SAD24 + DFOffset ]; + } + else if( width == 48 ) + { + rcDP.distFunc = m_afpDistortFunc[ DF_SAD48 + DFOffset ]; + } + else if( isPowerOf2( width) ) + { +#if CTU_256 + rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + std::min<int>( 7, floorLog2( width ) ) ]; +#else + rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + floorLog2( width ) ]; +#endif + } + else + { + rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset ]; + } + } + else + { +#if CTU_256 + rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + std::min<int>( 7, floorLog2( width ) ) ]; +#else + rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + floorLog2( width ) ]; +#endif + } +} +#endif + #if TM_AMVP || TM_MRG void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, bool TrueA_FalseL, int wIdx, int subShift, ComponentID compID ) { diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h index 8e194a9e30f78d50aa01a1782cf8981d2bf41084..4cbe0cf75c868a115683c7b17e71f30bd3534bcc 100644 --- a/source/Lib/CommonLib/RdCost.h +++ b/source/Lib/CommonLib/RdCost.h @@ -185,6 +185,9 @@ public: void setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRefY , int iRefStride, int bitDepth, ComponentID compID, int subShiftMode = 0, int step = 1, bool useHadamard = false ); void setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, ComponentID compID, bool useHadamard = false ); void setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false, bool bioApplied = false ); +#if JVET_W0123_TIMD_FUSION + void setTimdDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false ); +#endif void setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRefY, int iRefStride, const Pel* mask, int iMaskStride, int stepX, int iMaskStride2, int bitDepth, ComponentID compID); #if TM_AMVP || TM_MRG void setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, bool TrueA_FalseL, int wIdx, int subShift, ComponentID compID ); diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index 934d14c39ac50bc84650f081b54be54510cfe0e7..d82e0bd2969d87b2dc6110911f75e9d22e0df70e 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -3113,6 +3113,9 @@ SPS::SPS() #if ENABLE_DIMD , m_dimd ( false ) #endif +#if JVET_W0123_TIMD_FUSION +, m_timd ( false ) +#endif #if JVET_V0130_INTRA_TMP , m_intraTMP ( false ) , m_intraTmpMaxSize ( 64 ) @@ -4768,6 +4771,9 @@ bool operator == (const ConstraintInfo& op1, const ConstraintInfo& o #if ENABLE_DIMD if( op1.m_noDimdConstraintFlag != op2.m_noDimdConstraintFlag ) return false; #endif +#if JVET_W0123_TIMD_FUSION + if( op1.m_noTimdConstraintFlag != op2.m_noTimdConstraintFlag ) return false; +#endif #if ENABLE_OBMC if( op1.m_noObmcConstraintFlag != op2.m_noObmcConstraintFlag ) return false; #endif diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index f53244b1dbbedad876acfc6f5385cf719faa4399..70d7b8bc6b0106e57954db4266f2769999574c9c 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -319,6 +319,9 @@ class ConstraintInfo #if ENABLE_DIMD bool m_noDimdConstraintFlag; #endif +#if JVET_W0123_TIMD_FUSION + bool m_noTimdConstraintFlag; +#endif #if ENABLE_OBMC bool m_noObmcConstraintFlag; #endif @@ -434,6 +437,9 @@ public: #if ENABLE_DIMD , m_noDimdConstraintFlag (false) #endif +#if JVET_W0123_TIMD_FUSION + , m_noTimdConstraintFlag (false) +#endif #if ENABLE_OBMC , m_noObmcConstraintFlag (false) #endif @@ -615,6 +621,10 @@ public: bool getNoDimdConstraintFlag() const { return m_noDimdConstraintFlag; } void setNoDimdConstraintFlag(bool bVal) { m_noDimdConstraintFlag = bVal; } #endif +#if JVET_W0123_TIMD_FUSION + bool getNoTimdConstraintFlag() const { return m_noTimdConstraintFlag; } + void setNoTimdConstraintFlag(bool bVal) { m_noTimdConstraintFlag = bVal; } +#endif #if ENABLE_OBMC bool getNoObmcConstraintFlag() const { return m_noObmcConstraintFlag; } void setNoObmcConstraintFlag(bool bVal) { m_noObmcConstraintFlag = bVal; } @@ -1651,6 +1661,9 @@ private: #if ENABLE_DIMD bool m_dimd; #endif +#if JVET_W0123_TIMD_FUSION + bool m_timd; +#endif #if JVET_V0130_INTRA_TMP bool m_intraTMP; ///< intra Template Matching unsigned m_intraTmpMaxSize; ///< max CU size for which intra TMP is allowed @@ -2083,6 +2096,10 @@ void setCCALFEnabledFlag( bool b ) void setIntraTMPMaxSize (unsigned n) { m_intraTmpMaxSize = n; } unsigned getIntraTMPMaxSize () const { return m_intraTmpMaxSize; } #endif +#if JVET_W0123_TIMD_FUSION + void setUseTimd ( bool b ) { m_timd = b; } + bool getUseTimd () const { return m_timd; } +#endif #if ENABLE_OBMC void setUseOBMC ( bool b ) { m_OBMC = b; } bool getUseOBMC () const { return m_OBMC; } diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index a6252225bc409c79d9f3b79d81fea4f4c714b783..26fbda3252bb36edb6394600b284812be9f215d9 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -831,7 +831,11 @@ void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID ) const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ]; uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) ); +#if JVET_W0123_TIMD_FUSION + if( compID != COMPONENT_Y && PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) ) +#else if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) ) +#endif { intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) ); } @@ -844,6 +848,12 @@ void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID ) { intraMode = PLANAR_IDX; } +#endif +#if JVET_W0123_TIMD_FUSION + if (tu.cu->timd && compID == COMPONENT_Y) + { + intraMode = MAP131TO67(intraMode); + } #endif CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" ); @@ -977,7 +987,11 @@ void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, cons const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ]; uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) ); +#if JVET_W0123_TIMD_FUSION + if( compID != COMPONENT_Y && PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) ) +#else if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) ) +#endif { intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) ); } @@ -990,6 +1004,12 @@ void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, cons { intraMode = PLANAR_IDX; } +#endif +#if JVET_W0123_TIMD_FUSION + if (tu.cu->timd && compID == COMPONENT_Y) + { + intraMode = MAP131TO67(intraMode); + } #endif CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" ); diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 9cb01a37576d9d26be8fbad57ed503f2b3f6a32d..63d251c2662434aba131f547bb649856a53767d5 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -112,6 +112,8 @@ #define JVET_V0087_DIMD_NO_ISP ENABLE_DIMD // disallow combination of DIMD and ISP #define JVET_V0130_INTRA_TMP 1 // JVET-V0130: template matching prediction +#define JVET_W0123_TIMD_FUSION 1 // Template based intra mode derivation and fusion + // Inter #define CIIP_PDPC 1 // apply pdpc to megre prediction as a new CIIP mode (CIIP_PDPC) additional to CIIP mode #define SAMPLE_BASED_BDOF 1 // Sample based BDOF @@ -565,6 +567,16 @@ enum ISPType INTRA_SUBPARTITIONS_RESERVED = 4 }; +#if JVET_W0123_TIMD_FUSION +enum TEMPLATE_TYPE +{ + NO_NEIGHBOR = 0, + LEFT_NEIGHBOR = 1, + ABOVE_NEIGHBOR = 2, + LEFT_ABOVE_NEIGHBOR = 3 +}; +#endif + enum SbtIdx { SBT_OFF_DCT = 0, diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 0d5f7f2a3ca47b9c20ed0a550dc52c9a4388ad19..4e28e52a2aab1fa207cd8164b43ee596e90fc97f 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -290,6 +290,14 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) dimdRelWeight[i] = other.dimdRelWeight[i]; } #endif +#if JVET_W0123_TIMD_FUSION + timd = other.timd; + timdMode = other.timdMode; + timdModeSecondary = other.timdModeSecondary; + timdIsBlended = other.timdIsBlended; + timdFusionWeight[0] = other.timdFusionWeight[0]; + timdFusionWeight[1] = other.timdFusionWeight[1]; +#endif #if ENABLE_OBMC obmcFlag = other.obmcFlag; isobmcMC = other.isobmcMC; @@ -375,6 +383,14 @@ void CodingUnit::initData() dimdRelWeight[i] = -1; } #endif +#if JVET_W0123_TIMD_FUSION + timd = false; + timdMode = -1; + timdModeSecondary = -1; + timdIsBlended = false; + timdFusionWeight[0] = -1; + timdFusionWeight[1] = -1; +#endif #if ENABLE_OBMC obmcFlag = true; isobmcMC = false; @@ -607,7 +623,7 @@ void PredictionUnit::initData() intraDir[1] = PLANAR_IDX; mipTransposedFlag = false; multiRefIdx = 0; -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION parseLumaMode = false; candId = -1; parseChromaMode = false; @@ -683,7 +699,7 @@ PredictionUnit& PredictionUnit::operator=(const IntraPredictionData& predData) } mipTransposedFlag = predData.mipTransposedFlag; multiRefIdx = predData.multiRefIdx; -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION parseLumaMode = predData.parseLumaMode; candId = predData.candId; parseChromaMode = predData.parseChromaMode; @@ -767,7 +783,7 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other ) mergeFlag = other.mergeFlag; regularMergeFlag = other.regularMergeFlag; mergeIdx = other.mergeIdx; -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION parseLumaMode = other.parseLumaMode; candId = other.candId; parseChromaMode = other.parseChromaMode; @@ -864,6 +880,29 @@ CMotionBuf PredictionUnit::getMotionBuf() const return cs->getMotionBuf( *this ); } +#if JVET_W0123_TIMD_FUSION +const uint8_t& PredictionUnit::getIpmInfo() const +{ + return cs->getIpmInfo( lumaPos() ); +} + +const uint8_t& PredictionUnit::getIpmInfo( const Position& pos ) const +{ + CHECKD( !Y().contains( pos ), "Trying to access motion info outsied of PU" ); + return cs->getIpmInfo( pos ); +} + +IpmBuf PredictionUnit::getIpmBuf() +{ + return cs->getIpmBuf( *this ); +} + +CIpmBuf PredictionUnit::getIpmBuf() const +{ + return cs->getIpmBuf( *this ); +} +#endif + // --------------------------------------------------------------------------- // transform unit method definitions diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index 31fd09ac0aff9313a9fc4a67fee1363dc61d2db2..1bd52574d6a60507ba79d8616fac45eacfb3b573 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -325,6 +325,13 @@ struct CodingUnit : public UnitArea int8_t dimdBlendMode[2]; // max number of blend modes (the main mode is not counter) --> incoherent with dimdRelWeight int8_t dimdRelWeight[3]; // max number of predictions to blend #endif +#if JVET_W0123_TIMD_FUSION + bool timd; + int timdMode; + int timdModeSecondary; + bool timdIsBlended; + int8_t timdFusionWeight[2]; +#endif #if ENABLE_OBMC bool obmcFlag; bool isobmcMC; @@ -399,7 +406,7 @@ struct CodingUnit : public UnitArea struct IntraPredictionData { -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION bool parseLumaMode = false; int8_t candId = -1; bool parseChromaMode = false; @@ -496,6 +503,13 @@ struct PredictionUnit : public UnitArea, public IntraPredictionData, public Inte MotionBuf getMotionBuf(); CMotionBuf getMotionBuf() const; +#if JVET_W0123_TIMD_FUSION + const uint8_t& getIpmInfo() const; + const uint8_t& getIpmInfo( const Position& pos ) const; + IpmBuf getIpmBuf(); + CIpmBuf getIpmBuf() const; +#endif + #if ENABLE_SPLIT_PARALLELISM int64_t cacheId; diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index 3616f90b49ea1c4d5c4ed31583bb6d69bb9131c8..d8528c0f71ab01fd68b6164c9f585e4146f5cf5e 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -94,7 +94,11 @@ void CS::setRefinedMotionField(CodingStructure &cs) subPu.mv[REF_PIC_LIST_1].clipToStorageBitDepth(); pu.mvdL0SubPu[num].setZero(); num++; +#if JVET_W0123_TIMD_FUSION + PU::spanMotionInfo2(subPu); +#else PU::spanMotionInfo(subPu); +#endif } } } @@ -581,6 +585,48 @@ bool CU::allLumaCBFsAreZero(const CodingUnit& cu) } } +#if JVET_W0123_TIMD_FUSION +TEMPLATE_TYPE CU::deriveTimdRefType( int iCurX, int iCurY, uint32_t uiCurWidth, uint32_t uiCurHeight, int iTemplateWidth, int iTemplateHeight, int& iRefX, int& iRefY, uint32_t& uiRefWidth, uint32_t& uiRefHeight ) +{ + if(iCurX == 0 && iCurY == 0) + { + return NO_NEIGHBOR; + } + + TEMPLATE_TYPE eTempType = NO_NEIGHBOR; + iRefX = iRefY = -1; + if(iCurX > 0 && iCurY > 0) + { + iRefX = iCurX - iTemplateWidth; + iRefY = iCurY - iTemplateHeight; + uiRefWidth = uiCurWidth + iTemplateWidth; + uiRefHeight = uiCurHeight + iTemplateHeight; + eTempType = LEFT_ABOVE_NEIGHBOR; + } + else if(iCurX == 0 && iCurY > 0) + { + iRefX = iCurX; + iRefY = iCurY - iTemplateHeight; + uiRefWidth = uiCurWidth; + uiRefHeight = uiCurHeight; + eTempType = ABOVE_NEIGHBOR; + } + else if(iCurX > 0 && iCurY == 0) + { + iRefX = iCurX - iTemplateWidth; + iRefY = iCurY; + uiRefWidth = uiCurWidth; + uiRefHeight = uiCurHeight; + eTempType = LEFT_NEIGHBOR; + } + else + { + assert(0); + } + return eTempType; +} +#endif + PUTraverser CU::traversePUs( CodingUnit& cu ) { @@ -642,15 +688,31 @@ int PU::getIntraMPMs(const PredictionUnit &pu, unsigned* mpm, const ChannelType if (puLeft && CU::isIntra(*puLeft->cu)) { #if SECONDARY_MPM +#if JVET_W0123_TIMD_FUSION + mpm[numValidMPM] = puLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puLeft)) : PU::getIntraDirLuma(*puLeft); +#else mpm[numValidMPM] = PU::getIntraDirLuma(*puLeft); +#endif if( !includedMode[mpm[numValidMPM]] ) { includedMode[mpm[numValidMPM++]] = true; } #else leftIntraDir = PU::getIntraDirLuma( *puLeft ); +#if JVET_W0123_TIMD_FUSION + if (puLeft->cu->timd) + { + leftIntraDir = MAP131TO67(leftIntraDir); + } +#endif #endif } +#if JVET_W0123_TIMD_FUSION && !SECONDARY_MPM + if (puLeft && CU::isInter(*puLeft->cu)) + { + leftIntraDir = puLeft->getIpmInfo(posLB.offset(-1, 0)); + } +#endif // Get intra direction of above PU #if SECONDARY_MPM @@ -663,22 +725,60 @@ int PU::getIntraMPMs(const PredictionUnit &pu, unsigned* mpm, const ChannelType if (puAbove && CU::isIntra(*puAbove->cu) && CU::isSameCtu(*pu.cu, *puAbove->cu)) { #if SECONDARY_MPM +#if JVET_W0123_TIMD_FUSION + mpm[numValidMPM] = puAbove->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAbove)) : PU::getIntraDirLuma(*puAbove); +#else mpm[numValidMPM] = PU::getIntraDirLuma(*puAbove); - if( !includedMode[mpm[numValidMPM]] ) +#endif + if (!includedMode[mpm[numValidMPM]]) { includedMode[mpm[numValidMPM++]] = true; } #else - aboveIntraDir = PU::getIntraDirLuma( *puAbove ); + aboveIntraDir = PU::getIntraDirLuma(*puAbove); +#if JVET_W0123_TIMD_FUSION + if (puAbove->cu->timd) + { + aboveIntraDir = MAP131TO67(aboveIntraDir); + } +#endif #endif } +#if JVET_W0123_TIMD_FUSION && !SECONDARY_MPM + if (puAbove && CU::isInter(*puAbove->cu)) + { + aboveIntraDir = puAbove->getIpmInfo(posRT.offset(0, -1)); + } +#endif #if SECONDARY_MPM +#if JVET_W0123_TIMD_FUSION + if (puLeft && CU::isInter(*puLeft->cu)) + { + mpm[numValidMPM] = puLeft->getIpmInfo(pu.lheight() >= pu.lwidth() ? posRT.offset(0, -1) : posLB.offset(-1, 0)); + if( !includedMode[mpm[numValidMPM]] ) + { + includedMode[mpm[numValidMPM++]] = true; + } + } + if (puAbove && CU::isInter(*puAbove->cu)) + { + mpm[numValidMPM] = puAbove->getIpmInfo(pu.lheight() >= pu.lwidth() ? posLB.offset(-1, 0) : posRT.offset(0, -1)); + if( !includedMode[mpm[numValidMPM]] ) + { + includedMode[mpm[numValidMPM++]] = true; + } + } +#endif // Get intra direction of below-left PU const PredictionUnit *puBelowLeft = pu.cs->getPURestricted(posLB.offset(-1, 1), pu, channelType); if (puBelowLeft && CU::isIntra(*puBelowLeft->cu)) { +#if JVET_W0123_TIMD_FUSION + mpm[numValidMPM] = puBelowLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puBelowLeft)) : PU::getIntraDirLuma(*puBelowLeft); +#else mpm[numValidMPM] = PU::getIntraDirLuma(*puBelowLeft); +#endif if( !includedMode[mpm[numValidMPM]] ) { includedMode[mpm[numValidMPM++]] = true; @@ -689,7 +789,11 @@ int PU::getIntraMPMs(const PredictionUnit &pu, unsigned* mpm, const ChannelType const PredictionUnit *puAboveRight = pu.cs->getPURestricted(posRT.offset(1, -1), pu, channelType); if (puAboveRight && CU::isIntra(*puAboveRight->cu) && CU::isSameCtu(*pu.cu, *puAboveRight->cu)) { +#if JVET_W0123_TIMD_FUSION + mpm[numValidMPM] = puAboveRight->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAboveRight)) : PU::getIntraDirLuma(*puAboveRight); +#else mpm[numValidMPM] = PU::getIntraDirLuma(*puAboveRight); +#endif if( !includedMode[mpm[numValidMPM]] ) { includedMode[mpm[numValidMPM++]] = true; @@ -701,12 +805,42 @@ int PU::getIntraMPMs(const PredictionUnit &pu, unsigned* mpm, const ChannelType const PredictionUnit *puAboveLeft = pu.cs->getPURestricted(posTL.offset(-1, -1), pu, channelType); if (puAboveLeft && CU::isIntra(*puAboveLeft->cu) && CU::isSameCtu(*pu.cu, *puAboveLeft->cu)) { +#if JVET_W0123_TIMD_FUSION + mpm[numValidMPM] = puAboveLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAboveLeft)) : PU::getIntraDirLuma(*puAboveLeft); +#else mpm[numValidMPM] = PU::getIntraDirLuma(*puAboveLeft); +#endif + if( !includedMode[mpm[numValidMPM]] ) + { + includedMode[mpm[numValidMPM++]] = true; + } + } +#if JVET_W0123_TIMD_FUSION + if (puBelowLeft && CU::isInter(*puBelowLeft->cu)) + { + mpm[numValidMPM] = puBelowLeft->getIpmInfo(posLB.offset(-1, 1)); + if( !includedMode[mpm[numValidMPM]] ) + { + includedMode[mpm[numValidMPM++]] = true; + } + } + if (puAboveRight && CU::isInter(*puAboveRight->cu)) + { + mpm[numValidMPM] = puAboveRight->getIpmInfo(posRT.offset(1, -1)); + if( !includedMode[mpm[numValidMPM]] ) + { + includedMode[mpm[numValidMPM++]] = true; + } + } + if (puAboveLeft && CU::isInter(*puAboveLeft->cu)) + { + mpm[numValidMPM] = puAboveLeft->getIpmInfo(posTL.offset(-1, -1)); if( !includedMode[mpm[numValidMPM]] ) { includedMode[mpm[numValidMPM++]] = true; } } +#endif #endif CHECK(2 >= numMPMs, "Invalid number of most probable modes"); @@ -1093,6 +1227,12 @@ const PredictionUnit &PU::getCoLocatedLumaPU(const PredictionUnit &pu) uint32_t PU::getCoLocatedIntraLumaMode(const PredictionUnit &pu) { +#if JVET_W0123_TIMD_FUSION + if (PU::getCoLocatedLumaPU(pu).cu->timd) + { + return MAP131TO67(PU::getIntraDirLuma(PU::getCoLocatedLumaPU(pu))); + } +#endif return PU::getIntraDirLuma(PU::getCoLocatedLumaPU(pu)); } @@ -4213,6 +4353,9 @@ void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx ) #if !MULTI_PASS_DMVR MotionBuf mb = pu.getMotionBuf(); #endif +#if JVET_W0123_TIMD_FUSION + IpmBuf ib = pu.getIpmBuf(); +#endif if (!pu.mergeFlag || pu.mergeType == MRG_TYPE_DEFAULT_N || pu.mergeType == MRG_TYPE_IBC) { @@ -4280,10 +4423,23 @@ void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx ) { PU::setAllAffineMv(pu, pu.mvAffi[1][0], pu.mvAffi[1][1], pu.mvAffi[1][2], REF_PIC_LIST_1); } +#if JVET_W0123_TIMD_FUSION + spanIpmInfoInter(pu, mb, ib); +#endif } else { mb.fill(mi); +#if JVET_W0123_TIMD_FUSION + if (mi.isIBCmot) + { + ib.fill(PLANAR_IDX); + } + else + { + spanIpmInfoInter(pu, mb, ib); + } +#endif } } else if (pu.mergeType == MRG_TYPE_SUBPU_ATMVP) @@ -4293,9 +4449,237 @@ void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx ) MotionBuf mb = pu.getMotionBuf(); #endif mb.copyFrom(mrgCtx.subPuMvpMiBuf); +#if JVET_W0123_TIMD_FUSION + spanIpmInfoInter(pu, mb, ib); +#endif } } +#if JVET_W0123_TIMD_FUSION +#if MULTI_PASS_DMVR +void PU::spanMotionInfo2( PredictionUnit &pu, const MergeCtx &mrgCtx, Mv* bdmvrSubPuMv0, Mv* bdmvrSubPuMv1, Mv* bdofSubPuMvOffset) +#else +void PU::spanMotionInfo2( PredictionUnit &pu, const MergeCtx &mrgCtx ) +#endif +{ +#if !MULTI_PASS_DMVR + MotionBuf mb = pu.getMotionBuf(); +#endif + + if (!pu.mergeFlag || pu.mergeType == MRG_TYPE_DEFAULT_N || pu.mergeType == MRG_TYPE_IBC) + { + MotionInfo mi; + + mi.isInter = !CU::isIntra(*pu.cu); + mi.isIBCmot = CU::isIBC(*pu.cu); + mi.sliceIdx = pu.cu->slice->getIndependentSliceIdx(); +#if INTER_LIC + mi.usesLIC = pu.cu->LICFlag; +#endif + + if( mi.isInter ) + { + mi.interDir = pu.interDir; + mi.useAltHpelIf = pu.cu->imv == IMV_HPEL; + for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ ) + { + mi.mv[i] = pu.mv[i]; + mi.refIdx[i] = pu.refIdx[i]; + } + if (mi.isIBCmot) + { + mi.bv = pu.bv; + } + } + +#if MULTI_PASS_DMVR + if (pu.bdmvrRefine) + { + CHECK(bdmvrSubPuMv0 == nullptr, "this is not possible"); + const int dx = std::min<int>(pu.lwidth (), BDOF_SUBPU_DIM); + const int dy = std::min<int>(pu.lheight(), BDOF_SUBPU_DIM); + int subPuIdx = 0; + const int bioSubPuIdxStrideIncr = BDOF_SUBPU_STRIDE - std::max(1, (int)(pu.lwidth() >> BDOF_SUBPU_DIM_LOG2)); + + for (int yStart = 0; yStart < pu.lheight(); yStart += dy) + { + for (int xStart = 0; xStart < pu.lwidth(); xStart += dx) + { + const int bdmvrSubPuIdx = (yStart >> DMVR_SUBCU_HEIGHT_LOG2) * DMVR_SUBPU_STRIDE + (xStart >> DMVR_SUBCU_WIDTH_LOG2); + mi.mv[0] = bdmvrSubPuMv0[bdmvrSubPuIdx] + bdofSubPuMvOffset[subPuIdx]; + mi.mv[1] = bdmvrSubPuMv1[bdmvrSubPuIdx] - bdofSubPuMvOffset[subPuIdx]; + + subPuIdx++; + MotionBuf mb = pu.cs->getMotionBuf(Area(pu.lx() + xStart, pu.ly() + yStart, dx, dy)); + mb.fill(mi); + } + subPuIdx += bioSubPuIdxStrideIncr; + } + return; + } + MotionBuf mb = pu.getMotionBuf(); +#endif + if (pu.cu->affine) + { + mi.mv[0].setZero(); // to make sure filling of MV in unused reference list + mi.mv[1].setZero(); + mb.fill(mi); + if (pu.refIdx[0] >= 0) + { + PU::setAllAffineMv(pu, pu.mvAffi[0][0], pu.mvAffi[0][1], pu.mvAffi[0][2], REF_PIC_LIST_0); + } + if (pu.refIdx[1] >= 0) + { + PU::setAllAffineMv(pu, pu.mvAffi[1][0], pu.mvAffi[1][1], pu.mvAffi[1][2], REF_PIC_LIST_1); + } + } + else + { + mb.fill(mi); + } + } + else if (pu.mergeType == MRG_TYPE_SUBPU_ATMVP) + { + CHECK(mrgCtx.subPuMvpMiBuf.area() == 0 || !mrgCtx.subPuMvpMiBuf.buf, "Buffer not initialized"); +#if MULTI_PASS_DMVR + MotionBuf mb = pu.getMotionBuf(); +#endif + mb.copyFrom(mrgCtx.subPuMvpMiBuf); + } +} + +void PU::spanIpmInfoIntra( PredictionUnit &pu) +{ + int ipm = PU::getIntraDirLuma(pu); + if (pu.cu->timd) + { + ipm = MAP131TO67(ipm); + } + IpmBuf ib = pu.getIpmBuf(); + ib.fill(ipm); +} + +void PU::spanIpmInfoInter( PredictionUnit &pu, MotionBuf &mb, IpmBuf &ib) +{ + const unsigned scale = 4 * std::max<int>(1, 4 * AMVP_DECIMATION_FACTOR / 4); + const unsigned mask = ~(scale - 1); + Mv cMv; + RefPicList refList; + int refIdx; + Position PosY; + MotionInfo tempMi; + MotionInfo mi0; + MotionInfo mi1; + Position PosY0; + Position PosY1; + Mv cMv0; + Mv cMv1; + Picture* pRefPic0; + Picture* pRefPic1; + uint8_t* ii = ib.buf; + int ibH = pu.Y().height >> MIN_CU_LOG2; + int ibW = pu.Y().width >> MIN_CU_LOG2; + for (int y = 0; y < ibH; y++) + { + for (int x = 0; x < ibW; x++) + { + uint8_t ipm = PLANAR_IDX; + tempMi = mb.at(x, y); + if (tempMi.interDir != 3) + { + if (tempMi.interDir != 2) + { + cMv = tempMi.mv[0]; + refList = REF_PIC_LIST_0; + refIdx = tempMi.refIdx[0]; + } + else + { + cMv = tempMi.mv[1]; + refList = REF_PIC_LIST_1; + refIdx = tempMi.refIdx[1]; + } + if (refList < 0 || refIdx < 0) + { + ipm = PLANAR_IDX; + } + else + { + cMv.changePrecision(MV_PRECISION_SIXTEENTH, MV_PRECISION_INT); + PosY.x = pu.Y().x + (x << MIN_CU_LOG2) + cMv.getHor(); + PosY.y = pu.Y().y + (y << MIN_CU_LOG2) + cMv.getVer(); + clipColPos(PosY.x, PosY.y, pu); + PosY.x = (PosY.x & mask); + PosY.y = (PosY.y & mask); + ipm = pu.cu->slice->getRefPic(refList, refIdx)->cs->getIpmInfo(PosY); + } + } + else + { + pRefPic0 = pu.cu->slice->getRefPic(REF_PIC_LIST_0, tempMi.refIdx[0]); + cMv0 = tempMi.mv[0]; + cMv0.changePrecision(MV_PRECISION_SIXTEENTH, MV_PRECISION_INT); + PosY0.x = pu.Y().x + (x << MIN_CU_LOG2) + cMv0.getHor(); + PosY0.y = pu.Y().y + (y << MIN_CU_LOG2) + cMv0.getVer(); + clipColPos(PosY0.x, PosY0.y, pu); + PosY0.x = (PosY0.x & mask); + PosY0.y = (PosY0.y & mask); + mi0 = pRefPic0->cs->getMotionInfo(PosY0); + int ipm0 = pRefPic0->cs->getIpmInfo(PosY0); + int pocDiff0 = abs(pRefPic0->getPOC() - pu.cu->slice->getPOC()); + + pRefPic1 = pu.cu->slice->getRefPic(REF_PIC_LIST_1, tempMi.refIdx[1]); + cMv1 = tempMi.mv[1]; + cMv1.changePrecision(MV_PRECISION_SIXTEENTH, MV_PRECISION_INT); + PosY1.x = pu.Y().x + (x << MIN_CU_LOG2) + cMv1.getHor(); + PosY1.y = pu.Y().y + (y << MIN_CU_LOG2) + cMv1.getVer(); + clipColPos(PosY1.x, PosY1.y, pu); + PosY1.x = (PosY1.x & mask); + PosY1.y = (PosY1.y & mask); + mi1 = pRefPic1->cs->getMotionInfo(PosY1); + int ipm1 = pRefPic1->cs->getIpmInfo(PosY1); + int pocDiff1 = abs(pRefPic1->getPOC() - pu.cu->slice->getPOC()); + + if (!mi0.isInter && mi1.isInter) + { + ipm = ipm0; + } + else if (!mi1.isInter && mi0.isInter) + { + ipm = ipm1; + } + else if (ipm0 > DC_IDX && ipm1 <= DC_IDX) + { + ipm = ipm0; + } + else if (ipm0 <= DC_IDX && ipm1 > DC_IDX) + { + ipm = ipm1; + } + else if (pocDiff0 < pocDiff1) + { + ipm = ipm0; + } + else if (pocDiff1 < pocDiff0) + { + ipm = ipm1; + } + else if (pRefPic0->m_prevQP[0] > pRefPic1->m_prevQP[0]) + { + ipm = ipm1; + } + else + { + ipm = ipm0; + } + } + ii[x] = ipm; + } + ii += ib.stride; + } +} +#endif + void PU::applyImv( PredictionUnit& pu, MergeCtx &mrgCtx, InterPrediction *interPred ) { if( !pu.mergeFlag ) @@ -4456,6 +4840,9 @@ void PU::spanGeoMotionInfo( PredictionUnit &pu, MergeCtx &geoMrgCtx, const uint8 pu.geoMergeIdx0 = candIdx0; pu.geoMergeIdx1 = candIdx1; MotionBuf mb = pu.getMotionBuf(); +#if JVET_W0123_TIMD_FUSION + IpmBuf ib = pu.getIpmBuf(); +#endif MotionInfo biMv; biMv.isInter = true; @@ -4558,6 +4945,9 @@ void PU::spanGeoMotionInfo( PredictionUnit &pu, MergeCtx &geoMrgCtx, const uint8 } motionInfo += mb.stride; } +#if JVET_W0123_TIMD_FUSION + spanIpmInfoInter(pu, mb, ib); +#endif } bool CU::hasSubCUNonZeroMVd( const CodingUnit& cu ) @@ -4987,6 +5377,9 @@ bool CU::isMTSAllowed(const CodingUnit &cu, const ComponentID compID) mtsAllowed &= !cu.sbtInfo; #if JVET_V0130_INTRA_TMP mtsAllowed &= !cu.tmpFlag; +#endif +#if JVET_W0123_TIMD_FUSION + mtsAllowed &= !(cu.timd && cu.firstPU->multiRefIdx); #endif mtsAllowed &= !(cu.bdpcmMode && cuWidth <= tsMaxSize && cuHeight <= tsMaxSize); return mtsAllowed; diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index ab2c96231fb0bfa826aa208253437a3eac1656c5..0f1fed6c9838dd5f7e74da88f7ca2a25aa53f411 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -109,6 +109,9 @@ namespace CU bool canUseLfnstWithISP ( const CodingUnit& cu, const ChannelType chType ); uint32_t getISPSplitDim ( const int width, const int height, const PartSplit ispType ); bool allLumaCBFsAreZero ( const CodingUnit& cu ); +#if JVET_W0123_TIMD_FUSION + TEMPLATE_TYPE deriveTimdRefType ( int iCurX, int iCurY, uint32_t uiCurWidth, uint32_t uiCurHeight, int iTemplateWidth, int iTemplateHeight, int& iRefX, int& iRefY, uint32_t& uiRefWidth, uint32_t& uiRefHeight ); +#endif PUTraverser traversePUs ( CodingUnit& cu); TUTraverser traverseTUs ( CodingUnit& cu); @@ -191,6 +194,15 @@ namespace PU void spanMotionInfo ( PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx(), Mv* bdmvrSubPuMv0 = nullptr, Mv* bdmvrSubPuMv1 = nullptr, Mv* bdofSubPuMvOffset = nullptr ); #else void spanMotionInfo ( PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx() ); +#endif +#if JVET_W0123_TIMD_FUSION +#if MULTI_PASS_DMVR + void spanMotionInfo2 ( PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx(), Mv* bdmvrSubPuMv0 = nullptr, Mv* bdmvrSubPuMv1 = nullptr, Mv* bdofSubPuMvOffset = nullptr ); +#else + void spanMotionInfo2 ( PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx() ); +#endif + void spanIpmInfoIntra ( PredictionUnit &pu ); + void spanIpmInfoInter ( PredictionUnit &pu, MotionBuf &mb, IpmBuf &ib ); #endif void applyImv ( PredictionUnit &pu, MergeCtx &mrgCtx, InterPrediction *interPred = NULL ); void getAffineControlPointCand(const PredictionUnit &pu, MotionInfo mi[4], bool isAvailable[4], int verIdx[4], int8_t bcwIdx, int modelIdx, int verNum, AffineMergeCtx& affMrgCtx); diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index 0539150a948c6a7aab9dad25b7c846d7787f8e4a..a0ced2ca8a07d59063fdaf837ccf7d5bb7354ec7 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -1582,10 +1582,18 @@ void CABACReader::extend_ref_line(CodingUnit& cu) if (MRL_NUM_REF_LINES > 1) { +#if JVET_W0123_TIMD_FUSION + multiRefIdx = m_BinDecoder.decodeBin(cu.timd ? Ctx::MultiRefLineIdx(2) : Ctx::MultiRefLineIdx(0)) == 1 ? MULTI_REF_LINE_IDX[1] : MULTI_REF_LINE_IDX[0]; +#else multiRefIdx = m_BinDecoder.decodeBin(Ctx::MultiRefLineIdx(0)) == 1 ? MULTI_REF_LINE_IDX[1] : MULTI_REF_LINE_IDX[0]; +#endif if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0]) { +#if JVET_W0123_TIMD_FUSION + multiRefIdx = m_BinDecoder.decodeBin(cu.timd ? Ctx::MultiRefLineIdx(3) : Ctx::MultiRefLineIdx(1)) == 1 ? MULTI_REF_LINE_IDX[2] : MULTI_REF_LINE_IDX[1]; +#else multiRefIdx = m_BinDecoder.decodeBin(Ctx::MultiRefLineIdx(1)) == 1 ? MULTI_REF_LINE_IDX[2] : MULTI_REF_LINE_IDX[1]; +#endif } } @@ -1627,6 +1635,9 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) mip_pred_modes(cu); return; } +#if JVET_W0123_TIMD_FUSION + cu_timd_flag(cu); +#endif extend_ref_line( cu ); isp_mode( cu ); #if ENABLE_DIMD @@ -1634,6 +1645,12 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) { return; } +#endif +#if JVET_W0123_TIMD_FUSION + if (cu.timd) + { + return; + } #endif RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__INTRA_DIR_ANG, cu.lumaSize(), CHANNEL_TYPE_LUMA ); @@ -1669,7 +1686,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) #else PU::getIntraMPMs(*pu, mpm_pred); #endif -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION pu->parseLumaMode = true; pu->mpmFlag = mpmFlag[k]; #endif @@ -1711,7 +1728,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) ipred_idx += m_BinDecoder.decodeBinEP(); } } -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION pu->ipred_idx = ipred_idx; #endif pu->intraDir[0] = mpm_pred[ipred_idx]; @@ -1724,7 +1741,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) #if SECONDARY_MPM if (m_BinDecoder.decodeBin(Ctx::IntraLumaSecondMpmFlag())) { -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION int idx = m_BinDecoder.decodeBinsEP(4) + NUM_PRIMARY_MOST_PROBABLE_MODES; ipred_mode = mpm_pred[idx]; pu->secondMpmFlag = true; @@ -1736,7 +1753,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) else { xReadTruncBinCode(ipred_mode, NUM_LUMA_MODE - NUM_MOST_PROBABLE_MODES); -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION pu->secondMpmFlag = false; pu->ipred_idx = ipred_mode; #endif @@ -1744,7 +1761,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) } #else xReadTruncBinCode(ipred_mode, NUM_LUMA_MODE - NUM_MOST_PROBABLE_MODES); -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION pu->ipred_idx = ipred_mode; #endif #endif @@ -1779,6 +1796,38 @@ void CABACReader::cu_dimd_flag(CodingUnit& cu) cu.dimd = m_BinDecoder.decodeBin(Ctx::DimdFlag(ctxId)); } #endif + +#if JVET_W0123_TIMD_FUSION +void CABACReader::cu_timd_flag( CodingUnit& cu ) +{ + if (!cu.cs->sps->getUseTimd()) + { + cu.timd = false; + return; + } + if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE) + { + cu.timd = false; + return; + } +#if ENABLE_DIMD + if (cu.dimd) + { + cu.timd = false; + return; + } +#endif + if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType)) + { + cu.timd = false; + return; + } + + unsigned ctxId = DeriveCtx::CtxTimdFlag( cu ); + cu.timd = m_BinDecoder.decodeBin( Ctx::TimdFlag(ctxId) ); +} +#endif + void CABACReader::intra_chroma_pred_modes( CodingUnit& cu ) { #if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS @@ -1875,7 +1924,7 @@ void CABACReader::intra_chroma_pred_mode(PredictionUnit& pu) pu.intraDir[1] = DM_CHROMA_IDX; return; } -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION pu.parseChromaMode = true; #endif unsigned candId = m_BinDecoder.decodeBinsEP(2); @@ -1888,7 +1937,7 @@ void CABACReader::intra_chroma_pred_mode(PredictionUnit& pu) CHECK(chromaCandModes[candId] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path"); pu.intraDir[1] = chromaCandModes[candId]; -#if ENABLE_DIMD +#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION pu.candId = candId; #endif } @@ -3848,7 +3897,11 @@ void CABACReader::isp_mode( CodingUnit& cu ) RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__ISP_MODE_FLAG); +#if JVET_W0123_TIMD_FUSION + int symbol = m_BinDecoder.decodeBin(cu.timd ? Ctx::ISPMode(2) : Ctx::ISPMode(0)); +#else int symbol = m_BinDecoder.decodeBin(Ctx::ISPMode(0)); +#endif if( symbol ) { @@ -3881,6 +3934,13 @@ void CABACReader::residual_lfnst_mode( CodingUnit& cu, CUCtx& cuCtx ) { return; } +#if JVET_W0123_TIMD_FUSION + if (cu.timd && (cu.ispMode || cu.firstPU->multiRefIdx)) + { + cu.lfnstIdx = 0; + return; + } +#endif RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__LFNST ); diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index 59607c833b83baa691603e354f34bc03f1ea175e..4a8b362b9e25f8f294bdae22d20cfb62495cd478 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -96,6 +96,9 @@ public: void cu_bcw_flag ( CodingUnit& cu ); void extend_ref_line (CodingUnit& cu); void intra_luma_pred_modes ( CodingUnit& cu ); +#if JVET_W0123_TIMD_FUSION + void cu_timd_flag ( CodingUnit& cu ); +#endif void intra_chroma_pred_modes ( CodingUnit& cu ); bool intra_chroma_lmc_mode ( PredictionUnit& pu ); void intra_chroma_pred_mode ( PredictionUnit& pu ); diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index 19f5affab59e9e932206a0c7ec8a21a8304935e8..f3c5ec18b4cb47fa5a83083a94c4056233f287b7 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -161,12 +161,84 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) IntraPrediction::deriveDimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU); pu->intraDir[0] = currCU.dimdMode; } +#if JVET_W0123_TIMD_FUSION + else if (currCU.timd) + { + PredictionUnit *pu = currCU.firstPU; + const CompArea &area = currCU.Y(); +#if SECONDARY_MPM + IntraPrediction::deriveDimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU); +#endif + currCU.timdMode = m_pcIntraPred->deriveTimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU); + pu->intraDir[0] = currCU.timdMode; + } +#endif else if (currCU.firstPU->parseLumaMode) { const CompArea &area = currCU.Y(); IntraPrediction::deriveDimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU); } + //redo prediction dir derivation + if (currCU.firstPU->parseLumaMode) + { +#if SECONDARY_MPM + uint8_t* mpm_pred = currCU.firstPU->intraMPM; // mpm_idx / rem_intra_luma_pred_mode + uint8_t* non_mpm_pred = currCU.firstPU->intraNonMPM; + PU::getIntraMPMs( *currCU.firstPU, mpm_pred, non_mpm_pred ); +#else + unsigned int mpm_pred[NUM_MOST_PROBABLE_MODES]; // mpm_idx / rem_intra_luma_pred_mode + PU::getIntraMPMs(*currCU.firstPU, mpm_pred); +#endif + if (currCU.firstPU->mpmFlag) + { + currCU.firstPU->intraDir[0] = mpm_pred[currCU.firstPU->ipred_idx]; + } + else + { +#if SECONDARY_MPM + if (currCU.firstPU->secondMpmFlag) + { + currCU.firstPU->intraDir[0] = mpm_pred[currCU.firstPU->ipred_idx]; + } + else + { + currCU.firstPU->intraDir[0] = non_mpm_pred[currCU.firstPU->ipred_idx]; + } +#else + //postponed sorting of MPMs (only in remaining branch) + std::sort(mpm_pred, mpm_pred + NUM_MOST_PROBABLE_MODES); + unsigned ipred_mode = currCU.firstPU->ipred_idx; + + for (uint32_t i = 0; i < NUM_MOST_PROBABLE_MODES; i++) + { + ipred_mode += (ipred_mode >= mpm_pred[i]); + } + currCU.firstPU->intraDir[0] = ipred_mode; +#endif + } + } + if (currCU.firstPU->parseChromaMode) + { + unsigned chromaCandModes[NUM_CHROMA_MODE]; + PU::getIntraChromaCandModes(*currCU.firstPU, chromaCandModes); + + CHECK(currCU.firstPU->candId >= NUM_CHROMA_MODE, "Chroma prediction mode index out of bounds"); + CHECK(PU::isLMCMode(chromaCandModes[currCU.firstPU->candId]), "The intra dir cannot be LM_CHROMA for this path"); + CHECK(chromaCandModes[currCU.firstPU->candId] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path"); + + currCU.firstPU->intraDir[1] = chromaCandModes[currCU.firstPU->candId]; + } +#else +#if JVET_W0123_TIMD_FUSION + if (currCU.timd) + { + PredictionUnit *pu = currCU.firstPU; + const CompArea &area = currCU.Y(); + currCU.timdMode = m_pcIntraPred->deriveTimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU); + pu->intraDir[0] = currCU.timdMode; + } + //redo prediction dir derivation if (currCU.firstPU->parseLumaMode) { @@ -217,6 +289,7 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) currCU.firstPU->intraDir[1] = chromaCandModes[currCU.firstPU->candId]; } +#endif #endif xReconIntraQT( currCU ); break; @@ -670,6 +743,12 @@ void DecCu::xReconIntraQT( CodingUnit &cu ) } } } +#if JVET_W0123_TIMD_FUSION + if (cu.blocks[CHANNEL_TYPE_LUMA].valid()) + { + PU::spanIpmInfoIntra(*cu.firstPU); + } +#endif } void DecCu::xReconPLT(CodingUnit &cu, ComponentID compBegin, uint32_t numComp) diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index b41bffd482821ebc8174ffe69f8abc181fe6215b..96d62b5f8372b48ce07d3d99461577d8834ff1fd 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -2266,6 +2266,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) { READ_UVLC(uiCode, "sps_log2_intra_tmp_max_size"); pcSPS->setIntraTMPMaxSize(1 << uiCode); } +#endif +#if JVET_W0123_TIMD_FUSION + READ_FLAG(uiCode, "sps_timd_enabled_flag"); pcSPS->setUseTimd( uiCode != 0 ); #endif if( pcSPS->getChromaFormatIdc() != CHROMA_400) { @@ -5006,6 +5009,9 @@ void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo) READ_FLAG(symbol, "gci_no_cclm_constraint_flag"); cinfo->setNoCclmConstraintFlag(symbol > 0 ? true : false); #if ENABLE_DIMD READ_FLAG(symbol, "gci_no_dimd_constraint_flag"); cinfo->setNoDimdConstraintFlag(symbol > 0 ? true : false); +#endif +#if JVET_W0123_TIMD_FUSION + READ_FLAG(symbol, "gci_no_timd_constraint_flag"); cinfo->setNoTimdConstraintFlag(symbol > 0 ? true : false); #endif /* inter */ READ_FLAG(symbol, "gci_no_ref_pic_resampling_constraint_flag"); cinfo->setNoRprConstraintFlag(symbol > 0 ? true : false); diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index 391f28a2fbac914005f1b5bf19360e80dc8806a9..4904308cf850d1f40e3d256220d054e1791b8dc5 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -1140,10 +1140,18 @@ void CABACWriter::extend_ref_line(const PredictionUnit& pu) int multiRefIdx = pu.multiRefIdx; if (MRL_NUM_REF_LINES > 1) { +#if JVET_W0123_TIMD_FUSION + m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[0], cu.timd ? Ctx::MultiRefLineIdx(2) : Ctx::MultiRefLineIdx(0)); +#else m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[0], Ctx::MultiRefLineIdx(0)); +#endif if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0]) { +#if JVET_W0123_TIMD_FUSION + m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], cu.timd ? Ctx::MultiRefLineIdx(3) : Ctx::MultiRefLineIdx(1)); +#else m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], Ctx::MultiRefLineIdx(1)); +#endif } } } @@ -1177,10 +1185,18 @@ void CABACWriter::extend_ref_line(const CodingUnit& cu) int multiRefIdx = pu->multiRefIdx; if (MRL_NUM_REF_LINES > 1) { +#if JVET_W0123_TIMD_FUSION + m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[0], cu.timd ? Ctx::MultiRefLineIdx(2) : Ctx::MultiRefLineIdx(0)); +#else m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[0], Ctx::MultiRefLineIdx(0)); +#endif if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0]) { +#if JVET_W0123_TIMD_FUSION + m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], cu.timd ? Ctx::MultiRefLineIdx(3) : Ctx::MultiRefLineIdx(1)); +#else m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], Ctx::MultiRefLineIdx(1)); +#endif } } @@ -1217,6 +1233,9 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu ) mip_pred_modes(cu); return; } +#if JVET_W0123_TIMD_FUSION + cu_timd_flag(cu); +#endif extend_ref_line( cu ); isp_mode( cu ); @@ -1226,6 +1245,12 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu ) return; } #endif +#if JVET_W0123_TIMD_FUSION + if (cu.timd) + { + return; + } +#endif #if SECONDARY_MPM const int numMPMs = NUM_PRIMARY_MOST_PROBABLE_MODES; #else @@ -1410,6 +1435,9 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu ) mip_pred_mode(pu); return; } +#if JVET_W0123_TIMD_FUSION + cu_timd_flag(*pu.cu); +#endif extend_ref_line( pu ); isp_mode( *pu.cu ); #if ENABLE_DIMD @@ -1417,6 +1445,12 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu ) { return; } +#endif +#if JVET_W0123_TIMD_FUSION + if (pu.cu->timd) + { + return; + } #endif // prev_intra_luma_pred_flag #if SECONDARY_MPM @@ -1540,6 +1574,33 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu ) } } +#if JVET_W0123_TIMD_FUSION +void CABACWriter::cu_timd_flag( const CodingUnit& cu ) +{ + if (!cu.cs->sps->getUseTimd()) + { + return; + } + if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE) + { + return; + } +#if ENABLE_DIMD + if (cu.dimd) + { + return; + } +#endif + if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType)) + { + return; + } + + unsigned ctxId = DeriveCtx::CtxTimdFlag(cu); + m_BinEncoder.encodeBin(cu.timd, Ctx::TimdFlag(ctxId)); +} +#endif + #if ENABLE_DIMD void CABACWriter::cu_dimd_flag(const CodingUnit& cu) { @@ -3578,11 +3639,19 @@ void CABACWriter::isp_mode( const CodingUnit& cu ) } if ( cu.ispMode == NOT_INTRA_SUBPARTITIONS ) { +#if JVET_W0123_TIMD_FUSION + m_BinEncoder.encodeBin( 0, cu.timd ? Ctx::ISPMode( 2 ) : Ctx::ISPMode( 0 ) ); +#else m_BinEncoder.encodeBin( 0, Ctx::ISPMode( 0 ) ); +#endif } else { +#if JVET_W0123_TIMD_FUSION + m_BinEncoder.encodeBin( 1, cu.timd ? Ctx::ISPMode( 2 ) : Ctx::ISPMode( 0 ) ); +#else m_BinEncoder.encodeBin( 1, Ctx::ISPMode( 0 ) ); +#endif m_BinEncoder.encodeBin( cu.ispMode - 1, Ctx::ISPMode( 1 ) ); } DTRACE( g_trace_ctx, D_SYNTAX, "intra_subPartitions() etype=%d pos=(%d,%d) ispIdx=%d\n", cu.chType, cu.blocks[cu.chType].x, cu.blocks[cu.chType].y, (int)cu.ispMode ); @@ -3611,6 +3680,12 @@ void CABACWriter::residual_lfnst_mode( const CodingUnit& cu, CUCtx& cuCtx ) { return; } +#if JVET_W0123_TIMD_FUSION + if (cu.timd && (cu.ispMode || cu.firstPU->multiRefIdx)) + { + return; + } +#endif if( cu.cs->sps->getUseLFNST() && CU::isIntra( cu ) ) { diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index afbf6ed7c1a280ea67cfcaadaabca11236e8cdb6..bae249e5075fba6e03134e6374d1d8a0345d2c1b 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -107,6 +107,9 @@ public: void intra_luma_pred_mode ( const PredictionUnit& pu ); #if ENABLE_DIMD void cu_dimd_flag ( const CodingUnit& cu ); +#endif +#if JVET_W0123_TIMD_FUSION + void cu_timd_flag ( const CodingUnit& cu ); #endif void intra_chroma_pred_modes ( const CodingUnit& cu ); void intra_chroma_lmc_mode ( const PredictionUnit& pu ); diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index a0740d6c0da4a2a4250e539130baa9fa9b919772..14ecf41a012ef787653f69c1d7c6ab738dd6b4ae 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -232,6 +232,9 @@ protected: #if ENABLE_DIMD bool m_noDimdConstraintFlag; #endif +#if JVET_W0123_TIMD_FUSION + bool m_noTimdConstraintFlag; +#endif #if ENABLE_OBMC bool m_noObmcConstraintFlag; #endif @@ -384,6 +387,9 @@ protected: #if ENABLE_DIMD bool m_dimd; #endif +#if JVET_W0123_TIMD_FUSION + bool m_timd; +#endif #if ENABLE_OBMC bool m_OBMC; #endif @@ -976,6 +982,10 @@ public: bool getNoDimdConstraintFlag() const { return m_noDimdConstraintFlag; } void setNoDimdConstraintFlag(bool val) { m_noDimdConstraintFlag = val; } #endif +#if JVET_W0123_TIMD_FUSION + bool getNoTimdConstraintFlag() const { return m_noTimdConstraintFlag; } + void setNoTimdConstraintFlag(bool val) { m_noTimdConstraintFlag = val; } +#endif #if ENABLE_OBMC bool getNoObmcConstraintFlag() const { return m_noObmcConstraintFlag; } void setNoObmcConstraintFlag(bool bVal) { m_noObmcConstraintFlag = bVal; } @@ -1238,6 +1248,10 @@ public: void setUseDimd ( bool b ) { m_dimd = b; } bool getUseDimd () const { return m_dimd; } #endif +#if JVET_W0123_TIMD_FUSION + void setUseTimd ( bool b ) { m_timd = b; } + bool getUseTimd () const { return m_timd; } +#endif #if ENABLE_OBMC void setUseObmc ( bool b ) { m_OBMC = b; } bool getUseObmc () const { return m_OBMC; } diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 59dbb6c05e0ec253279e2b27f0db3d0cb8cbef52..2142260474e0634d82ada0799cc17cc8bad0f82a 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -1816,6 +1816,12 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS double maxCostAllowedForChroma = MAX_DOUBLE; const CodingUnit *bestCU = bestCS->getCU( partitioner.chType ); Distortion interHad = m_modeCtrl->getInterHad(); +#if JVET_W0123_TIMD_FUSION + int timdMode = 0; + int timdModeSecondary = 0; + bool timdIsBlended = false; + int timdFusionWeight[2] = { 0 }; +#endif double dct2Cost = MAX_DOUBLE; @@ -1923,6 +1929,9 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS } #endif +#if JVET_W0123_TIMD_FUSION + bool timdDerived = false; +#endif for( int trGrpIdx = 0; trGrpIdx < grpNumMax; trGrpIdx++ ) { const uint8_t startMtsFlag = trGrpIdx > 0; @@ -1979,6 +1988,35 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS cu.colorTransform = adaptiveColorTrans; CU::addPUs( cu ); +#if JVET_W0123_TIMD_FUSION + cu.timd = false; + if (isLuma(partitioner.chType) && cu.slice->getSPS()->getUseTimd()) + { + if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE) + { + timdDerived = true; + } + if (!timdDerived) + { + const CompArea &area = cu.Y(); + cu.timdMode = m_pcIntraSearch->deriveTimdMode(bestCS->picture->getRecoBuf(area), area, cu); + timdMode = cu.timdMode; + timdDerived = true; + timdModeSecondary = cu.timdModeSecondary; + timdIsBlended = cu.timdIsBlended; + timdFusionWeight[0] = cu.timdFusionWeight[0]; + timdFusionWeight[1] = cu.timdFusionWeight[1]; + } + else + { + cu.timdMode = timdMode; + cu.timdModeSecondary = timdModeSecondary; + cu.timdIsBlended = timdIsBlended; + cu.timdFusionWeight[0] = timdFusionWeight[0]; + cu.timdFusionWeight[1] = timdFusionWeight[1]; + } + } +#endif tempCS->interHad = interHad; @@ -2003,7 +2041,14 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS { continue; } +#if JVET_W0123_TIMD_FUSION + PU::spanIpmInfoIntra(*cu.firstPU); +#endif +#if JVET_W0123_TIMD_FUSION + if (m_pcEncCfg->getUseFastISP() && validCandRet && !mtsFlag && !lfnstIdx && !cu.colorTransform && !cu.timd) +#else if (m_pcEncCfg->getUseFastISP() && validCandRet && !mtsFlag && !lfnstIdx && !cu.colorTransform) +#endif { m_modeCtrl->setISPMode(cu.ispMode); m_modeCtrl->setISPLfnstIdx(cu.lfnstIdx); diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index b76d81cf34849a886353165abf4603baf1914d66..16965b1ac1592b814f890798740006b84e007d93 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -1288,6 +1288,9 @@ void EncLib::xInitSPS( SPS& sps ) #if ENABLE_DIMD cinfo->setNoDimdConstraintFlag(m_noDimdConstraintFlag); #endif +#if JVET_W0123_TIMD_FUSION + cinfo->setNoTimdConstraintFlag(m_noTimdConstraintFlag); +#endif #if ENABLE_OBMC cinfo->setNoObmcConstraintFlag(m_noObmcConstraintFlag); #endif @@ -1408,6 +1411,9 @@ void EncLib::xInitSPS( SPS& sps ) #if ENABLE_DIMD sps.setUseDimd ( m_dimd ); #endif +#if JVET_W0123_TIMD_FUSION + sps.setUseTimd ( m_timd ); +#endif #if ENABLE_OBMC sps.setUseOBMC ( m_OBMC ); #endif diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 46474299c2704bb447f3af87fb6398c124173acd..aa634aa9dcd750952c764c140dd7b54ea5af8cd0 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -396,6 +396,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c const TempCtx ctxStartMipFlag ( m_CtxCache, SubCtx( Ctx::MipFlag, m_CABACEstimator->getCtx() ) ); #if JVET_V0130_INTRA_TMP const TempCtx ctxStartTpmFlag(m_CtxCache, SubCtx(Ctx::TmpFlag, m_CABACEstimator->getCtx())); +#endif +#if JVET_W0123_TIMD_FUSION + const TempCtx ctxStartTimdFlag ( m_CtxCache, SubCtx( Ctx::TimdFlag, m_CABACEstimator->getCtx() ) ); #endif const TempCtx ctxStartIspMode ( m_CtxCache, SubCtx( Ctx::ISPMode, m_CABACEstimator->getCtx() ) ); #if SECONDARY_MPM @@ -509,6 +512,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif #if ENABLE_DIMD bool bestDimdMode = false; +#endif +#if JVET_W0123_TIMD_FUSION + bool bestTimdMode = false; #endif if (isSecondColorSpace) { @@ -621,6 +627,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c m_CABACEstimator->getCtx() = SubCtx( Ctx::TmpFlag, ctxStartTpmFlag ); #endif m_CABACEstimator->getCtx() = SubCtx( Ctx::MipFlag, ctxStartMipFlag ); +#if JVET_W0123_TIMD_FUSION + m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag ); +#endif m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode ); #if SECONDARY_MPM m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag); @@ -698,6 +707,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c m_CABACEstimator->getCtx() = SubCtx( Ctx::TmpFlag, ctxStartTpmFlag ); #endif m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); +#if JVET_W0123_TIMD_FUSION + m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag ); +#endif m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); #if SECONDARY_MPM m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag); @@ -766,6 +778,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c m_CABACEstimator->getCtx() = SubCtx( Ctx::TmpFlag, ctxStartTpmFlag ); #endif m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); +#if JVET_W0123_TIMD_FUSION + m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag ); +#endif m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); #if SECONDARY_MPM m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag); @@ -1096,6 +1111,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c m_CABACEstimator->getCtx() = SubCtx( Ctx::TmpFlag, ctxStartTpmFlag ); #endif m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); +#if JVET_W0123_TIMD_FUSION + m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag ); +#endif m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); #if SECONDARY_MPM m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag); @@ -1113,6 +1131,30 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } int numNonISPModes = (int)uiRdModeList.size(); +#if JVET_W0123_TIMD_FUSION + bool isTimdValid = cu.slice->getSPS()->getUseTimd(); + if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE) + { + isTimdValid = false; + } + if (isTimdValid) + { + cu.timd = false; + uiRdModeList.push_back( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, TIMD_IDX ) ); + numNonISPModes++; + if (lfnstIdx == 0 && !cu.mtsFlag) + { + bool isFirstLineOfCtu = (((pu.block(COMPONENT_Y).y) & ((pu.cs->sps)->getMaxCUWidth() - 1)) == 0); + int numOfPassesExtendRef = ((!sps.getUseMRL() || isFirstLineOfCtu) ? 1 : MRL_NUM_REF_LINES); + for (int mRefNum = 1; mRefNum < numOfPassesExtendRef; mRefNum++) + { + int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum]; + uiRdModeList.push_back( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, TIMD_IDX ) ); + numNonISPModes++; + } + } + } +#endif if ( testISP ) { @@ -1124,6 +1166,13 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c uiRdModeList.push_back( ModeInfo( false, false, 0, INTRA_SUBPARTITIONS_RESERVED, 0 ) ); } } +#if JVET_W0123_TIMD_FUSION + if (isTimdValid && sps.getUseISP() && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize()) && lfnstIdx == 0) + { + uiRdModeList.push_back( ModeInfo( false, false, 0, HOR_INTRA_SUBPARTITIONS, TIMD_IDX ) ); + uiRdModeList.push_back( ModeInfo( false, false, 0, VER_INTRA_SUBPARTITIONS, TIMD_IDX ) ); + } +#endif //===== check modes (using r-d costs) ===== ModeInfo uiBestPUMode; @@ -1176,9 +1225,29 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c { if (m_pcEncCfg->getUseFastISP()) { +#if JVET_W0123_TIMD_FUSION + if (bestTimdMode) + { + m_modeCtrl->setBestPredModeDCT2(MAP131TO67(uiBestPUMode.modeId)); + } + else + { + m_modeCtrl->setBestPredModeDCT2(uiBestPUMode.modeId); + } +#else m_modeCtrl->setBestPredModeDCT2(uiBestPUMode.modeId); +#endif } +#if JVET_W0123_TIMD_FUSION + ModeInfo tempBestPUMode = uiBestPUMode; + if (bestTimdMode) + { + tempBestPUMode.modeId = MAP131TO67(tempBestPUMode.modeId); + } + if (!xSortISPCandList(bestCurrentCost, csBest->cost, tempBestPUMode)) +#else if (!xSortISPCandList(bestCurrentCost, csBest->cost, uiBestPUMode)) +#endif { break; } @@ -1207,9 +1276,43 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c cu.ispMode = uiOrgMode.ispMod; pu.multiRefIdx = uiOrgMode.mRefId; pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId; +#if JVET_W0123_TIMD_FUSION + cu.timd = false; + int modeDiff = uiOrgMode.modeId - MAP131TO67(cu.dimdMode); + if (isTimdValid && lfnstIdx == 0 && uiOrgMode.ispMod > 0 && modeDiff == 0) + { + continue; + } + if (isTimdValid && uiOrgMode.mRefId > 0 && lfnstIdx == 0 && cu.mtsFlag == 0 && modeDiff == 0) + { + continue; + } + if (mode >= 0 && uiOrgMode.modeId == TIMD_IDX) + { + if (cu.ispMode) + { + cu.lfnstIdx = lfnstIdx; + if (cu.ispMode == VER_INTRA_SUBPARTITIONS && uiBestPUMode.ispMod == 0 && !bestTimdMode) + { + continue; + } + } + uiOrgMode.modeId = cu.timdMode; + pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId; + cu.timd = true; + } +#endif CHECK(cu.mipFlag && pu.multiRefIdx, "Error: combination of MIP and MRL not supported"); - CHECK(pu.multiRefIdx && (pu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported"); +#if JVET_W0123_TIMD_FUSION + if (!cu.timd) + { +#endif + CHECK(pu.multiRefIdx && (pu.intraDir[0] == PLANAR_IDX), + "Error: combination of MRL and Planar mode not supported"); +#if JVET_W0123_TIMD_FUSION + } +#endif CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported"); CHECK(cu.ispMode && pu.multiRefIdx, "Error: combination of ISP and MRL not supported"); CHECK(cu.ispMode&& cu.colorTransform, "Error: combination of ISP and ACT not supported"); @@ -1244,7 +1347,14 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c continue; } // we save the data for future tests +#if JVET_W0123_TIMD_FUSION + if (!cu.timd) + { +#endif m_ispTestedModes[m_curIspLfnstIdx].setModeResults((ISPType)cu.ispMode, (int)uiOrgMode.modeId, (int)csTemp->tus.size(), csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ? csTemp->cost : MAX_DOUBLE, csBest->cost); +#if JVET_W0123_TIMD_FUSION + } +#endif csTemp->cost = !tmpValidReturn ? MAX_DOUBLE : csTemp->cost; } else @@ -1261,9 +1371,17 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } } #if JVET_V0130_INTRA_TMP +#if JVET_W0123_TIMD_FUSION + if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.tmpFlag && testISP && !cu.timd) +#else if( !cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.tmpFlag && testISP ) +#endif +#else +#if JVET_W0123_TIMD_FUSION + if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP && !cu.timd) #else if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP) +#endif #endif { #if JVET_V0130_INTRA_TMP @@ -1281,7 +1399,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } validReturn |= tmpValidReturn; +#if JVET_W0123_TIMD_FUSION + if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 && !cu.timd ) +#else if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 ) +#endif { m_modeCostStore[lfnstIdx][mode] = tmpValidReturn ? csTemp->cost : (MAX_DOUBLE / 2.0); //(MAX_DOUBLE / 2.0) ?? } @@ -1313,6 +1435,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c bestBDPCMMode = cu.bdpcmMode; #if ENABLE_DIMD bestDimdMode = cu.dimd; +#endif +#if JVET_W0123_TIMD_FUSION + bestTimdMode = cu.timd; #endif if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode ) { @@ -1405,6 +1530,13 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } #endif cu.bdpcmMode = bestBDPCMMode; +#if JVET_W0123_TIMD_FUSION + cu.timd = bestTimdMode; + if (cu.timd) + { + pu.intraDir[ CHANNEL_TYPE_LUMA ] = cu.timdMode; + } +#endif if (cu.colorTransform) { CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform"); @@ -6217,6 +6349,9 @@ void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode, if ( #if ENABLE_DIMD && !JVET_V0087_DIMD_NO_ISP candidate.modeId != DIMD_IDX && +#endif +#if JVET_W0123_TIMD_FUSION + candidate.modeId != TIMD_IDX && #endif maxNumSubPartitions > 2 && (curIspLfnstIdx > 0 || (candidate.modeId >= DC_IDX && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2))) { @@ -6433,6 +6568,9 @@ bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost, if ( #if ENABLE_DIMD && !JVET_V0087_DIMD_NO_ISP origHadList.at(k).modeId == DIMD_IDX || +#endif +#if JVET_W0123_TIMD_FUSION + origHadList.at(k).modeId == TIMD_IDX || #endif !modeIsInList[origHadList.at(k).modeId]) { @@ -6484,6 +6622,12 @@ void IntraSearch::xSortISPCandListLFNST() { continue; } +#endif +#if JVET_W0123_TIMD_FUSION + if( candList[i].modeId == TIMD_IDX ) + { + continue; + } #endif const int candSubParts = ispTestedModesRef.getNumCompletedSubParts(ispMode, candList[i].modeId); const double candCost = ispTestedModesRef.getRDCost(ispMode, candList[i].modeId); diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index cec7da12760889e3721451dbd17fe3b50b56380e..19aeb75d35f7e95bb82c2507245036e6afbf7d34 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -1392,6 +1392,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) } #endif +#if JVET_W0123_TIMD_FUSION + WRITE_FLAG( pcSPS->getUseTimd() ? 1 : 0, "sps_timd_enabled_flag"); +#endif if( pcSPS->getChromaFormatIdc() != CHROMA_400) { WRITE_FLAG( pcSPS->getUseLMChroma() ? 1 : 0, "sps_cclm_enabled_flag"); @@ -2979,6 +2982,9 @@ void HLSWriter::codeConstraintInfo ( const ConstraintInfo* cinfo ) #if ENABLE_DIMD WRITE_FLAG(cinfo->getNoDimdConstraintFlag() ? 1 : 0, "gci_no_dimd_constraint_flag"); #endif +#if JVET_W0123_TIMD_FUSION + WRITE_FLAG(cinfo->getNoTimdConstraintFlag() ? 1 : 0, "gci_no_timd_constraint_flag" ); +#endif /* inter */ WRITE_FLAG(cinfo->getNoRprConstraintFlag() ? 1 : 0, "gci_no_ref_pic_resampling_constraint_flag");