diff --git a/cfg/encoder_intra_vtm.cfg b/cfg/encoder_intra_vtm.cfg index 267efc02569004930a7a1f198b8db8d5e4c3d4d4..4fa54177913d571faa85c22b8897b8b9818a81d3 100644 --- a/cfg/encoder_intra_vtm.cfg +++ b/cfg/encoder_intra_vtm.cfg @@ -98,6 +98,7 @@ MTS : 1 MTSIntraMaxCand : 3 MTSInterMaxCand : 4 SBT : 1 +LFNST : 1 ISP : 1 Affine : 1 SubPuMvp : 1 @@ -118,6 +119,7 @@ ISPFast : 1 FastMrg : 1 AMaxBT : 1 FastMIP : 1 +FastLFNST : 1 # Encoder optimization tools AffineAmvrEncOpt : 0 diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg index ab866f6b1ff0e34b067ea2b48487624d201ad3ab..86dcefd4a1addeb27ef8439a57c5c95992671968 100644 --- a/cfg/encoder_randomaccess_vtm.cfg +++ b/cfg/encoder_randomaccess_vtm.cfg @@ -128,6 +128,7 @@ MTS : 1 MTSIntraMaxCand : 3 MTSInterMaxCand : 4 SBT : 1 +LFNST : 1 ISP : 1 MMVD : 1 Affine : 1 @@ -156,6 +157,7 @@ ISPFast : 1 FastMrg : 1 AMaxBT : 1 FastMIP : 0 +FastLFNST : 0 # Encoder optimization tools AffineAmvrEncOpt : 1 diff --git a/doc/software-manual.tex b/doc/software-manual.tex index aaa6870eb52d24c392e51934eefa93f7690a0591..0d3bdcb708f0474b1d9680d669fb1cc293dd0788 100644 --- a/doc/software-manual.tex +++ b/doc/software-manual.tex @@ -2040,6 +2040,18 @@ Enables or disables symmetric MVD mode. Enables or disables RDPCM coding mode. \\ +\Option{LFNST} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables the use of low frequency non-separable transform (LFNST). +\\ + +\Option{FastLFNST} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables the fast encoding of low frequency non-separable transform (LFNST). +\\ + \end{OptionTableNoShorthand} %% diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index 916b4b816ca795b46656161db38590da75814635..a66de87e6c7d84bc14ca1fc181e60b7c933d76a5 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -248,6 +248,10 @@ void EncApp::xInitLibCfg() m_cEncLib.setMinQTSizes ( m_uiMinQT ); m_cEncLib.setMaxBTDepth ( m_uiMaxBTDepth, m_uiMaxBTDepthI, m_uiMaxBTDepthIChroma ); m_cEncLib.setDualITree ( m_dualTree ); +#if JVET_N0193_LFNST + m_cEncLib.setLFNST ( m_LFNST ); + m_cEncLib.setUseFastLFNST ( m_useFastLFNST ); +#endif m_cEncLib.setSubPuMvpMode ( m_SubPuMvpMode ); m_cEncLib.setAffine ( m_Affine ); m_cEncLib.setAffineType ( m_AffineType ); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 51699ffb66eb3089de0d0d6ed91855ab29e6d39b..1999011a00990b39c7c7c94a7a2f31664eb55d92 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -834,6 +834,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("MaxBTDepthISliceL", m_uiMaxBTDepthI, 3u, "MaxBTDepthISliceL") ("MaxBTDepthISliceC", m_uiMaxBTDepthIChroma, 3u, "MaxBTDepthISliceC") ("DualITree", m_dualTree, false, "Use separate QTBT trees for intra slice luma and chroma channel types") +#if JVET_N0193_LFNST + ( "LFNST", m_LFNST, false, "Enable LFNST (0:off, 1:on) [default: off]" ) + ( "FastLFNST", m_useFastLFNST, false, "Fast methods for LFNST" ) +#endif ("SubPuMvp", m_SubPuMvpMode, 0, "Enable Sub-PU temporal motion vector prediction (0:off, 1:ATMVP, 2:STMVP, 3:ATMVP+STMVP) [default: off]") #if JVET_N0127_MMVD_SPS_FLAG ("MMVD", m_MMVD, true, "Enable Merge mode with Motion Vector Difference (0:off, 1:on) [default: 1]") @@ -3159,6 +3163,9 @@ void EncAppCfg::xPrintParameter() if( m_profile == Profile::NEXT ) { msg( VERBOSE, "\nNEXT TOOL CFG: " ); +#if JVET_N0193_LFNST + msg( VERBOSE, "LFNST:%d ", m_LFNST ); +#endif #if JVET_N0127_MMVD_SPS_FLAG msg( VERBOSE, "MMVD:%d ", m_MMVD); #endif @@ -3238,6 +3245,9 @@ void EncAppCfg::xPrintParameter() if( m_ISP ) msg( VERBOSE, "ISPFast:%d ", m_useFastISP ); #else msg( VERBOSE, "ISPFast:%d ", m_useFastISP ); +#endif +#if JVET_N0193_LFNST + if( m_LFNST ) msg( VERBOSE, "FastLFNST:%d ", m_useFastLFNST ); #endif msg( VERBOSE, "AMaxBT:%d ", m_useAMaxBT ); msg( VERBOSE, "E0023FastEnc:%d ", m_e0023FastEnc ); diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index bda717ec4b356f71ab4db4438bda11ef77ff55a7..ee5da61d316eb3c1f45e84cc861e87094381cb52 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -238,6 +238,10 @@ protected: unsigned m_uiMaxBTDepthI; unsigned m_uiMaxBTDepthIChroma; bool m_dualTree; +#if JVET_N0193_LFNST + bool m_LFNST; + bool m_useFastLFNST; +#endif int m_SubPuMvpMode; bool m_Affine; bool m_AffineType; diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h index 196124911c55a1df940b2e988bbaa00d30e14c7a..f51f82d15595255dbcb3b5dd0ca637caa85e7738 100644 --- a/source/Lib/CommonLib/CodingStatistics.h +++ b/source/Lib/CommonLib/CodingStatistics.h @@ -91,6 +91,9 @@ enum CodingStatisticsType STATS__CABAC_BITS__SAO, STATS__CABAC_BITS__ALF, STATS__CABAC_TRM_BITS, +#if JVET_N0193_LFNST + STATS__CABAC_BITS__LFNST, +#endif STATS__CABAC_FIXED_BITS, STATS__CABAC_PCM_ALIGN_BITS, STATS__CABAC_PCM_CODE_BITS, @@ -109,6 +112,9 @@ enum CodingStatisticsType STATS__CABAC_BITS__TRIANGLE_FLAG, STATS__CABAC_BITS__TRIANGLE_INDEX, STATS__CABAC_BITS__MULTI_REF_LINE, +#if JVET_N0193_LFNST + STATS__TOOL_LFNST, +#endif STATS__CABAC_BITS__SYMMVD_FLAG, STATS__TOOL_TOTAL_FRAME,// This is a special case and is not included in the report. STATS__TOOL_AFF, @@ -173,6 +179,9 @@ static inline const char* getName(CodingStatisticsType name) "CABAC_BITS__SIGN_BIT", "CABAC_BITS__ESCAPE_BITS", "CABAC_BITS__SAO", +#if JVET_N0193_LFNST + "CABAC_BITS__LFNST", +#endif "CABAC_BITS__ALF", "CABAC_TRM_BITS", "CABAC_FIXED_BITS", @@ -197,6 +206,9 @@ static inline const char* getName(CodingStatisticsType name) "TOOL_FRAME", "TOOL_AFFINE", "TOOL_EMT", +#if JVET_N0193_LFNST + "TOOL_LFNST", +#endif "TOOL_TOTAL" }; CHECK( STATS__NUM_STATS != sizeof( statNames ) / sizeof( char* ) || name >= STATS__NUM_STATS, "stats out of range" ); diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index b56f3e958caeb9c8161c0276951cfa70854f7a4d..345a7a4afa78a73daa1cb6c44fe25b45bf546833 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -217,6 +217,10 @@ static const int NUM_LUMA_MODE = 67; ///< Plan static const int NUM_LMC_MODE = 1 + 2; ///< LMC + MDLM_T + MDLM_L static const int NUM_INTRA_MODE = (NUM_LUMA_MODE + NUM_LMC_MODE); +#if JVET_N0193_LFNST +static const int NUM_EXT_LUMA_MODE = 28; +#endif + static const int NUM_DIR = (((NUM_LUMA_MODE - 3) >> 2) + 1); static const int PLANAR_IDX = 0; ///< index for intra PLANAR mode static const int DC_IDX = 1; ///< index for intra DC mode @@ -253,6 +257,15 @@ static const int FAST_UDI_MAX_RDMODE_NUM = (NUM_LUMA_MODE + MAX_NUM_MIP_MODE); / static const int FAST_UDI_MAX_RDMODE_NUM = NUM_LUMA_MODE; ///< maximum number of RD comparison in fast-UDI estimation loop #endif +#if JVET_N0193_LFNST +static const int MAX_LFNST_COEF_NUM = 16; + +static const int LFNST_SIG_NZ_LUMA = 1; +static const int LFNST_SIG_NZ_CHROMA = 1; + +static const int NUM_LFNST_NUM_PER_SET = 3; +#endif + static const int MDCS_ANGLE_LIMIT = 9; ///< 0 = Horizontal/vertical only, 1 = Horizontal/vertical +/- 1, 2 = Horizontal/vertical +/- 2 etc... static const int MDCS_MAXIMUM_WIDTH = 8; ///< (measured in pixels) TUs with width greater than this can only use diagonal scan diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index 488de6e61dacd88dc2b4502343bf8ffc3a0f1b24..944154732650e667364559982a5936bb09fecccd 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -693,6 +693,16 @@ const CtxSet ContextSetCfg::TransquantBypassFlag = ContextSetCfg::addCtxSet { DWS, } }); +#if JVET_N0193_LFNST +const CtxSet ContextSetCfg::LFNSTIdx = ContextSetCfg::addCtxSet +( { + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, }, +} ); +#endif + const CtxSet ContextSetCfg::RdpcmFlag = ContextSetCfg::addCtxSet ({ { 139, 139,}, diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index 4157171c065fea63ca6d6a4b3fc400b348235043..f4f9c6775448024bcc2e1367091b9995c9b79c0f 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -250,6 +250,9 @@ public: static const CtxSet SaoTypeIdx; static const CtxSet MTSIndex; static const CtxSet TransquantBypassFlag; +#if JVET_N0193_LFNST + static const CtxSet LFNSTIdx; +#endif static const CtxSet RdpcmFlag; static const CtxSet RdpcmDir; static const CtxSet SbtFlag; diff --git a/source/Lib/CommonLib/DepQuant.cpp b/source/Lib/CommonLib/DepQuant.cpp index 2ef44c47c65832dfb5b562dc6090aa2d094eaa72..eb3e987819524df7a1d7892484badcad7d07c0cd 100644 --- a/source/Lib/CommonLib/DepQuant.cpp +++ b/source/Lib/CommonLib/DepQuant.cpp @@ -1003,75 +1003,119 @@ namespace DQIntern m_goRiceZero = 0; } +#if JVET_N0193_LFNST + void checkRdCosts( const ScanPosType spt, const PQData& pqDataA, const PQData& pqDataB, Decision& decisionA, Decision& decisionB, bool zeroOut ) const +#else void checkRdCosts( const ScanPosType spt, const PQData& pqDataA, const PQData& pqDataB, Decision& decisionA, Decision& decisionB) const +#endif { const int32_t* goRiceTab = g_goRiceBits[m_goRicePar]; int64_t rdCostA = m_rdCost + pqDataA.deltaDist; int64_t rdCostB = m_rdCost + pqDataB.deltaDist; int64_t rdCostZ = m_rdCost; - if( m_remRegBins >= 4 ) +#if JVET_N0193_LFNST + if( zeroOut ) { - if( pqDataA.absLevel < 4 ) - rdCostA += m_coeffFracBits.bits[pqDataA.absLevel]; - else + rdCostZ = m_rdCost; + if( m_remRegBins >= 4 ) { - const unsigned value = (pqDataA.absLevel - 4) >> 1; - rdCostA += m_coeffFracBits.bits[pqDataA.absLevel - (value << 1)] + goRiceTab[value<RICEMAX ? value : RICEMAX-1]; + if( spt == SCAN_ISCSBB ) + { + rdCostZ += m_sigFracBits.intBits[ 0 ]; + } + else if( spt == SCAN_SOCSBB ) + { + rdCostZ += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 0 ]; + } + else if( m_numSigSbb ) + { + rdCostZ += m_sigFracBits.intBits[ 0 ]; + } + else + { + rdCostZ = decisionA.rdCost; + } } - if( pqDataB.absLevel < 4 ) - rdCostB += m_coeffFracBits.bits[pqDataB.absLevel]; else { - const unsigned value = (pqDataB.absLevel - 4) >> 1; - rdCostB += m_coeffFracBits.bits[pqDataB.absLevel - (value << 1)] + goRiceTab[value<RICEMAX ? value : RICEMAX-1]; + rdCostZ += goRiceTab[ m_goRiceZero ]; } - if( spt == SCAN_ISCSBB ) + if( rdCostZ < decisionA.rdCost ) { - rdCostA += m_sigFracBits.intBits[1]; - rdCostB += m_sigFracBits.intBits[1]; - rdCostZ += m_sigFracBits.intBits[0]; + decisionA.rdCost = rdCostZ; + decisionA.absLevel = 0; + decisionA.prevId = m_stateId; } - else if( spt == SCAN_SOCSBB ) + } + else + { +#endif + if( m_remRegBins >= 4 ) { - rdCostA += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[1]; - rdCostB += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[1]; - rdCostZ += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[0]; + if( pqDataA.absLevel < 4 ) + rdCostA += m_coeffFracBits.bits[ pqDataA.absLevel ]; + else + { + const unsigned value = ( pqDataA.absLevel - 4 ) >> 1; + rdCostA += m_coeffFracBits.bits[ pqDataA.absLevel - ( value << 1 ) ] + goRiceTab[ value < RICEMAX ? value : RICEMAX - 1 ]; + } + if( pqDataB.absLevel < 4 ) + rdCostB += m_coeffFracBits.bits[ pqDataB.absLevel ]; + else + { + const unsigned value = ( pqDataB.absLevel - 4 ) >> 1; + rdCostB += m_coeffFracBits.bits[ pqDataB.absLevel - ( value << 1 ) ] + goRiceTab[ value < RICEMAX ? value : RICEMAX - 1 ]; + } + if( spt == SCAN_ISCSBB ) + { + rdCostA += m_sigFracBits.intBits[ 1 ]; + rdCostB += m_sigFracBits.intBits[ 1 ]; + rdCostZ += m_sigFracBits.intBits[ 0 ]; + } + else if( spt == SCAN_SOCSBB ) + { + rdCostA += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 1 ]; + rdCostB += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 1 ]; + rdCostZ += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 0 ]; + } + else if( m_numSigSbb ) + { + rdCostA += m_sigFracBits.intBits[ 1 ]; + rdCostB += m_sigFracBits.intBits[ 1 ]; + rdCostZ += m_sigFracBits.intBits[ 0 ]; + } + else + { + rdCostZ = decisionA.rdCost; + } } - else if( m_numSigSbb ) + else { - rdCostA += m_sigFracBits.intBits[1]; - rdCostB += m_sigFracBits.intBits[1]; - rdCostZ += m_sigFracBits.intBits[0]; + rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[ pqDataA.absLevel <= m_goRiceZero ? pqDataA.absLevel - 1 : ( pqDataA.absLevel < RICEMAX ? pqDataA.absLevel : RICEMAX - 1 ) ]; + rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[ pqDataB.absLevel <= m_goRiceZero ? pqDataB.absLevel - 1 : ( pqDataB.absLevel < RICEMAX ? pqDataB.absLevel : RICEMAX - 1 ) ]; + rdCostZ += goRiceTab[ m_goRiceZero ]; } - else + if( rdCostA < decisionA.rdCost ) { - rdCostZ = decisionA.rdCost; + decisionA.rdCost = rdCostA; + decisionA.absLevel = pqDataA.absLevel; + decisionA.prevId = m_stateId; } + if( rdCostZ < decisionA.rdCost ) + { + decisionA.rdCost = rdCostZ; + decisionA.absLevel = 0; + decisionA.prevId = m_stateId; + } + if( rdCostB < decisionB.rdCost ) + { + decisionB.rdCost = rdCostB; + decisionB.absLevel = pqDataB.absLevel; + decisionB.prevId = m_stateId; + } +#if JVET_N0193_LFNST } - else - { - rdCostA += (1 << SCALE_BITS) + goRiceTab[pqDataA.absLevel <= m_goRiceZero ? pqDataA.absLevel - 1 : (pqDataA.absLevel<RICEMAX ? pqDataA.absLevel : RICEMAX-1)]; - rdCostB += (1 << SCALE_BITS) + goRiceTab[pqDataB.absLevel <= m_goRiceZero ? pqDataB.absLevel - 1 : (pqDataB.absLevel<RICEMAX ? pqDataB.absLevel : RICEMAX-1)]; - rdCostZ += goRiceTab[m_goRiceZero]; - } - if( rdCostA < decisionA.rdCost ) - { - decisionA.rdCost = rdCostA; - decisionA.absLevel = pqDataA.absLevel; - decisionA.prevId = m_stateId; - } - if( rdCostZ < decisionA.rdCost ) - { - decisionA.rdCost = rdCostZ; - decisionA.absLevel = 0; - decisionA.prevId = m_stateId; - } - if( rdCostB < decisionB.rdCost ) - { - decisionB.rdCost = rdCostB; - decisionB.absLevel = pqDataB.absLevel; - decisionB.prevId = m_stateId; - } +#endif } inline void checkRdCostStart(int32_t lastOffset, const PQData &pqData, Decision &decision) const @@ -1479,6 +1523,7 @@ namespace DQIntern { ::memcpy( decisions, startDec, 8*sizeof(Decision) ); +#if !JVET_N0193_LFNST if( zeroOut ) { if( spt==SCAN_EOCSBB ) @@ -1490,22 +1535,51 @@ namespace DQIntern } return; } +#endif PQData pqData[4]; m_quant.preQuantCoeff( absCoeff, pqData ); +#if JVET_N0193_LFNST + m_prevStates[0].checkRdCosts( spt, pqData[0], pqData[2], decisions[0], decisions[2], zeroOut ); + m_prevStates[1].checkRdCosts( spt, pqData[0], pqData[2], decisions[2], decisions[0], zeroOut ); + m_prevStates[2].checkRdCosts( spt, pqData[3], pqData[1], decisions[1], decisions[3], zeroOut ); + m_prevStates[3].checkRdCosts( spt, pqData[3], pqData[1], decisions[3], decisions[1], zeroOut ); +#else m_prevStates[0].checkRdCosts( spt, pqData[0], pqData[2], decisions[0], decisions[2]); m_prevStates[1].checkRdCosts( spt, pqData[0], pqData[2], decisions[2], decisions[0]); m_prevStates[2].checkRdCosts( spt, pqData[3], pqData[1], decisions[1], decisions[3]); m_prevStates[3].checkRdCosts( spt, pqData[3], pqData[1], decisions[3], decisions[1]); +#endif if( spt==SCAN_EOCSBB ) { - m_skipStates[0].checkRdCostSkipSbb( decisions[0] ); - m_skipStates[1].checkRdCostSkipSbb( decisions[1] ); - m_skipStates[2].checkRdCostSkipSbb( decisions[2] ); - m_skipStates[3].checkRdCostSkipSbb( decisions[3] ); +#if JVET_N0193_LFNST + if( zeroOut ) + { + m_skipStates[0].checkRdCostSkipSbbZeroOut( decisions[0] ); + m_skipStates[1].checkRdCostSkipSbbZeroOut( decisions[1] ); + m_skipStates[2].checkRdCostSkipSbbZeroOut( decisions[2] ); + m_skipStates[3].checkRdCostSkipSbbZeroOut( decisions[3] ); + } + else + { +#endif + m_skipStates[0].checkRdCostSkipSbb( decisions[0] ); + m_skipStates[1].checkRdCostSkipSbb( decisions[1] ); + m_skipStates[2].checkRdCostSkipSbb( decisions[2] ); + m_skipStates[3].checkRdCostSkipSbb( decisions[3] ); +#if JVET_N0193_LFNST + } +#endif } +#if JVET_N0193_LFNST + if( !zeroOut ) + { +#endif m_startState.checkRdCostStart( lastOffset, pqData[0], decisions[0] ); m_startState.checkRdCostStart( lastOffset, pqData[2], decisions[2] ); +#if JVET_N0193_LFNST + } +#endif } void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut ) @@ -1527,7 +1601,11 @@ namespace DQIntern m_currStates[3].updateStateEOS( scanInfo, m_prevStates, m_skipStates, decisions[3] ); ::memcpy( decisions+4, decisions, 4*sizeof(Decision) ); } +#if !JVET_N0193_LFNST else if( !zeroOut ) +#else + else +#endif { switch( scanInfo.nextNbInfoSbb.num ) { @@ -1590,8 +1668,21 @@ namespace DQIntern ::memset( tu.getCoeffs( compID ).buf, 0x00, numCoeff*sizeof(TCoeff) ); absSum = 0; +#if JVET_N0193_LFNST + const CompArea& area = tu.blocks[ compID ]; + const uint32_t width = area.width; + const uint32_t height = area.height; + const uint32_t lfnstIdx = tu.cu->lfnstIdx; +#endif + //===== find first test position ===== int firstTestPos = numCoeff - 1; +#if JVET_N0193_LFNST + if( lfnstIdx > 0 && tu.mtsIdx != 1 && ( ( width == 4 && height == 4 ) || ( width == 8 && height == 8 ) ) ) + { + firstTestPos = 7; + } +#endif const TCoeff thres = m_quant.getLastThreshold(); for( ; firstTestPos >= 0; firstTestPos-- ) { @@ -1627,7 +1718,13 @@ namespace DQIntern for( int scanIdx = firstTestPos; scanIdx >= 0; scanIdx-- ) { const ScanInfo& scanInfo = tuPars.m_scanInfo[ scanIdx ]; +#if JVET_N0193_LFNST + bool lfnstZeroOut = lfnstIdx > 0 && tu.mtsIdx != 1 && width >= 4 && height >= 4 && + ( ( ( ( width >= 8 && height >= 8 ) && scanIdx >= 16 ) || ( ( ( width == 4 && height == 4 ) || ( width == 8 && height == 8 ) ) && scanIdx >= 8 ) ) && scanIdx < 48 ); + xDecideAndUpdate( abs( tCoeff[ scanInfo.rasterPos ] ), scanInfo, ( zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ) ) || lfnstZeroOut ); +#else xDecideAndUpdate( abs( tCoeff[ scanInfo.rasterPos ] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ) ); +#endif } //===== find best path ===== diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h index 51541c017c59a3cef7f742cfc34037ab4af72350..22a970c67f606577771e858184fba038d8ce3f68 100644 --- a/source/Lib/CommonLib/IntraPrediction.h +++ b/source/Lib/CommonLib/IntraPrediction.h @@ -165,7 +165,6 @@ public: /// set parameters from CU data for accessing intra data void initIntraPatternChType (const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag = false); // use forceRefFilterFlag to get both filtered and unfiltered buffers - #if JVET_N0217_MATRIX_INTRAPRED // Matrix-based intra prediction void initIntraMip (const PredictionUnit &pu); @@ -173,6 +172,9 @@ public: #endif static bool useFilteredIntraRefSamples( const ComponentID &compID, const PredictionUnit &pu, bool modeSpecific, const UnitArea &tuArea ); +#if HM_MDIS_AS_IN_JEM && JVET_N0193_LFNST + static bool getPlanarMDISCondition( const UnitArea &tuArea ) { return abs( PLANAR_IDX - HOR_IDX ) > m_aucIntraFilter[ CHANNEL_TYPE_LUMA ][ ( ( g_aucLog2[ tuArea.Y().width ] + g_aucLog2[ tuArea.Y().height ] ) >> 1 ) ]; } +#endif static bool useDPCMForFirstPassIntraEstimation(const PredictionUnit &pu, const uint32_t &uiDirMode); void geneWeightedPred (const ComponentID compId, PelBuf &pred, const PredictionUnit &pu, Pel *srcBuf); diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp index 07183059915ed2467f249ad0585b65dfdf079fa3..966ad23cce1fa2245723f6ff6b4ae085e819c0ec 100644 --- a/source/Lib/CommonLib/QuantRDOQ.cpp +++ b/source/Lib/CommonLib/QuantRDOQ.cpp @@ -759,15 +759,36 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID ); #endif +#if JVET_N0193_LFNST + const uint32_t lfnstIdx = tu.cu->lfnstIdx; +#endif for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--) { cctx.initSubblock( subSetId ); +#if JVET_N0193_LFNST + uint32_t maxNonZeroPosInCG = iCGSizeM1; + if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) ) + { + maxNonZeroPosInCG = 7; + } +#endif + memset( &rdStats, 0, sizeof (coeffGroupRDStats)); +#if JVET_N0193_LFNST + for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- ) + { + iScanPos = cctx.minSubPos() + iScanPosinCG; + uint32_t blkPos = cctx.blockPos( iScanPos ); + piDstCoeff[ blkPos ] = 0; + } + for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) +#else for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--) +#endif { iScanPos = cctx.minSubPos() + iScanPosinCG; //===== quantization ===== @@ -970,7 +991,11 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0); } // reset coeffs to 0 in this block +#if JVET_N0193_LFNST + for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) +#else for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--) +#endif { iScanPos = cctx.minSubPos() + iScanPosinCG; uint32_t uiBlkPos = cctx.blockPos( iScanPos ); @@ -1088,7 +1113,16 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ]; if (cctx.isSigGroup( iCGScanPos ) ) { +#if JVET_N0193_LFNST + uint32_t maxNonZeroPosInCG = iCGSizeM1; + if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) ) + { + maxNonZeroPosInCG = 7; + } + for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) +#else for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--) +#endif { iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG; diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index 20e8363a6c07c19d033ab47e2b81c29e957f6c36..b8cd5148ccd41823bd4470726f7bc3502e3694e7 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -467,6 +467,32 @@ void initROM() } #endif +#if JVET_N0193_LFNST + // initialize CoefTopLeftDiagScan8x8 for LFNST + for( uint32_t blockWidthIdx = 0; blockWidthIdx < sizeInfo.numAllWidths(); blockWidthIdx++ ) + { + const uint32_t blockWidth = sizeInfo.sizeFrom( blockWidthIdx ); + + const static uint8_t g_auiXYDiagScan8x8[ 64 ][ 2 ] = + { + { 0, 0 }, { 0, 1 }, { 1, 0 }, { 0, 2 }, { 1, 1 }, { 2, 0 }, { 0, 3 }, { 1, 2 }, + { 2, 1 }, { 3, 0 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 2, 3 }, { 3, 2 }, { 3, 3 }, + { 0, 4 }, { 0, 5 }, { 1, 4 }, { 0, 6 }, { 1, 5 }, { 2, 4 }, { 0, 7 }, { 1, 6 }, + { 2, 5 }, { 3, 4 }, { 1, 7 }, { 2, 6 }, { 3, 5 }, { 2, 7 }, { 3, 6 }, { 3, 7 }, + { 4, 0 }, { 4, 1 }, { 5, 0 }, { 4, 2 }, { 5, 1 }, { 6, 0 }, { 4, 3 }, { 5, 2 }, + { 6, 1 }, { 7, 0 }, { 5, 3 }, { 6, 2 }, { 7, 1 }, { 6, 3 }, { 7, 2 }, { 7, 3 }, + { 4, 4 }, { 4, 5 }, { 5, 4 }, { 4, 6 }, { 5, 5 }, { 6, 4 }, { 4, 7 }, { 5, 6 }, + { 6, 5 }, { 7, 4 }, { 5, 7 }, { 6, 6 }, { 7, 5 }, { 6, 7 }, { 7, 6 }, { 7, 7 } + }; + for( int i = 0; i < 64; i++ ) + { + g_coefTopLeftDiagScan8x8[ blockWidthIdx ][ i ].idx = g_auiXYDiagScan8x8[ i ][ 0 ] + g_auiXYDiagScan8x8[ i ][ 1 ] * blockWidth; + g_coefTopLeftDiagScan8x8[ blockWidthIdx ][ i ].x = g_auiXYDiagScan8x8[ i ][ 0 ]; + g_coefTopLeftDiagScan8x8[ blockWidthIdx ][ i ].y = g_auiXYDiagScan8x8[ i ][ 1 ]; + } + } +#endif + for( int idxH = MAX_CU_DEPTH - MIN_CU_LOG2; idxH >= 0; --idxH ) { for( int idxW = MAX_CU_DEPTH - MIN_CU_LOG2; idxW >= 0; --idxW ) @@ -667,6 +693,9 @@ ScanElement *g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_C #else ScanElement *g_scanOrder[2][SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1]; #endif +#if JVET_N0193_LFNST +ScanElement g_coefTopLeftDiagScan8x8[ MAX_CU_SIZE / 2 + 1 ][ 64 ]; +#endif const uint32_t g_uiMinInGroup[LAST_SIGNIFICANT_GROUPS] = { 0,1,2,3,4,6,8,12,16,24,32,48,64,96 }; const uint32_t g_uiGroupIdx[MAX_TB_SIZEY] = { 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9, 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11 }; diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h index a82dd2c554e78f1b201dd09f876eb829f3679f02..b35701dc19582441ad684c761500943f9d5efd34 100644 --- a/source/Lib/CommonLib/Rom.h +++ b/source/Lib/CommonLib/Rom.h @@ -76,6 +76,9 @@ extern uint32_t g_log2SbbSize [2][MAX_CU_DEPTH+1][MAX_CU_DEPTH+1][2]; extern ScanElement *g_scanOrder[2][SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1]; #endif +#if JVET_N0193_LFNST +extern ScanElement g_coefTopLeftDiagScan8x8[ MAX_CU_SIZE / 2 + 1 ][ 64 ]; +#endif #if JVET_N0246_MODIFIED_QUANTSCALES extern const int g_quantScales [2/*0=4^n blocks, 1=2*4^n blocks*/][SCALING_LIST_REM_NUM]; // Q(QP%6) @@ -145,6 +148,12 @@ extern const TMatrixCoeff g_trCoreDST7P8 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 8][ extern const TMatrixCoeff g_trCoreDST7P16 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 16][ 16]; extern const TMatrixCoeff g_trCoreDST7P32 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 32][ 32]; +#if JVET_N0193_LFNST +extern const int8_t g_lfnst8x8[ 4 ][ 2 ][ 16 ][ 48 ]; +extern const int8_t g_lfnst4x4[ 4 ][ 2 ][ 16 ][ 16 ]; + +extern const uint8_t g_lfnstLut[ NUM_INTRA_MODE + NUM_EXT_LUMA_MODE - 1 ]; +#endif // ==================================================================================================================== // Misc. diff --git a/source/Lib/CommonLib/RomLFNST.cpp b/source/Lib/CommonLib/RomLFNST.cpp new file mode 100644 index 0000000000000000000000000000000000000000..74a42528e1abb7b1e7b2c10f9d1bb9ca3fc8eaf3 --- /dev/null +++ b/source/Lib/CommonLib/RomLFNST.cpp @@ -0,0 +1,368 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2019, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file RomLFNST.cpp + \brief LFNST Tables +*/ + +#include "Rom.h" + +#if JVET_N0193_LFNST + +#include <memory.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <iomanip> + +// ==================================================================================================================== +// LFNST Tables +// ==================================================================================================================== + +const uint8_t g_lfnstLut[ NUM_INTRA_MODE + NUM_EXT_LUMA_MODE - 1 ] = +{//0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +const int8_t g_lfnst8x8[ 4 ][ 2 ][ 16 ][ 48 ] = { + { //0 + { + { -117, 28, 18, 2, 4, 1, 2, 1, 32, -18, -2, 0, -1, 0, 0, 0, 14, -1, -3, 0, -1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, -1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0 }, + { -29, -91, 47, 1, 9, 0, 3, 0, -54, 26, -8, 3, 0, 1, 0, 0, 33, 5, -9, -1, -2, 0, -1, 0, -3, 3, 0, 0, 0, 0, 0, 0, 7, 2, -2, 0, -1, 1, 0, 0, 2, 1, -1, 0, 0, 0, 0, 0 }, + { -10, 62, -11, -8, -2, -2, -1, -1, -95, 3, 32, 0, 4, 0, 2, 0, 32, -30, -4, 4, -1, 1, 0, 0, 6, 2, -5, 0, 0, 0, 0, 0, 6, -3, 0, 0, 2, 0, -1, 0, 2, -1, 0, 0, 1, 0, 0, 0 }, + { -15, 15, -10, -2, 1, 0, 1, 0, 10, 112, -20, -17, -4, -4, -1, -2, -20, -26, 31, 1, 0, 0, 0, 0, 2, -16, -1, 6, 0, 1, 0, 0, 1, -4, 0, 0, 0, -3, 0, 1, 0, -1, 0, 0, 0, -2, 0, 0 }, + { 32, 39, 92, -44, 4, -10, 1, -4, 26, 12, -15, 13, -5, 2, -2, 0, 29, -16, -22, 8, 0, 1, 0, 1, -20, 6, 4, -3, 1, 0, 0, 0, 1, -4, -3, 2, -4, 1, 0, 0, 1, -1, -2, 1, -2, 0, 0, 0 }, + { -10, 1, 50, -15, 2, -3, 1, -1, -28, -15, 14, 6, 1, 1, 1, 0, -99, -4, 9, 5, 5, 2, 2, 1, 44, -10, -11, 1, -2, 0, -1, 0, -5, 4, -3, 0, 8, -1, -2, 0, -2, 1, -1, 0, 4, 0, -1, 0 }, + { 1, -33, -11, -14, 7, -2, 2, 0, 29, -12, 37, -7, -4, 0, -1, 0, 6, -99, 3, 26, -1, 5, 0, 2, 14, 30, -27, -2, 1, -1, 0, -1, -6, 6, 6, -3, 1, 3, -3, 0, -1, 1, 1, 0, 0, 1, -1, 0 }, + { 0, 6, -6, 21, -4, 2, 0, 0, -20, -24, -104, 30, 5, 5, 1, 2, -7, -46, 10, -14, 7, 0, 1, 0, 9, 21, 7, -6, -2, -1, 0, -1, 2, 2, 5, -2, 0, 3, 4, -1, 0, 0, 1, 0, 0, 1, 2, -1 }, + { -13, -13, -37, -101, 29, -11, 8, -3, -12, -15, -20, 2, -11, 5, -2, 1, -12, 10, 26, 12, -6, 0, -1, 0, -32, -2, 11, 3, 3, -1, 1, 0, 11, -5, -1, 6, -4, 2, 1, 0, 3, -1, 1, 2, -1, 0, 0, 0 }, + { 6, 1, -14, -36, 9, -3, 2, 0, 10, 9, -18, -1, -3, 1, 0, 0, 38, 26, -13, -1, -5, -1, -1, 0, 102, 3, -14, -1, -5, -1, -2, 0, -29, 10, 10, 0, 10, -4, -1, 1, -7, 1, 2, 1, 2, -1, 0, 0 }, + { -12, -2, -26, -12, -9, 2, -1, 1, -3, 30, 4, 34, -4, 0, -1, 0, -30, 3, -92, 14, 19, 0, 3, 0, -11, 34, 21, -33, 1, -2, 0, -1, -9, -4, 18, 3, 2, 0, 0, -2, -1, -1, 3, 0, 0, 0, 0, -1 }, + { 0, -3, 0, -4, -15, 6, -3, 1, -7, -15, -28, -86, 19, -5, 4, -1, -5, -17, -41, 42, -6, 2, -1, 1, -1, -40, 37, 13, -4, 2, -1, 1, -10, 13, -1, -4, 4, -4, 3, 4, -2, 2, -1, -1, 1, -1, 1, 2 }, + { -1, 9, 13, 5, 14, -2, 2, -1, -8, 3, -4, -62, 4, 1, 1, 0, -12, 23, 16, -11, -17, 0, -1, 0, -11, 97, -3, -3, 0, -6, 0, -2, -21, -5, 23, 0, 2, -2, -1, 6, -3, -3, 1, 0, 0, 0, 0, 2 }, + { 6, 2, -3, 2, 10, -1, 2, 0, 8, 3, -1, -20, 0, 1, 0, 0, -4, 4, -16, 0, -2, 0, 1, 0, 34, 23, 6, -7, -4, -2, -1, 0, 108, -5, -30, 6, -27, 10, 7, -2, 11, -3, -1, 1, -4, 1, 0, 1 }, + { 6, 9, -2, 35, 110, -22, 11, -4, -2, 0, -3, 1, -18, 12, -3, 2, -5, -4, -22, 8, -25, 3, 0, 0, -3, -21, 2, -3, 9, -2, 1, 0, -7, 1, 3, -5, 3, 0, -1, 0, 0, 1, 0, -1, 1, 0, 0, 0 }, + { -1, 7, -2, 9, -11, 5, -1, 1, -7, 2, -22, 4, -13, 0, -1, 0, 0, 28, 0, 76, 4, -6, 0, -2, -13, 5, -76, -4, 33, -1, 3, 0, 9, 18, -3, -35, -4, -1, 6, 1, 1, 2, 0, -3, -1, 0, 2, 0 }, + }, + { + { -108, 48, 9, 1, 1, 1, 0, 0, 44, -6, -9, -1, -1, 0, -1, 0, 9, -9, -1, 1, 0, 0, 0, 0, 3, -1, 1, 0, 0, 0, 0, 0, 1, -1, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0 }, + { 55, 66, -37, -5, -6, -1, -2, 0, 67, -30, -20, 4, -2, 0, -1, 0, -31, -19, 14, 4, 1, 1, 1, 0, -6, 3, 5, -2, 0, 0, 0, 0, -7, -1, 1, 0, -1, 1, 1, 0, -2, -1, 1, 0, 0, 0, 0, 0 }, + { 2, 86, -21, -13, -4, -2, -1, -1, -88, 5, 6, 4, 5, 1, 1, 0, 14, -5, 0, 3, 0, 0, 0, 0, 10, -5, -2, 0, -1, 0, 0, 0, 6, -5, 0, 1, 2, -1, 0, 0, 1, -1, 0, 0, 1, 0, 0, 0 }, + { -24, -21, -38, 19, 0, 4, -1, 2, -23, -89, 31, 20, 2, 3, 1, 1, -30, 26, 36, -8, -2, -2, 0, -1, 14, 18, -7, -9, -1, -1, 0, 0, 1, 3, -2, -1, 3, 2, -2, -1, 0, 1, 0, 0, 1, 1, -1, 0 }, + { 9, 20, 98, -26, -3, -5, 0, -2, -9, -26, 15, -16, 2, 0, 1, 0, -61, -3, -2, 3, 7, 1, 1, 0, 12, 16, -6, -1, 0, -1, 0, 0, 2, 0, -8, 1, 3, 1, -1, 1, 0, -1, -2, 0, 1, 0, -1, 0 }, + { -21, -7, -37, 10, 2, 2, -1, 1, -10, 69, -5, -7, -2, -2, 0, -1, -93, 2, 19, 0, 3, 0, 2, 0, 17, 4, 0, 0, -1, 0, 0, 0, 5, -4, -2, 0, 4, -2, 0, 1, 0, 0, 0, 0, 2, -1, 0, 0 }, + { -10, -25, 4, -17, 8, -2, 2, -1, -27, -17, -71, 25, 8, 2, 1, 1, -4, -66, 28, 36, -5, 3, 0, 1, -10, 20, 33, -13, -8, 0, 0, -1, 3, 6, -3, -7, -1, 3, 3, -1, 1, 0, -1, 0, 0, 1, 1, -1 }, + { 2, 5, 10, 64, -9, 4, -3, 1, -4, 8, 62, 3, -17, 1, -2, 0, -3, -75, 5, -14, 1, 4, 0, 1, -36, 3, 18, -4, 4, 0, 1, 0, 1, 14, -2, -8, -2, 1, -3, 0, 2, 2, -1, -2, 0, 1, -1, 0 }, + { -11, -15, -28, -97, 6, -1, 4, -1, 7, 3, 57, -15, 10, -2, 0, -1, -1, -27, 13, 6, 1, -1, 0, 0, -34, -6, 0, 3, 4, 1, 2, 0, -2, 8, 1, 5, -2, 0, -3, 1, 1, 1, 0, 2, -1, 0, -1, 0 }, + { 9, 13, 24, -6, 7, -2, 1, -1, 16, 39, 20, 47, -2, -2, -2, 0, 28, 23, 76, -5, -25, -3, -3, -1, 6, 36, -7, -39, -4, -1, 0, -1, 2, -4, -18, -3, -1, -1, -2, -2, 1, -2, -2, 0, 0, 0, -1, -1 }, + { -7, 11, 12, 7, 2, -1, 0, -1, -14, -1, -24, 11, 2, 0, 0, 0, -20, 48, 11, -13, -5, -2, 0, -1, -105, -19, 17, 0, 6, 2, 3, 0, -14, 8, 8, 2, 1, 2, -1, -2, 3, 0, -1, 0, 0, 0, 0, 0 }, + { 0, 0, 7, -6, 23, -3, 3, -1, 5, 1, 18, 96, 13, -9, -1, -1, -21, -7, -42, 14, -24, -3, 0, 0, 11, -47, -7, 3, -5, 9, 1, 2, 0, -1, 19, -1, 1, 0, -1, -6, -1, 1, 2, 0, 1, 0, 0, -2 }, + { -2, -6, -1, -10, 0, 1, 1, 0, -7, -2, -28, 20, -15, 4, -3, 1, -2, -32, -2, -66, 3, 7, 1, 2, -11, 13, -70, 5, 43, -2, 3, 0, 8, -14, -3, 43, -1, 2, 7, -1, 1, -2, 1, 3, -1, 1, 1, 0 }, + { -1, 6, -16, 0, 24, -3, 1, -1, 2, 6, 6, 16, 18, -7, 1, -1, -3, 11, -63, 9, 4, -5, 2, -1, -22, 94, -4, -6, -4, -4, 1, -2, 10, 23, -19, -5, 0, -6, -4, 6, 3, -2, 1, 1, 0, -1, 0, 0 }, + { -5, -6, -3, -19, -104, 18, -4, 3, 0, 6, 0, 35, -41, 20, -2, 2, -2, 10, -18, 16, 21, 3, -2, 0, -2, 11, 6, -10, 6, -3, -1, 0, -1, 5, -1, -6, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, -1 }, + { -1, -2, 0, 23, -9, 0, -2, 0, 1, 1, 8, -1, 29, 1, 1, 0, 3, -6, 13, 76, 30, -11, -1, -2, -26, -8, -69, 7, -9, -7, 3, -1, -10, -34, -25, 13, -1, 0, 11, 5, 1, -1, 1, -2, 0, 0, 2, 0 }, + } + }, + { //1 + { + { 110, -49, -3, -4, -1, -1, 0, -1, -38, -1, 10, 0, 2, 0, 1, 0, -9, 13, 1, -2, 0, 0, 0, 0, -4, 2, -3, 0, 0, 0, 0, 0, -2, 2, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 0, 0, 0 }, + { -43, -19, 17, -1, 3, 0, 1, 0, -98, 46, 14, -1, 2, 0, 1, 0, 26, 26, -15, -3, -2, -1, -1, 0, 11, -7, -9, 2, 0, 0, 0, 0, 9, -3, -1, 2, 3, -3, 0, 0, 4, -1, 0, 0, 2, -1, 0, 0 }, + { -19, 17, -7, 3, -2, 1, -1, 0, -32, -59, 29, 3, 4, 0, 2, 0, -72, 43, 34, -9, 3, -2, 1, -1, 13, 36, -18, -10, 0, -2, 0, -1, 3, 0, -12, 3, 6, 1, -3, 2, 1, -1, -2, 0, 3, 1, -1, 1 }, + { -35, -103, 39, 1, 7, 0, 2, 0, 38, -13, 25, -6, 1, -1, 0, 0, -1, 7, 6, -7, 1, -1, 0, 0, -13, 14, 2, -4, 2, -1, 0, 0, -2, 11, -6, -2, -2, 4, -3, 0, 0, 3, -2, 0, -1, 1, -1, 0 }, + { 9, 5, -6, -1, -1, 0, -1, 0, 42, 4, 21, -11, 1, -3, 1, -1, 21, 70, -32, -21, 0, -4, -1, -1, 34, -26, -57, 11, 4, 2, 0, 1, -4, -32, 5, 24, 1, -6, 12, 4, -3, -2, 4, -2, 0, -1, 0, 0 }, + { -5, -5, -28, 9, -3, 2, -1, 1, -20, -78, 22, 16, 1, 3, 0, 1, 80, -6, 25, -5, -4, -1, -1, 0, 6, -24, 7, -9, 0, 0, 0, 0, -7, 3, 13, -4, -3, 5, 1, -5, -2, 3, 1, -2, -1, 2, -1, -2 }, + { 14, 17, 27, -12, 1, -3, 1, -1, 8, 19, -13, 4, -2, 1, -1, 0, 48, -1, 48, -15, -4, -2, -1, -1, 1, 60, -28, -42, 5, -6, 1, -2, 11, -11, -51, 11, -2, -10, -2, 13, 2, -6, -4, 4, -2, -3, 2, 2 }, + { 7, 35, 17, -4, -1, 0, 0, 0, 3, 8, 54, -17, 1, -2, 1, -1, 10, 14, -11, -34, 4, -4, 1, -1, -80, -7, -6, 2, 15, 0, 3, 0, -16, 46, 1, 3, 2, 7, -24, 0, 2, -2, -5, 8, 1, -1, -2, 2 }, + { -13, -27, -101, 24, -8, 6, -3, 2, 11, 43, 6, 28, -6, 3, -1, 1, -3, 14, 21, -12, -7, -2, -1, -1, -23, 10, -4, -12, 3, 0, 1, 0, 2, 9, -10, 0, 1, -5, -4, 4, 2, -2, 2, 2, 0, -2, 1, 0 }, + { -11, -13, -3, -10, 3, -1, 1, 0, -19, -19, -37, 8, 4, 2, 0, 1, -12, -30, 3, -9, 5, 0, 1, 0, -56, -9, -47, 8, 21, 1, 4, 1, -11, -30, 10, 59, -2, 8, 41, 8, 2, 5, 6, -7, -1, 3, 5, -2 }, + { -4, -10, -24, -11, 3, -2, 0, -1, -6, -37, -45, -17, 8, -2, 2, -1, 17, 14, -58, 14, 15, 0, 2, 0, -10, 34, -7, 28, 4, -1, 1, 0, 23, 34, -31, 4, 10, -22, -30, 22, 4, -15, 9, 20, 2, -5, 9, 4 }, + { -2, 1, 13, -17, 3, -5, 1, -2, 3, 0, -55, 22, 6, 1, 1, 0, 8, 74, 21, 40, -14, 0, -2, 0, -36, -8, 11, -13, -23, 1, -3, 0, -36, 6, 16, -14, 2, 19, -4, -12, -1, 0, -7, -3, 0, 2, -2, -1 }, + { 3, 1, 5, -15, 1, -2, 1, -1, 7, 4, -7, 29, -1, 2, -1, 1, 8, 3, 12, -14, -9, -1, -1, 0, 4, 29, -15, 31, 10, 4, 1, 1, 61, 22, 55, 14, 13, 3, -9, -65, 1, -11, -21, -7, 0, 0, -1, 3 }, + { -4, -8, -1, -50, 6, -4, 2, -2, -1, 5, -22, 20, 6, 1, 0, 0, -16, -15, 18, -29, -11, 2, -2, 1, 40, -45, -19, -22, 31, 2, 4, 1, -25, 41, 0, 12, 9, 7, -42, 12, -3, -14, 2, 28, 5, 1, 6, 2 }, + { 5, -1, 26, 102, -13, 12, -4, 4, -4, -2, -40, -7, -23, 3, -5, 1, -1, 5, 8, -23, 7, 2, 1, 1, 10, -11, -13, -3, 12, -3, 2, 0, -9, 23, 4, 9, 14, 9, -14, -4, 0, -12, -7, 6, 3, 0, 6, 3 }, + { -5, -6, -27, -22, -12, 0, -3, 0, -5, 8, -20, -83, 0, 0, 0, 0, 9, 7, 24, -20, 41, 3, 6, 1, 15, 20, 12, 11, 17, -9, 1, -2, -26, -1, 18, -1, -12, 32, 3, -18, -5, 10, -25, -5, -2, 1, -8, 10 }, + }, + { + { 80, -49, 6, -4, 1, -1, 1, -1, -72, 36, 4, 0, 1, 0, 0, 0, 26, 0, -12, 2, -2, 1, -1, 0, -7, -9, 6, 1, 0, 0, 0, 0, 3, 5, -1, -2, -2, -2, -1, 1, 1, 1, 0, 0, -1, -1, 0, 0 }, + { -72, -6, 17, 0, 3, 0, 1, 0, -23, 58, -21, 2, -3, 1, -1, 0, 55, -46, -1, 6, -2, 1, -1, 0, -22, 7, 17, -7, 2, -1, 1, 0, 9, 5, -12, 1, -3, -4, 4, 2, 4, 1, -2, -1, -1, -1, 1, 0 }, + { -50, 19, -15, 4, -1, 1, -1, 1, -58, -2, 30, -3, 4, -1, 2, 0, 6, 57, -34, 0, -2, 0, -1, 0, 34, -48, -2, 14, -4, 3, -1, 1, -10, 7, 21, -10, 6, 1, -11, 0, -1, -1, 4, 2, 3, 0, -2, -1 }, + { -33, -43, 28, -7, 4, -2, 2, -1, -38, 11, -8, 4, 1, 1, 0, 0, -55, 24, 26, -5, 2, -1, 1, 0, 15, 46, -40, -1, -1, 0, -1, 0, 17, -38, 1, 17, -3, 11, 15, -11, 3, -1, -10, 1, 0, 1, 3, 2 }, + { 10, 66, -21, -3, -3, 0, -1, 0, -53, -41, -2, 16, -1, 4, -1, 1, 36, -5, 41, -20, 3, -3, 1, -1, -30, 26, -32, -3, 7, -2, 2, -1, 15, -8, 1, 17, -1, -2, 4, -8, 2, 0, -1, 3, 0, 0, 0, -1 }, + { 18, 14, 13, -9, 2, -2, 1, -1, 34, 32, -31, 12, -5, 2, -2, 1, 40, 4, -4, -9, -3, -2, -1, -1, 27, -31, -43, 19, -2, 3, -1, 1, 7, -49, 52, 10, -11, 22, 7, -26, -1, -6, -9, 6, -2, 2, 4, -2 }, + { 21, 66, -1, 9, -4, 2, -1, 1, -21, 41, -30, -10, 0, -2, 0, -1, -35, -17, -3, 26, -6, 5, -2, 2, 56, 3, 18, -25, -1, -2, -1, -1, -15, -13, -27, 9, 9, -6, 20, 5, -3, 2, -6, -9, 3, -3, 1, 5 }, + { 1, -6, -24, 17, -5, 3, -2, 1, 24, 10, 39, -21, 5, -4, 2, -1, 33, 32, -30, 4, -3, -1, -1, 0, -4, 13, -16, -10, 0, -1, 0, 0, 24, -26, -37, 33, 5, -32, 55, -5, -7, 22, -14, -22, 1, -9, -3, 13 }, + { 9, 33, -24, 1, 4, 0, 1, 0, 6, 50, 26, 1, -10, 0, -2, 0, -27, 1, -28, -21, 16, -5, 3, -2, -23, 36, -2, 40, -17, 4, -3, 1, 43, -13, 4, -41, -19, -2, -24, 17, 11, -4, 8, 4, -3, -3, -3, -3 }, + { -7, -9, -32, 14, -3, 3, -1, 1, -23, -28, 0, -5, -1, 0, 0, 0, -36, -59, -24, 14, 4, 2, 1, 1, -23, -26, 23, 26, -3, 5, 0, 2, 10, -26, 38, 7, -12, 11, 42, -22, -5, 20, -14, -15, -1, -2, 1, 6 }, + { 6, 30, 69, -18, 5, -4, 3, -1, -3, -11, -34, -16, 9, -4, 2, -1, -16, 35, -35, 30, -9, 3, -2, 1, -57, -13, 6, 4, -5, 5, -1, 1, 28, 10, 4, 7, 0, -15, 7, -10, -1, 7, -2, 2, 1, -3, 0, 0 }, + { 1, -8, 24, -3, 7, -2, 2, -1, -6, -51, -6, -4, -5, 0, -1, 0, 38, -1, 0, 25, 6, 2, 1, 1, 47, 20, 35, 1, -27, 1, -5, 0, 37, -37, -9, -47, -28, 5, 0, 18, 8, 6, 0, -8, -4, -3, -3, 1 }, + { 4, 10, 4, 17, -9, 4, -2, 1, 5, 14, 32, -15, 9, -3, 2, -1, 7, 13, 19, 15, -8, 1, -1, 0, 3, 25, 30, -18, 1, -2, 0, -1, 11, 24, 22, -11, -3, 37, -13, -58, -5, 12, -63, 26, 9, -15, 11, 8 }, + { -3, -9, -23, 10, -10, 3, -3, 1, -5, -14, -16, -27, 13, -5, 2, -1, -1, -13, -30, 11, -5, 2, -1, 0, -5, -8, -22, -16, 10, 0, 1, 0, 0, -29, -27, 6, -27, -10, -30, 9, -3, -10, -7, 77, 9, -13, 45, -8 }, + { 2, 11, 22, 2, 9, -2, 2, 0, -6, -7, 20, -32, -3, -4, 0, -1, 13, -5, -28, 6, 18, -4, 3, -1, -26, 27, -14, 6, -20, 0, -2, 0, -76, -26, -4, -7, 12, 51, 5, 24, 7, -17, -16, -12, -5, 4, 2, 13 }, + { 2, -3, 8, 14, -5, 3, -1, 1, -2, -11, 5, -18, 8, -3, 2, -1, 12, -23, -19, 22, 2, 0, 1, 0, 23, 41, -7, 35, -10, 4, -1, 1, 5, 7, 23, 5, 69, -38, -8, -32, -15, -31, 24, 11, 2, 18, 11, -15 }, + } + }, + { //2 + { + { -121, 33, 4, 4, 1, 2, 0, 1, -1, -1, 1, 0, 0, 0, 0, 0, 24, -5, -1, -1, 0, 0, 0, 0, 5, -1, 0, 0, 0, 0, 0, 0, 3, -1, 0, 0, 2, -1, 0, 0, 2, -1, 0, 0, 1, 0, 0, 0 }, + { 0, -2, 0, 0, 0, 0, 0, 0, 121, -23, -7, -3, -2, -1, -1, 0, 17, 1, -2, 0, 0, 0, 0, 0, -27, 4, 2, 0, 0, 0, 0, 0, -12, 2, 1, 0, -5, 1, 0, 0, -1, 0, 0, 0, -2, 0, 0, 0 }, + { -20, 19, -5, 2, -1, 1, 0, 0, 16, 3, -2, 0, 0, 0, 0, 0, -120, 14, 8, 1, 3, 1, 1, 0, -18, -2, 3, 0, 1, 0, 0, 0, 17, -3, -1, 0, 6, -1, -1, 0, 2, 0, 0, 0, 2, 0, 0, 0 }, + { 32, 108, -43, 10, -9, 3, -3, 1, 4, 19, -7, 1, -1, 0, 0, 0, 11, -30, 9, -2, 1, -1, 0, 0, 0, -8, 2, 0, 0, 0, 0, 0, -7, -1, 2, 0, -3, -1, 1, 0, -2, -2, 1, 0, 0, 0, 0, 0 }, + { -3, 0, -1, 0, 0, 0, 0, 0, -29, 11, -2, 1, 0, 0, 0, 0, 12, 7, -1, 0, 0, 0, 0, 0, -117, 12, 9, 1, 3, 0, 1, 0, -32, -3, 3, 0, 12, -2, -1, 0, 7, 0, 0, 0, 1, 0, 0, 0 }, + { -4, -12, -3, 1, -1, 0, 0, 0, 19, 105, -31, 7, -6, 1, -2, 0, 9, 46, -6, 0, 0, 0, 0, 0, 8, -29, 9, -3, 1, 0, 0, 0, -3, -19, 3, 0, -4, -6, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0 }, + { 7, 1, 2, 0, 0, 0, 0, 0, 4, 3, -2, 0, 0, 0, 0, 0, 22, -8, 1, -1, 0, 0, 0, 0, -28, -9, 4, 0, 1, 0, 0, 0, 117, -10, -8, 0, 32, 1, -4, 0, 3, 1, -1, 0, -3, 1, 0, 0 }, + { -8, -31, 14, -4, 3, -1, 1, 0, 9, 43, 0, 1, -1, 0, 0, 0, -13, -105, 17, -2, 2, 0, 0, 0, -8, -25, -3, 0, 0, 0, 0, 0, -7, 32, -5, 1, -1, 4, 0, 0, 2, -1, 0, 0, 1, 0, -1, 0 }, + { -15, -43, -100, 23, -12, 6, -4, 2, -6, -17, -48, 10, -5, 2, -1, 1, 1, -5, 19, -6, 3, -1, 1, 0, 2, 7, 15, -3, 1, -1, 0, 0, 4, 10, 5, -1, 0, 3, 1, 0, -2, 1, 2, 0, -1, 1, 1, 0 }, + { -3, 1, 2, 0, 0, 0, 0, 0, -6, 3, 1, 0, 0, 0, 0, 0, 0, 3, -2, 0, 0, 0, 0, 0, -20, 8, -2, 0, 0, 0, 0, 0, 30, 13, -3, 0, -116, 6, 10, 0, -35, -5, 4, 0, -3, -1, 0, 0 }, + { -1, -6, -3, 2, -1, 0, 0, 0, -6, -35, 9, 0, 2, 0, 0, 0, 1, -6, 11, -2, 2, 0, 1, 0, -9, -100, 17, -1, 1, 0, 0, 0, -10, -63, 1, 2, -17, 3, -4, 0, -1, 9, -1, 0, 3, 4, -1, 0 }, + { -5, -14, -48, 2, -5, 1, -2, 0, 10, 24, 99, -17, 10, -4, 3, -1, 4, 14, 32, 0, 2, 0, 1, 0, -4, 0, -39, 6, -4, 1, -1, 0, 2, -3, -4, 0, 2, -2, -2, 0, 0, 0, -1, 0, 0, -1, -1, 0 }, + { -2, 0, 2, 0, 0, 0, 0, 0, -2, 0, 1, 0, 0, 0, 0, 0, -1, -1, 1, -1, 0, 0, 0, 0, -1, -4, 2, 0, 0, 0, 0, 0, -8, -2, -1, 1, 30, 4, -4, 1, -102, 4, 8, -1, -69, -2, 6, -1 }, + { -2, -10, -4, 0, 0, 0, 0, 0, 3, 11, -1, -1, 0, 0, 0, 0, -6, -40, -15, 6, -2, 1, 0, 0, 5, 57, -6, 2, 0, 0, 0, 0, 1, -95, 18, -6, -10, -34, -2, 0, -4, 17, -2, 0, 0, 2, 1, 0 }, + { -2, -3, -25, -2, -3, 0, -1, 0, -1, -3, -1, 4, -2, 2, 0, 1, -7, -8, -97, 17, -9, 3, -3, 1, -8, -26, -61, -1, -3, -1, -1, -1, 2, 10, 24, -7, 5, 9, 19, -1, 0, 1, 4, 0, -2, 0, 1, 0 }, + { 4, -4, 28, 103, -42, 24, -9, 7, 1, 2, 4, 0, 3, -1, 0, 0, -1, 0, -9, -42, 17, -9, 3, -2, -1, 1, -14, 6, -4, 2, -1, 0, -1, -2, -4, 4, 0, 3, 1, -1, 0, 2, 0, -2, 2, 0, 0, 0 }, + }, + { + { 87, -41, 3, -4, 1, -1, 0, -1, -73, 28, 2, 1, 1, 1, 0, 0, 30, -5, -6, 1, -1, 0, 0, 0, -8, -3, 3, 0, 0, 0, 0, 0, 3, 2, -1, 0, -2, -1, 0, 0, 1, 1, 0, 0, -1, 0, 0, 0 }, + { -75, 4, 7, 0, 2, 0, 1, 0, -41, 36, -7, 3, -1, 1, 0, 0, 72, -29, -2, 0, -1, 0, -1, 0, -37, 6, 7, -2, 1, 0, 0, 0, 12, 3, -4, 0, -3, -2, 1, 0, 4, 0, 0, 0, -1, 0, 0, 0 }, + { 26, -44, 22, -6, 4, -2, 1, -1, 77, 24, -22, 2, -4, 0, -1, 0, 7, -38, 10, 0, 1, 0, 0, 0, -51, 27, 4, -3, 2, -1, 1, 0, 31, -5, -8, 3, -14, 0, 5, -1, 6, 1, -3, 0, -4, -1, 1, 0 }, + { -39, -68, 37, -7, 6, -2, 2, 0, -9, 56, -21, 1, -2, 0, -1, 0, -45, 4, -3, 6, -1, 2, 0, 1, 49, -13, 3, -3, -1, 0, 0, 0, -19, 2, 0, 0, 5, 1, 1, 0, -2, 0, -1, 0, 1, 0, 0, 0 }, + { 10, -20, 2, 0, 1, 0, 0, 0, 50, -1, 8, -5, 1, -1, 0, 0, 66, 17, -24, 4, -3, 1, -1, 0, 13, -49, 15, 1, 0, 0, 0, 0, -53, 34, 6, -5, 30, -7, -11, 3, -11, -2, 5, 1, 4, 2, -1, -1 }, + { -21, -45, 8, -2, 3, -1, 1, 0, -7, -30, 26, -8, 3, -1, 1, -1, -9, 69, -33, 5, -2, 0, -1, 0, -44, -31, 10, 7, -2, 2, 0, 1, 49, 7, 2, -6, -23, -3, -2, 2, 9, 4, 0, 0, -2, -1, -1, 0 }, + { -4, -2, -55, 28, -8, 5, -3, 2, -2, 37, 43, -19, 1, -2, 1, -1, -47, -34, -27, 5, 4, -1, 1, 0, -39, -2, 27, 4, -2, 1, 0, 0, -11, 32, -8, -7, 27, -12, -6, 6, -13, 0, 4, -3, 3, -1, -2, 1 }, + { 2, 19, 47, -23, 6, -4, 2, -1, -23, -22, -44, 17, -2, 2, -1, 0, -33, 3, 22, -2, -4, 1, -1, 0, -58, -17, 6, -6, 7, -1, 1, 0, -23, 40, -2, 5, 43, -11, -8, -1, -18, -4, 5, 2, 4, 3, 0, -1 }, + { -19, -62, -9, 3, 0, 0, 0, 0, -12, -56, 27, -7, 3, -1, 1, 0, 7, -8, 16, -6, 4, -2, 1, -1, -15, 54, -23, 2, -1, 0, 0, 0, -42, -25, 4, 6, 34, 8, 2, -2, -15, -1, 0, -1, 3, 2, 0, 1 }, + { 1, 9, -5, 0, -1, 0, 0, 0, 0, 22, -1, 2, 0, 1, 0, 0, -13, 17, 0, -2, 0, -1, 0, 0, -46, -10, -10, 4, -1, 1, 0, 0, -80, -27, 20, -4, -66, 23, -2, -2, 20, -3, -2, 3, -14, 2, 3, -1 }, + { 5, 17, -9, 0, -2, 1, 0, 0, 13, 54, -2, 7, -1, 1, 0, 0, 4, 51, -3, -6, -1, -1, 0, 0, -20, 6, -34, 9, -2, 2, -1, 0, 16, -52, 28, 1, 59, 15, -8, -5, -28, -7, 2, 2, 10, 3, 0, -1 }, + { 7, 27, 56, -2, 10, -3, 3, -1, -2, -6, 8, -28, 3, -4, 1, -1, -1, -4, -68, 35, -5, 5, -2, 1, 0, 35, 43, -4, -6, 1, -1, 0, -14, -38, -12, -10, 9, 5, 7, 6, -9, 7, -4, -3, 4, -4, 0, 3 }, + { 0, 0, 19, -4, 3, -2, 2, -1, -3, -13, 10, -4, 1, 0, 0, 0, -6, -37, -18, -5, 2, -2, 1, -1, 6, -6, -7, 25, -6, 4, -1, 1, 16, 10, 55, -24, 15, 46, -52, 1, 35, -43, 10, 12, -23, 13, 5, -8 }, + { -3, 0, -27, -80, 40, -16, 6, -4, 4, 3, 31, 61, -22, 7, -1, 1, -4, -7, -26, -6, -10, 6, -4, 1, 3, 8, 14, -18, 15, -5, 2, -1, -2, -4, -1, 13, 0, 2, -4, -3, 3, -1, 2, 1, -2, 0, -2, -1 }, + { 1, 2, -8, 6, -1, 1, 0, 0, 2, 8, -5, -1, 0, 0, 0, 0, 1, 24, 3, 5, -1, 1, 0, 0, -3, 12, 6, -10, 1, -1, 0, 0, -9, -1, -25, 10, 45, -11, 18, 2, 86, 1, -13, -4, -65, -6, 7, 2 }, + { -4, -18, -57, 8, -8, 1, -3, 0, -5, -20, -69, 7, -6, 2, -2, 1, 1, 4, 0, 33, -7, 5, -2, 1, 0, -9, 53, -22, 3, -1, 0, 0, 4, -27, -2, -9, 5, 36, -13, 5, -7, -17, 1, 2, 4, 6, 4, -1 }, + } + }, + { //3 + { + { -115, 37, 9, 2, 2, 1, 1, 0, 10, -29, 8, 0, 1, 0, 1, 0, 23, -8, -8, 1, -1, 0, 0, 0, 3, 3, -2, -1, 0, 0, 0, 0, 4, 0, 0, -1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 }, + { 15, 51, -18, 0, -3, 0, -1, 0, -95, 7, 34, -3, 5, -1, 2, 0, 23, -47, 1, 6, 0, 1, 0, 1, 8, 5, -12, 0, -1, 0, 0, 0, 3, -3, 1, -1, 2, 1, -2, 0, 1, -1, 0, 0, 1, 1, -1, 0 }, + { 29, -22, 16, -6, 3, -2, 1, -1, -4, -80, 12, 15, 0, 3, 0, 1, 45, 7, -59, 7, -2, 1, -1, 0, -15, 41, -3, -16, 2, -3, 0, -1, 1, 0, 7, -2, -3, 6, 1, -2, 0, 0, 1, 0, -1, 2, 0, -1 }, + { -36, -98, 25, 5, 4, 1, 2, 1, -59, 11, -17, 1, 1, 1, 0, 0, 6, -13, 7, -3, 0, 0, 0, 0, 14, -4, -14, 3, -1, 0, 0, 0, 2, 8, -3, -5, 2, 0, 0, 0, 0, 3, 0, -1, 1, 0, 0, 0 }, + { -6, 18, 3, -3, -1, 0, 0, 0, -50, -5, -38, 12, 0, 2, 0, 1, 3, 67, -7, -40, 3, -6, 1, -3, -12, -13, 65, -3, -10, 0, -1, 0, 9, -20, -5, 22, -2, 0, 0, -1, 2, -3, -2, 3, -1, 0, 1, 0 }, + { 4, 15, 52, -13, 5, -3, 2, -1, -17, -45, 16, 24, -2, 4, -1, 2, -87, -8, -14, 7, 8, 1, 2, 0, 23, -35, -6, -3, 1, 1, 0, 0, 2, 5, -17, 0, 3, -1, -1, -5, 0, 1, -4, 0, 1, 0, 0, -2 }, + { -20, -7, -43, 4, 0, 1, -1, 1, -7, 35, 0, 12, -4, 1, -1, 0, -51, -2, -57, 5, 15, 0, 4, 0, 7, 39, 5, -55, 1, -7, 1, -3, 1, -10, 41, 2, 4, -3, -2, 3, -1, -2, 7, 1, 1, -1, -1, 0 }, + { 4, 29, 1, 26, -5, 4, -2, 1, -17, -7, -73, 6, 6, 2, 1, 1, -5, 21, -3, 5, -1, -3, 0, -1, -11, 2, -52, -3, 27, -2, 5, 0, 0, 27, 8, -58, 2, -5, 25, 3, 0, 3, 0, -5, 0, -2, 7, 0 }, + { 12, 13, 10, 2, -1, 3, -1, 1, 17, -2, -46, 12, 7, 0, 2, 0, 16, -45, -9, -53, 6, 1, 1, 0, 70, 16, 8, -4, -37, 1, -7, 0, -12, 29, 3, 21, 4, 0, 5, -1, -3, 4, 1, 4, 2, 0, 1, 0 }, + { 5, 20, 90, -17, 4, -3, 2, -1, 6, 66, 8, 28, -7, 3, -1, 1, 29, 5, -19, 12, 9, -1, 1, 0, -10, 14, -1, -13, 7, 0, 1, 0, 0, -6, 13, -4, 0, -4, 1, 5, 0, -1, -1, 1, 0, -1, 0, 0 }, + { -3, -4, -34, -12, 2, -1, -1, 0, 5, 25, 11, 43, -10, 4, -2, 1, 23, 20, -40, 12, 21, -3, 4, -1, 25, -28, -10, 5, 8, 6, 0, 2, -4, 21, -64, -8, -5, 19, 10, -48, 3, -1, 10, -3, 0, 4, 3, -6 }, + { -1, -3, 2, 19, -2, 4, -1, 2, 9, 3, -35, 22, 11, 1, 2, 0, -7, -65, -19, -22, 11, 4, 2, 1, -75, -18, 3, -1, -10, 2, 0, 1, 2, -35, -27, 4, 1, 8, -17, -19, 3, 0, 3, -6, 0, 2, -1, -2 }, + { 10, -4, -6, 12, 5, 1, 1, 0, 11, -9, -12, -2, -7, 0, -1, 0, 33, -10, -4, 18, 18, -4, 4, -1, 28, -72, 1, -49, 15, 2, 2, 1, 56, -23, 22, -1, 4, -1, -15, 26, 6, 4, -10, 0, 0, 2, -3, 2 }, + { 4, 6, 14, 53, -4, 4, 0, 2, 0, -1, -20, -13, 3, 2, -1, 1, -3, 1, -5, 35, -16, -6, -1, -2, 46, 29, 13, 21, 37, -5, 4, -1, -10, -53, -18, 8, 9, 12, -41, -25, -2, 2, 13, -16, 4, 1, -5, 1 }, + { 2, 9, 13, 37, 19, 6, 2, 2, -9, -3, -9, -28, -20, -4, -3, -1, 1, 18, 9, 28, 24, 6, 2, 2, -20, -5, -25, -33, -36, 9, -2, 2, -13, 42, 1, 57, -22, -2, -25, -28, 5, 6, 19, -12, -5, -3, -2, 4 }, + { 3, -3, 12, 84, -12, 8, -2, 3, 6, 13, 50, -1, 45, 1, 7, 0, -2, 18, -22, -37, -13, 14, 0, 3, 1, -12, -3, 2, -15, -8, 1, -1, 19, 14, -4, -12, -4, 5, 17, 8, 2, -4, -4, 4, -2, 2, 1, 0 }, + }, + { + { 109, -26, -8, -3, -2, -1, -1, 0, -50, 28, 2, 1, 0, 0, 0, 0, -18, -8, 6, 0, 1, 0, 1, 0, 6, -2, -3, 0, 0, 0, 0, 0, -3, 2, 1, -1, 0, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0 }, + { -39, 31, -5, 2, -1, 1, 0, 0, -95, 6, 18, 0, 4, 0, 1, 0, 32, -49, 5, 1, 1, 0, 0, 0, 27, -1, -14, 2, -2, 1, -1, 0, 3, 5, -3, -2, 4, 1, -1, -1, 2, 0, 0, 0, 2, 0, 0, 0 }, + { 29, -3, -2, -2, 0, 0, 0, 0, 0, -41, 9, 0, 2, 0, 1, 0, 86, 4, -33, 2, -6, 1, -2, 0, -32, 58, 1, -7, 0, -2, 0, -1, -14, -8, 20, 0, -2, -3, 0, 4, -1, -1, 0, 0, -1, 1, 0, 0 }, + { 18, 96, -23, 2, -5, 1, -2, 0, -10, 6, 10, -2, 1, -1, 1, 0, -14, 26, 2, -4, 1, -1, 0, 0, -43, -9, 35, -2, 4, -1, 1, 0, 14, -40, 1, 10, 2, 1, -10, 1, 2, -4, -1, -1, 0, 0, -1, 0 }, + { -29, -60, 16, -2, 3, -1, 1, 0, -52, 9, -17, 5, -2, 1, -1, 1, 13, 56, -2, -9, 0, -2, 0, -1, -34, -18, 41, 0, 3, 0, 1, 0, 19, -36, -10, 13, 3, 6, -14, -1, 3, 1, -1, -3, 1, 1, -1, -1 }, + { -23, -5, -15, 5, -2, 1, -1, 1, 2, 79, -13, -4, -2, -1, -1, 0, -9, 1, 5, -1, 1, 0, 0, 0, -4, 49, 2, -14, 1, -3, 0, -1, -31, -14, 56, -1, 13, -37, -4, 20, -2, 2, -10, 0, 2, -4, 0, -1 }, + { -7, -3, 12, -3, 3, -1, 1, 0, -31, -62, 8, 7, 0, 2, 0, 1, -75, 9, -45, 5, -1, 1, -1, 0, 14, 35, 0, -23, 2, -5, 1, -2, 1, -8, 32, -1, 7, -12, -4, 10, 0, 2, -6, -1, 2, 0, 0, -2 }, + { 1, -26, 5, 0, 1, 0, 1, 0, 24, -3, 43, -6, 4, -2, 1, -1, -7, -64, 9, 14, 0, 3, 0, 1, -12, -4, 5, 3, -1, 1, 0, 0, 8, -59, -3, 26, 14, 6, -58, 6, -5, 17, -7, -18, 3, 3, -1, -5 }, + { 11, 14, 6, -3, 1, -1, 1, 0, 10, -7, -9, 3, -2, 1, -1, 0, 22, 21, 1, -21, 2, -4, 1, -2, 92, 1, 53, 0, -9, 1, -2, 0, -21, -11, 1, 40, -5, -4, -24, 5, -4, 5, -6, -5, 0, 0, 0, -3 }, + { -10, -11, -47, 3, -4, 1, -1, 0, 5, 28, 11, -2, -1, 0, 0, 0, -12, -2, -38, 2, 0, 1, 0, 0, 16, 38, 11, -16, -1, -3, 0, -2, 12, -9, -22, 7, -8, 60, 4, -36, -6, -15, 54, 7, 3, -7, -8, 14 }, + { -8, -24, -99, 11, -10, 3, -4, 1, -5, -36, 19, -26, 4, -5, 1, -2, 0, 25, 41, 5, -3, 1, 0, 0, 10, -5, -7, 12, 2, 1, 0, 0, -1, 1, 9, -3, -3, -14, -3, 12, 2, 4, -13, -2, -1, 3, 2, -4 }, + { -5, 1, -1, 0, 1, 0, 0, 0, -10, -14, -6, 8, 0, 1, 0, 0, -17, -2, 7, -5, 3, -1, 0, 0, -16, 13, 3, 31, -1, 6, 0, 2, -93, -15, -46, -3, 23, -19, 0, -47, 8, 4, 8, 3, 2, 3, 0, 0 }, + { 1, 12, -20, 21, -4, 5, -2, 2, -5, -2, -75, 9, -1, 2, -1, 1, -1, -2, -16, -4, 0, -1, 0, 0, -7, 7, -31, 0, 3, 0, 0, 0, 4, 11, -12, 4, -12, 14, -50, -1, -8, 32, -4, -54, 2, 0, 30, -15 }, + { 2, -9, -18, 8, -3, 3, -1, 1, 3, -25, -62, -6, 0, -2, 0, -1, -6, -61, 14, -51, 2, -6, 0, -2, -19, 0, 40, -7, -17, 0, -3, 0, 13, -4, 11, 9, 17, 0, 24, 5, 1, -12, 4, 28, 0, 0, -15, 8 }, + { 4, 9, 39, 18, 0, 2, 0, 1, -6, -16, -22, -37, 5, -5, 1, -2, -5, 15, 63, 9, -16, 0, -3, 0, 18, 42, -18, 27, 15, 1, 3, 1, 12, -34, 9, -24, 4, 28, -2, 4, -11, -4, 30, 2, 5, -13, -4, 18 }, + { -7, -2, 15, -6, 1, -1, 1, -1, -11, -3, 22, -14, 0, -2, 1, -1, -18, -7, 30, -9, -4, 0, -1, 0, -35, 23, 23, 10, -17, 1, -3, 0, -19, 53, 6, 48, -65, 12, -12, 11, -8, -16, 10, -21, -2, -12, 6, 2 }, + } + } +}; + +const int8_t g_lfnst4x4[ 4 ][ 2 ][ 16 ][ 16 ] = { + { //0 + { + { 108, -44, -15, 1, -44, 19, 7, -1, -11, 6, 2, -1, 0, -1, -1, 0 }, + { -40, -97, 56, 12, -11, 29, -12, -3, 18, 18, -15, -3, -1, -3, 2, 1 }, + { 25, -31, -1, 7, 100, -16, -29, 1, -54, 21, 14, -4, -7, 2, 4, 0 }, + { -32, -39, -92, 51, -6, -16, 36, -8, 3, 22, 18, -15, 4, 1, -5, 2 }, + { 8, -9, 33, -8, -16, -102, 36, 23, -4, 38, -27, -5, 5, 16, -8, -6 }, + { -25, 5, 16, -3, -38, 14, 11, -3, -97, 7, 26, 1, 55, -10, -19, 3 }, + { 8, 9, 16, 1, 37, 36, 94, -38, -7, 3, -47, 11, -6, -13, -17, 10 }, + { 2, 34, -5, 1, -7, 24, -25, -3, 8, 99, -28, -29, 6, -43, 21, 11 }, + { -16, -27, -39, -109, 6, 10, 16, 24, 3, 19, 10, 24, -4, -7, -2, -3 }, + { -9, -10, -34, 4, -9, -5, -29, 5, -33, -26, -96, 33, 14, 4, 39, -14 }, + { -13, 1, 4, -9, -30, -17, -3, -64, -35, 11, 17, 19, -86, 6, 36, 14 }, + { 8, -7, -5, -15, 7, -30, -28, -87, 31, 4, 4, 33, 61, -5, -17, 22 }, + { -2, 13, -6, -4, -2, 28, -13, -14, -3, 37, -15, -3, -2, 107, -36, -24 }, + { 4, 9, 11, 31, 4, 9, 16, 19, 12, 33, 32, 94, 12, 0, 34, -45 }, + { 2, -2, 8, -16, 8, 5, 28, -17, 6, -7, 18, -45, 40, 36, 97, -8 }, + { 0, -2, 0, -10, -1, -7, -3, -35, -1, -7, -2, -32, -6, -33, -16, -112 }, + }, + { + { 119, -30, -22, -3, -23, -2, 3, 2, -16, 3, 6, 0, -3, 2, 1, 0 }, + { -27, -101, 31, 17, -47, 2, 22, 3, 19, 30, -7, -9, 5, 3, -5, -1 }, + { 0, 58, 22, -15, -102, 2, 38, 2, 10, -13, -5, 4, 14, -1, -9, 0 }, + { 23, 4, 66, -11, 22, 89, -2, -26, 13, -8, -38, -1, -9, -20, -2, 8 }, + { -19, -5, -89, 2, -26, 76, -11, -17, 20, 13, 18, -4, 1, -15, 3, 5 }, + { -10, -1, -1, 6, 23, 25, 87, -7, -74, 4, 39, -5, 0, -1, -20, -1 }, + { -17, -28, 12, -8, -32, 14, -53, -6, -68, -67, 17, 29, 2, 6, 25, 4 }, + { 1, -24, -23, 1, 17, -7, 52, 9, 50, -92, -15, 27, -15, -10, -6, 3 }, + { -6, -17, -2, -111, 7, -17, 8, -42, 9, 18, 16, 25, -4, 2, -1, 11 }, + { 9, 5, 35, 0, 6, 21, -9, 34, 44, -3, 102, 11, -7, 13, 11, -20 }, + { 4, -5, -5, -10, 15, 19, -2, 6, 6, -12, -13, 6, 95, 69, -29, -24 }, + { -6, -4, -9, -39, 1, 22, 0, 102, -19, 19, -32, 30, -16, -14, -8, -23 }, + { 4, -4, 7, 8, 4, -13, -18, 5, 0, 0, 21, 22, 58, -88, -54, 28 }, + { -4, -7, 0, -24, -7, 0, -25, 3, -3, -30, 8, -76, -34, 4, -80, -26 }, + { 0, 6, 0, 30, -6, 1, -13, -23, 1, 20, -2, 80, -44, 37, -68, 1 }, + { 0, 0, -1, 5, -1, -7, 1, -34, -2, 3, -6, 19, 5, -38, 11, -115 }, + } + }, + { //1 + { + { -111, 39, 4, 3, 44, 11, -12, -1, 7, -16, -5, 2, 3, -1, 4, 2 }, + { -47, -27, 15, -1, -92, 43, 20, -2, 20, 39, -16, -5, 10, -5, -13, 2 }, + { -35, -23, 4, 4, -17, -72, 32, 6, -59, 18, 50, -6, 0, 40, 0, -13 }, + { 13, 93, -27, -4, -48, 13, -34, 4, -52, 11, 1, 10, 3, 16, -3, 1 }, + { -11, -27, 1, 2, -47, -4, -36, 10, -2, -85, 14, 29, -20, -2, 57, 4 }, + { 0, -35, 32, -2, 26, 60, -3, -17, -82, 1, -30, 0, -37, 21, 3, 12 }, + { -17, -46, -92, 14, 7, -10, -39, 29, -17, 27, -28, 17, 1, -15, -13, 17 }, + { 4, -10, -23, 4, 16, 58, -17, 26, 30, 21, 67, 2, -13, 59, 13, -40 }, + { 5, -20, 32, -5, 8, -3, -46, -7, -4, 2, -15, 24, 100, 44, 0, 5 }, + { -4, -1, 38, -18, -7, -42, -63, -6, 33, 34, -23, 15, -65, 33, -20, 2 }, + { -2, -10, 35, -19, 5, 8, -44, 14, -25, 25, 58, 17, 7, -84, -16, -18 }, + { 5, 13, 18, 34, 11, -4, 18, 18, 5, 58, -3, 42, -2, -10, 85, 38 }, + { -5, -7, -34, -83, 2, -1, -4, -73, 4, 20, 15, -12, 4, -3, 44, 12 }, + { 0, 4, -2, -60, 5, 9, 42, 34, 5, -14, 9, 80, -5, 13, -38, 37 }, + { -1, 2, 7, -57, 3, -7, 9, 68, -9, 6, -49, -20, 6, -4, 36, -64 }, + { -1, 0, -12, 23, 1, -4, 17, -53, -3, 4, -21, 72, -4, -8, -3, -83 }, + }, + { + { 88, -55, 6, -3, -66, 27, 9, -2, 11, 11, -13, 1, -2, -7, 1, 2 }, + { -58, -20, 27, -2, -27, 75, -29, 0, 47, -42, -11, 11, -9, -3, 19, -4 }, + { -51, 23, -22, 5, -63, 3, 37, -5, 1, 64, -35, -4, 29, -31, -11, 13 }, + { -27, -76, 49, -2, 40, 14, 9, -17, -56, 36, -25, 6, 14, 3, -6, 8 }, + { 19, -4, -36, 22, 52, 7, 36, -23, 28, -17, -64, 15, -5, -44, 48, 9 }, + { 29, 50, 13, -10, 1, 34, -59, 1, -51, 4, -16, 30, 52, -33, 24, -5 }, + { -12, -21, -74, 43, -13, 39, 18, -5, -58, -35, 27, -5, 19, 26, 6, -5 }, + { 19, 38, -10, -5, 28, 66, 0, -5, -4, 19, -30, -26, -40, 28, -60, 37 }, + { -6, 27, 18, -5, -37, -18, 12, -25, -44, -10, -38, 37, -66, 45, 40, -7 }, + { -13, -28, -45, -39, 0, -5, -39, 69, -23, 16, -12, -18, -50, -31, 24, 13 }, + { -1, 8, 24, -51, -15, -9, 44, 10, -28, -70, -12, -39, 24, -18, -4, 51 }, + { -8, -22, -17, 33, -18, -45, -57, -27, 0, -31, -30, 29, -2, -13, -53, 49 }, + { 1, 12, 32, 51, -8, 8, -2, -31, -22, 4, 46, -39, -49, -67, 14, 17 }, + { 4, 5, 24, 60, -5, -14, -23, 38, 9, 8, -34, -59, 24, 47, 42, 28 }, + { -1, -5, -20, -34, 4, 4, -15, -46, 18, 31, 42, 10, 10, 27, 49, 78 }, + { -3, -7, -22, -34, -5, -11, -36, -69, -1, -3, -25, -73, 5, 4, 4, -49 }, + } + }, + { //2 + { + { -112, 47, -2, 2, -34, 13, 2, 0, 15, -7, 1, 0, 8, -3, -1, 0 }, + { 29, -7, 1, -1, -108, 40, 2, 0, -45, 13, 4, -1, 8, -5, 1, 0 }, + { -36, -87, 69, -10, -17, -33, 26, -2, 7, 14, -11, 2, 6, 8, -7, 0 }, + { 28, -5, 2, -2, -29, 13, -2, 0, 103, -36, -4, 1, 48, -16, -4, 1 }, + { -12, -24, 15, -3, 26, 80, -61, 9, 15, 54, -36, 2, 0, -4, 6, -2 }, + { 18, 53, 69, -74, 14, 24, 28, -30, -6, -7, -11, 12, -5, -7, -6, 8 }, + { 5, -1, 2, 0, -26, 6, 0, 1, 45, -9, -1, 0, -113, 28, 8, -1 }, + { -13, -32, 18, -2, 15, 34, -27, 7, -25, -80, 47, -1, -16, -50, 28, 2 }, + { -4, -13, -10, 19, 18, 46, 60, -48, 16, 33, 60, -48, 1, 0, 5, -2 }, + { 15, 33, 63, 89, 8, 15, 25, 40, -4, -8, -15, -8, -2, -6, -9, -7 }, + { -8, -24, -27, 15, 12, 41, 26, -29, -17, -50, -39, 27, 0, 35, -67, 26 }, + { -2, -6, -24, 13, -1, -8, 37, -22, 3, 18, -51, 22, -23, -95, 17, 17 }, + { -3, -7, -16, -21, 10, 24, 46, 75, 8, 20, 38, 72, 1, 2, 1, 7 }, + { 2, 6, 10, -3, -5, -16, -31, 12, 7, 24, 41, -16, -16, -41, -89, 49 }, + { 4, 8, 21, 40, -4, -11, -28, -57, 5, 14, 31, 70, 7, 18, 32, 52 }, + { 0, 1, 4, 11, -2, -4, -13, -34, 3, 7, 20, 47, -6, -19, -42, -101 }, + }, + { + { -99, 39, -1, 2, 65, -20, -5, 0, -15, -2, 5, -1, 0, 3, -1, 0 }, + { 58, 42, -33, 3, 33, -63, 23, -1, -55, 32, 3, -5, 21, -2, -8, 3 }, + { -15, 71, -44, 5, -58, -29, 25, 3, 62, -7, -4, -4, -19, 4, 0, 1 }, + { 46, 5, 4, -6, 71, -12, -15, 5, 52, -38, 13, -2, -63, 23, 3, -3 }, + { -14, -54, -29, 29, 25, -9, 61, -29, 27, 44, -48, 5, -27, -21, 12, 7 }, + { -3, 3, 69, -42, -11, -50, -26, 26, 24, 63, -19, -5, -18, -22, 12, 0 }, + { 17, 16, -2, 1, 38, 18, -12, 0, 62, 1, -14, 5, 89, -42, 8, -2 }, + { 15, 54, -8, 6, 6, 60, -26, -8, -30, 17, -38, 22, -43, -45, 42, -7 }, + { -6, -17, -55, -28, 9, 30, -8, 58, 4, 34, 41, -52, -16, -36, -20, 16 }, + { -2, -1, -9, -79, 7, 11, 48, 44, -13, -34, -55, 6, 12, 23, 20, -11 }, + { 7, 29, 14, -6, 12, 53, 10, -11, 14, 59, -15, -3, 5, 71, -54, 13 }, + { -5, -24, -53, 15, -3, -15, -61, 26, 6, 30, -16, 23, 13, 56, 44, -35 }, + { 4, 8, 21, 52, -1, -1, -5, 29, -7, -17, -44, -84, 8, 20, 31, 39 }, + { -2, -11, -25, -4, -4, -21, -53, 2, -5, -26, -64, 19, -8, -19, -73, 39 }, + { -3, -5, -23, -57, -2, -4, -24, -75, 1, 3, 9, -25, 6, 15, 41, 61 }, + { 1, 1, 7, 18, 1, 2, 16, 47, 2, 5, 24, 67, 3, 9, 25, 88 }, + } + }, + { //3 + { + { -114, 37, 3, 2, -22, -23, 14, 0, 21, -17, -5, 2, 5, 2, -4, -1 }, + { -19, -41, 19, -2, 85, -60, -11, 7, 17, 31, -34, 2, -11, 19, 2, -8 }, + { 36, -25, 18, -2, -42, -53, 35, 5, 46, -60, -25, 19, 8, 21, -33, -1 }, + { -27, -80, 44, -3, -58, 1, -29, 19, -41, 18, -12, -7, 12, -17, 7, -6 }, + { -11, -21, 37, -10, 44, -4, 47, -12, -37, -41, 58, 18, 10, -46, -16, 31 }, + { 15, 47, 10, -6, -16, -44, 42, 10, -80, 25, -40, 21, -23, -2, 3, -14 }, + { 13, 25, 79, -39, -13, 10, 31, -4, 49, 45, 12, -8, 3, -1, 43, 7 }, + { 16, 11, -26, 13, -13, -74, -20, -1, 5, -6, 29, -47, 26, -49, 54, 2 }, + { -8, -34, -26, 7, -26, -19, 29, -37, 1, 22, 46, -9, -81, 37, 14, 20 }, + { -6, -30, -42, -12, -3, 5, 57, -52, -2, 37, -12, 6, 74, 10, 6, -15 }, + { 5, 9, -6, 42, -15, -18, -9, 26, 15, 58, 14, 43, 23, -10, -37, 75 }, + { -5, -23, -23, 36, 3, 22, 36, 40, 27, -4, -16, 56, -25, -46, 56, -24 }, + { 1, 3, 23, 73, 8, 5, 34, 46, -12, 2, 35, -38, 26, 52, 2, -31 }, + { -3, -2, -21, -52, 1, -10, -17, 44, -19, -20, 30, 45, 27, 61, 49, 21 }, + { -2, -7, -33, -56, -4, -6, 21, 63, 15, 31, 32, -22, -10, -26, -52, -38 }, + { -5, -12, -18, -12, 8, 22, 38, 36, -5, -15, -51, -63, -5, 0, 15, 73 }, + }, + { + { -102, 22, 7, 2, 66, -25, -6, -1, -15, 14, 1, -1, 2, -2, 1, 0 }, + { 12, 93, -27, -6, -27, -64, 36, 6, 13, 5, -23, 0, -2, 6, 5, -3 }, + { -59, -24, 17, 1, -62, -2, -3, 2, 83, -12, -17, -2, -24, 14, 7, -2 }, + { -33, 23, -36, 11, -21, 50, 35, -16, -23, -78, 16, 19, 22, 15, -30, -5 }, + { 0, -38, -81, 30, 27, 5, 51, -32, 24, 36, -16, 12, -24, -8, 9, 1 }, + { 28, 38, 8, -9, 62, 32, -13, 2, 51, -32, 15, 5, -66, 28, 0, -1 }, + { 11, -35, 21, -17, 30, -18, 31, 18, -11, -36, -80, 12, 16, 49, 13, -32 }, + { -13, 23, 22, -36, -12, 64, 39, 25, -19, 23, -36, 9, -30, -58, 33, -7 }, + { -9, -20, -55, -83, 3, -2, 1, 62, 8, 2, 27, -28, 7, 15, -11, 5 }, + { -6, 24, -38, 23, -8, 40, -49, 0, -7, 9, -25, -44, 23, 39, 70, -3 }, + { 12, 17, 17, 0, 32, 27, 21, 2, 67, 11, -6, -10, 89, -22, -12, 16 }, + { 2, -9, 8, 45, 7, -8, 27, 35, -9, -31, -17, -87, -23, -22, -19, 44 }, + { -1, -9, 28, -24, -1, -10, 49, -30, -8, -7, 40, 1, 4, 33, 65, 67 }, + { 5, -12, -24, -17, 13, -34, -32, -16, 14, -67, -7, 9, 7, -74, 49, 1 }, + { 2, -6, 11, 45, 3, -10, 33, 55, 8, -5, 59, 4, 7, -4, 44, -66 }, + { -1, 1, -14, 36, -1, 2, -20, 69, 0, 0, -15, 72, 3, 4, 5, 65 }, + } + } +}; + +#endif +//-------------------------------------------------------------------------------------------------- diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index 21636fa227b6297255090087cbadec4087195b85..18e58b25ee850829ba026d43ebfb8f3e48e53ef1 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -1854,6 +1854,9 @@ SPS::SPS() , m_cclmCollocatedChromaFlag ( false ) , m_IntraMTS ( false ) , m_InterMTS ( false ) +#if JVET_N0193_LFNST +, m_LFNST ( false ) +#endif , m_Affine ( false ) , m_AffineType ( false ) , m_MHIntra ( false ) diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index b2918ca4a465d37cca498cbc96a313865be541e5..42e3958f75015295a2dffa3572d83a57b4029cb9 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -1115,6 +1115,9 @@ private: bool m_MTS; bool m_IntraMTS; // 18 bool m_InterMTS; // 19 +#if JVET_N0193_LFNST + bool m_LFNST; +#endif #if JVET_N0235_SMVD_SPS bool m_SMVD; #endif @@ -1393,6 +1396,10 @@ public: bool getUseIntraMTS () const { return m_IntraMTS; } void setUseInterMTS ( bool b ) { m_InterMTS = b; } bool getUseInterMTS () const { return m_InterMTS; } +#if JVET_N0193_LFNST + void setUseLFNST ( bool b ) { m_LFNST = b; } + bool getUseLFNST () const { return m_LFNST; } +#endif #if JVET_N0235_SMVD_SPS void setUseSMVD(bool b) { m_SMVD = b; } bool getUseSMVD() const { return m_SMVD; } diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index 6f6a684818e3786383e5cb25346595252196313f..f1134a9d15af3e2e7cdad098d12ea6d9ec99d99d 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -170,6 +170,293 @@ void TrQuant::init( const Quant* otherQuant, } } +#if JVET_N0193_LFNST +void TrQuant::fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ) +{ + const int8_t* trMat = ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ]; + const int trSize = ( size > 4 ) ? 48 : 16; + int coef; + int* out = dst; + + assert( index < 3 ); + + for( int j = 0; j < zeroOutSize; j++ ) + { + int* srcPtr = src; + const int8_t* trMatTmp = trMat; + coef = 0; + for( int i = 0; i < trSize; i++ ) + { + coef += *srcPtr++ * *trMatTmp++; + } + *out++ = ( coef + 64 ) >> 7; + trMat += trSize; + } + + ::memset( out, 0, ( trSize - zeroOutSize ) * sizeof( int ) ); +} + +void TrQuant::invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ) +{ + int maxLog2TrDynamicRange = 15; + const TCoeff outputMinimum = -( 1 << maxLog2TrDynamicRange ); + const TCoeff outputMaximum = ( 1 << maxLog2TrDynamicRange ) - 1; + const int8_t* trMat = ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ]; + const int trSize = ( size > 4 ) ? 48 : 16; + int resi; + int* out = dst; + + assert( index < 3 ); + + for( int j = 0; j < trSize; j++ ) + { + resi = 0; + const int8_t* trMatTmp = trMat; + int* srcPtr = src; + for( int i = 0; i < zeroOutSize; i++ ) + { + resi += *srcPtr++ * *trMatTmp; + trMatTmp += trSize; + } + *out++ = Clip3( outputMinimum, outputMaximum, ( int ) ( resi + 64 ) >> 7 ); + trMat++; + } +} + +uint32_t TrQuant::getLFNSTIntraMode( int wideAngPredMode ) +{ + uint32_t intraMode; + + if( wideAngPredMode < 0 ) + { + intraMode = ( uint32_t ) ( wideAngPredMode + ( NUM_EXT_LUMA_MODE >> 1 ) + NUM_LUMA_MODE ); + } + else if( wideAngPredMode >= NUM_LUMA_MODE ) + { + intraMode = ( uint32_t ) ( wideAngPredMode + ( NUM_EXT_LUMA_MODE >> 1 ) ); + } + else + { + intraMode = ( uint32_t ) wideAngPredMode; + } + + return intraMode; +} + +bool TrQuant::getTransposeFlag( uint32_t intraMode ) +{ + return ( ( intraMode >= NUM_LUMA_MODE ) && ( intraMode >= ( NUM_LUMA_MODE + ( NUM_EXT_LUMA_MODE >> 1 ) ) ) ) || + ( ( intraMode < NUM_LUMA_MODE ) && ( intraMode > DIA_IDX ) ); +} + +void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID ) +{ + const CompArea& area = tu.blocks[ compID ]; + const uint32_t width = area.width; + const uint32_t height = area.height; + const uint32_t lfnstIdx = tu.cu->lfnstIdx; + + if( lfnstIdx && tu.mtsIdx != 1 && width >= 4 && height >= 4 ) + { + const bool whge3 = width >= 8 && height >= 8; +#if JVET_N0103_CGSIZE_HARMONIZATION + const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ]; +#else + const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ toChannelType( compID ) ][ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ]; +#endif + uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) ); + + if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) ) + { + intraMode = PLANAR_IDX; + } + CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" ); + + if( lfnstIdx < 3 ) + { + intraMode = getLFNSTIntraMode( PU::getWideAngIntraMode( tu, intraMode, compID ) ); +#if RExt__DECODER_DEBUG_TOOL_STATISTICS + CodingStatistics::IncrementStatisticTool( CodingStatisticsClassType { STATS__TOOL_LFNST, width, height, compID } ); +#endif + bool transposeFlag = getTransposeFlag( intraMode ); + const int sbSize = whge3 ? 8 : 4; + const int subGrpXMax = ( height == 4 && width > 8 ) ? 2 : 1; + const int subGrpYMax = ( width == 4 && height > 8 ) ? 2 : 1; + bool tu4x4Flag = ( width == 4 && height == 4 ); + bool tu8x8Flag = ( width == 8 && height == 8 ); + TCoeff* lfnstTemp; + TCoeff* coeffTemp; + + for( int subGroupX = 0; subGroupX < subGrpXMax; subGroupX++ ) + { + for( int subGroupY = 0; subGroupY < subGrpYMax; subGroupY++ ) + { + const int offsetX = sbSize * subGroupX; + const int offsetY = sbSize * subGroupY * width; + int y; + lfnstTemp = m_tempInMatrix; // inverse spectral rearrangement + coeffTemp = m_plTempCoeff + offsetX + offsetY; + + TCoeff * dst = lfnstTemp; + const ScanElement * scanPtr = scan; + for( y = 0; y < 16; y++ ) + { + *dst++ = coeffTemp[ scanPtr->idx ]; + scanPtr++; + } + + invLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 ); + + lfnstTemp = m_tempOutMatrix; // inverse spectral rearrangement + + if( transposeFlag ) + { + if( sbSize == 4 ) + { + for( y = 0; y < 4; y++ ) + { + coeffTemp[ 0 ] = lfnstTemp[ 0 ]; coeffTemp[ 1 ] = lfnstTemp[ 4 ]; + coeffTemp[ 2 ] = lfnstTemp[ 8 ]; coeffTemp[ 3 ] = lfnstTemp[ 12 ]; + lfnstTemp++; + coeffTemp += width; + } + } + else // ( sbSize == 8 ) + { + for( y = 0; y < 8; y++ ) + { + coeffTemp[ 0 ] = lfnstTemp[ 0 ]; coeffTemp[ 1 ] = lfnstTemp[ 8 ]; + coeffTemp[ 2 ] = lfnstTemp[ 16 ]; coeffTemp[ 3 ] = lfnstTemp[ 24 ]; + if( y < 4 ) + { + coeffTemp[ 4 ] = lfnstTemp[ 32 ]; coeffTemp[ 5 ] = lfnstTemp[ 36 ]; + coeffTemp[ 6 ] = lfnstTemp[ 40 ]; coeffTemp[ 7 ] = lfnstTemp[ 44 ]; + } + lfnstTemp++; + coeffTemp += width; + } + } + } + else + { + for( y = 0; y < sbSize; y++ ) + { + uint32_t uiStride = ( y < 4 ) ? sbSize : 4; + ::memcpy( coeffTemp, lfnstTemp, uiStride * sizeof( TCoeff ) ); + lfnstTemp += uiStride; + coeffTemp += width; + } + } + } + } // subGroupX + } + } +} + +void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, const bool loadTr ) +{ + const CompArea& area = tu.blocks[ compID ]; + const uint32_t width = area.width; + const uint32_t height = area.height; + const uint32_t lfnstIdx = tu.cu->lfnstIdx; + + if( lfnstIdx && tu.mtsIdx != 1 && width >= 4 && height >= 4 ) + { + const bool whge3 = width >= 8 && height >= 8; +#if JVET_N0103_CGSIZE_HARMONIZATION + const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ]; +#else + const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ toChannelType( compID ) ][ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ]; +#endif + uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) ); + + if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) ) + { + intraMode = PLANAR_IDX; + } + CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" ); + + if( lfnstIdx < 3 ) + { + intraMode = getLFNSTIntraMode( PU::getWideAngIntraMode( tu, intraMode, compID ) ); + + bool transposeFlag = getTransposeFlag( intraMode ); + const int sbSize = whge3 ? 8 : 4; + const int subGrpXMax = ( height == 4 && width > 8 ) ? 2 : 1; + const int subGrpYMax = ( width == 4 && height > 8 ) ? 2 : 1; + bool tu4x4Flag = ( width == 4 && height == 4 ); + bool tu8x8Flag = ( width == 8 && height == 8 ); + TCoeff* lfnstTemp; + TCoeff* coeffTemp; + TCoeff* tempCoeff = loadTr ? m_mtsCoeffs[ tu.mtsIdx ] : m_plTempCoeff; + + for( int subGroupX = 0; subGroupX < subGrpXMax; subGroupX++ ) + { + for( int subGroupY = 0; subGroupY < subGrpYMax; subGroupY++ ) + { + const int offsetX = sbSize * subGroupX; + const int offsetY = sbSize * subGroupY * width; + int y; + lfnstTemp = m_tempInMatrix; // forward low frequency non-separable transform + coeffTemp = tempCoeff + offsetX + offsetY; + + if( transposeFlag ) + { + if( sbSize == 4 ) + { + for( y = 0; y < 4; y++ ) + { + lfnstTemp[ 0 ] = coeffTemp[ 0 ]; lfnstTemp[ 4 ] = coeffTemp[ 1 ]; + lfnstTemp[ 8 ] = coeffTemp[ 2 ]; lfnstTemp[ 12 ] = coeffTemp[ 3 ]; + lfnstTemp++; + coeffTemp += width; + } + } + else // ( sbSize == 8 ) + { + for( y = 0; y < 8; y++ ) + { + lfnstTemp[ 0 ] = coeffTemp[ 0 ]; lfnstTemp[ 8 ] = coeffTemp[ 1 ]; + lfnstTemp[ 16 ] = coeffTemp[ 2 ]; lfnstTemp[ 24 ] = coeffTemp[ 3 ]; + if( y < 4 ) + { + lfnstTemp[ 32 ] = coeffTemp[ 4 ]; lfnstTemp[ 36 ] = coeffTemp[ 5 ]; + lfnstTemp[ 40 ] = coeffTemp[ 6 ]; lfnstTemp[ 44 ] = coeffTemp[ 7 ]; + } + lfnstTemp++; + coeffTemp += width; + } + } + } + else + { + for( y = 0; y < sbSize; y++ ) + { + uint32_t uiStride = ( y < 4 ) ? sbSize : 4; + ::memcpy( lfnstTemp, coeffTemp, uiStride * sizeof( TCoeff ) ); + lfnstTemp += uiStride; + coeffTemp += width; + } + } + + fwdLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 ); + + lfnstTemp = m_tempOutMatrix; // forward spectral rearrangement + coeffTemp = tempCoeff + offsetX + offsetY; + + const ScanElement * scanPtr = scan; + int lfnstCoeffNum = ( sbSize == 4 ) ? sbSize * sbSize : 48; + for( y = 0; y < lfnstCoeffNum; y++ ) + { + coeffTemp[ scanPtr->idx ] = *lfnstTemp++; + scanPtr++; + } + } + } // subGroupX + } + } +} +#endif void TrQuant::invTransformNxN( TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQP ) @@ -204,6 +491,13 @@ void TrQuant::invTransformNxN( TransformUnit &tu, const ComponentID &compID, Pel DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID ); +#if JVET_N0193_LFNST + if( tu.cs->sps->getUseLFNST() ) + { + xInvLfnst( tu, compID ); + } +#endif + if( isLuma(compID) && tu.mtsIdx == 1 ) { xITransformSkip( tempCoeff, pResi, tu, compID ); @@ -661,6 +955,9 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr, double* diagRatio, double* horVerRatio ) { CodingStructure &cs = *tu.cs; +#if JVET_N0193_LFNST + const SPS &sps = *cs.sps; +#endif const CompArea &rect = tu.blocks[compID]; const uint32_t uiWidth = rect.width; const uint32_t uiHeight = rect.height; @@ -746,6 +1043,14 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const //it gets the distribution of the coefficients energy, which will be useful to discard ISP tests xGetCoeffEnergy( tu, compID, tempCoeff, diagRatio, horVerRatio ); } + +#if JVET_N0193_LFNST + if( sps.getUseLFNST() ) + { + xFwdLfnst( tu, compID, loadTr ); + } +#endif + DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID ); xQuant( tu, compID, tempCoeff, uiAbsSum, cQP, ctx ); diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h index 61735cec78e4383cf3317f9dc152702182b64ea0..241dabe6fc70c24171e6bc63eff4fa4388c39032 100644 --- a/source/Lib/CommonLib/TrQuant.h +++ b/source/Lib/CommonLib/TrQuant.h @@ -83,8 +83,21 @@ public: void getTrTypes( TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer ); #endif +#if JVET_N0193_LFNST + void fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ); + void invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ); + + uint32_t getLFNSTIntraMode( int wideAngPredMode ); + bool getTransposeFlag ( uint32_t intraMode ); +#endif + protected: +#if JVET_N0193_LFNST + void xFwdLfnst( const TransformUnit &tu, const ComponentID compID, const bool loadTr = false ); + void xInvLfnst( const TransformUnit &tu, const ComponentID compID ); +#endif + public: void invTransformNxN (TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQPs); @@ -124,6 +137,10 @@ protected: private: Quant *m_quant; //!< Quantizer TCoeff** m_mtsCoeffs; +#if JVET_N0193_LFNST + TCoeff m_tempInMatrix [ 48 ]; + TCoeff m_tempOutMatrix[ 48 ]; +#endif // forward Transform diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index c29fbc7c2fb5aa818e052f879c3d8678285864ac..59ab96c1a929595a818ceb3136e1f9bdf393e93d 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,6 +50,7 @@ #include <assert.h> #include <cassert> +#define JVET_N0193_LFNST 1 //Low Frequency Non-Separable Transform (LFNST), previously, Reduced Secondary Transform (RST) #define JVET_N0217_MATRIX_INTRAPRED 1 // matrix-based intra prediction (MIP) diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index c9145da4dcb456804c39f416c64287fae361eaf3..b6a344b3bd557f51d08654b7dd739e030a0aae45 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -275,6 +275,10 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) chromaQpAdj = other.chromaQpAdj; rootCbf = other.rootCbf; sbtInfo = other.sbtInfo; +#if JVET_N0193_LFNST + mtsFlag = other.mtsFlag; + lfnstIdx = other.lfnstIdx; +#endif tileIdx = other.tileIdx; imv = other.imv; imvNumCand = other.imvNumCand; @@ -315,6 +319,10 @@ void CodingUnit::initData() chromaQpAdj = 0; rootCbf = true; sbtInfo = 0; +#if JVET_N0193_LFNST + mtsFlag = 0; + lfnstIdx = 0; +#endif tileIdx = 0; imv = 0; imvNumCand = 0; diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index 4c346f03a7386ca2ffddd556a10a59f2e276e817..3fbcfb5eefc6ba1078d5641c0e4a3cbf0e916387 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -313,6 +313,10 @@ struct CodingUnit : public UnitArea bool rootCbf; uint8_t sbtInfo; uint32_t tileIdx; +#if JVET_N0193_LFNST + uint8_t mtsFlag; + uint32_t lfnstIdx; +#endif uint8_t GBiIdx; int refIdxBi[2]; #if JVET_N0217_MATRIX_INTRAPRED diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index f2ec42dbf837812b8745f39db2dce01e3c9f8324..628a4331cbfb63dd6509b9ed704f6dd0786b26be 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -252,17 +252,38 @@ bool CU::hasNonTsCodedBlock( const CodingUnit& cu ) return hasAnyNonTSCoded; } +#if JVET_N0193_LFNST +uint32_t CU::getNumNonZeroCoeffNonTs( const CodingUnit& cu, const bool lumaFlag, const bool chromaFlag ) +#else uint32_t CU::getNumNonZeroCoeffNonTs( const CodingUnit& cu ) +#endif { uint32_t count = 0; for( auto &currTU : traverseTUs( cu ) ) { +#if JVET_N0193_LFNST + count += TU::getNumNonZeroCoeffsNonTS( currTU, lumaFlag, chromaFlag ); +#else count += TU::getNumNonZeroCoeffsNonTS( currTU ); +#endif } return count; } +#if JVET_N0193_LFNST +uint32_t CU::getNumNonZeroCoeffNonTsCorner8x8( const CodingUnit& cu, const bool lumaFlag, const bool chromaFlag ) +{ + uint32_t count = 0; + for( auto &currTU : traverseTUs( cu ) ) + { + count += TU::getNumNonZeroCoeffsNonTSCorner8x8( currTU, lumaFlag, chromaFlag ); + } + + return count; +} +#endif + bool CU::divideTuInRows( const CodingUnit &cu ) { CHECK( cu.ispMode != HOR_INTRA_SUBPARTITIONS && cu.ispMode != VER_INTRA_SUBPARTITIONS, "Intra Subpartitions type not recognized!" ); @@ -1214,6 +1235,36 @@ uint32_t PU::getFinalIntraMode( const PredictionUnit &pu, const ChannelType &chT return uiIntraMode; } +#if JVET_N0193_LFNST +int PU::getWideAngIntraMode( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID ) +{ + if( dirMode < 2 ) + { + return ( int ) dirMode; + } + + CodingStructure& cs = *tu.cs; + const CompArea& area = tu.blocks[ compID ]; + PelBuf pred = cs.getPredBuf( area ); + int width = int( pred.width ); + int height = int( pred.height ); + int modeShift[ ] = { 0, 6, 10, 12, 14, 15 }; + int deltaSize = abs( g_aucLog2[ width ] - g_aucLog2[ height ] ); + int predMode = dirMode; + + if( width > height && dirMode < 2 + modeShift[ deltaSize ] ) + { + predMode += ( VDIA_IDX - 1 ); + } + else if( height > width && predMode > VDIA_IDX - modeShift[ deltaSize ] ) + { + predMode -= ( VDIA_IDX + 1 ); + } + + return predMode; +} +#endif + bool PU::xCheckSimilarMotion(const int mergeCandIndex, const int prevCnt, const MergeCtx mergeCandList, bool hasPruned[MRG_MAX_NUM_CANDS]) { for (uint32_t ui = 0; ui < prevCnt; ui++) @@ -5824,7 +5875,11 @@ uint32_t TU::getNumNonZeroCoeffsNonTS( const TransformUnit& tu, const bool bLuma uint32_t count = 0; for( uint32_t i = 0; i < ::getNumberValidTBlocks( *tu.cs->pcv ); i++ ) { +#if JVET_N0193_LFNST + if( tu.blocks[ i ].valid() && tu.mtsIdx != 1 && TU::getCbf( tu, ComponentID( i ) ) ) +#else if( tu.blocks[i].valid() && ( isLuma(ComponentID(i)) ? tu.mtsIdx !=1 : true ) && TU::getCbf( tu, ComponentID( i ) ) ) +#endif { if( isLuma ( tu.blocks[i].compID ) && !bLuma ) continue; if( isChroma( tu.blocks[i].compID ) && !bChroma ) continue; @@ -5840,6 +5895,50 @@ uint32_t TU::getNumNonZeroCoeffsNonTS( const TransformUnit& tu, const bool bLuma return count; } +#if JVET_N0193_LFNST +uint32_t TU::getNumNonZeroCoeffsNonTSCorner8x8( const TransformUnit& tu, const bool lumaFlag, const bool chromaFlag ) +{ + const uint32_t lumaWidth = tu.blocks[ 0 ].width, chromaWidth = tu.blocks[ 1 ].width; + const uint32_t lumaHeight = tu.blocks[ 0 ].height, chromaHeight = tu.blocks[ 1 ].height; + bool luma4x4TUFlag = lumaWidth == 4 && lumaHeight == 4; + bool chroma4x4TUFlag = chromaWidth == 4 && chromaHeight == 4; + bool luma8x8TUFlag = lumaWidth == 8 && lumaHeight == 8; + bool chroma8x8TUFlag = chromaWidth == 8 && chromaHeight == 8; + bool lumaCountFlag = ( lumaWidth >= 8 && lumaHeight >= 8 ) || luma4x4TUFlag; + bool chromaCountFlag = ( chromaWidth >= 8 && chromaHeight >= 8 ) || chroma4x4TUFlag; + + uint32_t count = 0; + for( uint32_t i = 0; i < ::getNumberValidTBlocks( *tu.cs->pcv ); i++ ) + { + if( tu.blocks[ i ].valid() && tu.mtsIdx != 1 && TU::getCbf( tu, ComponentID( i ) ) ) + { + if( isLuma( tu.blocks[ i ].compID ) && ( !lumaFlag || !lumaCountFlag ) ) continue; + if( isChroma( tu.blocks[ i ].compID ) && ( !chromaFlag || !chromaCountFlag ) ) continue; + + const ScanElement * scan = g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( tu.blocks[ i ].width ) ]; + const TCoeff* coeff = tu.getCoeffs( ComponentID( i ) ).buf; + + int startPos = MAX_LFNST_COEF_NUM, endPos = 47; + if( ( isLuma( tu.blocks[ i ].compID ) && luma4x4TUFlag ) || ( isChroma( tu.blocks[ i ].compID ) && chroma4x4TUFlag ) ) + { + startPos = 8; endPos = 15; + } + else if( ( isLuma( tu.blocks[ i ].compID ) && luma8x8TUFlag ) || ( isChroma( tu.blocks[ i ].compID ) && chroma8x8TUFlag ) ) + { + startPos = 8; endPos = 47; + } + const ScanElement *scanPtr = scan + startPos; + for( uint32_t j = startPos; j <= endPos; j++ ) + { + count += coeff[ scanPtr->idx ] != 0; + scanPtr++; + } + } + } + return count; +} +#endif + bool TU::needsSqrt2Scale( const TransformUnit &tu, const ComponentID &compID ) { const Size &size=tu.blocks[compID]; diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index d2a1166be624ac535ce64a4427cf1062ab3a7ed5..193bedcba39ffe338c0767c67944eb21506ed510 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -78,7 +78,12 @@ namespace CU PartSplit getSplitAtDepth (const CodingUnit& cu, const unsigned depth); bool hasNonTsCodedBlock (const CodingUnit& cu); +#if JVET_N0193_LFNST + uint32_t getNumNonZeroCoeffNonTs ( const CodingUnit& cu, const bool lumaFlag = true, const bool chromaFlag = true ); + uint32_t getNumNonZeroCoeffNonTsCorner8x8( const CodingUnit& cu, const bool lumaFlag = true, const bool chromaFlag = true ); +#else uint32_t getNumNonZeroCoeffNonTs (const CodingUnit& cu); +#endif bool isGBiIdxCoded (const CodingUnit& cu); uint8_t getValidGbiIdx (const CodingUnit& cu); @@ -134,6 +139,9 @@ namespace PU #endif void getIntraChromaCandModes (const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE]); uint32_t getFinalIntraMode (const PredictionUnit &pu, const ChannelType &chType); +#if JVET_N0193_LFNST + int getWideAngIntraMode ( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID ); +#endif void getInterMergeCandidates (const PredictionUnit &pu, MergeCtx& mrgCtx, int mmvdList, const int& mrgCandIdx = -1 ); @@ -212,6 +220,9 @@ namespace PU namespace TU { uint32_t getNumNonZeroCoeffsNonTS (const TransformUnit &tu, const bool bLuma = true, const bool bChroma = true); +#if JVET_N0193_LFNST + uint32_t getNumNonZeroCoeffsNonTSCorner8x8( const TransformUnit &tu, const bool bLuma = true, const bool bChroma = true ); +#endif bool isNonTransformedResidualRotated(const TransformUnit &tu, const ComponentID &compID); bool getCbf (const TransformUnit &tu, const ComponentID &compID); bool getCbfAtDepth (const TransformUnit &tu, const ComponentID &compID, const unsigned &depth); diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index 19ca70a40af36be7a1108aba8344b909855ad452..b34a0521b772a80c5113231a94457232875a8456 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -1430,6 +1430,10 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx& { transform_tree( *cu.cs, partitioner, cuCtx, chromaCbfs ); } + +#if JVET_N0193_LFNST + residual_lfnst_mode( cu ); +#endif } void CABACReader::rqt_root_cbf( CodingUnit& cu ) @@ -2862,6 +2866,68 @@ void CABACReader::explicit_rdpcm_mode( TransformUnit& tu, ComponentID compID ) } } +#if JVET_N0193_LFNST +void CABACReader::residual_lfnst_mode( CodingUnit& cu ) +{ +#if JVET_N0217_MATRIX_INTRAPRED + if( cu.ispMode != NOT_INTRA_SUBPARTITIONS || cu.mipFlag == true || +#else + if( cu.ispMode != NOT_INTRA_SUBPARTITIONS || +#endif + ( CS::isDualITree( *cu.cs ) && cu.chType == CHANNEL_TYPE_CHROMA && std::min( cu.blocks[ 1 ].width, cu.blocks[ 1 ].height ) < 4 ) ) + { + return; + } + + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__LFNST ); + + if( cu.cs->sps->getUseLFNST() && CU::isIntra( cu ) && !CU::isLosslessCoded( cu ) ) + { + const bool lumaFlag = CS::isDualITree( *cu.cs ) ? ( isLuma( cu.chType ) ? true : false ) : true; + const bool chromaFlag = CS::isDualITree( *cu.cs ) ? ( isChroma( cu.chType ) ? true : false ) : true; + bool nonZeroCoeffNonTs; + bool nonZeroCoeffNonTsCorner8x8 = CU::getNumNonZeroCoeffNonTsCorner8x8( cu, lumaFlag, chromaFlag ) > 0; + const int nonZeroCoeffThr = CS::isDualITree( *cu.cs ) ? ( isLuma( cu.chType ) ? LFNST_SIG_NZ_LUMA : LFNST_SIG_NZ_CHROMA ) : LFNST_SIG_NZ_LUMA + LFNST_SIG_NZ_CHROMA; + nonZeroCoeffNonTs = CU::getNumNonZeroCoeffNonTs( cu, lumaFlag, chromaFlag ) > nonZeroCoeffThr; + + if( !nonZeroCoeffNonTs || nonZeroCoeffNonTsCorner8x8 ) + { + cu.lfnstIdx = 0; + return; + } + } + else + { + cu.lfnstIdx = 0; + return; + } + + uint32_t ctxOff = 0; + + int intraMode = cu.firstPU->intraDir[ cu.chType ]; + if( intraMode == DM_CHROMA_IDX && !isLuma( cu.chType ) ) + { + intraMode = PLANAR_IDX; + } + if( cu.chromaFormat == CHROMA_422 && !isLuma( cu.chType ) ) + { + intraMode = g_chroma422IntraAngleMappingTable[ intraMode ]; + } + ctxOff = PU::isLMCMode( intraMode ) || intraMode <= DC_IDX; + + unsigned cctx = 0; + if( cu.firstTU->mtsIdx < 2 && CS::isDualITree( *cu.cs ) ) cctx++; + + uint32_t idxLFNST = m_BinDecoder.decodeBin( Ctx::LFNSTIdx( ctxOff + 4 * cctx ) ); + if( idxLFNST ) + { + idxLFNST += m_BinDecoder.decodeBin( Ctx::LFNSTIdx( 2 + ctxOff + 4 * cctx ) ); + } + cu.lfnstIdx = idxLFNST; + + DTRACE( g_trace_ctx, D_SYNTAX, "residual_lfnst_mode() etype=%d pos=(%d,%d) mode=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), ( int ) cu.lfnstIdx ); +} +#endif int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, ComponentID compID ) { diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index abffaf06efd2c795b6ddedc47dda3f1f8072aad4..b2b9c796e11e622a0454f2c384749b7d0e37ece7 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -134,6 +134,9 @@ public: // residual coding (clause 7.3.8.11) void residual_coding ( TransformUnit& tu, ComponentID compID ); void mts_coding ( TransformUnit& tu, ComponentID compID ); +#if JVET_N0193_LFNST + void residual_lfnst_mode ( CodingUnit& cu ); +#endif void isp_mode ( CodingUnit& cu ); void explicit_rdpcm_mode ( TransformUnit& tu, ComponentID compID ); int last_sig_coeff ( CoeffCodingContext& cctx, TransformUnit& tu, ComponentID compID ); diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index 7df613a1c3d99b095f419780f2a142596cac24b1..0caae6b6291560676bc27eda536d86dc90a2dd08 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -1119,6 +1119,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) READ_FLAG( uiCode, "mts_intra_enabled_flag" ); pcSPS->setUseIntraMTS ( uiCode != 0 ); READ_FLAG( uiCode, "mts_inter_enabled_flag" ); pcSPS->setUseInterMTS ( uiCode != 0 ); } +#if JVET_N0193_LFNST + READ_FLAG( uiCode, "lfnst_enabled_flag" ); pcSPS->setUseLFNST ( uiCode != 0 ); +#endif #if JVET_N0235_SMVD_SPS READ_FLAG(uiCode, "smvd_flag"); pcSPS->setUseSMVD ( uiCode != 0 ); #endif diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index 8404cd3f9d0e8d591f46baef2d5fdc025b45d592..e92a739ba27d148ad2a5cd114d582a44fee265d4 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -1360,6 +1360,10 @@ void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, C { transform_tree( *cu.cs, partitioner, cuCtx, chromaCbfs ); } + +#if JVET_N0193_LFNST + residual_lfnst_mode( cu, cuCtx ); +#endif } void CABACWriter::rqt_root_cbf( const CodingUnit& cu ) @@ -2773,6 +2777,67 @@ void CABACWriter::explicit_rdpcm_mode( const TransformUnit& tu, ComponentID comp } } +#if JVET_N0193_LFNST +void CABACWriter::residual_lfnst_mode( const CodingUnit& cu, CUCtx& cuCtx ) +{ +#if JVET_N0217_MATRIX_INTRAPRED + if( cu.ispMode != NOT_INTRA_SUBPARTITIONS || cu.mipFlag == true || +#else + if( cu.ispMode != NOT_INTRA_SUBPARTITIONS || +#endif + ( CS::isDualITree( *cu.cs ) && cu.chType == CHANNEL_TYPE_CHROMA && std::min( cu.blocks[ 1 ].width, cu.blocks[ 1 ].height ) < 4 ) ) + { + return; + } + + if( cu.cs->sps->getUseLFNST() && CU::isIntra( cu ) && !CU::isLosslessCoded( cu ) ) + { + const bool lumaFlag = CS::isDualITree( *cu.cs ) ? ( isLuma( cu.chType ) ? true : false ) : true; + const bool chromaFlag = CS::isDualITree( *cu.cs ) ? ( isChroma( cu.chType ) ? true : false ) : true; + bool nonZeroCoeffNonTs; + bool nonZeroCoeffNonTsCorner8x8 = CU::getNumNonZeroCoeffNonTsCorner8x8( cu, lumaFlag, chromaFlag ) > 0; + const int nonZeroCoeffThr = CS::isDualITree( *cu.cs ) ? ( isLuma( cu.chType ) ? LFNST_SIG_NZ_LUMA : LFNST_SIG_NZ_CHROMA ) : LFNST_SIG_NZ_LUMA + LFNST_SIG_NZ_CHROMA; + cuCtx.numNonZeroCoeffNonTs = CU::getNumNonZeroCoeffNonTs( cu, lumaFlag, chromaFlag ); + nonZeroCoeffNonTs = cuCtx.numNonZeroCoeffNonTs > nonZeroCoeffThr; + + if( !nonZeroCoeffNonTs || nonZeroCoeffNonTsCorner8x8 ) + { + return; + } + } + else + { + return; + } + + uint32_t ctxOff = 0; + + int intraMode = cu.firstPU->intraDir[ cu.chType ]; + if( intraMode == DM_CHROMA_IDX && !isLuma( cu.chType ) ) + { + intraMode = PLANAR_IDX; + } + if( cu.chromaFormat == CHROMA_422 && !isLuma( cu.chType ) ) + { + intraMode = g_chroma422IntraAngleMappingTable[ intraMode ]; + } + ctxOff = PU::isLMCMode( intraMode ) || intraMode <= DC_IDX; + + unsigned cctx = 0; + if( cu.firstTU->mtsIdx < 2 && CS::isDualITree( *cu.cs ) ) cctx++; + + const uint32_t idxLFNST = cu.lfnstIdx; + assert( idxLFNST < 3 ); + m_BinEncoder.encodeBin( idxLFNST ? 1 : 0, Ctx::LFNSTIdx( ctxOff + 4 * cctx ) ); + + if( idxLFNST ) + { + m_BinEncoder.encodeBin( ( idxLFNST - 1 ) ? 1 : 0, Ctx::LFNSTIdx( 2 + ctxOff + 4 * cctx ) ); + } + + DTRACE( g_trace_ctx, D_SYNTAX, "residual_lfnst_mode() etype=%d pos=(%d,%d) mode=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), ( int ) cu.lfnstIdx ); +} +#endif void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit& tu, ComponentID compID ) { diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index 5e8708768e62b8e425194702f5964eeb5876e8f5..5760dd8c58c478d3a70512868f6e99603ca3a2f1 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -148,6 +148,9 @@ public: // residual coding (clause 7.3.8.11) void residual_coding ( const TransformUnit& tu, ComponentID compID ); void mts_coding ( const TransformUnit& tu, ComponentID compID ); +#if JVET_N0193_LFNST + void residual_lfnst_mode ( const CodingUnit& cu, CUCtx& cuCtx ); +#endif void isp_mode ( const CodingUnit& cu ); void explicit_rdpcm_mode ( const TransformUnit& tu, ComponentID compID ); void last_sig_coeff ( CoeffCodingContext& cctx, const TransformUnit& tu, ComponentID compID ); diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 1ec657ba9fc6d9f2bc424b5ff8faf051b035a5df..59221a7fed7227b51fb08747a3b7697b4c7f3584 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -218,6 +218,10 @@ protected: int m_InterMTSMaxCand; int m_ImplicitMTS; bool m_SBT; ///< Sub-Block Transform for inter blocks +#if JVET_N0193_LFNST + bool m_LFNST; + bool m_useFastLFNST; +#endif int m_SubPuMvpMode; bool m_Affine; bool m_AffineType; @@ -725,6 +729,13 @@ public: void setDualITree ( bool b ) { m_dualITree = b; } bool getDualITree () const { return m_dualITree; } +#if JVET_N0193_LFNST + void setLFNST ( bool b ) { m_LFNST = b; } + bool getLFNST() const { return m_LFNST; } + void setUseFastLFNST ( bool b ) { m_useFastLFNST = b; } + bool getUseFastLFNST() const { return m_useFastLFNST; } +#endif + void setUseLMChroma ( int n ) { m_LMChroma = n; } int getUseLMChroma() const { return m_LMChroma; } void setCclmCollocatedChromaFlag ( bool b ) { m_cclmCollocatedChromaFlag = b; } diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 3c735303585986f5d8a534d3e3daf2df46727e7c..b8cac8f9b1edf13866ed043702ba5a5d66f03006 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -1368,6 +1368,14 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { +#if JVET_N0193_LFNST + double bestInterCost = m_modeCtrl->getBestInterCost(); + double costSize2Nx2NmtsFirstPass = m_modeCtrl->getMtsSize2Nx2NFirstPassCost(); + bool skipSecondMtsPass = m_modeCtrl->getSkipSecondMTSPass(); + const SPS& sps = *tempCS->sps; + const int maxSizeMTS = MTS_INTRA_MAX_CU_SIZE; + uint8_t considerMtsSecondPass = ( sps.getUseIntraMTS() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeMTS && partitioner.currArea().lheight() <= maxSizeMTS ) ? 1 : 0; +#endif const PPS &pps = *tempCS->pps; bool useIntraSubPartitions = false; @@ -1376,136 +1384,319 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC Distortion interHad = m_modeCtrl->getInterHad(); +#if JVET_N0193_LFNST + double dct2Cost = MAX_DOUBLE; + double trGrpBestCost [ 4 ] = { MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE }; + double globalBestCost = MAX_DOUBLE; + bool bestSelFlag [ 4 ] = { false, false, false, false }; + bool trGrpCheck [ 4 ] = { true, true, true, true }; + int startMTSIdx [ 4 ] = { 0, 1, 2, 3 }; + int endMTSIdx [ 4 ] = { 0, 1, 2, 3 }; + double trGrpStopThreshold[ 3 ] = { 1.001, 1.001, 1.001 }; + int bestMtsFlag = 0; + int bestLfnstIdx = 0; + + const int maxLfnstIdx = CS::isDualITree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) ? 0 : 2; + bool skipOtherLfnst = false; + int startLfnstIdx = 0; + int endLfnstIdx = sps.getUseLFNST() ? maxLfnstIdx : 0; + + int grpNumMax = sps.getUseLFNST() ? 4 : 1; + for( int trGrpIdx = 0; trGrpIdx < grpNumMax; trGrpIdx++ ) { + const uint8_t startMtsFlag = trGrpIdx > 0; + const uint8_t endMtsFlag = sps.getUseLFNST() ? considerMtsSecondPass : 0; - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + if( ( trGrpIdx == 0 || ( !skipSecondMtsPass && considerMtsSecondPass ) ) && trGrpCheck[ trGrpIdx ] ) + { + for( int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++ ) + { + for( uint8_t mtsFlag = startMtsFlag; mtsFlag <= endMtsFlag; mtsFlag++ ) +#endif + { +#if JVET_N0193_LFNST + //3) if interHad is 0, only try further modes if some intra mode was already better than inter + if( sps.getUseLFNST() && m_pcEncCfg->getUsePbIntraFast() && !tempCS->slice->isIntra() && bestCU && CU::isInter( *bestCS->getCU( partitioner.chType ) ) && interHad == 0 ) + { + continue; + } +#endif - CodingUnit &cu = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType ); + tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); - partitioner.setCUData( cu ); - cu.slice = tempCS->slice; - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); - cu.skip = false; - cu.mmvdSkip = false; - cu.predMode = MODE_INTRA; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; - cu.qp = encTestMode.qp; - //cu.ipcm = false; - cu.ispMode = NOT_INTRA_SUBPARTITIONS; + CodingUnit &cu = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType ); + + partitioner.setCUData( cu ); + cu.slice = tempCS->slice; + cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); + cu.skip = false; + cu.mmvdSkip = false; + cu.predMode = MODE_INTRA; + cu.transQuantBypass = encTestMode.lossless; + cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.qp = encTestMode.qp; + //cu.ipcm = false; +#if JVET_N0193_LFNST + cu.lfnstIdx = lfnstIdx; + cu.mtsFlag = mtsFlag; +#endif + cu.ispMode = NOT_INTRA_SUBPARTITIONS; - CU::addPUs( cu ); + CU::addPUs( cu ); - tempCS->interHad = interHad; + tempCS->interHad = interHad; - m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; + m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; - if( isLuma( partitioner.chType ) ) - { - //the Intra SubPartitions mode uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary lines - const double bestCostSoFar = CS::isDualITree( *tempCS ) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost; - m_pcIntraSearch->estIntraPredLumaQT( cu, partitioner, bestCostSoFar ); +#if JVET_N0193_LFNST + bool validCandRet = false; +#endif + if( isLuma( partitioner.chType ) ) + { + //the Intra SubPartitions mode uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary lines + const double bestCostSoFar = CS::isDualITree( *tempCS ) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost; +#if JVET_N0193_LFNST + validCandRet = m_pcIntraSearch->estIntraPredLumaQT( cu, partitioner, bestCostSoFar, mtsFlag, startMTSIdx[ trGrpIdx ], endMTSIdx[ trGrpIdx ], ( trGrpIdx > 1 ) ); + if( sps.getUseLFNST() && ( !validCandRet || ( cu.ispMode && cu.firstTU->cbf[ COMPONENT_Y ] == 0 ) ) ) + { + continue; + } +#else + m_pcIntraSearch->estIntraPredLumaQT( cu, partitioner, bestCostSoFar ); +#endif - useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS; - if( !CS::isDualITree( *tempCS ) ) - { - tempCS->lumaCost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist ); - if( useIntraSubPartitions ) - { - //the difference between the best cost so far and the current luma cost is stored to avoid testing the Cr component if the cost of luma + Cb is larger than the best cost - maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE; - } - } + useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS; + if( !CS::isDualITree( *tempCS ) ) + { + tempCS->lumaCost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist ); + if( useIntraSubPartitions ) + { + //the difference between the best cost so far and the current luma cost is stored to avoid testing the Cr component if the cost of luma + Cb is larger than the best cost + maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE; + } + } - if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max() - && tempCS->interHad == 0) - { - interHad = 0; - // JEM assumes only perfect reconstructions can from now on beat the inter mode - m_modeCtrl->enforceInterHad( 0 ); - return; - } + if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max() + && tempCS->interHad == 0) + { + interHad = 0; + // JEM assumes only perfect reconstructions can from now on beat the inter mode + m_modeCtrl->enforceInterHad( 0 ); +#if JVET_N0193_LFNST + continue; +#else + return; +#endif + } - if( !CS::isDualITree( *tempCS ) ) - { - cu.cs->picture->getRecoBuf( cu.Y() ).copyFrom( cu.cs->getRecoBuf( COMPONENT_Y ) ); - cu.cs->picture->getPredBuf(cu.Y()).copyFrom(cu.cs->getPredBuf(COMPONENT_Y)); - } - } + if( !CS::isDualITree( *tempCS ) ) + { + cu.cs->picture->getRecoBuf( cu.Y() ).copyFrom( cu.cs->getRecoBuf( COMPONENT_Y ) ); + cu.cs->picture->getPredBuf(cu.Y()).copyFrom(cu.cs->getPredBuf(COMPONENT_Y)); + } + } - if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !CS::isDualITree( *tempCS ) ) ) - { - TUIntraSubPartitioner subTuPartitioner( partitioner ); - m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( CS::isDualITree( *cu.cs ) && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma ); - if( useIntraSubPartitions && !cu.ispMode ) - { - //At this point the temp cost is larger than the best cost. Therefore, we can already skip the remaining calculations - return; - } - } + if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !CS::isDualITree( *tempCS ) ) ) + { + TUIntraSubPartitioner subTuPartitioner( partitioner ); + m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( CS::isDualITree( *cu.cs ) && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma ); + if( useIntraSubPartitions && !cu.ispMode ) + { + //At this point the temp cost is larger than the best cost. Therefore, we can already skip the remaining calculations +#if JVET_N0193_LFNST + continue; +#else + return; +#endif + } + } - cu.rootCbf = false; + cu.rootCbf = false; - for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ ) - { - cu.rootCbf |= cu.firstTU->cbf[t] != 0; - } + for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ ) + { + cu.rootCbf |= cu.firstTU->cbf[t] != 0; + } - // Get total bits for current mode: encode CU - m_CABACEstimator->resetBits(); + // Get total bits for current mode: encode CU + m_CABACEstimator->resetBits(); - if( pps.getTransquantBypassEnabledFlag() ) - { - m_CABACEstimator->cu_transquant_bypass_flag( cu ); - } + if( pps.getTransquantBypassEnabledFlag() ) + { + m_CABACEstimator->cu_transquant_bypass_flag( cu ); + } - if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag()) - && cu.Y().valid() - ) - { - m_CABACEstimator->cu_skip_flag ( cu ); - } - m_CABACEstimator->pred_mode ( cu ); - m_CABACEstimator->pcm_data ( cu, partitioner ); + if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag()) + && cu.Y().valid() + ) + { + m_CABACEstimator->cu_skip_flag ( cu ); + } + m_CABACEstimator->pred_mode ( cu ); + m_CABACEstimator->pcm_data ( cu, partitioner ); #if !JVET_N0217_MATRIX_INTRAPRED - m_CABACEstimator->extend_ref_line( cu ); - m_CABACEstimator->isp_mode ( cu ); + m_CABACEstimator->extend_ref_line( cu ); + m_CABACEstimator->isp_mode ( cu ); #endif - m_CABACEstimator->cu_pred_data ( cu ); + m_CABACEstimator->cu_pred_data ( cu ); #if JVET_N0413_RDPCM - m_CABACEstimator->bdpcm_mode ( cu, ComponentID(partitioner.chType) ); + m_CABACEstimator->bdpcm_mode ( cu, ComponentID(partitioner.chType) ); #endif - // Encode Coefficients - CUCtx cuCtx; - cuCtx.isDQPCoded = true; - cuCtx.isChromaQpAdjCoded = true; - m_CABACEstimator->cu_residual( cu, partitioner, cuCtx ); - tempCS->fracBits = m_CABACEstimator->getEstFracBits(); - tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist); + // Encode Coefficients + CUCtx cuCtx; + cuCtx.isDQPCoded = true; + cuCtx.isChromaQpAdjCoded = true; + m_CABACEstimator->cu_residual( cu, partitioner, cuCtx ); - const double tmpCostWithoutSplitFlags = tempCS->cost; - xEncodeDontSplit( *tempCS, partitioner ); + tempCS->fracBits = m_CABACEstimator->getEstFracBits(); + tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist); - xCheckDQP( *tempCS, partitioner ); +#if JVET_N0193_LFNST + double bestIspCost = cu.ispMode ? CS::isDualITree( *tempCS ) ? tempCS->cost : tempCS->lumaCost : MAX_DOUBLE; +#endif - if( tempCS->cost < bestCS->cost ) - { - m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags ); - } + const double tmpCostWithoutSplitFlags = tempCS->cost; + xEncodeDontSplit( *tempCS, partitioner ); - xCalDebCost( *tempCS, partitioner ); - tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt(); + xCheckDQP( *tempCS, partitioner ); + +#if JVET_N0193_LFNST + // Check if low frequency non-separable transform (LFNST) is too expensive + const int nonZeroCoeffThr = CS::isDualITree( *tempCS ) ? ( isLuma( partitioner.chType ) ? LFNST_SIG_NZ_LUMA : LFNST_SIG_NZ_CHROMA ) : LFNST_SIG_NZ_LUMA + LFNST_SIG_NZ_CHROMA; + if( lfnstIdx && cuCtx.numNonZeroCoeffNonTs <= nonZeroCoeffThr ) + { + bool isMDIS = false; + { + CHECK( CU::getNumPUs( cu ) > 1, "PLanarPDPC: encoder MDIS condition not defined for multi PU" ); + const PredictionUnit* pu = cu.firstPU; + isMDIS = IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, *pu, true, *pu ); +#if HM_MDIS_AS_IN_JEM + if( pu->intraDir[ 0 ] == PLANAR_IDX ) { isMDIS |= IntraPrediction::getPlanarMDISCondition( *pu ); } +#endif + } + + if( cuCtx.numNonZeroCoeffNonTs > 0 || isMDIS ) + { + tempCS->cost = MAX_DOUBLE; + } + } + + if( mtsFlag == 0 && lfnstIdx == 0 ) + { + dct2Cost = tempCS->cost; + } +#endif + + if( tempCS->cost < bestCS->cost ) + { + m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags ); + } + +#if JVET_N0193_LFNST + if( !mtsFlag ) static_cast< double& >( costSize2Nx2NmtsFirstPass ) = tempCS->cost; + + if( sps.getUseLFNST() && !tempCS->cus.empty() ) + { + skipOtherLfnst = m_modeCtrl->checkSkipOtherLfnst( encTestMode, tempCS, partitioner ); + } +#endif + + xCalDebCost( *tempCS, partitioner ); + tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt(); #if WCG_EXT - DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); + DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); #else - DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() ); + DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() ); #endif - xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); +#if JVET_N0193_LFNST + if( !sps.getUseLFNST() ) + { + xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); + } + else + { + if( xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ) ) + { + trGrpBestCost[ trGrpIdx ] = globalBestCost = bestCS->cost; + bestSelFlag [ trGrpIdx ] = true; + bestMtsFlag = mtsFlag; + bestLfnstIdx = lfnstIdx; + if( bestCS->cus.size() == 1 ) + { + CodingUnit &cu = *bestCS->cus.front(); + if( cu.firstTU->mtsIdx == 1 ) + { + if( ( g_aucLog2[ cu.firstTU->blocks[ COMPONENT_Y ].width ] + g_aucLog2[ cu.firstTU->blocks[ COMPONENT_Y ].height ] ) >= 6 ) + { + endLfnstIdx = 0; + } + } + } + } + + //we decide to skip the second emt pass or not according to the ISP results + if( considerMtsSecondPass && cu.ispMode && !mtsFlag && tempCS->slice->isIntra() ) + { + double bestCostDct2NoIsp = m_modeCtrl->getMtsFirstPassNoIspCost(); + CHECKD( bestCostDct2NoIsp <= bestIspCost, "wrong cost!" ); + double nSamples = ( double ) ( cu.lwidth() << g_aucLog2[ cu.lheight() ] ); + double threshold = 1 + 1.4 / sqrt( nSamples ); - } //for emtCuFlag + double lfnstThreshold = 1.01 * threshold; + if( bestCostDct2NoIsp > bestIspCost*lfnstThreshold ) + { + endLfnstIdx = lfnstIdx; + } + + if( bestCostDct2NoIsp > bestIspCost*threshold ) + { + skipSecondMtsPass = true; + m_modeCtrl->setSkipSecondMTSPass( true ); + break; + } + } + //now we check whether the second pass of SIZE_2Nx2N and the whole Intra SIZE_NxN should be skipped or not + if( !mtsFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA ) + { + const double thEmtInterFastSkipIntra = 1.4; // Skip checking Intra if "2Nx2N using DCT2" is worse than best Inter mode + if( costSize2Nx2NmtsFirstPass > thEmtInterFastSkipIntra * bestInterCost ) + { + skipSecondMtsPass = true; + m_modeCtrl->setSkipSecondMTSPass( true ); + break; + } + } + } +#else + xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); +#endif + + } //for emtCuFlag +#if JVET_N0193_LFNST + if( skipOtherLfnst ) + { + startLfnstIdx = lfnstIdx; + endLfnstIdx = lfnstIdx; + break; + } + } //for lfnstIdx + } //if (!skipSecondMtsPass && considerMtsSecondPass && trGrpCheck[iGrpIdx]) + + if( sps.getUseLFNST() && trGrpIdx < 3 ) + { + trGrpCheck[ trGrpIdx + 1 ] = false; + + if( bestSelFlag[ trGrpIdx ] && considerMtsSecondPass ) + { + double dCostRatio = dct2Cost / trGrpBestCost[ trGrpIdx ]; + trGrpCheck[ trGrpIdx + 1 ] = ( bestMtsFlag != 0 || bestLfnstIdx != 0 ) && dCostRatio < trGrpStopThreshold[ trGrpIdx ]; + } + } + } //trGrpIdx +#endif } void EncCu::xCheckIntraPCM(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 2466721390ead29ee924b7911f4bb6010f06e90f..861a1bbae43cecb058e4b4639d22e61a98051268 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -944,6 +944,9 @@ void EncLib::xInitSPS(SPS &sps) sps.setMinQTSizes ( m_uiMinQT ); sps.setMaxBTDepth ( m_uiMaxBTDepth, m_uiMaxBTDepthI, m_uiMaxBTDepthIChroma ); sps.setUseDualITree ( m_dualITree ); +#if JVET_N0193_LFNST + sps.setUseLFNST ( m_LFNST ); +#endif sps.setSBTMVPEnabledFlag ( m_SubPuMvpMode ); sps.setAMVREnabledFlag ( m_ImvMode != IMV_OFF ); sps.setBDOFEnabledFlag ( m_BIO ); diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index 92bb4f3b5f43b3d5ba11bd26317a94aa447c8f8d..41dd02585ff2f77c27c1f25ebfba5a33708f64fa 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -1893,6 +1893,27 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt } } +#if JVET_N0193_LFNST +bool EncModeCtrlMTnoRQT::checkSkipOtherLfnst( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ) +{ + xExtractFeatures( encTestmode, *tempCS ); + + ComprCUCtx& cuECtx = m_ComprCUCtxList.back(); + bool skipOtherLfnst = false; + + if( encTestmode.type == ETM_INTRA ) + { + if( !cuECtx.bestCS || ( tempCS->cost >= cuECtx.bestCS->cost && cuECtx.bestCS->cus.size() == 1 && CU::isIntra( *cuECtx.bestCS->cus[ 0 ] ) ) + || ( tempCS->cost < cuECtx.bestCS->cost && CU::isIntra( *tempCS->cus[ 0 ] ) ) ) + { + skipOtherLfnst = !tempCS->cus[ 0 ]->rootCbf; + } + } + + return skipOtherLfnst; +} +#endif + bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ) { xExtractFeatures( encTestmode, *tempCS ); @@ -1916,6 +1937,21 @@ bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingSt { cuECtx.set( BEST_TRIV_SPLIT_COST, tempCS->cost ); } +#if JVET_N0193_LFNST + else if( encTestmode.type == ETM_INTRA ) + { + const CodingUnit cu = *tempCS->getCU( partitioner.chType ); + + if( !cu.mtsFlag ) + { + cuECtx.bestMtsSize2Nx2N1stPass = tempCS->cost; + } + if( !cu.ispMode ) + { + cuECtx.bestCostMtsFirstPassNoIsp = tempCS->cost; + } + } +#endif if( m_pcEncCfg->getIMV4PelFast() && m_pcEncCfg->getIMV() && encTestmode.type == ETM_INTER_ME ) { diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h index f8ae006d18aa7f44f82aa8df76e972259d5a5f44..d7c0fc50cec4bd0c87af172ad76d20c117b9278b 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.h +++ b/source/Lib/EncoderLib/EncModeCtrl.h @@ -188,12 +188,21 @@ struct ComprCUCtx , extraFeatures ( ) , extraFeaturesd( ) , bestInterCost ( MAX_DOUBLE ) +#if JVET_N0193_LFNST + , bestMtsSize2Nx2N1stPass + ( MAX_DOUBLE ) + , skipSecondMTSPass + ( false ) +#endif , interHad (std::numeric_limits<Distortion>::max()) #if ENABLE_SPLIT_PARALLELISM , isLevelSplitParallel ( false ) #endif , bestCostWithoutSplitFlags( MAX_DOUBLE ) +#if JVET_N0193_LFNST + , bestCostMtsFirstPassNoIsp( MAX_DOUBLE ) +#endif { getAreaIdx( cs.area.Y(), *cs.pcv, cuX, cuY, cuW, cuH ); partIdx = ( ( cuX << 8 ) | cuY ); @@ -218,11 +227,18 @@ struct ComprCUCtx static_vector<int64_t, 30> extraFeatures; static_vector<double, 30> extraFeaturesd; double bestInterCost; +#if JVET_N0193_LFNST + double bestMtsSize2Nx2N1stPass; + bool skipSecondMTSPass; +#endif Distortion interHad; #if ENABLE_SPLIT_PARALLELISM bool isLevelSplitParallel; #endif double bestCostWithoutSplitFlags; +#if JVET_N0193_LFNST + double bestCostMtsFirstPassNoIsp; +#endif template<typename T> T get( int ft ) const { return typeid(T) == typeid(double) ? (T&)extraFeaturesd[ft] : T(extraFeatures[ft]); } template<typename T> void set( int ft, T val ) { extraFeatures [ft] = int64_t( val ); } @@ -268,6 +284,9 @@ protected: public: virtual bool useModeResult ( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ) = 0; +#if JVET_N0193_LFNST + virtual bool checkSkipOtherLfnst ( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ) = 0; +#endif #if ENABLE_SPLIT_PARALLELISM virtual void copyState ( const EncModeCtrl& other, const UnitArea& area ); virtual int getNumParallelJobs ( const CodingStructure &cs, Partitioner& partitioner ) const { return 1; } @@ -299,8 +318,17 @@ public: double getBestInterCost () const { return m_ComprCUCtxList.back().bestInterCost; } Distortion getInterHad () const { return m_ComprCUCtxList.back().interHad; } void enforceInterHad ( Distortion had ) { m_ComprCUCtxList.back().interHad = had; } +#if JVET_N0193_LFNST + double getMtsSize2Nx2NFirstPassCost () const { return m_ComprCUCtxList.back().bestMtsSize2Nx2N1stPass; } + bool getSkipSecondMTSPass () const { return m_ComprCUCtxList.back().skipSecondMTSPass; } + void setSkipSecondMTSPass ( bool b ) { m_ComprCUCtxList.back().skipSecondMTSPass = b; } +#endif double getBestCostWithoutSplitFlags () const { return m_ComprCUCtxList.back().bestCostWithoutSplitFlags; } void setBestCostWithoutSplitFlags ( double cost ) { m_ComprCUCtxList.back().bestCostWithoutSplitFlags = cost; } +#if JVET_N0193_LFNST + double getMtsFirstPassNoIspCost () const { return m_ComprCUCtxList.back().bestCostMtsFirstPassNoIsp; } + void setMtsFirstPassNoIspCost ( double cost ) { m_ComprCUCtxList.back().bestCostMtsFirstPassNoIsp = cost; } +#endif protected: void xExtractFeatures ( const EncTestMode encTestmode, CodingStructure& cs ); @@ -527,6 +555,9 @@ public: virtual bool isParallelSplit ( const CodingStructure &cs, Partitioner& partitioner ) const; virtual bool parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const; #endif +#if JVET_N0193_LFNST + virtual bool checkSkipOtherLfnst( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ); +#endif }; diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp index 0b30340524588c67e3c8ff9b382d70c189b7378f..55961314b29f666171fb909a8e8b846ed18502cc 100644 --- a/source/Lib/EncoderLib/EncSlice.cpp +++ b/source/Lib/EncoderLib/EncSlice.cpp @@ -119,7 +119,11 @@ EncSlice::setUpLambda( Slice* slice, const double dLambda, int iQP) int chromaQPOffset = slice->getPPS()->getQpOffset( compID ) + slice->getSliceChromaQpDelta( compID ); int qpc = ( iQP + chromaQPOffset < 0 ) ? iQP : getScaledChromaQP( iQP + chromaQPOffset, m_pcCfg->getChromaFormatIdc() ); double tmpWeight = pow( 2.0, ( iQP - qpc ) / 3.0 ); // takes into account of the chroma qp mapping and chroma qp Offset +#if JVET_N0193_LFNST + if( m_pcCfg->getDepQuantEnabledFlag() && !( m_pcCfg->getLFNST() ) ) +#else if( m_pcCfg->getDepQuantEnabledFlag() ) +#endif { tmpWeight *= ( m_pcCfg->getGOPSize() >= 8 ? pow( 2.0, 0.1/3.0 ) : pow( 2.0, 0.2/3.0 ) ); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma) } diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 21c0ac5d25b64f4016cd3b61e33ad4959c984d80..9dfc8c4f29cd2e980c794b4a0347dc70e9aa7a4f 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -266,7 +266,11 @@ void IntraSearch::init( EncCfg* pcEncCfg, // INTRA PREDICTION ////////////////////////////////////////////////////////////////////////// +#if JVET_N0193_LFNST +bool IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, const double bestCostSoFar, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst ) +#else void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, const double bestCostSoFar ) +#endif { CodingStructure &cs = *cu.cs; const SPS &sps = *cs.sps; @@ -297,16 +301,54 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, CHECK( !cu.firstPU, "CU has no PUs" ); const bool keepResi = cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS; +#if JVET_N0193_LFNST + // variables for saving fast intra modes scan results across multiple LFNST passes + bool LFNSTLoadFlag = sps.getUseLFNST() && cu.lfnstIdx != 0; + bool LFNSTSaveFlag = sps.getUseLFNST() && cu.lfnstIdx == 0; + + LFNSTSaveFlag &= sps.getUseIntraMTS() ? cu.mtsFlag == 0 : true; + + const uint32_t lfnstIdx = cu.lfnstIdx; +#endif + #if !JVET_N0217_MATRIX_INTRAPRED uint32_t extraModes = 0; // add two extra modes, which would be used after uiMode <= DC_IDX is removed for cu.nsstIdx == 3 #endif +#if JVET_N0193_LFNST + const int width = partitioner.currArea().lwidth(); + const int height = partitioner.currArea().lheight(); + + // Marking MTS usage for faster MTS + // 0: MTS is either not applicable for current CU (cuWidth > MTS_INTRA_MAX_CU_SIZE or cuHeight > MTS_INTRA_MAX_CU_SIZE), not active in the config file or the fast decision algorithm is not used in this case + // 1: MTS fast algorithm can be applied for the current CU, and the DCT2 is being checked + // 2: MTS is being checked for current CU. Stored results of DCT2 can be utilized for speedup + uint8_t mtsUsageFlag = 0; + const int maxSizeEMT = MTS_INTRA_MAX_CU_SIZE; + if( width <= maxSizeEMT && height <= maxSizeEMT && sps.getUseIntraMTS() ) + { + mtsUsageFlag = ( sps.getUseLFNST() && cu.mtsFlag == 1 ) ? 2 : 1; + } + + if( width * height < 64 && !m_pcEncCfg->getUseFastLFNST() ) + { + mtsUsageFlag = 0; + } +#endif +#if JVET_N0193_LFNST +#if INCLUDE_ISP_CFG_FLAG + int nOptionsForISP = ( sps.getUseISP() && cu.mtsFlag == 0 && cu.lfnstIdx == 0 ) ? NUM_INTRA_SUBPARTITIONS_MODES : 1; +#else + int nOptionsForISP = ( cu.mtsFlag == 0 && cu.lfnstIdx == 0 ) ? NUM_INTRA_SUBPARTITIONS_MODES : 1; +#endif +#else const int width = partitioner.currArea().lwidth(); const int height = partitioner.currArea().lheight(); #if INCLUDE_ISP_CFG_FLAG int nOptionsForISP = sps.getUseISP() ? NUM_INTRA_SUBPARTITIONS_MODES : 1; #else int nOptionsForISP = NUM_INTRA_SUBPARTITIONS_MODES; +#endif #endif double bestCurrentCost = bestCostSoFar; @@ -362,12 +404,16 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, } #if JVET_N0413_RDPCM +#if JVET_N0193_LFNST + const bool testBDPCM = m_pcEncCfg->getRDPCM() && CU::bdpcmAllowed( cu, ComponentID( partitioner.chType ) ) && cu.mtsFlag == 0 && cu.lfnstIdx == 0; +#else const bool testBDPCM = m_pcEncCfg->getRDPCM() && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType)); #endif +#endif #if JVET_N0217_MATRIX_INTRAPRED static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiHadModeList; #else - static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> uiHadModeList; + static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> uiHadModeList; #endif static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList; static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList; @@ -378,6 +424,9 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, #endif auto &pu = *cu.firstPU; +#if JVET_N0193_LFNST + bool validReturn = false; +#endif { CandHadList.clear(); CandCostList.clear(); @@ -392,7 +441,11 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes #if JVET_N0217_MATRIX_INTRAPRED const bool fastMip = sps.getUseMIP() && m_pcEncCfg->getUseFastMIP(); +#if JVET_N0193_LFNST + const bool mipAllowed = sps.getUseMIP() && ( cu.lfnstIdx == 0 ) && isLuma( partitioner.chType ) && pu.lwidth() <= MIP_MAX_WIDTH && pu.lheight() <= MIP_MAX_HEIGHT; +#else const bool mipAllowed = sps.getUseMIP() && isLuma( partitioner.chType ) && pu.lwidth() <= MIP_MAX_WIDTH && pu.lheight() <= MIP_MAX_HEIGHT; +#endif const bool testMip = mipAllowed && mipModesAvailable( pu.Y() ) && !(fastMip && (cu.lwidth() > 2 * cu.lheight() || cu.lheight() > 2 * cu.lwidth())); static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiRdModeList; @@ -407,6 +460,9 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, numModesForFullRD = numModesAvailable; #endif +#if JVET_N0193_LFNST + if( mtsUsageFlag != 2 ) +#endif { // this should always be true CHECK( !pu.Y().valid(), "PU is not valid" ); @@ -484,6 +540,9 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, bool bSatdChecked[NUM_INTRA_MODE]; memset( bSatdChecked, 0, sizeof( bSatdChecked ) ); +#if JVET_N0193_LFNST + if( !LFNSTLoadFlag ) +#endif { for( int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ ) { @@ -570,7 +629,40 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, #endif #endif } +#if JVET_N0193_LFNST + if( LFNSTSaveFlag ) + { + // save found best modes + m_uiSavedNumRdModesLFNST = numModesForFullRD; + m_uiSavedRdModeListLFNST = uiRdModeList; + m_dSavedModeCostLFNST = CandCostList; + // PBINTRA fast + m_uiSavedHadModeListLFNST = uiHadModeList; + m_dSavedHadListLFNST = CandHadList; +#if !JVET_N0217_MATRIX_INTRAPRED + m_iSavedExtendRefListLFNST = extendRefList; +#endif + LFNSTSaveFlag = false; + } +#endif } // NSSTFlag +#if JVET_N0193_LFNST + else + { + // restore saved modes + numModesForFullRD = m_uiSavedNumRdModesLFNST; + uiRdModeList = m_uiSavedRdModeListLFNST; + CandCostList = m_dSavedModeCostLFNST; + // PBINTRA fast + uiHadModeList = m_uiSavedHadModeListLFNST; + CandHadList = m_dSavedHadListLFNST; +#if !JVET_N0217_MATRIX_INTRAPRED + extendRefList = m_iSavedExtendRefListLFNST; +#endif + + LFNSTLoadFlag = false; + } // !LFNSTFlag +#endif #if JVET_N0217_MATRIX_INTRAPRED CHECK( uiRdModeList.size() != numModesForFullRD, "Error: RD mode list size" ); @@ -925,9 +1017,56 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, { uiRdModeList.push_back( i ); } +#endif + } +#if JVET_N0193_LFNST + if( sps.getUseLFNST() && mtsUsageFlag == 1 ) + { + // Store the modes to be checked with RD + m_savedNumRdModes[ lfnstIdx ] = numModesForFullRD; + std::copy_n( uiRdModeList.begin(), numModesForFullRD, m_savedRdModeList[ lfnstIdx ] ); +#if !JVET_N0217_MATRIX_INTRAPRED + std::copy_n( extendRefList.begin(), numModesForFullRD, m_savedExtendRefList[ lfnstIdx ] ); +#endif + } +#endif + } +#if JVET_N0193_LFNST + else //mtsUsage = 2 (here we potentially reduce the number of modes that will be full-RD checked) + { + if( m_pcEncCfg->getUseFastLFNST() || !cu.slice->isIntra() ) + { + numModesForFullRD = 0; + + double thresholdSkipMode = 1.0 + ( ( cu.lfnstIdx > 0 ) ? 0.1 : 1.0 ) * ( 1.4 / sqrt( ( double ) ( width*height ) ) ); + + // Skip checking the modes with much larger R-D cost than the best mode + for( int i = 0; i < m_savedNumRdModes[ lfnstIdx ]; i++ ) + { + if( m_modeCostStore[ lfnstIdx ][ i ] <= thresholdSkipMode * m_bestModeCostStore[ lfnstIdx ] ) + { + uiRdModeList.push_back( m_savedRdModeList[ lfnstIdx ][ i ] ); +#if !JVET_N0217_MATRIX_INTRAPRED + extendRefList.push_back( m_savedExtendRefList[ lfnstIdx ][ i ] ); +#endif + numModesForFullRD++; + } + } + } + else //this is necessary because we skip the candidates list calculation, since it was already obtained for the DCT-II. Now we load it + { + // Restore the modes to be checked with RD + numModesForFullRD = m_savedNumRdModes[ lfnstIdx ]; + uiRdModeList.resize( numModesForFullRD ); + std::copy_n( m_savedRdModeList[ lfnstIdx ], m_savedNumRdModes[ lfnstIdx ], uiRdModeList.begin() ); + CandCostList.resize( numModesForFullRD ); +#if !JVET_N0217_MATRIX_INTRAPRED + extendRefList.resize( numModesForFullRD ); + std::copy_n( m_savedExtendRefList[ lfnstIdx ], m_savedNumRdModes[ lfnstIdx ], extendRefList.begin() ); #endif } } +#endif if( nOptionsForISP > 1 ) // we remove the non-MPMs from the ISP lists { @@ -983,10 +1122,18 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, // after this point, don't use numModesForFullRD // PBINTRA fast +#if JVET_N0193_LFNST +#if JVET_N0329_IBC_SEARCH_IMP + if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable && !cs.slice->getDisableSATDForRD() && ( mtsUsageFlag != 2 || lfnstIdx > 0 ) ) +#else + if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable && ( mtsUsageFlag != 2 || lfnstIdx > 0 ) ) +#endif +#else #if JVET_N0329_IBC_SEARCH_IMP if (m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable && !cs.slice->getDisableSATDForRD()) #else if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable ) +#endif #endif { #if JVET_N0217_MATRIX_INTRAPRED @@ -1022,42 +1169,67 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, #endif m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx); +#if JVET_N0193_LFNST + return false; +#else return; +#endif } +#else +#if JVET_N0193_LFNST + double pbintraRatio = ( lfnstIdx > 0 ) ? 1.25 : PBINTRA_RATIO; + if( CandHadList.size() < 3 || CandHadList[ 2 ] > cs.interHad * pbintraRatio ) #else if( CandHadList.size() < 3 || CandHadList[2] > cs.interHad * PBINTRA_RATIO ) +#endif { uiRdModeList.resize( std::min<size_t>( uiRdModeList.size(), 2 ) ); +#if !JVET_N0217_MATRIX_INTRAPRED extendRefList.resize( std::min<size_t>( extendRefList.size(), 2 ) ); +#endif if( nOptionsForISP > 1 ) { m_rdModeListWithoutMrlHor.resize( std::min<size_t>( m_rdModeListWithoutMrlHor.size(), 2 ) ); m_rdModeListWithoutMrlVer.resize( std::min<size_t>( m_rdModeListWithoutMrlVer.size(), 2 ) ); } } +#if JVET_N0193_LFNST + if( CandHadList.size() < 2 || CandHadList[ 1 ] > cs.interHad * pbintraRatio ) +#else if( CandHadList.size() < 2 || CandHadList[1] > cs.interHad * PBINTRA_RATIO ) +#endif { uiRdModeList.resize( std::min<size_t>( uiRdModeList.size(), 1 ) ); +#if !JVET_N0217_MATRIX_INTRAPRED extendRefList.resize( std::min<size_t>( extendRefList.size(), 1 ) ); +#endif if( nOptionsForISP > 1 ) { m_rdModeListWithoutMrlHor.resize( std::min<size_t>( m_rdModeListWithoutMrlHor.size(), 1 ) ); m_rdModeListWithoutMrlVer.resize( std::min<size_t>( m_rdModeListWithoutMrlVer.size(), 1 ) ); } } +#if JVET_N0193_LFNST + if( CandHadList.size() < 1 || CandHadList[ 0 ] > cs.interHad * pbintraRatio ) +#else if( CandHadList.size() < 1 || CandHadList[0] > cs.interHad * PBINTRA_RATIO ) +#endif { cs.dist = std::numeric_limits<Distortion>::max(); cs.interHad = 0; //===== reset context models ===== - m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode); + m_CABACEstimator->getCtx() = SubCtx( Ctx::IntraLumaMpmFlag, ctxStartIntraMode ); #if !JVET_N0302_SIMPLFIED_CIIP m_CABACEstimator->getCtx() = SubCtx( Ctx::MHIntraPredMode, ctxStartMHIntraMode ); #endif m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx ); +#if JVET_N0193_LFNST + return false; +#else return; +#endif } #endif } @@ -1068,7 +1240,11 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, auto* firstIspList = ispOptions[1] == HOR_INTRA_SUBPARTITIONS ? &m_rdModeListWithoutMrlHor : &m_rdModeListWithoutMrlVer; auto* secondIspList = ispOptions[1] == HOR_INTRA_SUBPARTITIONS ? &m_rdModeListWithoutMrlVer : &m_rdModeListWithoutMrlHor; +#if JVET_N0193_LFNST + if( !sps.getUseLFNST() && m_pcEncCfg->getUseFastISP() ) +#else if ( m_pcEncCfg->getUseFastISP() ) +#endif { #if JVET_N0217_MATRIX_INTRAPRED CHECKD( uiRdModeList.size() > CandCostList.size(), "Error: CandCostList size" ); @@ -1176,6 +1352,12 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, uint8_t bestIspOption = NOT_INTRA_SUBPARTITIONS; #endif TUIntraSubPartitioner subTuPartitioner( partitioner ); +#if JVET_N0193_LFNST + if( !cu.ispMode && !cu.mtsFlag ) + { + m_modeCtrl->setMtsFirstPassNoIspCost( MAX_DOUBLE ); + } +#endif bool ispHorAllZeroCbfs = false, ispVerAllZeroCbfs = false; #if JVET_N0217_MATRIX_INTRAPRED @@ -1294,9 +1476,17 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, // determine residual for partition cs.initSubStructure( *csTemp, partitioner.chType, cs.area, true ); +#if JVET_N0193_LFNST + bool tmpValidReturn = false; +#endif if( cu.ispMode ) { +#if JVET_N0193_LFNST + tmpValidReturn = xRecurIntraCodingLumaQT( *csTemp, subTuPartitioner, bestCurrentCost, 0, intraSubPartitionsProcOrder, false, + mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst ); +#else xRecurIntraCodingLumaQT( *csTemp, subTuPartitioner, bestCurrentCost, 0, intraSubPartitionsProcOrder ); +#endif } else { @@ -1305,25 +1495,59 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, { m_bestCostNonMip = MAX_DOUBLE; } +#if JVET_N0193_LFNST + tmpValidReturn = xRecurIntraCodingLumaQT( *csTemp, partitioner, uiBestPUMode.ispMod ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, uiBestPUMode.ispMod, + mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst ); +#else xRecurIntraCodingLumaQT(*csTemp, partitioner, uiBestPUMode.ispMod ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, uiBestPUMode.ispMod); +#endif +#else +#if JVET_N0193_LFNST + tmpValidReturn = xRecurIntraCodingLumaQT( *csTemp, partitioner, bestIspOption ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, bestIspOption, + mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst ); #else xRecurIntraCodingLumaQT( *csTemp, partitioner, bestIspOption ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, bestIspOption ); +#endif #endif } if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ) { - if ( cu.ispMode == HOR_INTRA_SUBPARTITIONS ) - { - ispHorAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lheight() > 2 && csTemp->cost >= bestCurrentCost ); - } - else +#if JVET_N0193_LFNST + if( !sps.getUseLFNST() ) { - ispVerAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lwidth() > 2 && csTemp->cost >= bestCurrentCost ); +#endif + if ( cu.ispMode == HOR_INTRA_SUBPARTITIONS ) + { + ispHorAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lheight() > 2 && csTemp->cost >= bestCurrentCost ); + } + else + { + ispVerAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lwidth() > 2 && csTemp->cost >= bestCurrentCost ); + } +#if JVET_N0193_LFNST } +#endif csTemp->cost = MAX_DOUBLE; csTemp->costDbOffset = 0; +#if JVET_N0193_LFNST + tmpValidReturn = false; +#endif } +#if JVET_N0193_LFNST + validReturn |= tmpValidReturn; + +#if JVET_N0413_RDPCM + if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 ) + { + m_modeCostStore[ lfnstIdx ][ mode ] = tmpValidReturn ? csTemp->cost : ( MAX_DOUBLE / 2.0 ); //(MAX_DOUBLE / 2.0) ?? +#else + if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode ) + { + m_modeCostStore[ lfnstIdx ][ uiMode ] = tmpValidReturn ? csTemp->cost : ( MAX_DOUBLE / 2.0 ); //(MAX_DOUBLE / 2.0) ?? +#endif + } +#endif #if JVET_N0217_MATRIX_INTRAPRED @@ -1332,37 +1556,58 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, DTRACE( g_trace_ctx, D_INTRA_COST, "IntraCost T %f (%d) \n", csTemp->cost, uiOrgMode ); #endif - // check r-d cost - if( csTemp->cost < csBest->cost ) + +#if JVET_N0193_LFNST + if( tmpValidReturn ) { - std::swap( csTemp, csBest ); +#endif + // check r-d cost + if( csTemp->cost < csBest->cost ) + { + std::swap( csTemp, csBest ); - uiBestPUMode = uiOrgMode; + uiBestPUMode = uiOrgMode; #if !JVET_N0217_MATRIX_INTRAPRED - bestExtendRef = multiRefIdx; - bestIspOption = cu.ispMode; + bestExtendRef = multiRefIdx; + bestIspOption = cu.ispMode; #endif #if JVET_N0413_RDPCM - bestBDPCMMode = cu.bdpcmMode; + bestBDPCMMode = cu.bdpcmMode; #endif - if( csBest->cost < bestCurrentCost ) - { - bestCurrentCost = csBest->cost; - } +#if JVET_N0193_LFNST + if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode ) + { + m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost; + } +#endif + if( csBest->cost < bestCurrentCost ) + { + bestCurrentCost = csBest->cost; + } #if !JVET_N0413_RDPCM - if( !cu.ispMode ) + if( !cu.ispMode ) + { + bestNormalIntraModeIndex = uiMode; + } +#endif +#if JVET_N0193_LFNST + if( !cu.ispMode && !cu.mtsFlag ) + { + m_modeCtrl->setMtsFirstPassNoIspCost( csBest->cost ); + } +#endif + } +#if JVET_N0413_RDPCM + if( !cu.ispMode && !cu.bdpcmMode && csBest->cost < bestCostNonBDPCM ) { - bestNormalIntraModeIndex = uiMode; + bestCostNonBDPCM = csBest->cost; + bestNormalIntraModeIndex = mode; } #endif - } -#if JVET_N0413_RDPCM - if( !cu.ispMode && !cu.bdpcmMode && csBest->cost < bestCostNonBDPCM ) - { - bestCostNonBDPCM = csBest->cost; - bestNormalIntraModeIndex = mode; +#if JVET_N0193_LFNST } #endif + csTemp->releaseIntermediateData(); } // Mode loop #if JVET_N0217_MATRIX_INTRAPRED @@ -1371,24 +1616,42 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, cu.ispMode = bestIspOption; #endif - cs.useSubStructure(*csBest, partitioner.chType, pu.singleChan(CHANNEL_TYPE_LUMA), true, true, keepResi, keepResi); +#if JVET_N0193_LFNST + if( validReturn ) + { +#endif + cs.useSubStructure( *csBest, partitioner.chType, pu.singleChan( CHANNEL_TYPE_LUMA ), true, true, keepResi, keepResi ); +#if JVET_N0193_LFNST + } +#endif csBest->releaseIntermediateData(); - //=== update PU data ==== +#if JVET_N0193_LFNST + if( validReturn ) + { +#endif + //=== update PU data ==== #if JVET_N0217_MATRIX_INTRAPRED - cu.mipFlag = uiBestPUMode.mipFlg; - pu.multiRefIdx = uiBestPUMode.mRefId; - pu.intraDir[CHANNEL_TYPE_LUMA] = uiBestPUMode.modeId; + cu.mipFlag = uiBestPUMode.mipFlg; + pu.multiRefIdx = uiBestPUMode.mRefId; + pu.intraDir[ CHANNEL_TYPE_LUMA ] = uiBestPUMode.modeId; #else - pu.intraDir[0] = uiBestPUMode; - pu.multiRefIdx = bestExtendRef; + pu.intraDir[ 0 ] = uiBestPUMode; + pu.multiRefIdx = bestExtendRef; #endif #if JVET_N0413_RDPCM - cu.bdpcmMode = bestBDPCMMode; + cu.bdpcmMode = bestBDPCMMode; +#endif +#if JVET_N0193_LFNST + } #endif } //===== reset context models ===== m_CABACEstimator->getCtx() = ctxStart; + +#if JVET_N0193_LFNST + return validReturn; +#endif } void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner, const double maxCostAllowed ) @@ -2343,13 +2606,20 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp } } +#if JVET_N0193_LFNST +bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, const double bestCostSoFar, const int subTuIdx, const PartSplit ispType, const bool ispIsCurrentWinner, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst ) +#else void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, const double bestCostSoFar, const int subTuIdx, const PartSplit ispType, const bool ispIsCurrentWinner ) +#endif { int subTuCounter = subTuIdx; const UnitArea &currArea = partitioner.currArea(); const CodingUnit &cu = *cs.getCU( currArea.lumaPos(), partitioner.chType ); bool earlySkipISP = false; uint32_t currDepth = partitioner.currTrDepth; +#if JVET_N0193_LFNST + const SPS &sps = *cs.sps; +#endif const PPS &pps = *cs.pps; const bool keepResi = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS; bool bCheckFull = true; @@ -2364,10 +2634,20 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par } uint32_t numSig = 0; +#if JVET_N0193_LFNST + double dSingleCost = MAX_DOUBLE; + Distortion uiSingleDistLuma = 0; + uint64_t singleFracBits = 0; + bool checkTransformSkip = pps.getUseTransformSkip(); + int bestModeId[ MAX_NUM_COMPONENT ] = { 0, 0, 0 }; + uint8_t nNumTransformCands = cu.mtsFlag ? 4 : 1; + uint8_t numTransformIndexCands = nNumTransformCands; +#else double dSingleCost = MAX_DOUBLE; Distortion uiSingleDistLuma = 0; uint64_t singleFracBits = 0; int bestModeId[MAX_NUM_COMPONENT] = { 0, 0, 0 }; +#endif const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() ); TempCtx ctxBest ( m_CtxCache ); @@ -2384,6 +2664,10 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par csFull = &cs; } +#if JVET_N0193_LFNST + bool validReturnFull = false; +#endif + if( bCheckFull ) { csFull->cost = 0.0; @@ -2391,6 +2675,43 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par TransformUnit &tu = csFull->addTU( CS::getArea( *csFull, currArea, partitioner.chType ), partitioner.chType ); tu.depth = currDepth; +#if JVET_N0193_LFNST + const bool tsAllowed = TU::isTSAllowed( tu, COMPONENT_Y ); + const bool mtsAllowed = TU::isMTSAllowed( tu, COMPONENT_Y ); + std::vector<TrMode> trModes; + + if( sps.getUseLFNST() ) + { + checkTransformSkip &= tsAllowed; + checkTransformSkip &= !cu.mtsFlag; + checkTransformSkip &= !cu.lfnstIdx; + + if( !cu.mtsFlag && checkTransformSkip ) + { + trModes.push_back( TrMode( 0, true ) ); //DCT2 + trModes.push_back( TrMode( 1, true ) ); //TS + } + } + else + { + nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests + + trModes.push_back( TrMode( 0, true ) ); //DCT2 + if( tsAllowed ) + { + trModes.push_back( TrMode( 1, true ) ); + } + if( mtsAllowed ) + { + for( int i = 2; i < 6; i++ ) + { + trModes.push_back( TrMode( i, true ) ); + } + } + } + + CHECK( !tu.Y().valid(), "Invalid TU" ); +#else const bool tsAllowed = TU::isTSAllowed ( tu, COMPONENT_Y ); const bool mtsAllowed = TU::isMTSAllowed( tu, COMPONENT_Y ); uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests @@ -2409,6 +2730,7 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par } CHECK( !tu.Y().valid(), "Invalid TU" ); +#endif CodingStructure &saveCS = *m_pSaveCS[0]; @@ -2417,10 +2739,21 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par Distortion singleDistTmpLuma = 0; uint64_t singleTmpFracBits = 0; double singleCostTmp = 0; +#if JVET_N0193_LFNST + int firstCheckId = ( sps.getUseLFNST() && mtsCheckRangeFlag && cu.mtsFlag ) ? mtsFirstCheckId : 0; +#else int firstCheckId = 0; +#endif +#if JVET_N0193_LFNST + //we add the MTS candidates to the loop. TransformSkip will still be the last one to be checked (when modeId == lastCheckId) as long as checkTransformSkip is true + int lastCheckId = sps.getUseLFNST() ? ( ( mtsCheckRangeFlag && cu.mtsFlag ) ? ( mtsLastCheckId + ( int ) checkTransformSkip ) : ( numTransformIndexCands - ( firstCheckId + 1 ) + ( int ) checkTransformSkip ) ) : + trModes[ nNumTransformCands - 1 ].first; + bool isNotOnlyOneMode = sps.getUseLFNST() ? lastCheckId != firstCheckId : nNumTransformCands != 1; +#else int lastCheckId = trModes[nNumTransformCands-1].first; bool isNotOnlyOneMode = nNumTransformCands != 1; +#endif if( isNotOnlyOneMode ) { @@ -2431,10 +2764,48 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par tmpTU = &saveCS.addTU(currArea, partitioner.chType); } +#if JVET_N0193_LFNST + bool cbfBestMode = false; + bool cbfBestModeValid = false; +#endif bool cbfDCT2 = true; double bestDCT2cost = MAX_DOUBLE; double threshold = m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && nNumTransformCands > 1 ? 1 + 1.4 / sqrt( cu.lwidth() * cu.lheight() ) : 1; +#if JVET_N0193_LFNST + for( int modeId = firstCheckId; modeId <= ( sps.getUseLFNST() ? lastCheckId : ( nNumTransformCands - 1 ) ); modeId++ ) + { + uint8_t transformIndex = modeId; + + if( sps.getUseLFNST() ) + { + if( ( transformIndex < lastCheckId ) || ( ( transformIndex == lastCheckId ) && !checkTransformSkip ) ) //we avoid this if the mode is transformSkip + { + // Skip checking other transform candidates if zero CBF is encountered and it is the best transform so far + if( m_pcEncCfg->getUseFastLFNST() && transformIndex && !cbfBestMode && cbfBestModeValid ) + { + continue; + } + } + } + else + { + if( !cbfDCT2 || ( m_pcEncCfg->getUseTransformSkipFast() && bestModeId[ COMPONENT_Y ] == 1 ) ) + { + break; + } + if( !trModes[ modeId ].second ) + { + continue; + } + //we compare the DCT-II cost against the best ISP cost so far (except for TS) + if( m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && trModes[ modeId ].first != 0 && ( trModes[ modeId ].first != 1 || !tsAllowed ) && bestDCT2cost > bestCostSoFar * threshold ) + { + continue; + } + tu.mtsIdx = trModes[ modeId ].first; + } +#else for( int modeId = firstCheckId; modeId < nNumTransformCands; modeId++ ) { if( !cbfDCT2 || ( m_pcEncCfg->getUseTransformSkipFast() && bestModeId[COMPONENT_Y] == 1 ) ) @@ -2451,6 +2822,7 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par continue; } tu.mtsIdx = trModes[modeId].first; +#endif #if JVET_N0217_MATRIX_INTRAPRED //we compare the best cost for non lwip @@ -2469,18 +2841,110 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par int default0Save1Load2 = 0; singleDistTmpLuma = 0; +#if JVET_N0193_LFNST + if( modeId == firstCheckId && ( sps.getUseLFNST() ? ( modeId != lastCheckId ) : ( nNumTransformCands > 1 ) ) ) +#else if( modeId == firstCheckId && nNumTransformCands > 1 ) +#endif { default0Save1Load2 = 1; } else if (modeId != firstCheckId) { +#if JVET_N0193_LFNST + if( sps.getUseLFNST() && !cbfBestModeValid ) + { + default0Save1Load2 = 1; + } + else + { + default0Save1Load2 = 2; + } +#else default0Save1Load2 = 2; +#endif } if( cu.ispMode ) { default0Save1Load2 = 0; } +#if JVET_N0193_LFNST + if( sps.getUseLFNST() ) + { + if( cu.mtsFlag ) + { + if( moreProbMTSIdxFirst ) + { + const ChannelType chType = toChannelType( COMPONENT_Y ); + const CompArea& area = tu.blocks[ COMPONENT_Y ]; + const PredictionUnit& pu = *cs.getPU( area.pos(), chType ); + uint32_t uiIntraMode = pu.intraDir[ chType ]; + + if( transformIndex == 1 ) + { + tu.mtsIdx = ( uiIntraMode < 34 ) ? 2 : 1; //(DST7,DCT8) : (DCT8,DST7) + } + else if( transformIndex == 2 ) + { + tu.mtsIdx = ( uiIntraMode < 34 ) ? 1 : 2; //(DCT8,DST7) : (DST7,DCT8) + } + else + { + tu.mtsIdx = transformIndex; + } + } + else + { + tu.mtsIdx = transformIndex; + } + tu.mtsIdx += 2; + } + else + { + tu.mtsIdx = transformIndex; + } + + if( !cu.mtsFlag && checkTransformSkip ) + { + xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true ); + if( modeId == 0 ) + { + for( int i = 0; i < 2; i++ ) + { + if( trModes[ i ].second ) + { + lastCheckId = trModes[ i ].first; + } + } + } + } + else + { + xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig ); + } + } + else + { + if( nNumTransformCands > 1 ) + { + xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true ); + if( modeId == 0 ) + { + for( int i = 0; i < nNumTransformCands; i++ ) + { + if( trModes[ i ].second ) + { + lastCheckId = trModes[ i ].first; + } + } + } + } + else + { + xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig ); + } + } +#else if( nNumTransformCands > 1 ) { xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true ); @@ -2499,9 +2963,14 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par { xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig ); } +#endif //----- determine rate and r-d cost ----- +#if JVET_N0193_LFNST + if( ( sps.getUseLFNST() ? ( modeId == lastCheckId && modeId != 0 && checkTransformSkip ) : ( trModes[ modeId ].first != 0 ) ) && !TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ) ) +#else if( ( trModes[modeId].first != 0 && !TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ) ) ) +#endif { //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. singleCostTmp = MAX_DOUBLE; @@ -2536,11 +3005,25 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par uiSingleDistLuma = singleDistTmpLuma; singleFracBits = singleTmpFracBits; - bestModeId[COMPONENT_Y] = trModes[modeId].first; - if( trModes[modeId].first == 0 ) +#if JVET_N0193_LFNST + if( sps.getUseLFNST() ) { - cbfDCT2 = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ); + bestModeId[ COMPONENT_Y ] = modeId; + cbfBestMode = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ); + cbfBestModeValid = true; + validReturnFull = true; } + else + { +#endif + bestModeId[ COMPONENT_Y ] = trModes[ modeId ].first; + if( trModes[ modeId ].first == 0 ) + { + cbfDCT2 = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ); + } +#if JVET_N0193_LFNST + } +#endif if( bestModeId[COMPONENT_Y] != lastCheckId ) { @@ -2560,34 +3043,53 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par } } - if( bestModeId[COMPONENT_Y] != lastCheckId ) +#if JVET_N0193_LFNST + if( sps.getUseLFNST() && !validReturnFull ) { - csFull->getPredBuf( tu.Y() ).copyFrom( saveCS.getPredBuf( tu.Y() ) ); - csFull->getRecoBuf( tu.Y() ).copyFrom( saveCS.getRecoBuf( tu.Y() ) ); + csFull->cost = MAX_DOUBLE; - if( keepResi ) + if( bCheckSplit ) { - csFull->getResiBuf ( tu.Y() ).copyFrom( saveCS.getResiBuf ( tu.Y() ) ); - csFull->getOrgResiBuf( tu.Y() ).copyFrom( saveCS.getOrgResiBuf( tu.Y() ) ); + ctxBest = m_CABACEstimator->getCtx(); } + } + else + { +#endif + if( bestModeId[COMPONENT_Y] != lastCheckId ) + { + csFull->getPredBuf( tu.Y() ).copyFrom( saveCS.getPredBuf( tu.Y() ) ); + csFull->getRecoBuf( tu.Y() ).copyFrom( saveCS.getRecoBuf( tu.Y() ) ); - tu.copyComponentFrom( *tmpTU, COMPONENT_Y ); + if( keepResi ) + { + csFull->getResiBuf ( tu.Y() ).copyFrom( saveCS.getResiBuf ( tu.Y() ) ); + csFull->getOrgResiBuf( tu.Y() ).copyFrom( saveCS.getOrgResiBuf( tu.Y() ) ); + } + + tu.copyComponentFrom( *tmpTU, COMPONENT_Y ); - if( !bCheckSplit ) + if( !bCheckSplit ) + { + m_CABACEstimator->getCtx() = ctxBest; + } + } + else if( bCheckSplit ) { - m_CABACEstimator->getCtx() = ctxBest; + ctxBest = m_CABACEstimator->getCtx(); } - } - else if( bCheckSplit ) - { - ctxBest = m_CABACEstimator->getCtx(); - } - csFull->cost += dSingleCost; - csFull->dist += uiSingleDistLuma; - csFull->fracBits += singleFracBits; + csFull->cost += dSingleCost; + csFull->dist += uiSingleDistLuma; + csFull->fracBits += singleFracBits; +#if JVET_N0193_LFNST + } +#endif } +#if JVET_N0193_LFNST + bool validReturnSplit = false; +#endif if( bCheckSplit ) { //----- store full entropy coding status, load original entropy coding status ----- @@ -2611,8 +3113,18 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par } do { +#if JVET_N0193_LFNST + bool tmpValidReturnSplit = xRecurIntraCodingLumaQT( *csSplit, partitioner, bestCostSoFar, subTuCounter, ispType, false, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId ); + subTuCounter += subTuCounter != -1 ? 1 : 0; + if( sps.getUseLFNST() && !tmpValidReturnSplit ) + { + splitIsSelected = false; + break; + } +#else xRecurIntraCodingLumaQT( *csSplit, partitioner, bestCostSoFar, subTuCounter, ispType ); subTuCounter += subTuCounter != -1 ? 1 : 0; +#endif if( !cu.ispMode ) { @@ -2672,26 +3184,46 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par //--- update cost --- csSplit->cost = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist); + +#if JVET_N0193_LFNST + validReturnSplit = true; +#endif } } +#if JVET_N0193_LFNST + bool retVal = false; +#endif if( csFull || csSplit ) { +#if JVET_N0193_LFNST + if( !sps.getUseLFNST() || validReturnFull || validReturnSplit ) { - // otherwise this would've happened in useSubStructure - cs.picture->getRecoBuf( currArea.Y() ).copyFrom( cs.getRecoBuf( currArea.Y() ) ); - cs.picture->getPredBuf( currArea.Y() ).copyFrom( cs.getPredBuf( currArea.Y() ) ); - } +#endif + { + // otherwise this would've happened in useSubStructure + cs.picture->getRecoBuf( currArea.Y() ).copyFrom( cs.getRecoBuf( currArea.Y() ) ); + cs.picture->getPredBuf( currArea.Y() ).copyFrom( cs.getPredBuf( currArea.Y() ) ); + } - if( cu.ispMode && earlySkipISP ) - { - cs.cost = MAX_DOUBLE; - } - else - { - cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist ); + if( cu.ispMode && earlySkipISP ) + { + cs.cost = MAX_DOUBLE; + } + else + { + cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist ); +#if JVET_N0193_LFNST + retVal = true; +#endif + } +#if JVET_N0193_LFNST } +#endif } +#if JVET_N0193_LFNST + return retVal; +#endif } ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& partitioner, const double bestCostSoFar, const PartSplit ispType ) diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h index 54c3ddc6e86125c9ceba41af6cec01cbc63e36de..568005bc9ab9b2b1dd22264de4504e6c55c3cc10 100644 --- a/source/Lib/EncoderLib/IntraSearch.h +++ b/source/Lib/EncoderLib/IntraSearch.h @@ -93,15 +93,46 @@ private: static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrlHor; static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrlVer; #else - //cost variables for the EMT algorithm and new modes list static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrl; static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrlHor; static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrlVer; #endif + //cost variables for the EMT algorithm and new modes list +#if JVET_N0193_LFNST + double m_bestModeCostStore[ NUM_LFNST_NUM_PER_SET ]; // RD cost of the best mode for each PU using DCT2 + double m_modeCostStore[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ]; // RD cost of each mode for each PU using DCT2 +#if JVET_N0217_MATRIX_INTRAPRED + ModeInfo m_savedRdModeList[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ]; + int32_t m_savedNumRdModes[ NUM_LFNST_NUM_PER_SET ]; +#else + uint32_t m_savedRdModeList[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ], m_savedNumRdModes[ NUM_LFNST_NUM_PER_SET ]; +#endif +#if !JVET_N0217_MATRIX_INTRAPRED + int m_savedExtendRefList[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ]; +#endif +#endif + static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_intraModeDiagRatio; static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_intraModeHorVerRatio; static_vector<int, FAST_UDI_MAX_RDMODE_NUM> m_intraModeTestedNormalIntra; + +#if JVET_N0193_LFNST +#if JVET_N0217_MATRIX_INTRAPRED + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_uiSavedRdModeListLFNST; + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_uiSavedHadModeListLFNST; +#else + static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_uiSavedRdModeListLFNST; + static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_uiSavedHadModeListLFNST; +#endif + uint32_t m_uiSavedNumRdModesLFNST; + static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_dSavedModeCostLFNST; + static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_dSavedHadListLFNST; +#if !JVET_N0217_MATRIX_INTRAPRED + static_vector<int, FAST_UDI_MAX_RDMODE_NUM> m_iSavedExtendRefListLFNST; +#endif +#endif + PelStorage m_tmpStorageLCU; protected: // interface to option @@ -144,7 +175,11 @@ public: public: +#if JVET_N0193_LFNST + bool estIntraPredLumaQT ( CodingUnit &cu, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false ); +#else void estIntraPredLumaQT ( CodingUnit &cu, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE ); +#endif void estIntraPredChromaQT ( CodingUnit &cu, Partitioner& pm, const double maxCostAllowed = MAX_DOUBLE ); void IPCMSearch (CodingStructure &cs, Partitioner& partitioner); uint64_t xFracModeBitsIntra (PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &compID); @@ -173,7 +208,11 @@ protected: void xIntraCodingTUBlock (TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2 = 0, uint32_t* numSig = nullptr, std::vector<TrMode>* trModes=nullptr, const bool loadTr=false ); ChromaCbfs xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const PartSplit ispType = TU_NO_ISP ); +#if JVET_N0193_LFNST + bool xRecurIntraCodingLumaQT ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, const bool ispIsCurrentWinner = false, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false ); +#else void xRecurIntraCodingLumaQT ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, const bool ispIsCurrentWinner = false ); +#endif void encPredIntraDPCM( const ComponentID &compID, PelBuf &pOrg, PelBuf &pDst, const uint32_t &uiDirMode ); diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index 92e9fc2733eae5b796a4c8194d2eb327353160f5..ae4c19c0580fb18c541fcad8a6b2ffd95f39d482 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -769,6 +769,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) WRITE_FLAG( pcSPS->getUseIntraMTS() ? 1 : 0, "mts_intra_enabled_flag" ); WRITE_FLAG( pcSPS->getUseInterMTS() ? 1 : 0, "mts_inter_enabled_flag" ); } +#if JVET_N0193_LFNST + WRITE_FLAG( pcSPS->getUseLFNST() ? 1 : 0, "lfnst_enabled_flag" ); +#endif #if JVET_N0235_SMVD_SPS WRITE_FLAG( pcSPS->getUseSMVD() ? 1 : 0, "smvd_flag" ); #endif