diff --git a/cfg/encoder_intra_vtm.cfg b/cfg/encoder_intra_vtm.cfg index a8e2850ff60432658ece74697bb70d3f1248917f..7dcb733abeea23f7547d0fb95aac52c3ba058bc9 100644 --- a/cfg/encoder_intra_vtm.cfg +++ b/cfg/encoder_intra_vtm.cfg @@ -104,6 +104,7 @@ MTT : 1 MTS : 1 MTSIntraMaxCand : 3 MTSInterMaxCand : 4 +SBT : 1 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg index 1ad1e8112835ddeb10e8ae61a21313de15e7a9b8..e5fa7ee25ac00d3d3e0209699f94742d5ecff438 100644 --- a/cfg/encoder_lowdelay_P_vtm.cfg +++ b/cfg/encoder_lowdelay_P_vtm.cfg @@ -120,6 +120,7 @@ MTT : 1 MTS : 1 MTSIntraMaxCand : 3 MTSInterMaxCand : 4 +SBT : 1 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg index 5e09cae7dcaf24e292f2db9605f4ac4bbabd82b0..dfd581489386ce61b9c84da83bbd2f7736e37aae 100644 --- a/cfg/encoder_lowdelay_vtm.cfg +++ b/cfg/encoder_lowdelay_vtm.cfg @@ -120,6 +120,7 @@ MTT : 1 MTS : 1 MTSIntraMaxCand : 3 MTSInterMaxCand : 4 +SBT : 1 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg index 2f0ff3dad0747818d5482af32a690a4d4119efea..397869adb5d938acd130640d61ec7a5cb707a7bf 100644 --- a/cfg/encoder_randomaccess_vtm.cfg +++ b/cfg/encoder_randomaccess_vtm.cfg @@ -134,6 +134,7 @@ MTT : 1 MTS : 1 MTSIntraMaxCand : 3 MTSInterMaxCand : 4 +SBT : 1 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index 9cc41985218ca698db9c6ec8b5cbe75a52ae22e9..0b844b553dddbbbba4a59cb0648f72f3ea9f12f6 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -242,6 +242,9 @@ void EncApp::xInitLibCfg() #endif #if JVET_M0303_IMPLICIT_MTS m_cEncLib.setImplicitMTS ( m_MTSImplicit ); +#endif +#if JVET_M0140_SBT + m_cEncLib.setUseSBT ( m_SBT ); #endif m_cEncLib.setUseCompositeRef ( m_compositeRefEnabled ); m_cEncLib.setUseGBi ( m_GBi ); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 1d6de204bbf5d06ac28d2aae75eba624d99c19a1..849461a0511b0144fd724101222b89c4bb2b2ece 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -855,6 +855,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #endif #if JVET_M0303_IMPLICIT_MTS ("MTSImplicit", m_MTSImplicit, 0, "Enable implicit MTS (when explicit MTS is off)\n") +#endif +#if JVET_M0140_SBT + ( "SBT", m_SBT, false, "Enable Sub-Block Transform for inter blocks\n" ) #endif ("CompositeLTReference", m_compositeRefEnabled, false, "Enable Composite Long Term Reference Frame") ("GBi", m_GBi, false, "Enable Generalized Bi-prediction(GBi)") @@ -3197,6 +3200,9 @@ void EncAppCfg::xPrintParameter() msg( VERBOSE, "MTS: %1d(intra) %1d(inter) ", m_MTS & 1, ( m_MTS >> 1 ) & 1 ); #else msg( VERBOSE, "EMT: %1d(intra) %1d(inter) ", m_EMT & 1, ( m_EMT >> 1 ) & 1 ); +#endif +#if JVET_M0140_SBT + msg( VERBOSE, "SBT:%d ", m_SBT ); #endif msg( VERBOSE, "CompositeLTReference:%d ", m_compositeRefEnabled); msg( VERBOSE, "GBi:%d ", m_GBi ); diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index f79bcaa44fae23a16aaab54d51b0e96ca273e670..3a7b9dfbb53eae02f6c226f7b6585a1c125da2c1 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -227,6 +227,9 @@ protected: #if JVET_M0303_IMPLICIT_MTS int m_MTSImplicit; #endif +#if JVET_M0140_SBT + bool m_SBT; ///< Sub-Block Transform for inter blocks +#endif bool m_compositeRefEnabled; bool m_GBi; diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h index 22a9449c3d448cac60ed25adec4ce86f3f38190b..6f2492427618d23e4ba78a53fccecb54adc7e27a 100644 --- a/source/Lib/CommonLib/CodingStatistics.h +++ b/source/Lib/CommonLib/CodingStatistics.h @@ -105,6 +105,9 @@ enum CodingStatisticsType STATS__CABAC_BITS__GBI_IDX, STATS__CABAC_BITS__EMT_CU_FLAG, STATS__CABAC_BITS__EMT_TU_INDEX, +#if JVET_M0140_SBT + STATS__CABAC_BITS__SBT_MODE, +#endif STATS__CABAC_BITS__MH_INTRA_FLAG, STATS__CABAC_BITS__TRIANGLE_FLAG, STATS__CABAC_BITS__TRIANGLE_INDEX, @@ -191,6 +194,9 @@ static inline const char* getName(CodingStatisticsType name) "CABAC_BITS__GBI_IDX", "CABAC_BITS__EMT_CU_FLAG", "CABAC_BITS__EMT_TU_INDX", +#if JVET_M0140_SBT + "CABAC_BITS__SBT_MODE", +#endif "CABAC_BITS__MH_INTRA_FLAG", "CABAC_BITS__TRIANGLE_FLAG", "CABAC_BITS__TRIANGLE_INDEX", diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index b2b7a9135047231cd348114148e5ac13b116ea1a..6e2ade64161fa45bd63ce7f49b470c93b0e9e659 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -406,6 +406,12 @@ static const int TRIANGLE_MAX_NUM_CANDS = 40; static const int TRIANGLE_MAX_NUM_SATD_CANDS = 3; static const int TRIANGLE_MIN_SIZE = 8 * 8; +#if JVET_M0140_SBT +static const int SBT_MAX_SIZE = 64; ///< maximum CU size for using SBT +static const int SBT_NUM_SL = 10; ///< maximum number of historical PU decision saved for a CU +static const int SBT_NUM_RDO = 2; ///< maximum number of SBT mode tried for a PU +#endif + static const int IBC_MAX_CAND_SIZE = 16; // max block size for ibc search static const int IBC_NUM_CANDIDATES = 64; ///< Maximum number of candidates to store/test static const int CHROMA_REFINEMENT_CANDIDATES = 8; /// 8 candidates BV to choose from diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index 558720f5c5e86671fef3a7ed66b9b9d1eb50d1ea..85ad6159688dab89b2028f9924d7c0d82372a123 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -1239,6 +1239,48 @@ const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet }); #endif +#if JVET_M0140_SBT +const CtxSet ContextSetCfg::SbtFlag = ContextSetCfg::addCtxSet +( { + { CNU, CNU,}, + { CNU, CNU,}, + { CNU, CNU,}, +#if JVET_M0453_CABAC_ENGINE + { DWS, DWS,}, +#endif +} ); + +const CtxSet ContextSetCfg::SbtQuadFlag = ContextSetCfg::addCtxSet +( { + { CNU,}, + { CNU,}, + { CNU,}, +#if JVET_M0453_CABAC_ENGINE + { DWS,}, +#endif +} ); + +const CtxSet ContextSetCfg::SbtHorFlag = ContextSetCfg::addCtxSet +( { + { CNU, CNU, CNU,}, + { CNU, CNU, CNU,}, + { CNU, CNU, CNU,}, +#if JVET_M0453_CABAC_ENGINE + { DWS, DWS, DWS,}, +#endif +} ); + +const CtxSet ContextSetCfg::SbtPosFlag = ContextSetCfg::addCtxSet +( { + { CNU,}, + { CNU,}, + { CNU,}, +#if JVET_M0453_CABAC_ENGINE + { DWS,}, +#endif +} ); +#endif + const CtxSet ContextSetCfg::CrossCompPred = ContextSetCfg::addCtxSet ({ { 154, 154, 154, 154, 154, 154, 154, 154, 154, 154,}, diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index 994d44bae4ad3c85b1ab1a3f3937f06d9f70de6e..00dc798cecfbb1ba1cf5fe2822837b8240b06fd2 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -293,6 +293,12 @@ public: #if !JVET_M0464_UNI_MTS static const CtxSet EMTTuIndex; static const CtxSet EMTCuFlag; +#endif +#if JVET_M0140_SBT + static const CtxSet SbtFlag; + static const CtxSet SbtQuadFlag; + static const CtxSet SbtHorFlag; + static const CtxSet SbtPosFlag; #endif static const CtxSet CrossCompPred; static const CtxSet ChromaQpAdjFlag; diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index 372aa0899671e6ae4f7e0c0cc042dcbaee4cda06..c14c2b68efbfe8bef9961e3c5f53b2195f64a04f 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -1900,6 +1900,9 @@ SPSNext::SPSNext( SPS& sps ) #else , m_IntraEMT ( false ) , m_InterEMT ( false ) +#endif +#if JVET_M0140_SBT + , m_SBT ( false ) #endif , m_Affine ( false ) , m_AffineType ( false ) diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 6dc1c6302e68ac5879c2f183e8812ce6dbc391ea..f2688be6a1d5e5856199de7a7eb71bb344026251 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -848,6 +848,10 @@ private: #else bool m_IntraEMT; // 18 bool m_InterEMT; // 19 +#endif +#if JVET_M0140_SBT + bool m_SBT; + uint8_t m_MaxSbtSize; #endif bool m_Affine; bool m_AffineType; @@ -928,6 +932,12 @@ public: bool getUseIntraEMT () const { return m_IntraEMT; } void setUseInterEMT ( bool b ) { m_InterEMT = b; } bool getUseInterEMT () const { return m_InterEMT; } +#endif +#if JVET_M0140_SBT + void setUseSBT ( bool b ) { m_SBT = b; } + bool getUseSBT () const { return m_SBT; } + void setMaxSbtSize ( uint8_t val ) { m_MaxSbtSize = val; } + uint8_t getMaxSbtSize () const { return m_MaxSbtSize; } #endif void setUseGBi ( bool b ) { m_GBi = b; } bool getUseGBi () const { return m_GBi; } diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index a756ed223e23da328717a8a134aad5401ff0c367..af5d1ce22e826e438bc4b6693a4519f9404a3d95 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -311,6 +311,41 @@ void TrQuant::getTrTypes ( TransformUnit tu, const ComponentID compID, int &trTy return; } #endif +#if JVET_M0140_SBT + if( tu.cu->sbtInfo && compID == COMPONENT_Y ) + { + uint8_t sbtIdx = tu.cu->getSbtIdx(); + uint8_t sbtPos = tu.cu->getSbtPos(); + + if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_VER_QUAD ) + { + assert( tu.lwidth() <= MTS_INTER_MAX_CU_SIZE ); + if( tu.lheight() > MTS_INTER_MAX_CU_SIZE ) + { + trTypeHor = trTypeVer = DCT2; + } + else + { + if( sbtPos == SBT_POS0 ) { trTypeHor = DCT8; trTypeVer = DST7; } + else { trTypeHor = DST7; trTypeVer = DST7; } + } + } + else + { + assert( tu.lheight() <= MTS_INTER_MAX_CU_SIZE ); + if( tu.lwidth() > MTS_INTER_MAX_CU_SIZE ) + { + trTypeHor = trTypeVer = DCT2; + } + else + { + if( sbtPos == SBT_POS0 ) { trTypeHor = DST7; trTypeVer = DCT8; } + else { trTypeHor = DST7; trTypeVer = DST7; } + } + } + return; + } +#endif #if JVET_M0464_UNI_MTS if ( mtsActivated ) @@ -568,6 +603,15 @@ void TrQuant::transformNxN(TransformUnit &tu, const ComponentID &compID, const Q { tu.mtsIdx = it->first; CoeffBuf tempCoeff( m_mtsCoeffs[tu.mtsIdx], rect ); +#if JVET_M0140_SBT + if( tu.noResidual ) + { + int sumAbs = 0; + trCosts.push_back( TrCost( sumAbs, pos++ ) ); + it++; + continue; + } +#endif if( isLuma(compID) && tu.mtsIdx == 1 ) { @@ -640,6 +684,15 @@ void TrQuant::transformNxN(TransformUnit &tu, const ComponentID &compID, const Q const CPelBuf resiBuf = cs.getResiBuf(rect); CoeffBuf rpcCoeff = tu.getCoeffs(compID); +#if JVET_M0140_SBT + if( tu.noResidual ) + { + uiAbsSum = 0; + TU::setCbfAtDepth( tu, compID, tu.depth, uiAbsSum > 0 ); + return; + } +#endif + RDPCMMode rdpcmMode = RDPCM_OFF; rdpcmNxN(tu, compID, cQP, uiAbsSum, rdpcmMode); diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 9589707dcb7c3e3142f3e28d38e90243deee805b..0e5a45e29c681ba333720c7a0070689c98748d7c 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -115,6 +115,11 @@ #endif #define JVET_M0502_PRED_MODE_CTX 1 +#define JVET_M0140_SBT 1 // Sub-Block transform for Inter blocks +#if JVET_M0140_SBT +#define APPLY_SBT_SL_ON_MTS 1 // apply save & load fast algorithm on inter MTS when SBT is on +#endif + #define JVET_M0407_IBC_RANGE 1 // extend IBC search range to some part of left CTU #define JVET_M0464_UNI_MTS 1 @@ -420,6 +425,39 @@ enum ISPType }; #endif +#if JVET_M0140_SBT +enum SbtIdx +{ + SBT_OFF_DCT = 0, + SBT_VER_HALF = 1, + SBT_HOR_HALF = 2, + SBT_VER_QUAD = 3, + SBT_HOR_QUAD = 4, + NUMBER_SBT_IDX, + SBT_OFF_MTS, //note: must be after all SBT modes, only used in fast algorithm to discern the best mode is inter EMT +}; + +enum SbtPos +{ + SBT_POS0 = 0, + SBT_POS1 = 1, + NUMBER_SBT_POS +}; + +enum SbtMode +{ + SBT_VER_H0 = 0, + SBT_VER_H1 = 1, + SBT_HOR_H0 = 2, + SBT_HOR_H1 = 3, + SBT_VER_Q0 = 4, + SBT_VER_Q1 = 5, + SBT_HOR_Q0 = 6, + SBT_HOR_Q1 = 7, + NUMBER_SBT_MODE +}; +#endif + enum RDPCMMode { RDPCM_OFF = 0, diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 0ff1f5a6ab864ba27a7359503eebc6faced203e8..a2f4bc6ecd06ce6905415b7aa95fe22be017a974 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -271,6 +271,9 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) qp = other.qp; chromaQpAdj = other.chromaQpAdj; rootCbf = other.rootCbf; +#if JVET_M0140_SBT + sbtInfo = other.sbtInfo; +#endif #if !JVET_M0464_UNI_MTS emtFlag = other.emtFlag; #endif @@ -317,6 +320,9 @@ void CodingUnit::initData() qp = 0; chromaQpAdj = 0; rootCbf = true; +#if JVET_M0140_SBT + sbtInfo = 0; +#endif #if !JVET_M0464_UNI_MTS emtFlag = 0; #endif @@ -344,6 +350,70 @@ void CodingUnit::initData() #endif } +#if JVET_M0140_SBT +const uint8_t CodingUnit::checkAllowedSbt() const +{ + if( !slice->getSPS()->getSpsNext().getUseSBT() ) + { + return 0; + } + + //check on prediction mode + if( predMode == MODE_INTRA ) //intra + { + return 0; + } + if( firstPU->mhIntraFlag ) + { + return 0; + } + + uint8_t sbtAllowed = 0; + int cuWidth = lwidth(); + int cuHeight = lheight(); + bool allow_type[NUMBER_SBT_IDX]; + memset( allow_type, false, NUMBER_SBT_IDX * sizeof( bool ) ); + + //parameter + int maxSbtCUSize = cs->sps->getSpsNext().getMaxSbtSize(); + int minSbtCUSize = 1 << ( MIN_CU_LOG2 + 1 ); + + //check on size + if( cuWidth > maxSbtCUSize || cuHeight > maxSbtCUSize ) + { + return 0; + } + + allow_type[SBT_VER_HALF] = cuWidth >= minSbtCUSize; + allow_type[SBT_HOR_HALF] = cuHeight >= minSbtCUSize; + allow_type[SBT_VER_QUAD] = cuWidth >= ( minSbtCUSize << 1 ); + allow_type[SBT_HOR_QUAD] = cuHeight >= ( minSbtCUSize << 1 ); + + for( int i = 0; i < NUMBER_SBT_IDX; i++ ) + { + sbtAllowed += (uint8_t)allow_type[i] << i; + } + + return sbtAllowed; +} + +uint8_t CodingUnit::getSbtTuSplit() const +{ + uint8_t sbtTuSplitType = 0; + + switch( getSbtIdx() ) + { + case SBT_VER_HALF: sbtTuSplitType = ( getSbtPos() == SBT_POS0 ? 0 : 1 ) + SBT_VER_HALF_POS0_SPLIT; break; + case SBT_HOR_HALF: sbtTuSplitType = ( getSbtPos() == SBT_POS0 ? 0 : 1 ) + SBT_HOR_HALF_POS0_SPLIT; break; + case SBT_VER_QUAD: sbtTuSplitType = ( getSbtPos() == SBT_POS0 ? 0 : 1 ) + SBT_VER_QUAD_POS0_SPLIT; break; + case SBT_HOR_QUAD: sbtTuSplitType = ( getSbtPos() == SBT_POS0 ? 0 : 1 ) + SBT_HOR_QUAD_POS0_SPLIT; break; + default: assert( 0 ); break; + } + + assert( sbtTuSplitType <= SBT_HOR_QUAD_POS1_SPLIT && sbtTuSplitType >= SBT_VER_HALF_POS0_SPLIT ); + return sbtTuSplitType; +} +#endif // --------------------------------------------------------------------------- // prediction unit method definitions @@ -594,6 +664,9 @@ void TransformUnit::initData() #else emtIdx = 0; #endif +#if JVET_M0140_SBT + noResidual = false; +#endif #if JVET_M0427_INLOOP_RESHAPER m_chromaResScaleInv = 0; #endif @@ -636,6 +709,9 @@ TransformUnit& TransformUnit::operator=(const TransformUnit& other) mtsIdx = other.mtsIdx; #else emtIdx = other.emtIdx; +#endif +#if JVET_M0140_SBT + noResidual = other.noResidual; #endif return *this; } @@ -667,6 +743,9 @@ void TransformUnit::copyComponentFrom(const TransformUnit& other, const Componen emtIdx = other.emtIdx; } #endif +#if JVET_M0140_SBT + noResidual = other.noResidual; +#endif } CoeffBuf TransformUnit::getCoeffs(const ComponentID id) { return CoeffBuf(m_coeffs[id], blocks[id]); } @@ -674,6 +753,21 @@ const CCoeffBuf TransformUnit::getCoeffs(const ComponentID id) const { return CC PelBuf TransformUnit::getPcmbuf(const ComponentID id) { return PelBuf (m_pcmbuf[id], blocks[id]); } const CPelBuf TransformUnit::getPcmbuf(const ComponentID id) const { return CPelBuf (m_pcmbuf[id], blocks[id]); } + +#if JVET_M0140_SBT +void TransformUnit::checkTuNoResidual( unsigned idx ) +{ + if( CU::getSbtIdx( cu->sbtInfo ) == SBT_OFF_DCT ) + { + return; + } + + if( ( CU::getSbtPos( cu->sbtInfo ) == SBT_POS0 && idx == 1 ) || ( CU::getSbtPos( cu->sbtInfo ) == SBT_POS1 && idx == 0 ) ) + { + noResidual = true; + } +} +#endif #if JVET_M0427_INLOOP_RESHAPER int TransformUnit::getChromaAdj() const { return m_chromaResScaleInv; } void TransformUnit::setChromaAdj(int i) { m_chromaResScaleInv = i; } diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index 78624986944a2e0c34f16755c4d3daadf1ee6705..f25184cb118f0a6a7bb1df126884376dde65ca81 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -308,6 +308,9 @@ struct CodingUnit : public UnitArea bool ipcm; uint8_t imv; bool rootCbf; +#if JVET_M0140_SBT + uint8_t sbtInfo; +#endif #if HEVC_TILES_WPP uint32_t tileIdx; #endif @@ -353,6 +356,14 @@ struct CodingUnit : public UnitArea int64_t cacheId; bool cacheUsed; #endif +#if JVET_M0140_SBT + const uint8_t getSbtIdx() const { assert( ( ( sbtInfo >> 0 ) & 0xf ) < NUMBER_SBT_IDX ); return ( sbtInfo >> 0 ) & 0xf; } + const uint8_t getSbtPos() const { return ( sbtInfo >> 4 ) & 0x3; } + void setSbtIdx( uint8_t idx ) { CHECK( idx >= NUMBER_SBT_IDX, "sbt_idx wrong" ); sbtInfo = ( idx << 0 ) + ( sbtInfo & 0xf0 ); } + void setSbtPos( uint8_t pos ) { CHECK( pos >= 4, "sbt_pos wrong" ); sbtInfo = ( pos << 4 ) + ( sbtInfo & 0xcf ); } + uint8_t getSbtTuSplit() const; + const uint8_t checkAllowedSbt() const; +#endif }; // --------------------------------------------------------------------------- @@ -459,6 +470,9 @@ struct TransformUnit : public UnitArea uint8_t mtsIdx; #else uint8_t emtIdx; +#endif +#if JVET_M0140_SBT + bool noResidual; #endif uint8_t cbf [ MAX_NUM_TBLOCKS ]; RDPCMMode rdpcm [ MAX_NUM_TBLOCKS ]; @@ -483,6 +497,9 @@ struct TransformUnit : public UnitArea TransformUnit& operator=(const TransformUnit& other); void copyComponentFrom (const TransformUnit& other, const ComponentID compID); +#if JVET_M0140_SBT + void checkTuNoResidual( unsigned idx ); +#endif CoeffBuf getCoeffs(const ComponentID id); const CCoeffBuf getCoeffs(const ComponentID id) const; diff --git a/source/Lib/CommonLib/UnitPartitioner.cpp b/source/Lib/CommonLib/UnitPartitioner.cpp index 39be3bb0cc5c48b67b059e1198dec2899d0ffd3a..35a17fc19fe3ff5550d0305858bf67a336eb0ed6 100644 --- a/source/Lib/CommonLib/UnitPartitioner.cpp +++ b/source/Lib/CommonLib/UnitPartitioner.cpp @@ -255,6 +255,18 @@ void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructur case TU_MAX_TR_SPLIT: m_partStack.push_back( PartLevel( split, PartitionerImpl::getMaxTuTiling( currArea(), cs ) ) ); break; +#if JVET_M0140_SBT + case SBT_VER_HALF_POS0_SPLIT: + case SBT_VER_HALF_POS1_SPLIT: + case SBT_HOR_HALF_POS0_SPLIT: + case SBT_HOR_HALF_POS1_SPLIT: + case SBT_VER_QUAD_POS0_SPLIT: + case SBT_VER_QUAD_POS1_SPLIT: + case SBT_HOR_QUAD_POS0_SPLIT: + case SBT_HOR_QUAD_POS1_SPLIT: + m_partStack.push_back( PartLevel( split, PartitionerImpl::getSbtTuTiling( currArea(), cs, split ) ) ); + break; +#endif default: THROW( "Unknown split mode" ); break; @@ -269,6 +281,12 @@ void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructur { currTrDepth++; } +#if JVET_M0140_SBT + else if( split >= SBT_VER_HALF_POS0_SPLIT && split <= SBT_HOR_QUAD_POS1_SPLIT ) + { + currTrDepth++; + } +#endif else { currTrDepth = 0; @@ -418,6 +436,18 @@ bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs case TU_MAX_TR_SPLIT: return area.width > maxTrSize || area.height > maxTrSize; break; +#if JVET_M0140_SBT + case SBT_VER_HALF_POS0_SPLIT: + case SBT_VER_HALF_POS1_SPLIT: + case SBT_HOR_HALF_POS0_SPLIT: + case SBT_HOR_HALF_POS1_SPLIT: + case SBT_VER_QUAD_POS0_SPLIT: + case SBT_VER_QUAD_POS1_SPLIT: + case SBT_HOR_QUAD_POS0_SPLIT: + case SBT_HOR_QUAD_POS1_SPLIT: + return currTrDepth == 0; + break; +#endif #if JVET_M0421_SPLIT_SIG case CU_QUAD_SPLIT: return canQt; @@ -622,6 +652,13 @@ void QTBTPartitioner::exitCurrSplit() CHECK( currTrDepth == 0, "TR depth is '0', although a TU split was performed" ); currTrDepth--; } +#if JVET_M0140_SBT + else if( currSplit >= SBT_VER_HALF_POS0_SPLIT && currSplit <= SBT_HOR_QUAD_POS1_SPLIT ) + { + CHECK( currTrDepth == 0, "TR depth is '0', although a TU split was performed" ); + currTrDepth--; + } +#endif else { CHECK( currTrDepth > 0, "RQT found with QTBT partitioner" ); @@ -1111,3 +1148,64 @@ Partitioning PartitionerImpl::getMaxTuTiling( const UnitArea &cuArea, const Codi return ret; } + +#if JVET_M0140_SBT +Partitioning PartitionerImpl::getSbtTuTiling( const UnitArea& cuArea, const CodingStructure &cs, const PartSplit splitType ) +{ + Partitioning ret; + int numTiles = 2; + int widthFactor, heightFactor, xOffsetFactor, yOffsetFactor; // y = (x * factor) >> 2; + assert( splitType >= SBT_VER_HALF_POS0_SPLIT && splitType <= SBT_HOR_QUAD_POS1_SPLIT ); + + ret.resize( numTiles, cuArea ); + for( int i = 0; i < numTiles; i++ ) + { + if( splitType >= SBT_VER_QUAD_POS0_SPLIT ) + { + if( splitType == SBT_HOR_QUAD_POS0_SPLIT || splitType == SBT_HOR_QUAD_POS1_SPLIT ) + { + widthFactor = 4; + xOffsetFactor = 0; + heightFactor = ( ( i == 0 && splitType == SBT_HOR_QUAD_POS0_SPLIT ) || ( i == 1 && splitType == SBT_HOR_QUAD_POS1_SPLIT ) ) ? 1 : 3; + yOffsetFactor = ( i == 0 ) ? 0 : ( splitType == SBT_HOR_QUAD_POS0_SPLIT ? 1 : 3 ); + } + else + { + widthFactor = ( ( i == 0 && splitType == SBT_VER_QUAD_POS0_SPLIT ) || ( i == 1 && splitType == SBT_VER_QUAD_POS1_SPLIT ) ) ? 1 : 3; + xOffsetFactor = ( i == 0 ) ? 0 : ( splitType == SBT_VER_QUAD_POS0_SPLIT ? 1 : 3 ); + heightFactor = 4; + yOffsetFactor = 0; + } + } + else + { + if( splitType == SBT_HOR_HALF_POS0_SPLIT || splitType == SBT_HOR_HALF_POS1_SPLIT ) + { + widthFactor = 4; + xOffsetFactor = 0; + heightFactor = 2; + yOffsetFactor = ( i == 0 ) ? 0 : 2; + } + else + { + widthFactor = 2; + xOffsetFactor = ( i == 0 ) ? 0 : 2; + heightFactor = 4; + yOffsetFactor = 0; + } + } + + UnitArea& tile = ret[i]; + for( CompArea &comp : tile.blocks ) + { + if( !comp.valid() ) continue; + comp.x += ( comp.width * xOffsetFactor ) >> 2; + comp.y += ( comp.height * yOffsetFactor ) >> 2; + comp.width = ( comp.width * widthFactor ) >> 2; + comp.height = ( comp.height * heightFactor ) >> 2; + } + } + + return ret; +} +#endif \ No newline at end of file diff --git a/source/Lib/CommonLib/UnitPartitioner.h b/source/Lib/CommonLib/UnitPartitioner.h index 5cf07f530632290737dedb4072f041d1f8804bb8..4b49746d13fdf2c1097661acf7886d942fd9ae22 100644 --- a/source/Lib/CommonLib/UnitPartitioner.h +++ b/source/Lib/CommonLib/UnitPartitioner.h @@ -71,6 +71,16 @@ enum PartSplit TU_NO_ISP, TU_1D_HORZ_SPLIT, TU_1D_VERT_SPLIT, +#endif +#if JVET_M0140_SBT + SBT_VER_HALF_POS0_SPLIT, + SBT_VER_HALF_POS1_SPLIT, + SBT_HOR_HALF_POS0_SPLIT, + SBT_HOR_HALF_POS1_SPLIT, + SBT_VER_QUAD_POS0_SPLIT, + SBT_VER_QUAD_POS1_SPLIT, + SBT_HOR_QUAD_POS0_SPLIT, + SBT_HOR_QUAD_POS1_SPLIT, #endif NUM_PART_SPLIT, CU_MT_SPLIT = 1000, ///< dummy element to indicate the MT (multi-type-tree) split @@ -219,6 +229,9 @@ namespace PartitionerImpl #if JVET_M0102_INTRA_SUBPARTITIONS void getTUIntraSubPartitions( Partitioning &sub, const UnitArea &tuArea, const CodingStructure &cs, const PartSplit splitType ); #endif +#if JVET_M0140_SBT + Partitioning getSbtTuTiling ( const UnitArea& curArea, const CodingStructure &cs, const PartSplit splitType ); +#endif }; #endif diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index e54d24f0e8e1b175571ece392cbbb44e05f2a660..8768b5ade62a1a820114a34c3bfe6bd6c0c6adfc 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -5502,6 +5502,121 @@ int CU::getMaxNeighboriMVCandNum( const CodingStructure& cs, const Position& pos return maxImvNumCand; } +#if JVET_M0140_SBT +uint8_t CU::getSbtInfo( uint8_t idx, uint8_t pos ) +{ + return ( pos << 4 ) + ( idx << 0 ); +} + +uint8_t CU::getSbtIdx( const uint8_t sbtInfo ) +{ + return ( sbtInfo >> 0 ) & 0xf; +} + +uint8_t CU::getSbtPos( const uint8_t sbtInfo ) +{ + return ( sbtInfo >> 4 ) & 0x3; +} + +uint8_t CU::getSbtMode( uint8_t sbtIdx, uint8_t sbtPos ) +{ + uint8_t sbtMode = 0; + switch( sbtIdx ) + { + case SBT_VER_HALF: sbtMode = sbtPos + SBT_VER_H0; break; + case SBT_HOR_HALF: sbtMode = sbtPos + SBT_HOR_H0; break; + case SBT_VER_QUAD: sbtMode = sbtPos + SBT_VER_Q0; break; + case SBT_HOR_QUAD: sbtMode = sbtPos + SBT_HOR_Q0; break; + default: assert( 0 ); + } + + assert( sbtMode < NUMBER_SBT_MODE ); + return sbtMode; +} + +uint8_t CU::getSbtIdxFromSbtMode( uint8_t sbtMode ) +{ + if( sbtMode <= SBT_VER_H1 ) + return SBT_VER_HALF; + else if( sbtMode <= SBT_HOR_H1 ) + return SBT_HOR_HALF; + else if( sbtMode <= SBT_VER_Q1 ) + return SBT_VER_QUAD; + else if( sbtMode <= SBT_HOR_Q1 ) + return SBT_HOR_QUAD; + else + { + assert( 0 ); + return 0; + } +} + +uint8_t CU::getSbtPosFromSbtMode( uint8_t sbtMode ) +{ + if( sbtMode <= SBT_VER_H1 ) + return sbtMode - SBT_VER_H0; + else if( sbtMode <= SBT_HOR_H1 ) + return sbtMode - SBT_HOR_H0; + else if( sbtMode <= SBT_VER_Q1 ) + return sbtMode - SBT_VER_Q0; + else if( sbtMode <= SBT_HOR_Q1 ) + return sbtMode - SBT_HOR_Q0; + else + { + assert( 0 ); + return 0; + } +} + +uint8_t CU::targetSbtAllowed( uint8_t sbtIdx, uint8_t sbtAllowed ) +{ + uint8_t val = 0; + switch( sbtIdx ) + { + case SBT_VER_HALF: val = ( ( sbtAllowed >> SBT_VER_HALF ) & 0x1 ); break; + case SBT_HOR_HALF: val = ( ( sbtAllowed >> SBT_HOR_HALF ) & 0x1 ); break; + case SBT_VER_QUAD: val = ( ( sbtAllowed >> SBT_VER_QUAD ) & 0x1 ); break; + case SBT_HOR_QUAD: val = ( ( sbtAllowed >> SBT_HOR_QUAD ) & 0x1 ); break; + default: CHECK( 1, "unknown SBT type" ); + } + return val; +} + +uint8_t CU::numSbtModeRdo( uint8_t sbtAllowed ) +{ + uint8_t num = 0; + uint8_t sum = 0; + num = targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) + targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ); + sum += std::min( SBT_NUM_RDO, ( num << 1 ) ); + num = targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) + targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ); + sum += std::min( SBT_NUM_RDO, ( num << 1 ) ); + return sum; +} + +bool CU::isMtsMode( const uint8_t sbtInfo ) +{ + return getSbtIdx( sbtInfo ) == SBT_OFF_MTS; +} + +bool CU::isSbtMode( const uint8_t sbtInfo ) +{ + uint8_t sbtIdx = getSbtIdx( sbtInfo ); + return sbtIdx >= SBT_VER_HALF && sbtIdx <= SBT_HOR_QUAD; +} + +bool CU::isSameSbtSize( const uint8_t sbtInfo1, const uint8_t sbtInfo2 ) +{ + uint8_t sbtIdx1 = getSbtIdxFromSbtMode( sbtInfo1 ); + uint8_t sbtIdx2 = getSbtIdxFromSbtMode( sbtInfo2 ); + if( sbtIdx1 == SBT_HOR_HALF || sbtIdx1 == SBT_VER_HALF ) + return sbtIdx2 == SBT_HOR_HALF || sbtIdx2 == SBT_VER_HALF; + else if( sbtIdx1 == SBT_HOR_QUAD || sbtIdx1 == SBT_VER_QUAD ) + return sbtIdx2 == SBT_HOR_QUAD || sbtIdx2 == SBT_VER_QUAD; + else + return false; +} +#endif + bool CU::isGBiIdxCoded( const CodingUnit &cu ) { if( cu.cs->sps->getSpsNext().getUseGBi() == false ) @@ -5657,6 +5772,9 @@ bool TU::isTSAllowed(const TransformUnit &tu, const ComponentID compID) SizeType transformSkipMaxSize = 1 << maxSize; tsAllowed &= tu.lwidth() <= transformSkipMaxSize && tu.lheight() <= transformSkipMaxSize; +#if JVET_M0140_SBT + tsAllowed &= !tu.cu->sbtInfo; +#endif return tsAllowed; } @@ -5670,6 +5788,9 @@ bool TU::isMTSAllowed(const TransformUnit &tu, const ComponentID compID) mtsAllowed &= ( tu.lwidth() <= maxSize && tu.lheight() <= maxSize ); #if JVET_M0102_INTRA_SUBPARTITIONS mtsAllowed &= !tu.cu->ispMode; +#endif +#if JVET_M0140_SBT + mtsAllowed &= !tu.cu->sbtInfo; #endif return mtsAllowed; } diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index 5cdb1fb6f7f9f9b227382a39dab373a00ecbd759..3414c2c5511d1fd7f68af90da51adc59e6aabb5a 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -115,7 +115,19 @@ namespace CU int getMaxNeighboriMVCandNum (const CodingStructure& cs, const Position& pos); void resetMVDandMV2Int ( CodingUnit& cu, InterPrediction *interPred ); - +#if JVET_M0140_SBT + uint8_t getSbtInfo (uint8_t idx, uint8_t pos); + uint8_t getSbtIdx (const uint8_t sbtInfo); + uint8_t getSbtPos (const uint8_t sbtInfo); + uint8_t getSbtMode (const uint8_t sbtIdx, const uint8_t sbtPos); + uint8_t getSbtIdxFromSbtMode (const uint8_t sbtMode); + uint8_t getSbtPosFromSbtMode (const uint8_t sbtMode); + uint8_t targetSbtAllowed (uint8_t idx, uint8_t sbtAllowed); + uint8_t numSbtModeRdo (uint8_t sbtAllowed); + bool isMtsMode (const uint8_t sbtInfo); + bool isSbtMode (const uint8_t sbtInfo); + bool isSameSbtSize (const uint8_t sbtInfo1, const uint8_t sbtInfo2); +#endif } // PU tools namespace PU diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index 3ff8a9a798246a5dac50c56d1faa2795063c08fc..78321e38f975acf3910ac8556dba5c05c4f0dcd9 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -1488,6 +1488,12 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx& { cu.rootCbf = true; } +#if JVET_M0140_SBT + if( cu.rootCbf ) + { + sbt_mode( cu ); + } +#endif if( !cu.rootCbf ) { TransformUnit& tu = cu.cs->addTU(cu, partitioner.chType); @@ -1528,6 +1534,64 @@ void CABACReader::rqt_root_cbf( CodingUnit& cu ) DTRACE( g_trace_ctx, D_SYNTAX, "rqt_root_cbf() ctx=0 root_cbf=%d pos=(%d,%d)\n", cu.rootCbf ? 1 : 0, cu.lumaPos().x, cu.lumaPos().y ); } +#if JVET_M0140_SBT +void CABACReader::sbt_mode( CodingUnit& cu ) +{ + const uint8_t sbtAllowed = cu.checkAllowedSbt(); + if( !sbtAllowed ) + { + return; + } + + SizeType cuWidth = cu.lwidth(); + SizeType cuHeight = cu.lheight(); + + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__SBT_MODE ); + //bin - flag + uint8_t ctxIdx = ( cuWidth * cuHeight <= 256 ) ? 1 : 0; + bool sbtFlag = m_BinDecoder.decodeBin( Ctx::SbtFlag( ctxIdx ) ); + if( !sbtFlag ) + { + return; + } + + uint8_t sbtVerHalfAllow = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ); + uint8_t sbtHorHalfAllow = CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ); + uint8_t sbtVerQuadAllow = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ); + uint8_t sbtHorQuadAllow = CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ); + + //bin - type + bool sbtQuadFlag = false; + if( ( sbtHorHalfAllow || sbtVerHalfAllow ) && ( sbtHorQuadAllow || sbtVerQuadAllow ) ) + { + sbtQuadFlag = m_BinDecoder.decodeBin( Ctx::SbtQuadFlag( 0 ) ); + } + else + { + sbtQuadFlag = 0; + } + + //bin - dir + bool sbtHorFlag = false; + if( ( sbtQuadFlag && sbtVerQuadAllow && sbtHorQuadAllow ) || ( !sbtQuadFlag && sbtVerHalfAllow && sbtHorHalfAllow ) ) //both direction allowed + { + uint8_t ctxIdx = ( cuWidth == cuHeight ) ? 0 : ( cuWidth < cuHeight ? 1 : 2 ); + sbtHorFlag = m_BinDecoder.decodeBin( Ctx::SbtHorFlag( ctxIdx ) ); + } + else + { + sbtHorFlag = ( sbtQuadFlag && sbtHorQuadAllow ) || ( !sbtQuadFlag && sbtHorHalfAllow ); + } + cu.setSbtIdx( sbtHorFlag ? ( sbtQuadFlag ? SBT_HOR_QUAD : SBT_HOR_HALF ) : ( sbtQuadFlag ? SBT_VER_QUAD : SBT_VER_HALF ) ); + + //bin - pos + bool sbtPosFlag = m_BinDecoder.decodeBin( Ctx::SbtPosFlag( 0 ) ); + cu.setSbtPos( sbtPosFlag ? SBT_POS1 : SBT_POS0 ); + + DTRACE( g_trace_ctx, D_SYNTAX, "sbt_mode() pos=(%d,%d) sbtInfo=%d\n", cu.lx(), cu.ly(), (int)cu.sbtInfo ); +} +#endif + bool CABACReader::end_of_ctu( CodingUnit& cu, CUCtx& cuCtx ) { @@ -2260,6 +2324,11 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, CUCtx& cuCtx, ChromaCbfs& chromaCbfs ) #endif { +#if JVET_M0140_SBT + ChromaCbfs chromaCbfsLastDepth; + chromaCbfsLastDepth.Cb = chromaCbfs.Cb; + chromaCbfsLastDepth.Cr = chromaCbfs.Cr; +#endif const UnitArea& area = partitioner.currArea(); CodingUnit& cu = *cs.getCU( area.blocks[partitioner.chType], partitioner.chType ); @@ -2272,6 +2341,12 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, bool split = false; split = partitioner.canSplit( TU_MAX_TR_SPLIT, cs ); +#if JVET_M0140_SBT + if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) + { + split = true; + } +#endif #if JVET_M0102_INTRA_SUBPARTITIONS if( !split && cu.ispMode ) @@ -2294,19 +2369,31 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, const int cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth; if( chromaCbfs.Cb ) { +#if JVET_M0140_SBT + if( !( cu.sbtInfo && trDepth == 1 ) ) +#endif chromaCbfs.Cb &= cbf_comp( cs, area.blocks[COMPONENT_Cb], cbfDepth ); } if( chromaCbfs.Cr ) { +#if JVET_M0140_SBT + if( !( cu.sbtInfo && trDepth == 1 ) ) +#endif chromaCbfs.Cr &= cbf_comp( cs, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb ); } #else if( chromaCbfs.Cb ) { +#if JVET_M0140_SBT + if( !( cu.sbtInfo && trDepth == 1 ) ) +#endif chromaCbfs.Cb &= cbf_comp( cs, area.blocks[COMPONENT_Cb], trDepth ); } if( chromaCbfs.Cr ) { +#if JVET_M0140_SBT + if( !( cu.sbtInfo && trDepth == 1 ) ) +#endif chromaCbfs.Cr &= cbf_comp( cs, area.blocks[COMPONENT_Cr], trDepth, chromaCbfs.Cb ); } #endif @@ -2343,6 +2430,12 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, { partitioner.splitCurrArea( ispType, cs ); } +#endif +#if JVET_M0140_SBT + else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) + { + partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs ); + } #endif else THROW( "Implicit TU split not available!" ); @@ -2403,6 +2496,11 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, { TransformUnit &tu = cs.addTU( CS::getArea( cs, area, partitioner.chType ), partitioner.chType ); unsigned numBlocks = ::getNumberValidTBlocks( *cs.pcv ); +#if JVET_M0140_SBT + tu.checkTuNoResidual( partitioner.currPartIdx() ); + chromaCbfs.Cb &= !tu.noResidual; + chromaCbfs.Cr &= !tu.noResidual; +#endif for( unsigned compID = COMPONENT_Y; compID < numBlocks; compID++ ) { @@ -2421,6 +2519,17 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, { TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 1 ); } +#if JVET_M0140_SBT + else if( cu.sbtInfo && tu.noResidual ) + { + TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 0 ); + } + else if( cu.sbtInfo && !chromaCbfsLastDepth.sigChroma( area.chromaFormat ) ) + { + assert( !tu.noResidual ); + TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 1 ); + } +#endif else { #if JVET_M0102_INTRA_SUBPARTITIONS diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index 941be20b9b125d72668365312bc8fb603a9a2a49..031e01562be66bd5fe37d5420bd557e0013a2ba5 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -96,6 +96,9 @@ public: void intra_chroma_pred_mode ( PredictionUnit& pu ); void cu_residual ( CodingUnit& cu, Partitioner& pm, CUCtx& cuCtx ); void rqt_root_cbf ( CodingUnit& cu ); +#if JVET_M0140_SBT + void sbt_mode ( CodingUnit& cu ); +#endif bool end_of_ctu ( CodingUnit& cu, CUCtx& cuCtx ); // prediction unit (clause 7.3.8.6) diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index 742391b504a127a88f964f863a22cd36f7fe5657..a94f9710d62db7b0f034d2c77d1f5d7b15628ff0 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -815,6 +815,13 @@ void HLSyntaxReader::parseSPSNext( SPSNext& spsNext, const bool usePCM ) } #endif +#if JVET_M0140_SBT + READ_FLAG( symbol, "sbt_enable_flag" ); spsNext.setUseSBT ( symbol != 0 ); + if( spsNext.getUseSBT() ) + { + READ_FLAG( symbol, "max_sbt_size_64_flag" ); spsNext.setMaxSbtSize ( symbol ? 64 : 32 ); + } +#endif READ_FLAG( symbol, "affine_flag" ); spsNext.setUseAffine ( symbol != 0 ); if ( spsNext.getUseAffine() ) { diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index 85f257b789db7f4f4a633f9b270623627202681a..84c431da75e0f0597ee3327b78f60bc1bbf71f57 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -1318,6 +1318,12 @@ void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, C { rqt_root_cbf( cu ); } +#if JVET_M0140_SBT + if( cu.rootCbf ) + { + sbt_mode( cu ); + } +#endif if( !cu.rootCbf ) { @@ -1349,6 +1355,64 @@ void CABACWriter::rqt_root_cbf( const CodingUnit& cu ) DTRACE( g_trace_ctx, D_SYNTAX, "rqt_root_cbf() ctx=0 root_cbf=%d pos=(%d,%d)\n", cu.rootCbf ? 1 : 0, cu.lumaPos().x, cu.lumaPos().y ); } +#if JVET_M0140_SBT +void CABACWriter::sbt_mode( const CodingUnit& cu ) +{ + uint8_t sbtAllowed = cu.checkAllowedSbt(); + if( !sbtAllowed ) + { + return; + } + + SizeType cuWidth = cu.lwidth(); + SizeType cuHeight = cu.lheight(); + uint8_t sbtIdx = cu.getSbtIdx(); + uint8_t sbtPos = cu.getSbtPos(); + + //bin - flag + bool sbtFlag = cu.sbtInfo != 0; + uint8_t ctxIdx = ( cuWidth * cuHeight <= 256 ) ? 1 : 0; + m_BinEncoder.encodeBin( sbtFlag, Ctx::SbtFlag( ctxIdx ) ); + if( !sbtFlag ) + { + return; + } + + bool sbtQuadFlag = sbtIdx == SBT_HOR_QUAD || sbtIdx == SBT_VER_QUAD; + bool sbtHorFlag = sbtIdx == SBT_HOR_HALF || sbtIdx == SBT_HOR_QUAD; + bool sbtPosFlag = sbtPos == SBT_POS1; + + uint8_t sbtVerHalfAllow = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ); + uint8_t sbtHorHalfAllow = CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ); + uint8_t sbtVerQuadAllow = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ); + uint8_t sbtHorQuadAllow = CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ); + //bin - type + if( ( sbtHorHalfAllow || sbtVerHalfAllow ) && ( sbtHorQuadAllow || sbtVerQuadAllow ) ) + { + m_BinEncoder.encodeBin( sbtQuadFlag, Ctx::SbtQuadFlag( 0 ) ); + } + else + { + assert( sbtQuadFlag == 0 ); + } + + //bin - dir + if( ( sbtQuadFlag && sbtVerQuadAllow && sbtHorQuadAllow ) || ( !sbtQuadFlag && sbtVerHalfAllow && sbtHorHalfAllow ) ) //both direction allowed + { + uint8_t ctxIdx = ( cuWidth == cuHeight ) ? 0 : ( cuWidth < cuHeight ? 1 : 2 ); + m_BinEncoder.encodeBin( sbtHorFlag, Ctx::SbtHorFlag( ctxIdx ) ); + } + else + { + assert( sbtHorFlag == ( ( sbtQuadFlag && sbtHorQuadAllow ) || ( !sbtQuadFlag && sbtHorHalfAllow ) ) ); + } + + //bin - pos + m_BinEncoder.encodeBin( sbtPosFlag, Ctx::SbtPosFlag( 0 ) ); + + DTRACE( g_trace_ctx, D_SYNTAX, "sbt_mode() pos=(%d,%d) sbtInfo=%d\n", cu.lx(), cu.ly(), (int)cu.sbtInfo ); +} +#endif void CABACWriter::end_of_ctu( const CodingUnit& cu, CUCtx& cuCtx ) { @@ -2080,6 +2144,11 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partitioner, CUCtx& cuCtx, ChromaCbfs& chromaCbfs ) #endif { +#if JVET_M0140_SBT + ChromaCbfs chromaCbfsLastDepth; + chromaCbfsLastDepth.Cb = chromaCbfs.Cb; + chromaCbfsLastDepth.Cr = chromaCbfs.Cr; +#endif const UnitArea& area = partitioner.currArea(); #if JVET_M0102_INTRA_SUBPARTITIONS int subTuCounter = subTuIdx; @@ -2099,6 +2168,12 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit { CHECK( !split, "transform split implied" ); } +#if JVET_M0140_SBT + else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) + { + CHECK( !split, "transform split implied - sbt" ); + } +#endif else #if JVET_M0102_INTRA_SUBPARTITIONS CHECK( split && !cu.ispMode, "transform split not allowed with QTBT" ); @@ -2120,6 +2195,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit if( trDepth == 0 || chromaCbfs.Cb || chromaCbfISP ) { chromaCbfs.Cb = TU::getCbfAtDepth( tu, COMPONENT_Cb, trDepth ); +#if JVET_M0140_SBT + if( !( cu.sbtInfo && trDepth == 1 ) ) +#endif cbf_comp( cs, chromaCbfs.Cb, area.blocks[COMPONENT_Cb], cbfDepth ); } else @@ -2130,6 +2208,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit if( trDepth == 0 || chromaCbfs.Cr || chromaCbfISP ) { chromaCbfs.Cr = TU::getCbfAtDepth( tu, COMPONENT_Cr, trDepth ); +#if JVET_M0140_SBT + if( !( cu.sbtInfo && trDepth == 1 ) ) +#endif cbf_comp( cs, chromaCbfs.Cr, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb ); } else @@ -2140,6 +2221,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit if( trDepth == 0 || chromaCbfs.Cb ) { chromaCbfs.Cb = TU::getCbfAtDepth( tu, COMPONENT_Cb, trDepth ); +#if JVET_M0140_SBT + if( !( cu.sbtInfo && trDepth == 1 ) ) +#endif cbf_comp( cs, chromaCbfs.Cb, area.blocks[COMPONENT_Cb], trDepth ); } else @@ -2150,6 +2234,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit if( trDepth == 0 || chromaCbfs.Cr ) { chromaCbfs.Cr = TU::getCbfAtDepth( tu, COMPONENT_Cr, trDepth ); +#if JVET_M0140_SBT + if( !( cu.sbtInfo && trDepth == 1 ) ) +#endif cbf_comp( cs, chromaCbfs.Cr, area.blocks[COMPONENT_Cr], trDepth, chromaCbfs.Cb ); } else @@ -2193,6 +2280,12 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit { partitioner.splitCurrArea( ispType, cs ); } +#endif +#if JVET_M0140_SBT + else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) + { + partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs ); + } #endif else THROW( "Implicit TU split not available" ); @@ -2220,6 +2313,17 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit { CHECK( !TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be true for inter units with no chroma coeffs" ); } +#if JVET_M0140_SBT + else if( cu.sbtInfo && tu.noResidual ) + { + CHECK( TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be false for inter sbt no-residual tu" ); + } + else if( cu.sbtInfo && !chromaCbfsLastDepth.sigChroma( area.chromaFormat ) ) + { + assert( !tu.noResidual ); + CHECK( !TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be true for inter sbt residual tu" ); + } +#endif else { #if JVET_M0102_INTRA_SUBPARTITIONS diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index 8ddd3c5fcb0eae3260426fcd6a559e17bb859de3..9f45ea0a5099366138eaf2d9dde3ef1e5755faa9 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -107,6 +107,9 @@ public: void intra_chroma_pred_mode ( const PredictionUnit& pu ); void cu_residual ( const CodingUnit& cu, Partitioner& pm, CUCtx& cuCtx ); void rqt_root_cbf ( const CodingUnit& cu ); +#if JVET_M0140_SBT + void sbt_mode ( const CodingUnit& cu ); +#endif void end_of_ctu ( const CodingUnit& cu, CUCtx& cuCtx ); // prediction unit (clause 7.3.8.6) diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 5f8ff17f7bc8e33045554fcc41a39b1373baca96..c2d2a0a67b631c774f812effbd54ca93d5770c8f 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -217,6 +217,9 @@ protected: #endif #if JVET_M0303_IMPLICIT_MTS int m_ImplicitMTS; +#endif +#if JVET_M0140_SBT + bool m_SBT; ///< Sub-Block Transform for inter blocks #endif bool m_LargeCTU; int m_SubPuMvpMode; @@ -753,6 +756,10 @@ public: void setImplicitMTS ( bool b ) { m_ImplicitMTS = b; } bool getImplicitMTS () const { return m_ImplicitMTS; } #endif +#if JVET_M0140_SBT + void setUseSBT ( bool b ) { m_SBT = b; } + bool getUseSBT () const { return m_SBT; } +#endif void setUseCompositeRef (bool b) { m_compositeRefEnabled = b; } bool getUseCompositeRef () const { return m_compositeRefEnabled; } diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 6f6d16822c359eda71c161ce7b5e59166de345e9..b93231088587e50d5fab776cd78de6e3bfdd7015 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -713,6 +713,15 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par tempCS->chType = partitioner.chType; bestCS->chType = partitioner.chType; m_modeCtrl->initCULevel( partitioner, *tempCS ); +#if JVET_M0140_SBT + if( partitioner.currQtDepth == 0 && partitioner.currMtDepth == 0 && !tempCS->slice->isIntra() && ( sps.getSpsNext().getUseSBT() || sps.getSpsNext().getUseInterMTS() ) ) + { + auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl ); + int maxSLSize = sps.getSpsNext().getUseSBT() ? tempCS->slice->getSPS()->getSpsNext().getMaxSbtSize() : MTS_INTER_MAX_CU_SIZE; + slsSbt->resetSaveloadSbt( maxSLSize ); + } + m_sbtCostSave[0] = m_sbtCostSave[1] = MAX_DOUBLE; +#endif m_CurrCtx->start = m_CABACEstimator->getCtx(); @@ -4171,6 +4180,11 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be CodingUnit* cu = tempCS->getCU( partitioner.chType ); double bestCostInternal = MAX_DOUBLE; double bestCost = bestCS->cost; +#if JVET_M0140_SBT + double bestCostBegin = bestCS->cost; + CodingUnit* prevBestCU = bestCS->getCU( partitioner.chType ); + uint8_t prevBestSbt = ( prevBestCU == nullptr ) ? 0 : prevBestCU->sbtInfo; +#endif #if !JVET_M0464_UNI_MTS const SPS& sps = *tempCS->sps; const int maxSizeEMT = EMT_INTER_MAX_CU_WITH_QTBT; @@ -4211,6 +4225,51 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be } } } +#if JVET_M0140_SBT + const bool mtsAllowed = tempCS->sps->getSpsNext().getUseInterMTS() && partitioner.currArea().lwidth() <= MTS_INTER_MAX_CU_SIZE && partitioner.currArea().lheight() <= MTS_INTER_MAX_CU_SIZE; + uint8_t sbtAllowed = cu->checkAllowedSbt(); + uint8_t numRDOTried = 0; + Distortion sbtOffDist = 0; + bool sbtOffRootCbf = 0; + double sbtOffCost = MAX_DOUBLE; + double currBestCost = MAX_DOUBLE; + bool doPreAnalyzeResi = ( sbtAllowed || mtsAllowed ) && residualPass == 0; + + m_pcInterSearch->initTuAnalyzer(); + if( doPreAnalyzeResi ) + { + m_pcInterSearch->calcMinDistSbt( *tempCS, *cu, sbtAllowed ); + } + + auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl ); + int slShift = 4 + std::min( (int)gp_sizeIdxInfo->idxFrom( cu->lwidth() ) + (int)gp_sizeIdxInfo->idxFrom( cu->lheight() ), 9 ); + Distortion curPuSse = m_pcInterSearch->getEstDistSbt( NUMBER_SBT_MODE ); + uint8_t currBestSbt = 0; + uint8_t currBestTrs = MAX_UCHAR; + uint8_t histBestSbt = MAX_UCHAR; + uint8_t histBestTrs = MAX_UCHAR; + m_pcInterSearch->setHistBestTrs( MAX_UCHAR, MAX_UCHAR ); + if( doPreAnalyzeResi ) + { + if( m_pcInterSearch->getSkipSbtAll() && !mtsAllowed ) //emt is off + { + histBestSbt = 0; //try DCT2 + m_pcInterSearch->setHistBestTrs( histBestSbt, histBestTrs ); + } + else + { + assert( curPuSse != std::numeric_limits<uint64_t>::max() ); + uint16_t compositeSbtTrs = slsSbt->findBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ) ); + histBestSbt = ( compositeSbtTrs >> 0 ) & 0xff; + histBestTrs = ( compositeSbtTrs >> 8 ) & 0xff; + if( m_pcInterSearch->getSkipSbtAll() && CU::isSbtMode( histBestSbt ) ) //special case, skip SBT when loading SBT + { + histBestSbt = 0; //try DCT2 + } + m_pcInterSearch->setHistBestTrs( histBestSbt, histBestTrs ); + } + } +#endif #if !JVET_M0464_UNI_MTS if( emtMode == 2 ) @@ -4254,14 +4313,24 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be #if !JVET_M0464_UNI_MTS cu->emtFlag = curEmtMode; #endif +#if JVET_M0140_SBT + cu->sbtInfo = 0; +#endif const bool skipResidual = residualPass == 1; +#if JVET_M0140_SBT // skip DCT-2 and EMT if historical best transform mode is SBT + if( skipResidual || histBestSbt == MAX_UCHAR || !CU::isSbtMode( histBestSbt ) ) + { +#endif m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual ); - +#if JVET_M0140_SBT + numRDOTried += mtsAllowed ? 2 : 1; +#endif xEncodeDontSplit( *tempCS, partitioner ); xCheckDQP( *tempCS, partitioner ); +#if !JVET_M0140_SBT //harmonize with GBI fast algorithm (move the code to the end of this function) if( ETM_INTER_ME == encTestMode.type ) { if( equGBiCost != NULL ) @@ -4291,6 +4360,7 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be } } } +#endif #if !JVET_M0464_UNI_MTS double emtFirstPassCost = tempCS->cost; @@ -4318,6 +4388,18 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be return; } } +#if JVET_M0140_SBT + currBestCost = tempCS->cost; + sbtOffCost = tempCS->cost; + sbtOffDist = tempCS->dist; + sbtOffRootCbf = cu->rootCbf; + currBestSbt = CU::getSbtInfo( cu->firstTU->mtsIdx > 1 ? SBT_OFF_MTS : SBT_OFF_DCT, 0 ); + currBestTrs = cu->firstTU->mtsIdx; + if( cu->lwidth() <= MAX_TU_SIZE_FOR_PROFILE && cu->lheight() <= MAX_TU_SIZE_FOR_PROFILE ) + { + CHECK( tempCS->tus.size() != 1, "tu must be only one" ); + } +#endif #if WCG_EXT DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); @@ -4340,8 +4422,200 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be maxEMTMode = 0; // do not test EMT } } +#endif +#if JVET_M0140_SBT // skip DCT-2 and EMT + } +#endif + +#if JVET_M0140_SBT //RDO for SBT + uint8_t numSbtRdo = CU::numSbtModeRdo( sbtAllowed ); + //early termination if all SBT modes are not allowed + //normative + if( !sbtAllowed || skipResidual ) + { + numSbtRdo = 0; + } + //fast algorithm + if( ( histBestSbt != MAX_UCHAR && !CU::isSbtMode( histBestSbt ) ) || m_pcInterSearch->getSkipSbtAll() ) + { + numSbtRdo = 0; + } + if( bestCost != MAX_DOUBLE && sbtOffCost != MAX_DOUBLE ) + { + double th = 1.07; + if( !( prevBestSbt == 0 || m_sbtCostSave[0] == MAX_DOUBLE ) ) + { + assert( m_sbtCostSave[1] <= m_sbtCostSave[0] ); + th *= ( m_sbtCostSave[0] / m_sbtCostSave[1] ); + } + if( sbtOffCost > bestCost * th ) + { + numSbtRdo = 0; + } + } + if( !sbtOffRootCbf && sbtOffCost != MAX_DOUBLE ) + { + double th = Clip3( 0.05, 0.55, ( 27 - cu->qp ) * 0.02 + 0.35 ); + if( sbtOffCost < m_pcRdCost->calcRdCost( ( cu->lwidth() * cu->lheight() ) << SCALE_BITS, 0 ) * th ) + { + numSbtRdo = 0; + } + } + + if( histBestSbt != MAX_UCHAR && numSbtRdo != 0 ) + { + numSbtRdo = 1; + m_pcInterSearch->initSbtRdoOrder( CU::getSbtMode( CU::getSbtIdx( histBestSbt ), CU::getSbtPos( histBestSbt ) ) ); + } + + for( int sbtModeIdx = 0; sbtModeIdx < numSbtRdo; sbtModeIdx++ ) + { + uint8_t sbtMode = m_pcInterSearch->getSbtRdoOrder( sbtModeIdx ); + uint8_t sbtIdx = CU::getSbtIdxFromSbtMode( sbtMode ); + uint8_t sbtPos = CU::getSbtPosFromSbtMode( sbtMode ); + + //fast algorithm (early skip, save & load) + if( histBestSbt == MAX_UCHAR ) + { + uint8_t skipCode = m_pcInterSearch->skipSbtByRDCost( cu->lwidth(), cu->lheight(), cu->mtDepth, sbtIdx, sbtPos, bestCS->cost, sbtOffDist, sbtOffCost, sbtOffRootCbf ); + if( skipCode != MAX_UCHAR ) + { + continue; + } + + if( sbtModeIdx > 0 ) + { + uint8_t prevSbtMode = m_pcInterSearch->getSbtRdoOrder( sbtModeIdx - 1 ); + //make sure the prevSbtMode is the same size as the current SBT mode (otherwise the estimated dist may not be comparable) + if( CU::isSameSbtSize( prevSbtMode, sbtMode ) ) + { + Distortion currEstDist = m_pcInterSearch->getEstDistSbt( sbtMode ); + Distortion prevEstDist = m_pcInterSearch->getEstDistSbt( prevSbtMode ); + if( currEstDist > prevEstDist * 1.15 ) + { + continue; + } + } + } + } + + //init tempCS and TU + if( bestCost == bestCS->cost ) //The first EMT pass didn't become the bestCS, so we clear the TUs generated + { + tempCS->clearTUs(); + } + else if( false == swapped ) + { + tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->copyStructure( *bestCS, partitioner.chType ); + tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() ); + bestCost = bestCS->cost; + cu = tempCS->getCU( partitioner.chType ); + swapped = true; + } + else + { + tempCS->clearTUs(); + bestCost = bestCS->cost; + cu = tempCS->getCU( partitioner.chType ); + } + + //we need to restart the distortion for the new tempCS, the bit count and the cost + tempCS->dist = 0; + tempCS->fracBits = 0; + tempCS->cost = MAX_DOUBLE; + cu->skip = false; + + //set SBT info + cu->setSbtIdx( sbtIdx ); + cu->setSbtPos( sbtPos ); + + //try residual coding + m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual ); + numRDOTried++; + + xEncodeDontSplit( *tempCS, partitioner ); + + xCheckDQP( *tempCS, partitioner ); + + if( imvCS && ( tempCS->cost < imvCS->cost ) ) + { + if( imvCS->cost != MAX_DOUBLE ) + { + imvCS->initStructData( encTestMode.qp, encTestMode.lossless ); + } + imvCS->copyStructure( *tempCS, partitioner.chType ); + } + + if( NULL != bestHasNonResi && ( bestCostInternal > tempCS->cost ) ) + { + bestCostInternal = tempCS->cost; + if( !( tempCS->getPU( partitioner.chType )->mhIntraFlag ) ) + *bestHasNonResi = !cu->rootCbf; + } + + if( tempCS->cost < currBestCost ) + { + currBestSbt = cu->sbtInfo; + currBestTrs = tempCS->tus[cu->sbtInfo ? cu->getSbtPos() : 0]->mtsIdx; + assert( currBestTrs == 0 || currBestTrs == 1 ); + currBestCost = tempCS->cost; + } + +#if WCG_EXT + DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); +#else + DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() ); +#endif + xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); + } + + if( bestCostBegin != bestCS->cost ) + { + m_sbtCostSave[0] = sbtOffCost; + m_sbtCostSave[1] = currBestCost; + } #endif } //end emt loop + +#if JVET_M0140_SBT + if( histBestSbt == MAX_UCHAR && doPreAnalyzeResi && numRDOTried > 1 ) + { + slsSbt->saveBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ), currBestSbt, currBestTrs ); + } +#endif +#if JVET_M0140_SBT //harmonize with GBI fast algorithm (move the code here) + tempCS->cost = currBestCost; + if( ETM_INTER_ME == encTestMode.type ) + { + if( equGBiCost != NULL ) + { + if( tempCS->cost < ( *equGBiCost ) && cu->GBiIdx == GBI_DEFAULT ) + { + ( *equGBiCost ) = tempCS->cost; + } + } + else + { + CHECK( equGBiCost == NULL, "equGBiCost == NULL" ); + } + if( tempCS->slice->getCheckLDC() && !cu->imv && cu->GBiIdx != GBI_DEFAULT && tempCS->cost < m_bestGbiCost[1] ) + { + if( tempCS->cost < m_bestGbiCost[0] ) + { + m_bestGbiCost[1] = m_bestGbiCost[0]; + m_bestGbiCost[0] = tempCS->cost; + m_bestGbiIdx[1] = m_bestGbiIdx[0]; + m_bestGbiIdx[0] = cu->GBiIdx; + } + else + { + m_bestGbiCost[1] = tempCS->cost; + m_bestGbiIdx[1] = cu->GBiIdx; + } + } + } +#endif } diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index 3f1bc3b90230c2f5f231da4ae78d076fcfdf524b..89760c8491b9caaee20b70da692b5d4fdea5b113 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -152,6 +152,9 @@ private: #if SHARP_LUMA_DELTA_QP void updateLambda ( Slice* slice, double dQP ); #endif +#if JVET_M0140_SBT + double m_sbtCostSave[2]; +#endif public: /// copy parameters from encoder class diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 2deda4f064adbf1df0d5e64e3452f0b86d613135..76b3aef55aec3723872343c0bdee53683daead2c 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -909,6 +909,13 @@ void EncLib::xInitSPS(SPS &sps) #endif sps.getSpsNext().setUseIntraEMT ( m_IntraEMT ); sps.getSpsNext().setUseInterEMT ( m_InterEMT ); +#endif +#if JVET_M0140_SBT + sps.getSpsNext().setUseSBT ( m_SBT ); + if( sps.getSpsNext().getUseSBT() ) + { + sps.getSpsNext().setMaxSbtSize ( m_iSourceWidth >= 1920 ? 64 : 32 ); + } #endif sps.getSpsNext().setUseCompositeRef ( m_compositeRefEnabled ); sps.getSpsNext().setUseGBi ( m_GBi ); diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index dd4b2282e974f1b728279275d411cf224d5b115b..cac92f3864e957079f60ad703f052f1d9dd427e0 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -510,6 +510,106 @@ bool CacheBlkInfoCtrl::getMv( const UnitArea& area, const RefPicList refPicList, return m_codedCUInfo[idx1][idx2][idx3][idx4]->validMv[refPicList][iRefIdx]; } +#if JVET_M0140_SBT +void SaveLoadEncInfoSbt::init( const Slice &slice ) +{ + m_sliceSbt = &slice; +} + +void SaveLoadEncInfoSbt::create() +{ + int numSizeIdx = gp_sizeIdxInfo->idxFrom( SBT_MAX_SIZE ) - MIN_CU_LOG2 + 1; + int numPosIdx = MAX_CU_SIZE >> MIN_CU_LOG2; + + m_saveLoadSbt = new SaveLoadStructSbt***[numPosIdx]; + + for( int xIdx = 0; xIdx < numPosIdx; xIdx++ ) + { + m_saveLoadSbt[xIdx] = new SaveLoadStructSbt**[numPosIdx]; + for( int yIdx = 0; yIdx < numPosIdx; yIdx++ ) + { + m_saveLoadSbt[xIdx][yIdx] = new SaveLoadStructSbt*[numSizeIdx]; + for( int wIdx = 0; wIdx < numSizeIdx; wIdx++ ) + { + m_saveLoadSbt[xIdx][yIdx][wIdx] = new SaveLoadStructSbt[numSizeIdx]; + } + } + } +} + +void SaveLoadEncInfoSbt::destroy() +{ + int numSizeIdx = gp_sizeIdxInfo->idxFrom( SBT_MAX_SIZE ) - MIN_CU_LOG2 + 1; + int numPosIdx = MAX_CU_SIZE >> MIN_CU_LOG2; + + for( int xIdx = 0; xIdx < numPosIdx; xIdx++ ) + { + for( int yIdx = 0; yIdx < numPosIdx; yIdx++ ) + { + for( int wIdx = 0; wIdx < numSizeIdx; wIdx++ ) + { + delete[] m_saveLoadSbt[xIdx][yIdx][wIdx]; + } + delete[] m_saveLoadSbt[xIdx][yIdx]; + } + delete[] m_saveLoadSbt[xIdx]; + } + delete[] m_saveLoadSbt; +} + +uint16_t SaveLoadEncInfoSbt::findBestSbt( const UnitArea& area, const uint32_t curPuSse ) +{ + unsigned idx1, idx2, idx3, idx4; + getAreaIdx( area.Y(), *m_sliceSbt->getPPS()->pcv, idx1, idx2, idx3, idx4 ); + SaveLoadStructSbt* pSbtSave = &m_saveLoadSbt[idx1][idx2][idx3 - MIN_CU_LOG2][idx4 - MIN_CU_LOG2]; + + for( int i = 0; i < pSbtSave->numPuInfoStored; i++ ) + { + if( curPuSse == pSbtSave->puSse[i] ) + { + return pSbtSave->puSbt[i] + ( pSbtSave->puTrs[i] << 8 ); + } + } + + return MAX_UCHAR + ( MAX_UCHAR << 8 ); +} + +bool SaveLoadEncInfoSbt::saveBestSbt( const UnitArea& area, const uint32_t curPuSse, const uint8_t curPuSbt, const uint8_t curPuTrs ) +{ + unsigned idx1, idx2, idx3, idx4; + getAreaIdx( area.Y(), *m_sliceSbt->getPPS()->pcv, idx1, idx2, idx3, idx4 ); + SaveLoadStructSbt* pSbtSave = &m_saveLoadSbt[idx1][idx2][idx3 - MIN_CU_LOG2][idx4 - MIN_CU_LOG2]; + + if( pSbtSave->numPuInfoStored == SBT_NUM_SL ) + { + return false; + } + + pSbtSave->puSse[pSbtSave->numPuInfoStored] = curPuSse; + pSbtSave->puSbt[pSbtSave->numPuInfoStored] = curPuSbt; + pSbtSave->puTrs[pSbtSave->numPuInfoStored] = curPuTrs; + pSbtSave->numPuInfoStored++; + return true; +} + +void SaveLoadEncInfoSbt::resetSaveloadSbt( int maxSbtSize ) +{ + int numSizeIdx = gp_sizeIdxInfo->idxFrom( maxSbtSize ) - MIN_CU_LOG2 + 1; + int numPosIdx = MAX_CU_SIZE >> MIN_CU_LOG2; + + for( int xIdx = 0; xIdx < numPosIdx; xIdx++ ) + { + for( int yIdx = 0; yIdx < numPosIdx; yIdx++ ) + { + for( int wIdx = 0; wIdx < numSizeIdx; wIdx++ ) + { + memset( m_saveLoadSbt[xIdx][yIdx][wIdx], 0, numSizeIdx * sizeof( SaveLoadStructSbt ) ); + } + } + } +} +#endif + bool CacheBlkInfoCtrl::getInter(const UnitArea& area) { unsigned idx1, idx2, idx3, idx4; @@ -952,12 +1052,18 @@ void EncModeCtrlMTnoRQT::create( const EncCfg& cfg ) { CacheBlkInfoCtrl::create(); BestEncInfoCache::create( cfg.getChromaFormatIdc() ); +#if JVET_M0140_SBT + SaveLoadEncInfoSbt::create(); +#endif } void EncModeCtrlMTnoRQT::destroy() { CacheBlkInfoCtrl::destroy(); BestEncInfoCache::destroy(); +#if JVET_M0140_SBT + SaveLoadEncInfoSbt::destroy(); +#endif } #endif @@ -967,6 +1073,9 @@ void EncModeCtrlMTnoRQT::initCTUEncoding( const Slice &slice ) #if REUSE_CU_RESULTS BestEncInfoCache::init( slice ); #endif +#if JVET_M0140_SBT + SaveLoadEncInfoSbt::init( slice ); +#endif CHECK( !m_ComprCUCtxList.empty(), "Mode list is not empty at the beginning of a CTU" ); diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h index 145d0c4b96e5875e0c112e04617276ce2aad7993..9aa3abe477e94a5e59c25011bc7fcf0d13b0f0a5 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.h +++ b/source/Lib/EncoderLib/EncModeCtrl.h @@ -355,6 +355,33 @@ protected: ////////////////////////////////////////////////////////////////////////// // some utility interfaces that expose some functionality that can be used without concerning about which particular controller is used ////////////////////////////////////////////////////////////////////////// +#if JVET_M0140_SBT +struct SaveLoadStructSbt +{ + uint8_t numPuInfoStored; + uint32_t puSse[SBT_NUM_SL]; + uint8_t puSbt[SBT_NUM_SL]; + uint8_t puTrs[SBT_NUM_SL]; +}; + +class SaveLoadEncInfoSbt +{ +protected: + void init( const Slice &slice ); + void create(); + void destroy(); + +private: + SaveLoadStructSbt ****m_saveLoadSbt; + Slice const *m_sliceSbt; + +public: + virtual ~SaveLoadEncInfoSbt() { } + void resetSaveloadSbt( int maxSbtSize ); + uint16_t findBestSbt( const UnitArea& area, const uint32_t curPuSse ); + bool saveBestSbt( const UnitArea& area, const uint32_t curPuSse, const uint8_t curPuSbt, const uint8_t curPuTrs ); +}; +#endif static const int MAX_STORED_CU_INFO_REFS = 4; @@ -481,6 +508,9 @@ class EncModeCtrlMTnoRQT : public EncModeCtrl, public CacheBlkInfoCtrl #if REUSE_CU_RESULTS , public BestEncInfoCache #endif +#if JVET_M0140_SBT + , public SaveLoadEncInfoSbt +#endif { enum ExtraFeatures { diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index cd19a77f979ae2ae17a601d2c6fe16b7ed72c068..22b5d5e4b828bbe9110ecab4f723a92a7f6361b6 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -113,6 +113,10 @@ InterSearch::InterSearch() m_affMVList = nullptr; m_affMVListSize = 0; m_affMVListIdx = 0; +#if JVET_M0140_SBT + m_histBestSbt = MAX_UCHAR; + m_histBestMtsIdx = MAX_UCHAR; +#endif } @@ -5928,6 +5932,12 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti { CHECK( !bSubdiv, "Not performing the implicit TU split" ); } +#if JVET_M0140_SBT + else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) + { + CHECK( !bSubdiv, "Not performing the implicit TU split - sbt" ); + } +#endif else { CHECK( bSubdiv, "transformsplit not supported" ); @@ -5942,17 +5952,27 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti if( firstCbfOfCU || TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth - 1 ) ) { const bool chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ); +#if JVET_M0140_SBT + if( !( cu.sbtInfo && currDepth == 1 ) ) +#endif m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cb], currDepth ); } if( firstCbfOfCU || TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth - 1 ) ) { const bool chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth ); +#if JVET_M0140_SBT + if( !( cu.sbtInfo && currDepth == 1 ) ) +#endif m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cr], currDepth, TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ) ); } } } +#if JVET_M0140_SBT + if( !bSubdiv && !( cu.sbtInfo && currTU.noResidual ) ) +#else if( !bSubdiv ) +#endif { m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currArea.Y(), currDepth ); } @@ -5983,6 +6003,12 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti { partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); } +#if JVET_M0140_SBT + else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) + { + partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs ); + } +#endif else THROW( "Implicit TU split not available!" ); @@ -5996,6 +6022,253 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti } } +#if JVET_M0140_SBT +void InterSearch::calcMinDistSbt( CodingStructure &cs, const CodingUnit& cu, const uint8_t sbtAllowed ) +{ + if( !sbtAllowed ) + { + m_estMinDistSbt[NUMBER_SBT_MODE] = 0; + for( int comp = 0; comp < getNumberValidTBlocks( *cs.pcv ); comp++ ) + { + const ComponentID compID = ComponentID( comp ); + CPelBuf pred = cs.getPredBuf( compID ); + CPelBuf org = cs.getOrgBuf( compID ); + m_estMinDistSbt[NUMBER_SBT_MODE] += m_pcRdCost->getDistPart( org, pred, cs.sps->getBitDepth( toChannelType( compID ) ), compID, DF_SSE ); + } + return; + } + + //SBT fast algorithm 2.1 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16); + // if this cost is larger than the best cost, no need to try a specific SBT mode + int cuWidth = cu.lwidth(); + int cuHeight = cu.lheight(); + int numPartX = cuWidth >= 16 ? 4 : ( cuWidth == 4 ? 1 : 2 ); + int numPartY = cuHeight >= 16 ? 4 : ( cuHeight == 4 ? 1 : 2 ); + Distortion dist[4][4]; + memset( dist, 0, sizeof( Distortion ) * 16 ); + + for( uint32_t c = 0; c < getNumberValidTBlocks( *cs.pcv ); c++ ) + { + const ComponentID compID = ComponentID( c ); + const CompArea& compArea = cu.blocks[compID]; + const CPelBuf orgPel = cs.getOrgBuf( compArea ); + const CPelBuf predPel = cs.getPredBuf( compArea ); + int lengthX = compArea.width / numPartX; + int lengthY = compArea.height / numPartY; + int strideOrg = orgPel.stride; + int stridePred = predPel.stride; + uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT( ( *cs.sps.getBitDepth( toChannelType( compID ) ) - 8 ) << 1 ); + Intermediate_Int iTemp; + + //calc distY of 16 sub parts + for( int j = 0; j < numPartY; j++ ) + { + for( int i = 0; i < numPartX; i++ ) + { + int posX = i * lengthX; + int posY = j * lengthY; + const Pel* ptrOrg = orgPel.bufAt( posX, posY ); + const Pel* ptrPred = predPel.bufAt( posX, posY ); + Distortion uiSum = 0; + for( int n = 0; n < lengthY; n++ ) + { + for( int m = 0; m < lengthX; m++ ) + { + iTemp = ptrOrg[m] - ptrPred[m]; + uiSum += Distortion( ( iTemp * iTemp ) >> uiShift ); + } + ptrOrg += strideOrg; + ptrPred += stridePred; + } + if( isChroma( compID ) ) + { + uiSum = (Distortion)( uiSum * m_pcRdCost->getChromaWeight() ); + } + dist[j][i] += uiSum; + } + } + } + + //SSE of a CU + m_estMinDistSbt[NUMBER_SBT_MODE] = 0; + for( int j = 0; j < numPartY; j++ ) + { + for( int i = 0; i < numPartX; i++ ) + { + m_estMinDistSbt[NUMBER_SBT_MODE] += dist[j][i]; + } + } + //init per-mode dist + for( int i = SBT_VER_H0; i < NUMBER_SBT_MODE; i++ ) + { + m_estMinDistSbt[i] = std::numeric_limits<uint64_t>::max(); + } + + //SBT fast algorithm 1: not try SBT if the residual is too small to compensate bits for encoding residual info + uint64_t minNonZeroResiFracBits = 12 << SCALE_BITS; + if( m_pcRdCost->calcRdCost( 0, m_estMinDistSbt[NUMBER_SBT_MODE] ) < m_pcRdCost->calcRdCost( minNonZeroResiFracBits, 0 ) ) + { + m_skipSbtAll = true; + return; + } + + //derive estimated minDist of SBT = zero-residual part distortion + non-zero residual part distortion / 16 + int shift = 5; + Distortion distResiPart = 0, distNoResiPart = 0; + + if( CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) ) + { + int offsetResiPart = 0; + int offsetNoResiPart = numPartX / 2; + distResiPart = distNoResiPart = 0; + assert( numPartX >= 2 ); + for( int j = 0; j < numPartY; j++ ) + { + for( int i = 0; i < numPartX / 2; i++ ) + { + distResiPart += dist[j][i + offsetResiPart]; + distNoResiPart += dist[j][i + offsetNoResiPart]; + } + } + m_estMinDistSbt[SBT_VER_H0] = ( distResiPart >> shift ) + distNoResiPart; + m_estMinDistSbt[SBT_VER_H1] = ( distNoResiPart >> shift ) + distResiPart; + } + + if( CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ) ) + { + int offsetResiPart = 0; + int offsetNoResiPart = numPartY / 2; + assert( numPartY >= 2 ); + distResiPart = distNoResiPart = 0; + for( int j = 0; j < numPartY / 2; j++ ) + { + for( int i = 0; i < numPartX; i++ ) + { + distResiPart += dist[j + offsetResiPart][i]; + distNoResiPart += dist[j + offsetNoResiPart][i]; + } + } + m_estMinDistSbt[SBT_HOR_H0] = ( distResiPart >> shift ) + distNoResiPart; + m_estMinDistSbt[SBT_HOR_H1] = ( distNoResiPart >> shift ) + distResiPart; + } + + if( CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) ) + { + assert( numPartX == 4 ); + m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q1] = 0; + for( int j = 0; j < numPartY; j++ ) + { + m_estMinDistSbt[SBT_VER_Q0] += dist[j][0] + ( ( dist[j][1] + dist[j][2] + dist[j][3] ) << shift ); + m_estMinDistSbt[SBT_VER_Q1] += dist[j][3] + ( ( dist[j][0] + dist[j][1] + dist[j][2] ) << shift ); + } + m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q0] >> shift; + m_estMinDistSbt[SBT_VER_Q1] = m_estMinDistSbt[SBT_VER_Q1] >> shift; + } + + if( CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ) ) + { + assert( numPartY == 4 ); + m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q1] = 0; + for( int i = 0; i < numPartX; i++ ) + { + m_estMinDistSbt[SBT_HOR_Q0] += dist[0][i] + ( ( dist[1][i] + dist[2][i] + dist[3][i] ) << shift ); + m_estMinDistSbt[SBT_HOR_Q1] += dist[3][i] + ( ( dist[0][i] + dist[1][i] + dist[2][i] ) << shift ); + } + m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q0] >> shift; + m_estMinDistSbt[SBT_HOR_Q1] = m_estMinDistSbt[SBT_HOR_Q1] >> shift; + } + + //SBT fast algorithm 5: try N SBT modes with the lowest distortion + Distortion temp[NUMBER_SBT_MODE]; + memcpy( temp, m_estMinDistSbt, sizeof( Distortion ) * NUMBER_SBT_MODE ); + memset( m_sbtRdoOrder, 255, NUMBER_SBT_MODE ); + int startIdx = 0, numRDO; + numRDO = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ); + numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO ); + for( int i = startIdx; i < startIdx + numRDO; i++ ) + { + Distortion minDist = std::numeric_limits<uint64_t>::max(); + for( int n = SBT_VER_H0; n <= SBT_HOR_H1; n++ ) + { + if( temp[n] < minDist ) + { + minDist = temp[n]; + m_sbtRdoOrder[i] = n; + } + } + temp[m_sbtRdoOrder[i]] = std::numeric_limits<uint64_t>::max(); + } + + startIdx += numRDO; + numRDO = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ); + numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO ); + for( int i = startIdx; i < startIdx + numRDO; i++ ) + { + Distortion minDist = std::numeric_limits<uint64_t>::max(); + for( int n = SBT_VER_Q0; n <= SBT_HOR_Q1; n++ ) + { + if( temp[n] < minDist ) + { + minDist = temp[n]; + m_sbtRdoOrder[i] = n; + } + } + temp[m_sbtRdoOrder[i]] = std::numeric_limits<uint64_t>::max(); + } +} + +uint8_t InterSearch::skipSbtByRDCost( int width, int height, int mtDepth, uint8_t sbtIdx, uint8_t sbtPos, double bestCost, Distortion distSbtOff, double costSbtOff, bool rootCbfSbtOff ) +{ + int sbtMode = CU::getSbtMode( sbtIdx, sbtPos ); + + //SBT fast algorithm 2.2 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16); + // if this cost is larger than the best cost, no need to try a specific SBT mode + if( m_pcRdCost->calcRdCost( 11 << SCALE_BITS, m_estMinDistSbt[sbtMode] ) > bestCost ) + { + return 0; //early skip type 0 + } + + if( costSbtOff != MAX_DOUBLE ) + { + if( !rootCbfSbtOff ) + { + //SBT fast algorithm 3: skip SBT when the residual is too small (estCost is more accurate than fast algorithm 1, counting PU mode bits) + uint64_t minNonZeroResiFracBits = 10 << SCALE_BITS; + Distortion distResiPart; + if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_HOR_HALF ) + { + distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 9 ) >> 4 ); + } + else + { + distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 3 ) >> 3 ); + } + + double estCost = ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) + m_pcRdCost->calcRdCost( minNonZeroResiFracBits, m_estMinDistSbt[sbtMode] + distResiPart ); + if( estCost > costSbtOff ) + { + return 1; + } + if( estCost > bestCost ) + { + return 2; + } + } + else + { + //SBT fast algorithm 4: skip SBT when an estimated RD cost is larger than the bestCost + double weight = sbtMode > SBT_HOR_H1 ? 0.4 : 0.6; + double estCost = ( ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) * weight ) + m_pcRdCost->calcRdCost( 0 << SCALE_BITS, m_estMinDistSbt[sbtMode] ); + if( estCost > bestCost ) + { + return 3; + } + } + } + return MAX_UCHAR; +} +#endif + void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/ , const bool luma, const bool chroma ) @@ -6011,6 +6284,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par const unsigned currDepth = partitioner.currTrDepth; bool bCheckFull = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs ); +#if JVET_M0140_SBT + if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) + { + bCheckFull = false; + } +#endif bool bCheckSplit = !bCheckFull; // get temporary data @@ -6041,6 +6320,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par #else tu.emtIdx = 0; #endif +#if JVET_M0140_SBT + tu.checkTuNoResidual( partitioner.currPartIdx() ); +#endif #if JVET_M0427_INLOOP_RESHAPER const Slice &slice = *cs.slice; @@ -6127,15 +6409,41 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests std::vector<TrMode> trModes; trModes.push_back( TrMode( 0, true ) ); //DCT2 +#if JVET_M0140_SBT + nNumTransformCands = 1; + //for a SBT-no-residual TU, the RDO process should be called once, in order to get the RD cost + if( tsAllowed && !tu.noResidual ) +#else if( tsAllowed ) +#endif { trModes.push_back( TrMode( 1, true ) ); +#if JVET_M0140_SBT + nNumTransformCands++; +#endif } + +#if APPLY_SBT_SL_ON_MTS + //skip MTS if DCT2 is the best + if( mtsAllowed && ( !tu.cu->slice->getSPS()->getSpsNext().getUseSBT() || CU::getSbtIdx( m_histBestSbt ) != SBT_OFF_DCT ) ) +#else if( mtsAllowed ) +#endif { for( int i = 2; i < 6; i++ ) { +#if APPLY_SBT_SL_ON_MTS + //skip the non-best Mts mode + if( !tu.cu->slice->getSPS()->getSpsNext().getUseSBT() || ( m_histBestMtsIdx == MAX_UCHAR || m_histBestMtsIdx == i ) ) + { +#endif trModes.push_back( TrMode( i, true ) ); +#if JVET_M0140_SBT + nNumTransformCands++; +#endif +#if APPLY_SBT_SL_ON_MTS + } +#endif } } #endif @@ -6249,6 +6557,10 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par nonCoeffDist = m_pcRdCost->getDistPart( zeroBuf, orgResi, channelBitDepth, compID, DF_SSE ); // initialized with zero residual distortion } +#if JVET_M0140_SBT + if( !tu.noResidual ) + { +#endif const bool prevCbf = ( compID == COMPONENT_Cr ? tu.cbf[COMPONENT_Cb] : false ); m_CABACEstimator->cbf_comp( *csFull, false, compArea, currDepth, prevCbf ); @@ -6256,6 +6568,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par { m_CABACEstimator->cross_comp_pred( tu, compID ); } +#if JVET_M0140_SBT + } +#endif nonCoeffFracBits = m_CABACEstimator->getEstFracBits(); #if WCG_EXT @@ -6381,6 +6696,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par isLastBest = isLastMode; } +#if JVET_M0140_SBT + if( tu.noResidual ) + { + CHECK( currCompFracBits > 0 || currAbsSum, "currCompFracBits > 0 when tu noResidual" ); + } +#endif } } @@ -6394,7 +6715,10 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par m_CABACEstimator->getCtx() = ctxStart; m_CABACEstimator->resetBits(); - +#if JVET_M0140_SBT + if( !tu.noResidual ) + { +#endif static const ComponentID cbf_getComp[3] = { COMPONENT_Cb, COMPONENT_Cr, COMPONENT_Y }; for( unsigned c = 0; c < numTBlocks; c++) { @@ -6409,6 +6733,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par m_CABACEstimator->cbf_comp( *csFull, TU::getCbfAtDepth( tu, compID, currDepth ), tu.blocks[compID], currDepth, prevCbf ); } } +#if JVET_M0140_SBT + } +#endif for (uint32_t ch = 0; ch < numValidComp; ch++) { @@ -6430,6 +6757,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par uiSingleDist += uiSingleDistComp[compID]; } } +#if JVET_M0140_SBT + if( tu.noResidual ) + { + CHECK( m_CABACEstimator->getEstFracBits() > 0, "no residual TU's bits shall be 0" ); + } +#endif csFull->fracBits += m_CABACEstimator->getEstFracBits(); csFull->dist += uiSingleDist; @@ -6455,6 +6788,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par { partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); } +#if JVET_M0140_SBT + else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) + { + partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs ); + } +#endif else THROW( "Implicit TU split not available!" ); @@ -6574,6 +6913,9 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa { cu.skip = true; cu.rootCbf = false; +#if JVET_M0140_SBT + CHECK( cu.sbtInfo != 0, "sbtInfo shall be 0 if CU has no residual" ); +#endif cs.getResiBuf().fill(0); { cs.getRecoBuf().copyFrom(cs.getPredBuf() ); @@ -6744,6 +7086,9 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa if (zeroCost < cs.cost || !cu.rootCbf) { +#if JVET_M0140_SBT + cu.sbtInfo = 0; +#endif cu.rootCbf = false; cs.clearTUs(); diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h index 178905b3bd93ca8832092b664ffe33844f0acf89..a77004bf25f7e8f9b7a089715edfee1c2a62cb95 100644 --- a/source/Lib/EncoderLib/InterSearch.h +++ b/source/Lib/EncoderLib/InterSearch.h @@ -164,6 +164,14 @@ protected: bool m_isInitialized; unsigned int m_numBVs, m_numBV16s; Mv m_acBVs[IBC_NUM_CANDIDATES]; +#if JVET_M0140_SBT + Distortion m_estMinDistSbt[NUMBER_SBT_MODE + 1]; // estimated minimum SSE value of the PU if using a SBT mode + uint8_t m_sbtRdoOrder[NUMBER_SBT_MODE]; // order of SBT mode in RDO + bool m_skipSbtAll; // to skip all SBT modes for the current PU + uint8_t m_histBestSbt; // historical best SBT mode for PU of certain SSE values + uint8_t m_histBestMtsIdx; // historical best MTS idx for PU of certain SSE values +#endif + public: InterSearch(); virtual ~InterSearch(); @@ -186,6 +194,18 @@ public: void destroy (); +#if JVET_M0140_SBT + void calcMinDistSbt ( CodingStructure &cs, const CodingUnit& cu, const uint8_t sbtAllowed ); + uint8_t skipSbtByRDCost ( int width, int height, int mtDepth, uint8_t sbtIdx, uint8_t sbtPos, double bestCost, Distortion distSbtOff, double costSbtOff, bool rootCbfSbtOff ); + bool getSkipSbtAll () { return m_skipSbtAll; } + void setSkipSbtAll ( bool skipAll ) { m_skipSbtAll = skipAll; } + uint8_t getSbtRdoOrder ( uint8_t idx ) { assert( m_sbtRdoOrder[idx] < NUMBER_SBT_MODE ); assert( (uint32_t)( m_estMinDistSbt[m_sbtRdoOrder[idx]] >> 2 ) < ( MAX_UINT >> 1 ) ); return m_sbtRdoOrder[idx]; } + Distortion getEstDistSbt ( uint8_t sbtMode) { return m_estMinDistSbt[sbtMode]; } + void initTuAnalyzer () { m_estMinDistSbt[NUMBER_SBT_MODE] = std::numeric_limits<uint64_t>::max(); m_skipSbtAll = false; } + void setHistBestTrs ( uint8_t sbtInfo, uint8_t mtsIdx ) { m_histBestSbt = sbtInfo; m_histBestMtsIdx = mtsIdx; } + void initSbtRdoOrder ( uint8_t sbtMode ) { m_sbtRdoOrder[0] = sbtMode; m_estMinDistSbt[0] = m_estMinDistSbt[sbtMode]; } +#endif + void setTempBuffers (CodingStructure ****pSlitCS, CodingStructure ****pFullCS, CodingStructure **pSaveCS ); void resetCtuRecord () { m_ctuRecord.clear(); } #if ENABLE_SPLIT_PARALLELISM diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index 248ee6db4253939623ff5f674a122e333e233592..823751dc792402248f445100f41d940d689eaec5 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -555,6 +555,13 @@ void HLSWriter::codeSPSNext( const SPSNext& spsNext, const bool usePCM ) } #endif +#if JVET_M0140_SBT + WRITE_FLAG( spsNext.getUseSBT() ? 1 : 0, "sbt_enable_flag" ); + if( spsNext.getUseSBT() ) + { + WRITE_FLAG( spsNext.getMaxSbtSize() == 64 ? 1 : 0, "max_sbt_size_64_flag" ); + } +#endif WRITE_FLAG( spsNext.getUseAffine() ? 1 : 0, "affine_flag" ); if ( spsNext.getUseAffine() ) {