diff --git a/cfg/encoder_intra_vtm.cfg b/cfg/encoder_intra_vtm.cfg index 5de7eca29a7a0e22936e16c0e7123cbacbd2830c..814bf20d9368cd7c9a6675e9840f2ec1ae4c2e13 100644 --- a/cfg/encoder_intra_vtm.cfg +++ b/cfg/encoder_intra_vtm.cfg @@ -48,9 +48,9 @@ InternalBitDepth : 10 # codec operating bit-depth #=========== Coding Tools ================= SAO : 1 # Sample adaptive offset (0: OFF, 1: ON) -AMP : 1 # Asymmetric motion partitions (0: OFF, 1: ON) TransformSkip : 1 # Transform skipping (0: OFF, 1: ON) TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1: ON) +TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) #============ Slices ================ @@ -77,11 +77,11 @@ PCMFilterDisableFlag : 0 # 0: Enable loop filterin TransquantBypassEnable : 0 # Value of PPS flag. CUTransquantBypassFlagForce: 0 # Force transquant bypass mode, when transquant_bypass_enable_flag is enabled -#============ JEM settings ====================== +#============ VTM settings ====================== LoopFilterTcOffset_div2 : 0 SEIDecodedPictureHash : 0 -CbQpOffset : 0 -CrQpOffset : 0 +CbQpOffset : 1 +CrQpOffset : 1 TemporalSubsampleRatio : 8 #============ NEXT ==================== @@ -101,8 +101,9 @@ MaxBTDepthISliceL : 3 MaxBTDepthISliceC : 3 MTT : 1 -EMT : 1 -EMTFast : 1 +MTS : 1 +MTSIntraMaxCand : 3 +MTSInterMaxCand : 4 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 @@ -110,7 +111,7 @@ LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 2 ALF : 1 -CPR : 0 # turned off in CTC +IBC : 0 # turned off in CTC # Fast tools PBIntraFast : 1 diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg index 16ee4e63f0bb3871d8c170bcda6e269cdb2b6270..132d9db5c55967284a1d2ee5497da6c44ee39ed5 100644 --- a/cfg/encoder_lowdelay_P_vtm.cfg +++ b/cfg/encoder_lowdelay_P_vtm.cfg @@ -56,9 +56,9 @@ InternalBitDepth : 10 # codec operating bit-depth #=========== Coding Tools ================= SAO : 1 # Sample adaptive offset (0: OFF, 1: ON) -AMP : 1 # Asymmetric motion partitions (0: OFF, 1: ON) TransformSkip : 1 # Transform skipping (0: OFF, 1: ON) TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1: ON) +TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) #============ Slices ================ @@ -94,11 +94,11 @@ RCLCUSeparateModel : 1 # Rate control: use LCU l InitialQP : 0 # Rate control: initial QP RCForceIntraQP : 0 # Rate control: force intra QP to be equal to initial QP -#============ JEM settings ====================== +#============ VTM settings ====================== LoopFilterTcOffset_div2 : 0 SEIDecodedPictureHash : 0 -CbQpOffset : 0 -CrQpOffset : 0 +CbQpOffset : 1 +CrQpOffset : 1 #============ NEXT ==================== @@ -117,8 +117,9 @@ MaxBTDepthISliceL : 3 MaxBTDepthISliceC : 3 MTT : 1 -EMT : 1 -EMTFast : 1 +MTS : 1 +MTSIntraMaxCand : 3 +MTSInterMaxCand : 4 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 @@ -127,7 +128,7 @@ DepQuant : 1 IMV : 2 ALF : 1 MHIntra : 1 -CPR : 0 # turned off in CTC +IBC : 0 # turned off in CTC # Fast tools PBIntraFast : 1 diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg index 935e259733e48722779be43f555e7d16b5584c91..7d8350f9257c2d87be841e0083792ada75bc11b3 100644 --- a/cfg/encoder_lowdelay_vtm.cfg +++ b/cfg/encoder_lowdelay_vtm.cfg @@ -56,9 +56,9 @@ InternalBitDepth : 10 # codec operating bit-depth #=========== Coding Tools ================= SAO : 1 # Sample adaptive offset (0: OFF, 1: ON) -AMP : 1 # Asymmetric motion partitions (0: OFF, 1: ON) TransformSkip : 1 # Transform skipping (0: OFF, 1: ON) TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1: ON) +TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) #============ Slices ================ @@ -94,11 +94,11 @@ RCLCUSeparateModel : 1 # Rate control: use LCU l InitialQP : 0 # Rate control: initial QP RCForceIntraQP : 0 # Rate control: force intra QP to be equal to initial QP -#============ JEM settings ====================== +#============ VTM settings ====================== LoopFilterTcOffset_div2 : 0 SEIDecodedPictureHash : 0 -CbQpOffset : 0 -CrQpOffset : 0 +CbQpOffset : 1 +CrQpOffset : 1 #============ NEXT ==================== @@ -117,8 +117,9 @@ MaxBTDepthISliceL : 3 MaxBTDepthISliceC : 3 MTT : 1 -EMT : 1 -EMTFast : 1 +MTS : 1 +MTSIntraMaxCand : 3 +MTSInterMaxCand : 4 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 @@ -130,7 +131,7 @@ GBi : 1 GBiFast : 1 MHIntra : 1 Triangle : 1 -CPR : 0 # turned off in CTC +IBC : 0 # turned off in CTC # Fast tools PBIntraFast : 1 diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg index 2e938d715f9fb5c2b2b7b93fa69b3d3732944a48..f3404e3f2b958461236f98e56e0387d315808658 100644 --- a/cfg/encoder_randomaccess_vtm.cfg +++ b/cfg/encoder_randomaccess_vtm.cfg @@ -70,9 +70,9 @@ InternalBitDepth : 10 # codec operating bit-depth #=========== Coding Tools ================= SAO : 1 # Sample adaptive offset (0: OFF, 1: ON) -AMP : 1 # Asymmetric motion partitions (0: OFF, 1: ON) TransformSkip : 1 # Transform skipping (0: OFF, 1: ON) TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1: ON) +TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) #============ Slices ================ @@ -108,11 +108,11 @@ RCLCUSeparateModel : 1 # Rate control: use LCU l InitialQP : 0 # Rate control: initial QP RCForceIntraQP : 0 # Rate control: force intra QP to be equal to initial QP -#============ JEM settings ====================== +#============ VTM settings ====================== LoopFilterTcOffset_div2 : 0 SEIDecodedPictureHash : 0 -CbQpOffset : 0 -CrQpOffset : 0 +CbQpOffset : 1 +CrQpOffset : 1 #============ NEXT ==================== @@ -131,8 +131,9 @@ MaxBTDepthISliceL : 3 MaxBTDepthISliceC : 3 MTT : 1 -EMT : 1 -EMTFast : 1 +MTS : 1 +MTSIntraMaxCand : 3 +MTSInterMaxCand : 4 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 @@ -145,7 +146,7 @@ GBiFast : 1 BIO : 1 MHIntra : 1 Triangle : 1 -CPR : 0 # turned off in CTC +IBC : 0 # turned off in CTC # Fast tools PBIntraFast : 1 diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index 4e47a75dfb014ebec6739b5edf17f62e7ff91491..6afd8db7333f1376da1346246b9cd70f6ae5befa 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -141,7 +141,6 @@ void EncApp::xInitLibCfg() m_cEncLib.setAccessUnitDelimiter ( m_AccessUnitDelimiter ); m_cEncLib.setMaxTempLayer ( m_maxTempLayer ); - m_cEncLib.setUseAMP( m_enableAMP ); //===== Slice ======== @@ -224,13 +223,23 @@ void EncApp::xInitLibCfg() m_cEncLib.setDisableMotionCompression ( m_DisableMotionCompression ); m_cEncLib.setMTTMode ( m_MTT ); m_cEncLib.setUseLMChroma ( m_LMChroma ); +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + m_cEncLib.setCclmCollocatedChromaFlag ( m_cclmCollocatedChromaFlag ); +#endif #if ENABLE_WPP_PARALLELISM m_cEncLib.setUseAltDQPCoding ( m_AltDQPCoding ); #endif +#if JVET_M0464_UNI_MTS + m_cEncLib.setIntraMTS ( m_MTS & 1 ); + m_cEncLib.setIntraMTSMaxCand ( m_MTSIntraMaxCand ); + m_cEncLib.setInterMTS ( ( m_MTS >> 1 ) & 1 ); + m_cEncLib.setInterMTSMaxCand ( m_MTSInterMaxCand ); +#else m_cEncLib.setIntraEMT ( m_EMT & 1 ); m_cEncLib.setFastIntraEMT ( m_FastEMT & m_EMT & 1 ); m_cEncLib.setInterEMT ( ( m_EMT >> 1 ) & 1 ); m_cEncLib.setFastInterEMT ( ( m_FastEMT >> 1 ) & ( m_EMT >> 1 ) & 1 ); +#endif m_cEncLib.setUseCompositeRef ( m_compositeRefEnabled ); m_cEncLib.setUseGBi ( m_GBi ); m_cEncLib.setUseGBiFast ( m_GBiFast ); @@ -249,13 +258,13 @@ void EncApp::xInitLibCfg() m_cEncLib.setUseMHIntra ( m_MHIntra ); m_cEncLib.setUseTriangle ( m_Triangle ); - m_cEncLib.setCPRMode ( m_CPRMode ); - m_cEncLib.setCPRLocalSearchRangeX ( m_CPRLocalSearchRangeX ); - m_cEncLib.setCPRLocalSearchRangeY ( m_CPRLocalSearchRangeY ); - m_cEncLib.setCPRHashSearch ( m_CPRHashSearch ); - m_cEncLib.setCPRHashSearchMaxCand ( m_CPRHashSearchMaxCand ); - m_cEncLib.setCPRHashSearchRange4SmallBlk ( m_CPRHashSearchRange4SmallBlk ); - m_cEncLib.setCPRFastMethod ( m_CPRFastMethod ); + m_cEncLib.setIBCMode ( m_IBCMode ); + m_cEncLib.setIBCLocalSearchRangeX ( m_IBCLocalSearchRangeX ); + m_cEncLib.setIBCLocalSearchRangeY ( m_IBCLocalSearchRangeY ); + m_cEncLib.setIBCHashSearch ( m_IBCHashSearch ); + m_cEncLib.setIBCHashSearchMaxCand ( m_IBCHashSearchMaxCand ); + m_cEncLib.setIBCHashSearchRange4SmallBlk ( m_IBCHashSearchRange4SmallBlk ); + m_cEncLib.setIBCFastMethod ( m_IBCFastMethod ); m_cEncLib.setUseWrapAround ( m_wrapAround ); m_cEncLib.setWrapAroundOffset ( m_wrapAroundOffset ); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index d5fff470e0c3445ffd2155c40bb63ef4b479b522..10241f5caae994d3e4c68d692541232a9ccedee5 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -828,6 +828,20 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("LMChroma", m_LMChroma, 1, " LMChroma prediction " "\t0: Disable LMChroma\n" "\t1: Enable LMChroma\n") +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + ("CclmCollocatedChroma", m_cclmCollocatedChromaFlag, false, "Specifies the location of the top-left downsampled luma sample in cross-component linear model intra prediction relative to the top-left luma sample\n" + "\t0: horizontally co-sited, vertically shifted by 0.5 units of luma samples\n" + "\t1: collocated\n") +#endif +#if JVET_M0464_UNI_MTS + ("MTS", m_MTS, 0, "Multiple Transform Set (MTS)\n" + "\t0: Disable MTS\n" + "\t1: Enable only Intra MTS\n" + "\t2: Enable only Inter MTS\n" + "\t3: Enable both Intra & Inter MTS\n") + ("MTSIntraMaxCand", m_MTSIntraMaxCand, 3, "Number of additional candidates to test in encoder search for MTS in intra slices\n") + ("MTSInterMaxCand", m_MTSInterMaxCand, 4, "Number of additional candidates to test in encoder search for MTS in inter slices\n") +#else ("EMT,-emt", m_EMT, 0, "Enhanced Multiple Transform (EMT)\n" "\t0: Disable EMT\n" "\t1: Enable only Intra EMT\n" @@ -838,6 +852,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) "\t1: Enable fast methods only for Intra EMT\n" "\t2: Enable fast methods only for Inter EMT\n" "\t3: Enable fast methods for both Intra & Inter EMT\n") +#endif ("CompositeLTReference", m_compositeRefEnabled, false, "Enable Composite Long Term Reference Frame") ("GBi", m_GBi, false, "Enable Generalized Bi-prediction(GBi)") ("GBiFast", m_GBiFast, false, "Fast methods for Generalized Bi-prediction(GBi)\n") @@ -850,13 +865,13 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("MHIntra", m_MHIntra, false, "Enable MHIntra mode") ("Triangle", m_Triangle, false, "Enable triangular shape motion vector prediction (0:off, 1:on)") - ( "CPR", m_CPRMode, 0u, "CPRMode (0x1:enabled, 0x0:disabled) [default: disabled]") - ( "CPRLocalSearchRangeX", m_CPRLocalSearchRangeX, 128u, "Search range of CPR local search in x direction") - ( "CPRLocalSearchRangeY", m_CPRLocalSearchRangeY, 128u, "Search range of CPR local search in y direction") - ( "CPRHashSearch", m_CPRHashSearch, 1u, "Hash based CPR search") - ( "CPRHashSearchMaxCand", m_CPRHashSearchMaxCand, 256u, "Max candidates for hash based CPR search") - ( "CPRHashSearchRange4SmallBlk", m_CPRHashSearchRange4SmallBlk, 256u, "Small block search range in based CPR search") - ( "CPRFastMethod", m_CPRFastMethod, 6u, "Fast methods for CPR") + ( "IBC", m_IBCMode, 0u, "IBCMode (0x1:enabled, 0x0:disabled) [default: disabled]") + ( "IBCLocalSearchRangeX", m_IBCLocalSearchRangeX, 128u, "Search range of IBC local search in x direction") + ( "IBCLocalSearchRangeY", m_IBCLocalSearchRangeY, 128u, "Search range of IBC local search in y direction") + ( "IBCHashSearch", m_IBCHashSearch, 1u, "Hash based IBC search") + ( "IBCHashSearchMaxCand", m_IBCHashSearchMaxCand, 256u, "Max candidates for hash based IBC search") + ( "IBCHashSearchRange4SmallBlk", m_IBCHashSearchRange4SmallBlk, 256u, "Small block search range in based IBC search") + ( "IBCFastMethod", m_IBCFastMethod, 6u, "Fast methods for IBC") ("WrapAround", m_wrapAround, false, "Enable horizontal wrap-around motion compensation for inter prediction (0:off, 1:on) [default: off]") ("WrapAroundOffset", m_wrapAroundOffset, 0u, "Offset in luma samples used for computing the horizontal wrap-around position") @@ -943,8 +958,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("CbQpOffset,-cbqpofs", m_cbQpOffset, 0, "Chroma Cb QP Offset") ("CrQpOffset,-crqpofs", m_crQpOffset, 0, "Chroma Cr QP Offset") - ("CbQpOffsetDualTree", m_cbQpOffsetDualTree, 1, "Chroma Cb QP Offset for dual tree") - ("CrQpOffsetDualTree", m_crQpOffsetDualTree, 1, "Chroma Cr QP Offset for dual tree") + ("CbQpOffsetDualTree", m_cbQpOffsetDualTree, 0, "Chroma Cb QP Offset for dual tree") + ("CrQpOffsetDualTree", m_crQpOffsetDualTree, 0, "Chroma Cr QP Offset for dual tree") #if ER_CHROMA_QP_WCG_PPS ("WCGPPSEnable", m_wcgChromaQpControl.enabled, false, "1: Enable the WCG PPS chroma modulation scheme. 0 (default) disabled") ("WCGPPSCbQpScale", m_wcgChromaQpControl.chromaCbQpScale, 1.0, "WCG PPS Chroma Cb QP Scale") @@ -983,14 +998,18 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("DeblockingFilterMetric", m_DeblockingFilterMetric, false) #endif // Coding tools - ("AMP", m_enableAMP, true, "Enable asymmetric motion partitions") ("CrossComponentPrediction", m_crossComponentPredictionEnabledFlag, false, "Enable the use of cross-component prediction (not valid in V1 profiles)") ("ReconBasedCrossCPredictionEstimate", m_reconBasedCrossCPredictionEstimate, false, "When determining the alpha value for cross-component prediction, use the decoded residual rather than the pre-transform encoder-side residual") ("SaoLumaOffsetBitShift", saoOffsetBitShift[CHANNEL_TYPE_LUMA], 0, "Specify the luma SAO bit-shift. If negative, automatically calculate a suitable value based upon bit depth and initial QP") ("SaoChromaOffsetBitShift", saoOffsetBitShift[CHANNEL_TYPE_CHROMA], 0, "Specify the chroma SAO bit-shift. If negative, automatically calculate a suitable value based upon bit depth and initial QP") ("TransformSkip", m_useTransformSkip, false, "Intra transform skipping") +#if JVET_M0464_UNI_MTS + ("TransformSkipFast", m_useTransformSkipFast, false, "Fast encoder search for transform skipping, winner takes it all mode.") + ("TransformSkipLog2MaxSize", m_log2MaxTransformSkipBlockSize, 5U, "Specify transform-skip maximum size. Minimum 2, Maximum 5. (not valid in V1 profiles)") +#else ("TransformSkipFast", m_useTransformSkipFast, false, "Fast intra transform skipping") ("TransformSkipLog2MaxSize", m_log2MaxTransformSkipBlockSize, 2U, "Specify transform-skip maximum size. Minimum 2. (not valid in V1 profiles)") +#endif ("ImplicitResidualDPCM", m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT], false, "Enable implicitly signalled residual DPCM for intra (also known as sample-adaptive intra predict) (not valid in V1 profiles)") ("ExplicitResidualDPCM", m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT], false, "Enable explicitly signalled residual DPCM for inter (not valid in V1 profiles)") ("ResidualRotation", m_transformSkipRotationEnabledFlag, false, "Enable rotation of transform-skipped and transquant-bypassed TUs through 180 degrees prior to entropy coding (not valid in V1 profiles)") @@ -1922,15 +1941,19 @@ bool EncAppCfg::xCheckParameter() #endif xConfirmPara( m_LMChroma, "LMChroma only allowed with NEXT profile" ); xConfirmPara( m_LargeCTU, "Large CTU is only allowed with NEXT profile" ); - xConfirmPara( m_SubPuMvpMode != 0, "Sub-PU motion vector prediction is only allowed with NEXT profile" ); - xConfirmPara( m_BIO, "BIO only allowed with NEXT profile" ); xConfirmPara( m_DisableMotionCompression, "Disable motion data compression only allowed with NEXT profile" ); xConfirmPara( m_MTT, "Multi type tree is only allowed with NEXT profile" ); xConfirmPara( m_ImvMode, "IMV is only allowed with NEXT profile" ); - xConfirmPara(m_CPRMode, "CPR Mode only allowed with NEXT profile"); + xConfirmPara(m_IBCMode, "IBC Mode only allowed with NEXT profile"); xConfirmPara( m_useFastLCTU, "Fast large CTU can only be applied when encoding with NEXT profile" ); +#if JVET_M0464_UNI_MTS + xConfirmPara( m_MTS, "MTS only allowed with NEXT profile" ); + xConfirmPara( m_MTSIntraMaxCand, "MTS only allowed with NEXT profile" ); + xConfirmPara( m_MTSInterMaxCand, "MTS only allowed with NEXT profile" ); +#else xConfirmPara( m_EMT, "EMT only allowed with NEXT profile" ); xConfirmPara( m_FastEMT, "EMT only allowed with NEXT profile" ); +#endif xConfirmPara( m_compositeRefEnabled, "Composite Reference Frame is only allowed with NEXT profile" ); xConfirmPara( m_GBi, "GBi is only allowed with NEXT profile" ); xConfirmPara( m_GBiFast, "GBiFast is only allowed with NEXT profile" ); @@ -2059,7 +2082,11 @@ bool EncAppCfg::xCheckParameter() xConfirmPara(m_profile == Profile::MAINSTILLPICTURE && m_framesToBeEncoded > 1, "Number of frames to be encoded must be 1 when main still picture profile is used."); xConfirmPara(m_crossComponentPredictionEnabledFlag==true, "CrossComponentPrediction must not be used for non main-RExt profiles."); +#if JVET_M0464_UNI_MTS + xConfirmPara(m_log2MaxTransformSkipBlockSize>=6, "Transform Skip Log2 Max Size must be less or equal to 5."); +#else xConfirmPara(m_log2MaxTransformSkipBlockSize!=2, "Transform Skip Log2 Max Size must be 2 for V1 profiles."); +#endif xConfirmPara(m_transformSkipRotationEnabledFlag==true, "UseResidualRotation must not be enabled for non main-RExt profiles."); xConfirmPara(m_transformSkipContextEnabledFlag==true, "UseSingleSignificanceMapContext must not be enabled for non main-RExt profiles."); xConfirmPara(m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT]==true, "ImplicitResidualDPCM must not be enabled for non main-RExt profiles."); @@ -2148,6 +2175,7 @@ bool EncAppCfg::xCheckParameter() xConfirmPara (m_log2MaxTransformSkipBlockSize < 2, "Transform Skip Log2 Max Size must be at least 2 (4x4)"); +#if !JVET_M0464_UNI_MTS if (m_log2MaxTransformSkipBlockSize!=2 && m_useTransformSkipFast) { msg( WARNING, "***************************************************************************\n"); @@ -2156,6 +2184,7 @@ bool EncAppCfg::xCheckParameter() msg( WARNING, "** It may be better to disable transform skip fast mode **\n"); msg( WARNING, "***************************************************************************\n"); } +#endif xConfirmPara( m_quadtreeTULog2MaxSize * m_tuLog2MaxSize >= 0, "Setting of TULog2MaxSize and QuadtreeTULog2MaxSize is mutually exclusive - use only one of the parameters" ); @@ -2268,8 +2297,14 @@ bool EncAppCfg::xCheckParameter() m_maxNumAffineMergeCand = m_SubPuMvpMode; } +#if JVET_M0464_UNI_MTS + xConfirmPara( m_MTS < 0 || m_MTS > 3, "MTS must be greater than 0 smaller than 4" ); + xConfirmPara( m_MTSIntraMaxCand < 0 || m_MTSIntraMaxCand > 5, "m_MTSIntraMaxCand must be greater than 0 and smaller than 6" ); + xConfirmPara( m_MTSInterMaxCand < 0 || m_MTSInterMaxCand > 5, "m_MTSInterMaxCand must be greater than 0 and smaller than 6" ); +#else xConfirmPara( m_EMT < 0 || m_EMT >3, "EMT must be 0, 1, 2 or 3" ); xConfirmPara( m_FastEMT < 0 || m_FastEMT >3, "FEMT must be 0, 1, 2 or 3" ); +#endif if( m_usePCM) { for (uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++) @@ -3101,8 +3136,18 @@ void EncAppCfg::xPrintParameter() msg( VERBOSE, "AltDQPCoding:%d ", m_AltDQPCoding ); #endif msg( VERBOSE, "LMChroma:%d ", m_LMChroma ); +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + if( m_LMChroma && m_chromaFormatIDC == CHROMA_420 ) + { + msg( VERBOSE, "CclmCollocatedChroma:%d ", m_cclmCollocatedChromaFlag ); + } +#endif +#if JVET_M0464_UNI_MTS + msg( VERBOSE, "MTS: %1d(intra) %1d(inter) ", m_MTS & 1, ( m_MTS >> 1 ) & 1 ); +#else msg( VERBOSE, "EMT: %1d(intra) %1d(inter) ", m_EMT & 1, ( m_EMT >> 1 ) & 1 ); - msg(VERBOSE, "CompositeLTReference:%d ", m_compositeRefEnabled); +#endif + msg( VERBOSE, "CompositeLTReference:%d ", m_compositeRefEnabled); msg( VERBOSE, "GBi:%d ", m_GBi ); msg( VERBOSE, "GBiFast:%d ", m_GBiFast ); #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET @@ -3111,7 +3156,7 @@ void EncAppCfg::xPrintParameter() msg(VERBOSE, "MHIntra:%d ", m_MHIntra); msg( VERBOSE, "Triangle:%d ", m_Triangle ); } - msg(VERBOSE, "CPR:%d ", m_CPRMode); + msg(VERBOSE, "IBC:%d ", m_IBCMode); msg( VERBOSE, "WrapAround:%d ", m_wrapAround); if( m_wrapAround ) { @@ -3127,7 +3172,11 @@ void EncAppCfg::xPrintParameter() msg( VERBOSE, "FastMrg:%d ", m_useFastMrg ); msg( VERBOSE, "PBIntraFast:%d ", m_usePbIntraFast ); if( m_ImvMode == 2 ) msg( VERBOSE, "IMV4PelFast:%d ", m_Imv4PelFast ); +#if JVET_M0464_UNI_MTS + if( m_MTS ) msg( VERBOSE, "MTSMaxCand: %1d(intra) %1d(inter) ", m_MTSIntraMaxCand, m_MTSInterMaxCand ); +#else if( m_EMT ) msg( VERBOSE, "EMTFast: %1d(intra) %1d(inter) ", ( m_FastEMT & m_EMT & 1 ), ( m_FastEMT >> 1 ) & ( m_EMT >> 1 ) & 1 ); +#endif msg( VERBOSE, "AMaxBT:%d ", m_useAMaxBT ); msg( VERBOSE, "E0023FastEnc:%d ", m_e0023FastEnc ); msg( VERBOSE, "ContentBasedFastQtbt:%d ", m_contentBasedFastQtbt ); diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 94bedb0c472ea8232629567dcc2dd166985bae70..8b67d948decc874a2639cc402a27ee0d900cf67b 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -148,7 +148,6 @@ protected: bool m_transformSkipRotationEnabledFlag; ///< control flag for transform-skip/transquant-bypass residual rotation bool m_transformSkipContextEnabledFlag; ///< control flag for transform-skip/transquant-bypass single significance map context bool m_rdpcmEnabledFlag[NUMBER_OF_RDPCM_SIGNALLING_MODES];///< control flags for residual DPCM - bool m_enableAMP; bool m_persistentRiceAdaptationEnabledFlag; ///< control flag for Golomb-Rice parameter adaptation over each slice bool m_cabacBypassAlignmentEnabledFlag; @@ -214,8 +213,17 @@ protected: bool m_AltDQPCoding; #endif int m_LMChroma; +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + bool m_cclmCollocatedChromaFlag; +#endif +#if JVET_M0464_UNI_MTS + int m_MTS; ///< XZ: Multiple Transform Set + int m_MTSIntraMaxCand; ///< XZ: Number of additional candidates to test + int m_MTSInterMaxCand; ///< XZ: Number of additional candidates to test +#else int m_EMT; ///< XZ: Enhanced Multiple Transform int m_FastEMT; ///< XZ: Fast Methods of Enhanced Multiple Transform +#endif bool m_compositeRefEnabled; bool m_GBi; @@ -231,13 +239,13 @@ protected: bool m_Triangle; - unsigned m_CPRMode; - unsigned m_CPRLocalSearchRangeX; - unsigned m_CPRLocalSearchRangeY; - unsigned m_CPRHashSearch; - unsigned m_CPRHashSearchMaxCand; - unsigned m_CPRHashSearchRange4SmallBlk; - unsigned m_CPRFastMethod; + unsigned m_IBCMode; + unsigned m_IBCLocalSearchRangeX; + unsigned m_IBCLocalSearchRangeY; + unsigned m_IBCHashSearch; + unsigned m_IBCHashSearchMaxCand; + unsigned m_IBCHashSearchRange4SmallBlk; + unsigned m_IBCFastMethod; bool m_wrapAround; unsigned m_wrapAroundOffset; diff --git a/source/Lib/CommonLib/BinaryDecisionTree.cpp b/source/Lib/CommonLib/BinaryDecisionTree.cpp index 2fa1f97cc03e0b766e1b0895b02f4550ac3c7ddb..db6492688bd70ea46f6b41b511cf7d3e4597f400 100644 --- a/source/Lib/CommonLib/BinaryDecisionTree.cpp +++ b/source/Lib/CommonLib/BinaryDecisionTree.cpp @@ -38,6 +38,8 @@ #include "BinaryDecisionTree.h" #include "CommonDef.h" +#if !REMOVE_BIN_DECISION_TREE + #include <algorithm> struct DecisionTreeBuilder @@ -183,3 +185,5 @@ void DecisionTree::reduce( unsigned offset /*= 0*/, int depth /*= -1 */ ) isAvail[offset] = avail; } + +#endif diff --git a/source/Lib/CommonLib/BinaryDecisionTree.h b/source/Lib/CommonLib/BinaryDecisionTree.h index 271be4529e69cf35bf6e71f9d0bc1da5c5f39811..7ab0d6558cd9ece144e9545591a441480d742214 100644 --- a/source/Lib/CommonLib/BinaryDecisionTree.h +++ b/source/Lib/CommonLib/BinaryDecisionTree.h @@ -40,6 +40,8 @@ #include "CommonDef.h" +#if !REMOVE_BIN_DECISION_TREE + #define MAX_DEPTH_DECISION_TREE 5 #define MAX_NODES_DECISION_TREE ( 2 * ( 1 << MAX_DEPTH_DECISION_TREE ) ) @@ -92,3 +94,5 @@ DecisionTreeBuilder* decision( unsigned id, DecisionTreeBuilder* sub0, DecisionT DecisionTreeBuilder* decision( unsigned id, unsigned id0, DecisionTreeBuilder* sub1 ); #endif + +#endif diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h index 7754f0a40d7240e120c459399fe19fec7d850b4e..748138fdf7a5339d69a21cab167088f17ac5e40c 100644 --- a/source/Lib/CommonLib/CodingStatistics.h +++ b/source/Lib/CommonLib/CodingStatistics.h @@ -108,6 +108,9 @@ enum CodingStatisticsType STATS__TOOL_TOTAL_FRAME,// This is a special case and is not included in the report. STATS__TOOL_AFF, STATS__TOOL_EMT, +#if JVET_M0444_SMVD + STATS__CABAC_BITS__SYMMVD_FLAG, +#endif STATS__TOOL_TOTAL, STATS__NUM_STATS }; @@ -184,6 +187,9 @@ static inline const char* getName(CodingStatisticsType name) "CABAC_BITS__TRIANGLE_FLAG", "CABAC_BITS__TRIANGLE_INDEX", "CABAC_BITS__MULTI_REF_LINE", +#if JVET_M0444_SMVD + "CABAC_BITS__SYMMVD_FLAG", +#endif "TOOL_FRAME", "TOOL_AFFINE", "TOOL_EMT", diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp index 9f3226fc7d193fff292d3274b9469aaaface4146..8d5013fcc154824f7131c04f8dc76f23b3576441 100644 --- a/source/Lib/CommonLib/CodingStructure.cpp +++ b/source/Lib/CommonLib/CodingStructure.cpp @@ -1299,7 +1299,7 @@ const TransformUnit* CodingStructure::getTURestricted( const Position &pos, cons } } -CprLumaCoverage CodingStructure::getCprLumaCoverage(const CompArea& chromaArea) const +IbcLumaCoverage CodingStructure::getIbcLumaCoverage(const CompArea& chromaArea) const { CHECK(chType != CHANNEL_TYPE_CHROMA, "Error"); @@ -1307,7 +1307,7 @@ CprLumaCoverage CodingStructure::getCprLumaCoverage(const CompArea& chromaArea) CompArea lumaArea = CompArea(COMPONENT_Y, chromaArea.chromaFormat, chromaArea.lumaPos(), recalcSize(chromaArea.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, chromaArea.size())); lumaArea = clipArea(lumaArea, picture->block(COMPONENT_Y)); const unsigned int fullArea = lumaArea.area(); - unsigned int cprArea = 0; + unsigned int ibcArea = 0; for (SizeType y = 0; y < lumaArea.height; y += MIN_PU_SIZE) { for (SizeType x = 0; x < lumaArea.width; x += MIN_PU_SIZE) @@ -1315,19 +1315,19 @@ CprLumaCoverage CodingStructure::getCprLumaCoverage(const CompArea& chromaArea) Position pos = lumaArea.offset(x, y); if (picture->cs->getMotionInfo(pos).isInter) // need to change if inter slice allows dualtree { - cprArea += unitAreaSubBlock; + ibcArea += unitAreaSubBlock; } } } - CprLumaCoverage coverage = CPR_LUMA_COVERAGE_FULL; - if (cprArea == 0) + IbcLumaCoverage coverage = IBC_LUMA_COVERAGE_FULL; + if (ibcArea == 0) { - coverage = CPR_LUMA_COVERAGE_NONE; + coverage = IBC_LUMA_COVERAGE_NONE; } - else if (cprArea < fullArea) + else if (ibcArea < fullArea) { - coverage = CPR_LUMA_COVERAGE_PARTIAL; + coverage = IBC_LUMA_COVERAGE_PARTIAL; } return coverage; diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h index 451e1f13db3b12674aed680282b2cc4c1fe5a08b..3727cc1fd33e8af22c8d7a44362b79ba00a14524 100644 --- a/source/Lib/CommonLib/CodingStructure.h +++ b/source/Lib/CommonLib/CodingStructure.h @@ -58,12 +58,12 @@ enum PictureType PIC_ORG_RESI, NUM_PIC_TYPES }; -enum CprLumaCoverage +enum IbcLumaCoverage { - CPR_LUMA_COVERAGE_FULL = 0, - CPR_LUMA_COVERAGE_PARTIAL, - CPR_LUMA_COVERAGE_NONE, - NUM_CPR_LUMA_COVERAGE, + IBC_LUMA_COVERAGE_FULL = 0, + IBC_LUMA_COVERAGE_PARTIAL, + IBC_LUMA_COVERAGE_NONE, + NUM_IBC_LUMA_COVERAGE, }; extern XUCache g_globalUnitCache; @@ -89,6 +89,10 @@ public: int prevQP[MAX_NUM_CHANNEL_TYPE]; int currQP[MAX_NUM_CHANNEL_TYPE]; int chromaQpAdj; +#if JVET_M0170_MRG_SHARELIST + Position sharedBndPos; + Size sharedBndSize; +#endif bool isLossless; const SPS *sps; const PPS *pps; @@ -154,7 +158,7 @@ public: cCUTraverser traverseCUs(const UnitArea& _unit, const ChannelType _chType) const; cPUTraverser traversePUs(const UnitArea& _unit, const ChannelType _chType) const; cTUTraverser traverseTUs(const UnitArea& _unit, const ChannelType _chType) const; - CprLumaCoverage getCprLumaCoverage(const CompArea& chromaArea) const; + IbcLumaCoverage getIbcLumaCoverage(const CompArea& chromaArea) const; // --------------------------------------------------------------------------- // encoding search utilities // --------------------------------------------------------------------------- diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 986b0cd35278cd112addd6da1da2a08e8e8aa241..27d76437a551bb84a7d284e129c8d27423cb3d9c 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -211,8 +211,14 @@ static const int DM_CHROMA_IDX = NUM_INTRA_MODE; ///< chro static const uint8_t INTER_MODE_IDX = 255; ///< index for inter modes +#if JVET_M0464_UNI_MTS +static const uint32_t NUM_TRAFO_MODES_MTS = 6; ///< Max Intra CU size applying EMT, supported values: 8, 16, 32, 64, 128 +static const uint32_t MTS_INTRA_MAX_CU_SIZE = 32; ///< Max Intra CU size applying EMT, supported values: 8, 16, 32, 64, 128 +static const uint32_t MTS_INTER_MAX_CU_SIZE = 32; ///< Max Inter CU size applying EMT, supported values: 8, 16, 32, 64, 128 +#else static const uint32_t EMT_INTRA_MAX_CU_WITH_QTBT = 32; ///< Max Intra CU size applying EMT, supported values: 8, 16, 32, 64, 128 static const uint32_t EMT_INTER_MAX_CU_WITH_QTBT = 32; ///< Max Inter CU size applying EMT, supported values: 8, 16, 32, 64, 128 +#endif static const int NUM_MOST_PROBABLE_MODES = 6; static const int LM_SYMBOL_NUM = (1 + NUM_LMC_MODE); @@ -321,7 +327,9 @@ static const int MAX_ENCODER_DEBLOCKING_QUALITY_LAYERS = 8 ; static const uint32_t LUMA_LEVEL_TO_DQP_LUT_MAXSIZE = 1024; ///< max LUT size for QP offset based on luma #endif +#if !JVET_M0464_UNI_MTS static const int NUM_EMT_CU_FLAG_CTX = 6; ///< number of context models for EMT CU-level flag +#endif //QTBT high level parameters //for I slice luma CTB configuration para. @@ -388,12 +396,12 @@ static const int TRIANGLE_MAX_NUM_CANDS = 40; static const int TRIANGLE_MAX_NUM_SATD_CANDS = 3; static const int TRIANGLE_MIN_SIZE = 8 * 8; -static const int CPR_MAX_CAND_SIZE = 16; // max block size for cpr search -static const int CPR_NUM_CANDIDATES = 64; ///< Maximum number of candidates to store/test +static const int IBC_MAX_CAND_SIZE = 16; // max block size for ibc search +static const int IBC_NUM_CANDIDATES = 64; ///< Maximum number of candidates to store/test static const int CHROMA_REFINEMENT_CANDIDATES = 8; /// 8 candidates BV to choose from -static const int CPR_FAST_METHOD_NOINTRA_CPRCBF0 = 0x01; -static const int CPR_FAST_METHOD_BUFFERBV = 0X02; -static const int CPR_FAST_METHOD_ADAPTIVE_SEARCHRANGE = 0X04; +static const int IBC_FAST_METHOD_NOINTRA_IBCCBF0 = 0x01; +static const int IBC_FAST_METHOD_BUFFERBV = 0X02; +static const int IBC_FAST_METHOD_ADAPTIVE_SEARCHRANGE = 0X04; // ==================================================================================================================== // Macro functions @@ -536,6 +544,45 @@ template <typename ValueType> inline ValueType rightShift (const ValueType template <typename ValueType> inline ValueType leftShift_round (const ValueType value, const int shift) { return (shift >= 0) ? ( value << shift) : ((value + (ValueType(1) << (-shift - 1))) >> -shift); } template <typename ValueType> inline ValueType rightShift_round(const ValueType value, const int shift) { return (shift >= 0) ? ((value + (ValueType(1) << (shift - 1))) >> shift) : ( value << -shift); } +static inline int floorLog2(uint32_t x) +{ + if (x == 0) + { + return -1; + } +#ifdef __GNUC__ + return 31 - __builtin_clz(x); +#else + int result = 0; + if (x & 0xffff0000) + { + x >>= 16; + result += 16; + } + if (x & 0xff00) + { + x >>= 8; + result += 8; + } + if (x & 0xf0) + { + x >>= 4; + result += 4; + } + if (x & 0xc) + { + x >>= 2; + result += 2; + } + if (x & 0x2) + { + x >>= 1; + result += 1; + } + return result; +#endif +} + //CASE-BREAK for breakpoints #if defined ( _MSC_VER ) && defined ( _DEBUG ) #define _CASE(_x) if(_x) diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp index 57dd1636b913daa109bea8939a3d05175f594746..d52ea51e00383a3719d16ecc733dd3f968afda60 100644 --- a/source/Lib/CommonLib/ContextModelling.cpp +++ b/source/Lib/CommonLib/ContextModelling.cpp @@ -81,7 +81,11 @@ CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID comp , m_lastOffsetY (0) , m_lastShiftX (0) , m_lastShiftY (0) +#if JVET_M0464_UNI_MTS + , m_TrafoBypass (tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag() && (tu.cu->transQuantBypass || tu.mtsIdx==1)) +#else , m_TrafoBypass (tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag() && (tu.cu->transQuantBypass || tu.transformSkip[m_compID])) +#endif , m_scanPosLast (-1) , m_subSetId (-1) , m_subSetPos (-1) @@ -96,7 +100,9 @@ CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID comp , m_parFlagCtxSet ( Ctx::ParFlag[m_chType] ) , m_gtxFlagCtxSet { Ctx::GtxFlag[m_chType], Ctx::GtxFlag[m_chType+2] } , m_sigCoeffGroupFlag () +#if !JVET_M0464_UNI_MTS , m_emtNumSigCoeff (0) +#endif { // LOGTODO unsigned log2sizeX = m_log2BlockWidth; @@ -150,6 +156,116 @@ void CoeffCodingContext::initSubblock( int SubsetId, bool sigGroupFlag ) + +#if JVET_M0421_SPLIT_SIG +void DeriveCtx::CtxSplit( const CodingStructure& cs, Partitioner& partitioner, unsigned& ctxSpl, unsigned& ctxQt, unsigned& ctxHv, unsigned& ctxHorBt, unsigned& ctxVerBt, bool* _canSplit /*= nullptr */ ) +{ + const Position pos = partitioner.currArea().blocks[partitioner.chType]; + const unsigned curSliceIdx = cs.slice->getIndependentSliceIdx(); +#if HEVC_TILES_WPP + const unsigned curTileIdx = cs.picture->tileMap->getTileIdxMap( partitioner.currArea().lumaPos() ); +#endif + + // get left depth +#if HEVC_TILES_WPP + const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), curSliceIdx, curTileIdx, partitioner.chType ); +#else + const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), curSliceIdx, partitioner.chType ); +#endif + + // get above depth +#if HEVC_TILES_WPP + const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), curSliceIdx, curTileIdx, partitioner.chType ); +#else + const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), curSliceIdx, partitioner.chType ); +#endif + + bool canSplit[6]; + + if( _canSplit == nullptr ) + { + partitioner.canSplit( cs, canSplit[0], canSplit[1], canSplit[2], canSplit[3], canSplit[4], canSplit[5] ); + } + else + { + memcpy( canSplit, _canSplit, 6 * sizeof( bool ) ); + } + + /////////////////////// + // CTX do split (0-8) + /////////////////////// + const unsigned widthCurr = partitioner.currArea().blocks[partitioner.chType].width; + const unsigned heightCurr = partitioner.currArea().blocks[partitioner.chType].height; + + ctxSpl = 0; + + if( cuLeft ) + { + const unsigned heightLeft = cuLeft->blocks[partitioner.chType].height; + ctxSpl += ( heightLeft < heightCurr ? 1 : 0 ); + } + if( cuAbove ) + { + const unsigned widthAbove = cuAbove->blocks[partitioner.chType].width; + ctxSpl += ( widthAbove < widthCurr ? 1 : 0 ); + } + + unsigned numSplit = 0; + if( canSplit[1] ) numSplit += 2; + if( canSplit[2] ) numSplit += 1; + if( canSplit[3] ) numSplit += 1; + if( canSplit[4] ) numSplit += 1; + if( canSplit[5] ) numSplit += 1; + + if( numSplit > 0 ) numSplit--; + + ctxSpl += 3 * ( numSplit >> 1 ); + + ////////////////////////// + // CTX is qt split (0-5) + ////////////////////////// + ctxQt = ( cuLeft && cuLeft->qtDepth > partitioner.currQtDepth ) ? 1 : 0; + ctxQt += ( cuAbove && cuAbove->qtDepth > partitioner.currQtDepth ) ? 1 : 0; + ctxQt += partitioner.currQtDepth < 2 ? 0 : 3; + + //////////////////////////// + // CTX is ver split (0-4) + //////////////////////////// + ctxHv = 0; + + const unsigned numHor = ( canSplit[2] ? 1 : 0 ) + ( canSplit[4] ? 1 : 0 ); + const unsigned numVer = ( canSplit[3] ? 1 : 0 ) + ( canSplit[5] ? 1 : 0 ); + + if( numVer == numHor ) + { + const Area& area = partitioner.currArea().blocks[partitioner.chType]; + + const unsigned wAbove = cuAbove ? cuAbove->blocks[partitioner.chType].width : 1; + const unsigned hLeft = cuLeft ? cuLeft ->blocks[partitioner.chType].height : 1; + + const unsigned depAbove = area.width / wAbove; + const unsigned depLeft = area.height / hLeft; + + if( depAbove == depLeft || !cuLeft || !cuAbove ) ctxHv = 0; + else if( depAbove < depLeft ) ctxHv = 1; + else ctxHv = 2; + } + else if( numVer < numHor ) + { + ctxHv = 3; + } + else + { + ctxHv = 4; + } + + ////////////////////////// + // CTX is h/v bt (0-3) + ////////////////////////// + ctxHorBt = ( partitioner.currBtDepth >= 2 ? 1 : 0 ); + ctxVerBt = ( partitioner.currBtDepth >= 2 ? 3 : 2 ); +} +#else unsigned DeriveCtx::CtxCUsplit( const CodingStructure& cs, Partitioner& partitioner ) { auto adPartitioner = dynamic_cast<AdaptiveDepthPartitioner*>( &partitioner ); @@ -186,6 +302,7 @@ unsigned DeriveCtx::CtxCUsplit( const CodingStructure& cs, Partitioner& partitio return ctxId; } +#endif unsigned DeriveCtx::CtxQtCbf( const ComponentID compID, const unsigned trDepth, const bool prevCbCbf ) { @@ -258,6 +375,7 @@ unsigned DeriveCtx::CtxIMVFlag( const CodingUnit& cu ) return ctxId; } +#if !JVET_M0421_SPLIT_SIG unsigned DeriveCtx::CtxBTsplit(const CodingStructure& cs, Partitioner& partitioner) { const Position pos = partitioner.currArea().blocks[partitioner.chType]; @@ -306,6 +424,7 @@ unsigned DeriveCtx::CtxBTsplit(const CodingStructure& cs, Partitioner& partition return ctx; } +#endif unsigned DeriveCtx::CtxTriangleFlag( const CodingUnit& cu ) { const CodingStructure *cs = cu.cs; @@ -320,6 +439,17 @@ unsigned DeriveCtx::CtxTriangleFlag( const CodingUnit& cu ) return ctxId; } +#if JVET_M0502_PRED_MODE_CTX +unsigned DeriveCtx::CtxPredModeFlag( const CodingUnit& cu ) +{ + const CodingUnit *cuLeft = cu.cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L); + const CodingUnit *cuAbove = cu.cs->getCURestricted(cu.lumaPos().offset(0, -1), cu, CH_L); + + unsigned ctxId = ((cuAbove && cuAbove->predMode == MODE_INTRA) || (cuLeft && cuLeft->predMode == MODE_INTRA)) ? 1 : 0; + + return ctxId; +} +#endif void MergeCtx::setMergeInfo( PredictionUnit& pu, int candIdx ) { @@ -342,12 +472,15 @@ void MergeCtx::setMergeInfo( PredictionUnit& pu, int candIdx ) pu.mvpNum [REF_PIC_LIST_1] = NOT_VALID; if (interDirNeighbours[candIdx] == 1 && pu.cs->slice->getRefPic(REF_PIC_LIST_0, mvFieldNeighbours[candIdx << 1].refIdx)->getPOC() == pu.cs->slice->getPOC()) { - pu.cu->cpr = true; + pu.cu->ibc = true; pu.bv = pu.mv[REF_PIC_LIST_0]; pu.bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); // used for only integer resolution } pu.cu->GBiIdx = ( interDirNeighbours[candIdx] == 3 ) ? GBiIdx[candIdx] : GBI_DEFAULT; +#if JVET_M0068_M0171_MMVD_CLEANUP + PU::restrictBiPredMergeCandsOne(pu); +#endif } void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) { @@ -378,47 +511,78 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) const int poc0 = slice.getRefPOC(REF_PIC_LIST_0, refList0); const int poc1 = slice.getRefPOC(REF_PIC_LIST_1, refList1); const int currPoc = slice.getPOC(); +#if !JVET_M0068_M0171_MMVD_CLEANUP int refSign = 1; if ((poc0 - currPoc) * (currPoc - poc1) > 0) { refSign = -1; } +#endif if (fPosPosition == 0) { tempMv[0] = Mv(offset, 0); +#if !JVET_M0068_M0171_MMVD_CLEANUP tempMv[1] = Mv(offset * refSign, 0); +#endif } else if (fPosPosition == 1) { tempMv[0] = Mv(-offset, 0); +#if !JVET_M0068_M0171_MMVD_CLEANUP tempMv[1] = Mv(-offset * refSign, 0); +#endif } else if (fPosPosition == 2) { tempMv[0] = Mv(0, offset); +#if !JVET_M0068_M0171_MMVD_CLEANUP tempMv[1] = Mv(0, offset * refSign); +#endif } else { tempMv[0] = Mv(0, -offset); +#if !JVET_M0068_M0171_MMVD_CLEANUP tempMv[1] = Mv(0, -offset * refSign); +#endif } +#if JVET_M0068_M0171_MMVD_CLEANUP + if ((poc0 - currPoc) == (poc1 - currPoc)) + { + tempMv[1] = tempMv[0]; + } + else if (abs(poc1 - currPoc) > abs(poc0 - currPoc)) +#else if (abs(poc1 - currPoc) > abs(poc0 - currPoc)) +#endif { const int scale = PU::getDistScaleFactor(currPoc, poc0, currPoc, poc1); +#if JVET_M0068_M0171_MMVD_CLEANUP + tempMv[1] = tempMv[0]; + tempMv[0] = tempMv[1].scaleMv(scale); +#else if (scale != 4096) { tempMv[0] = tempMv[0].scaleMv(scale); } +#endif } +#if JVET_M0068_M0171_MMVD_CLEANUP + else +#else else if (abs(poc1 - currPoc) < abs(poc0 - currPoc)) +#endif { const int scale = PU::getDistScaleFactor(currPoc, poc1, currPoc, poc0); +#if JVET_M0068_M0171_MMVD_CLEANUP + tempMv[1] = tempMv[0].scaleMv(scale); +#else if (scale != 4096) { tempMv[1] = tempMv[1].scaleMv(scale); } +#endif } pu.interDir = 3; @@ -489,4 +653,8 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID; pu.cu->GBiIdx = (interDirNeighbours[fPosBaseIdx] == 3) ? GBiIdx[fPosBaseIdx] : GBI_DEFAULT; + +#if JVET_M0068_M0171_MMVD_CLEANUP + PU::restrictBiPredMergeCandsOne(pu); +#endif } diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h index 76fba82a0d454ebbfd5cd5c9538fdc2e8efb7b6c..4f2c0d90327606deffffdf26773083927762a763 100644 --- a/source/Lib/CommonLib/ContextModelling.h +++ b/source/Lib/CommonLib/ContextModelling.h @@ -107,7 +107,11 @@ public: const int diag = posX + posY; int numPos = 0; int sumAbs = 0; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS +#define UPDATE(x) {int a=abs(x);sumAbs+=std::min(4+(a&1),a);numPos+=!!a;} +#else #define UPDATE(x) {int a=abs(x);sumAbs+=std::min(2+(a&1),a);numPos+=!!a;} +#endif if( posX < m_width-1 ) { UPDATE( pData[1] ); @@ -183,8 +187,10 @@ public: return std::min(sum, 31); } +#if !JVET_M0464_UNI_MTS unsigned emtNumSigCoeff() const { return m_emtNumSigCoeff; } void setEmtNumSigCoeff( unsigned val ) { m_emtNumSigCoeff = val; } +#endif private: // constant @@ -235,7 +241,9 @@ private: CtxSet m_parFlagCtxSet; CtxSet m_gtxFlagCtxSet[2]; std::bitset<MLS_GRP_NUM> m_sigCoeffGroupFlag; +#if !JVET_M0464_UNI_MTS unsigned m_emtNumSigCoeff; +#endif }; @@ -294,14 +302,21 @@ public: namespace DeriveCtx { +#if JVET_M0421_SPLIT_SIG +void CtxSplit ( const CodingStructure& cs, Partitioner& partitioner, unsigned& ctxSpl, unsigned& ctxQt, unsigned& ctxHv, unsigned& ctxHorBt, unsigned& ctxVerBt, bool* canSplit = nullptr ); +#else unsigned CtxCUsplit ( const CodingStructure& cs, Partitioner& partitioner ); unsigned CtxBTsplit ( const CodingStructure& cs, Partitioner& partitioner ); +#endif unsigned CtxQtCbf ( const ComponentID compID, const unsigned trDepth, const bool prevCbCbf ); unsigned CtxInterDir ( const PredictionUnit& pu ); unsigned CtxSkipFlag ( const CodingUnit& cu ); unsigned CtxIMVFlag ( const CodingUnit& cu ); unsigned CtxAffineFlag( const CodingUnit& cu ); unsigned CtxTriangleFlag( const CodingUnit& cu ); +#if JVET_M0502_PRED_MODE_CTX +unsigned CtxPredModeFlag( const CodingUnit& cu ); +#endif } #endif // __CONTEXTMODELLING__ diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index 84a6947a12bbad73f44d4f0065dc535cd29fb93e..cbc516d354ffa006e10826ec03b36f8bb78fb434 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -349,6 +349,15 @@ std::vector<std::vector<uint8_t>> ContextSetCfg::sm_InitTables( NUMBER_OF_SLICE_ // clang-format off const CtxSet ContextSetCfg::SplitFlag = ContextSetCfg::addCtxSet ({ +#if JVET_M0421_SPLIT_SIG + // |-------- do split ctx -------------------| + { 93, 124, 141, 123, 125, 141, 139, 126, 157, }, + { 108, 139, 156, 138, 140, 141, 139, 141, 143, }, + { 153, 154, 172, 153, 140, 156, 154, 127, 159, }, +#if JVET_M0453_CABAC_ENGINE + { DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, }, +#endif +#else #if JVET_M0453_CABAC_ENGINE { 107, 110, 127, 106, 123, 140,}, { 138, 140, 142, 106, 123, 125,}, @@ -359,8 +368,40 @@ const CtxSet ContextSetCfg::SplitFlag = ContextSetCfg::addCtxSet { 138, 111, 143, 107, 138, 140, }, { 138, 141, 158, 151, 124, 126, }, #endif +#endif +}); + +#if JVET_M0421_SPLIT_SIG +const CtxSet ContextSetCfg::SplitQtFlag = ContextSetCfg::addCtxSet +({ + { 153, 126, 142, 137, 109, 155, }, + { 153, 126, 157, 122, 138, 140, }, + { 153, 125, 127, 137, 153, 155, }, +#if JVET_M0453_CABAC_ENGINE + { DWS, DWS, DWS, DWS, DWS, DWS, }, +#endif +}); + +const CtxSet ContextSetCfg::SplitHvFlag = ContextSetCfg::addCtxSet +({ + { 154, 168, 155, 153, 155, }, + { 154, 168, 170, 153, 170, }, + { 154, 153, 140, 153, 154, }, +#if JVET_M0453_CABAC_ENGINE + { DWS, DWS, DWS, DWS, DWS, }, +#endif }); +const CtxSet ContextSetCfg::Split12Flag = ContextSetCfg::addCtxSet +({ + { 140, 154, 140, 154, }, + { 155, 169, 140, 154, }, + { 155, 154, 155, 154, }, +#if JVET_M0453_CABAC_ENGINE + { DWS, DWS, DWS, DWS, }, +#endif +}); +#else const CtxSet ContextSetCfg::BTSplitFlag = ContextSetCfg::addCtxSet ({ // |-------- 1st bin, 9 ctx for luma + 3 ctx for chroma------| |--2nd bin--| |3rd bin| @@ -375,6 +416,7 @@ const CtxSet ContextSetCfg::BTSplitFlag = ContextSetCfg::addCtxSet { 139, 141, 157, 139, 155, 142, 153, 125, 141, 154, 154, 154, 154, 154, 154, 140, }, #endif }); +#endif const CtxSet ContextSetCfg::SkipFlag = ContextSetCfg::addCtxSet ({ @@ -477,15 +519,28 @@ const CtxSet ContextSetCfg::PartSize = ContextSetCfg::addCtxSet const CtxSet ContextSetCfg::PredMode = ContextSetCfg::addCtxSet ({ #if JVET_M0453_CABAC_ENGINE +#if JVET_M0502_PRED_MODE_CTX + { 193, 193, }, + { 151, 151, }, + { CNU, CNU, }, + { 1, 1, }, +#else { 193,}, { 151,}, { CNU,}, { 1,}, +#endif +#else +#if JVET_M0502_PRED_MODE_CTX + { 178, 178, }, + { 194, 194, }, + { CNU, CNU, }, #else { 178, }, { 194, }, { CNU, }, #endif +#endif }); const CtxSet ContextSetCfg::MultiRefLineIdx = ContextSetCfg::addCtxSet @@ -1010,6 +1065,18 @@ const CtxSet ContextSetCfg::MVPIdx = ContextSetCfg::addCtxSet #endif }); +#if JVET_M0444_SMVD +const CtxSet ContextSetCfg::SmvdFlag = ContextSetCfg::addCtxSet +( { + { 154, }, + { 110, }, + { CNU, }, +#if JVET_M0453_CABAC_ENGINE + { DWS, } +#endif +} ); +#endif + const CtxSet ContextSetCfg::SaoMergeFlag = ContextSetCfg::addCtxSet ({ #if JVET_M0453_CABAC_ENGINE @@ -1038,6 +1105,7 @@ const CtxSet ContextSetCfg::SaoTypeIdx = ContextSetCfg::addCtxSet #endif }); +#if !JVET_M0464_UNI_MTS const CtxSet ContextSetCfg::TransformSkipFlag = ContextSetCfg::addCtxSet ({ #if JVET_M0453_CABAC_ENGINE @@ -1051,6 +1119,7 @@ const CtxSet ContextSetCfg::TransformSkipFlag = ContextSetCfg::addCtxSet { 109, 42, }, #endif }); +#endif const CtxSet ContextSetCfg::TransquantBypassFlag = ContextSetCfg::addCtxSet ({ @@ -1082,6 +1151,21 @@ const CtxSet ContextSetCfg::RdpcmDir = ContextSetCfg::addCtxSet #endif }); +#if JVET_M0464_UNI_MTS +const CtxSet ContextSetCfg::MTSIndex = ContextSetCfg::addCtxSet +({ +#if JVET_M0453_CABAC_ENGINE + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, }, +#else + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, +#endif +}); +#else const CtxSet ContextSetCfg::EMTTuIndex = ContextSetCfg::addCtxSet ({ #if JVET_M0453_CABAC_ENGINE @@ -1109,6 +1193,7 @@ const CtxSet ContextSetCfg::EMTCuFlag = ContextSetCfg::addCtxSet { CNU, CNU, 140, 155, 155, CNU, }, #endif }); +#endif const CtxSet ContextSetCfg::CrossCompPred = ContextSetCfg::addCtxSet ({ diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index dae5f60a09d173e5164df8e19676f7e36dc99559..fcc59f0152a14eddf03460f23822adff43834592 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -245,7 +245,13 @@ class ContextSetCfg public: // context sets: specify offset and size static const CtxSet SplitFlag; +#if JVET_M0421_SPLIT_SIG + static const CtxSet SplitQtFlag; + static const CtxSet SplitHvFlag; + static const CtxSet Split12Flag; +#else static const CtxSet BTSplitFlag; +#endif static const CtxSet SkipFlag; static const CtxSet MergeFlag; static const CtxSet MergeIdx; @@ -276,12 +282,18 @@ public: static const CtxSet MVPIdx; static const CtxSet SaoMergeFlag; static const CtxSet SaoTypeIdx; +#if JVET_M0464_UNI_MTS + static const CtxSet MTSIndex; +#else static const CtxSet TransformSkipFlag; +#endif static const CtxSet TransquantBypassFlag; static const CtxSet RdpcmFlag; static const CtxSet RdpcmDir; +#if !JVET_M0464_UNI_MTS static const CtxSet EMTTuIndex; static const CtxSet EMTCuFlag; +#endif static const CtxSet CrossCompPred; static const CtxSet ChromaQpAdjFlag; static const CtxSet ChromaQpAdjIdc; @@ -292,6 +304,9 @@ public: static const CtxSet MHIntraPredMode; static const CtxSet TriangleFlag; static const CtxSet TriangleIdx; +#if JVET_M0444_SMVD + static const CtxSet SmvdFlag; +#endif static const unsigned NumberOfContexts; // combined sets for less complex copying diff --git a/source/Lib/CommonLib/DepQuant.cpp b/source/Lib/CommonLib/DepQuant.cpp index 267f168f0f060c558bb5b4df476be8b218aa5aa3..820920082c726b577d39675070e5048683ce6888 100644 --- a/source/Lib/CommonLib/DepQuant.cpp +++ b/source/Lib/CommonLib/DepQuant.cpp @@ -642,7 +642,11 @@ namespace DQIntern const int channelBitDepth = sps.getBitDepth( chType ); const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange( chType ); const int nomTransformShift = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange ); +#if JVET_M0464_UNI_MTS + const bool clipTransformShift = ( tu.mtsIdx==1 && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ); +#else const bool clipTransformShift = ( tu.transformSkip[ compID ] && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ); +#endif const int transformShift = ( clipTransformShift ? std::max<int>( 0, nomTransformShift ) : nomTransformShift ); // quant parameters @@ -726,7 +730,11 @@ namespace DQIntern const TCoeff minTCoeff = -( 1 << maxLog2TrDynamicRange ); const TCoeff maxTCoeff = ( 1 << maxLog2TrDynamicRange ) - 1; const int nomTransformShift = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange ); +#if JVET_M0464_UNI_MTS + const bool clipTransformShift = ( tu.mtsIdx==1 && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ); +#else const bool clipTransformShift = ( tu.transformSkip[ compID ] && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ); +#endif const int transformShift = ( clipTransformShift ? std::max<int>( 0, nomTransformShift ) : nomTransformShift ); #if HM_QTBT_AS_IN_JEM_QUANT Intermediate_Int shift = IQUANT_SHIFT + 1 - qpPer - transformShift + ( TU::needsBlockSizeTrafoScale( area ) ? ADJ_DEQUANT_SHIFT : 0 ); @@ -855,7 +863,11 @@ namespace DQIntern { m_rdCost = std::numeric_limits<int64_t>::max()>>1; m_numSigSbb = 0; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + m_remRegBins = 4; // just large enough for last scan pos +#else m_remRegBins = 3; // just large enough for last scan pos +#endif m_refSbbCtxId = -1; m_sigFracBits = m_sigFracBitsArray[ 0 ]; m_coeffFracBits = m_gtxFracBitsArray[ 0 ]; @@ -869,7 +881,11 @@ namespace DQIntern int64_t rdCostA = m_rdCost + pqDataA.deltaDist; int64_t rdCostB = m_rdCost + pqDataB.deltaDist; int64_t rdCostZ = m_rdCost; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + if( m_remRegBins >= 4 ) +#else if( m_remRegBins >= 3 ) +#endif { if( pqDataA.absLevel < 4 ) rdCostA += m_coeffFracBits.bits[pqDataA.absLevel]; @@ -1008,14 +1024,22 @@ namespace DQIntern m_sbbFracBits = prvState->m_sbbFracBits; m_remRegBins = prvState->m_remRegBins - 1; m_goRicePar = prvState->m_goRicePar; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + if( m_remRegBins >= 4 ) +#else if( m_remRegBins >= 3 ) +#endif { TCoeff rem = (decision.absLevel - 4) >> 1; if( m_goRicePar < 3 && rem > (3<<m_goRicePar)-1 ) { m_goRicePar++; } +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + m_remRegBins -= (decision.absLevel < 2 ? decision.absLevel : 3); +#else m_remRegBins -= std::min<TCoeff>( decision.absLevel, 2 ); +#endif } ::memcpy( m_absLevelsAndCtxInit, prvState->m_absLevelsAndCtxInit, 48*sizeof(uint8_t) ); } @@ -1025,11 +1049,19 @@ namespace DQIntern m_refSbbCtxId = -1; if ( scanInfo.sbbSize == 4 ) { +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + m_remRegBins = MAX_NUM_REG_BINS_2x2SUBBLOCK - (decision.absLevel < 2 ? decision.absLevel : 3); +#else m_remRegBins = MAX_NUM_REG_BINS_2x2SUBBLOCK - MAX_NUM_GT2_BINS_2x2SUBBLOCK - std::min<TCoeff>( decision.absLevel, 2 ); +#endif } else { +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + m_remRegBins = MAX_NUM_REG_BINS_4x4SUBBLOCK - (decision.absLevel < 2 ? decision.absLevel : 3); +#else m_remRegBins = MAX_NUM_REG_BINS_4x4SUBBLOCK - MAX_NUM_GT2_BINS_4x4SUBBLOCK - std::min<TCoeff>( decision.absLevel, 2 ); +#endif } m_goRicePar = ( ((decision.absLevel - 4) >> 1) > (3<<0)-1 ? 1 : 0 ); ::memset( m_absLevelsAndCtxInit, 0, 48*sizeof(uint8_t) ); @@ -1038,12 +1070,20 @@ namespace DQIntern uint8_t* levels = reinterpret_cast<uint8_t*>(m_absLevelsAndCtxInit); levels[ scanInfo.insidePos ] = (uint8_t)std::min<TCoeff>( 255, decision.absLevel ); +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + if (m_remRegBins >= 4) +#else if (m_remRegBins >= 3) +#endif { TCoeff tinit = m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos]; TCoeff sumAbs1 = (tinit >> 3) & 31; TCoeff sumNum = tinit & 7; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS +#define UPDATE(k) {TCoeff t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs1+=std::min<TCoeff>(4+(t&1),t); sumNum+=!!t; } +#else #define UPDATE(k) {TCoeff t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs1+=std::min<TCoeff>(2+(t&1),t); sumNum+=!!t; } +#endif if (numIPos == 1) { UPDATE(0); @@ -1174,11 +1214,19 @@ namespace DQIntern currState.m_numSigSbb = 0; if (scanInfo.sbbSize == 4) { +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + currState.m_remRegBins = MAX_NUM_REG_BINS_2x2SUBBLOCK; +#else currState.m_remRegBins = MAX_NUM_REG_BINS_2x2SUBBLOCK - MAX_NUM_GT2_BINS_2x2SUBBLOCK; +#endif } else { +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + currState.m_remRegBins = MAX_NUM_REG_BINS_4x4SUBBLOCK; +#else currState.m_remRegBins = MAX_NUM_REG_BINS_4x4SUBBLOCK - MAX_NUM_GT2_BINS_4x4SUBBLOCK; +#endif } currState.m_goRicePar = 0; currState.m_refSbbCtxId = currState.m_stateId; @@ -1193,7 +1241,11 @@ namespace DQIntern if( nbOut->num ) { TCoeff sumAbs = 0, sumAbs1 = 0, sumNum = 0; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS +#define UPDATE(k) {TCoeff t=absLevels[nbOut->outPos[k]]; sumAbs+=t; sumAbs1+=std::min<TCoeff>(4+(t&1),t); sumNum+=!!t; } +#else #define UPDATE(k) {TCoeff t=absLevels[nbOut->outPos[k]]; sumAbs+=t; sumAbs1+=std::min<TCoeff>(2+(t&1),t); sumNum+=!!t; } +#endif UPDATE(0); if( nbOut->num > 1 ) { diff --git a/source/Lib/CommonLib/CprHashMap.cpp b/source/Lib/CommonLib/IbcHashMap.cpp similarity index 94% rename from source/Lib/CommonLib/CprHashMap.cpp rename to source/Lib/CommonLib/IbcHashMap.cpp index 0ed0ac828fe838e947e7ddf40541bfdffbcaf6e5..9d876292deeedcab45d80f3057b14018b8ba17a4 100644 --- a/source/Lib/CommonLib/CprHashMap.cpp +++ b/source/Lib/CommonLib/IbcHashMap.cpp @@ -31,46 +31,46 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/** \file CprHashMap.cpp - \brief CPR hash map encoder class +/** \file IbcHashMap.cpp + \brief IBC hash map encoder class */ #include "CommonLib/dtrace_codingstruct.h" #include "CommonLib/Picture.h" #include "CommonLib/UnitTools.h" -#include "CprHashMap.h" +#include "IbcHashMap.h" using namespace std; -//! \ingroup CprHashMap +//! \ingroup IbcHashMap //! \{ // ==================================================================================================================== // Constructor / destructor / create / destroy // ==================================================================================================================== -CprHashMap::CprHashMap() +IbcHashMap::IbcHashMap() { m_picWidth = 0; m_picHeight = 0; m_pos2Hash = NULL; m_computeCrc32c = xxComputeCrc32c16bit; -#if ENABLE_SIMD_OPT_CPR +#if ENABLE_SIMD_OPT_IBC #ifdef TARGET_SIMD_X86 - initCprHashMapX86(); + initIbcHashMapX86(); #endif #endif } -CprHashMap::~CprHashMap() +IbcHashMap::~IbcHashMap() { destroy(); } -void CprHashMap::init(const int picWidth, const int picHeight) +void IbcHashMap::init(const int picWidth, const int picHeight) { if (picWidth != m_picWidth || picHeight != m_picHeight) { @@ -87,7 +87,7 @@ void CprHashMap::init(const int picWidth, const int picHeight) } } -void CprHashMap::destroy() +void IbcHashMap::destroy() { if (m_pos2Hash != NULL) { @@ -168,7 +168,7 @@ static const uint32_t crc32Table[256] = { 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L }; -uint32_t CprHashMap::xxComputeCrc32c16bit(uint32_t crc, const Pel pel) +uint32_t IbcHashMap::xxComputeCrc32c16bit(uint32_t crc, const Pel pel) { const void *buf = &pel; const uint8_t *p = (const uint8_t *)buf; @@ -184,7 +184,7 @@ uint32_t CprHashMap::xxComputeCrc32c16bit(uint32_t crc, const Pel pel) // CRC calculation in C code //////////////////////////////////////////////////////// -unsigned int CprHashMap::xxCalcBlockHash(const Pel* pel, const int stride, const int width, const int height, unsigned int crc) +unsigned int IbcHashMap::xxCalcBlockHash(const Pel* pel, const int stride, const int width, const int height, unsigned int crc) { for (int y = 0; y < height; y++) { @@ -198,7 +198,7 @@ unsigned int CprHashMap::xxCalcBlockHash(const Pel* pel, const int stride, const } template<ChromaFormat chromaFormat> -void CprHashMap::xxBuildPicHashMap(const PelUnitBuf& pic) +void IbcHashMap::xxBuildPicHashMap(const PelUnitBuf& pic) { const int chromaScalingX = getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, chromaFormat); const int chromaScalingY = getChannelTypeScaleY(CHANNEL_TYPE_CHROMA, chromaFormat); @@ -243,7 +243,7 @@ void CprHashMap::xxBuildPicHashMap(const PelUnitBuf& pic) } } -void CprHashMap::rebuildPicHashMap(const PelUnitBuf& pic) +void IbcHashMap::rebuildPicHashMap(const PelUnitBuf& pic) { m_hash2Pos.clear(); @@ -267,7 +267,7 @@ void CprHashMap::rebuildPicHashMap(const PelUnitBuf& pic) } } -bool CprHashMap::cprHashMatch(const Area& lumaArea, std::vector<Position>& cand, const CodingStructure& cs, const int maxCand, const int searchRange4SmallBlk) +bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand, const CodingStructure& cs, const int maxCand, const int searchRange4SmallBlk) { cand.clear(); @@ -332,7 +332,7 @@ bool CprHashMap::cprHashMatch(const Area& lumaArea, std::vector<Position>& cand, return cand.size() > 0; } -int CprHashMap::getHashHitRatio(const Area& lumaArea) +int IbcHashMap::getHashHitRatio(const Area& lumaArea) { int maxX = std::min((int)(lumaArea.x + lumaArea.width), m_picWidth); int maxY = std::min((int)(lumaArea.y + lumaArea.height), m_picHeight); diff --git a/source/Lib/CommonLib/CprHashMap.h b/source/Lib/CommonLib/IbcHashMap.h similarity index 90% rename from source/Lib/CommonLib/CprHashMap.h rename to source/Lib/CommonLib/IbcHashMap.h index 6ac4069b579a061702f130f3cdd8fa9031a7c16e..e343aab6e042abdd15ff0227a3ba38d921f24fdd 100644 --- a/source/Lib/CommonLib/CprHashMap.h +++ b/source/Lib/CommonLib/IbcHashMap.h @@ -31,12 +31,12 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/** \file CprHashMap.h - \brief CPR hash map encoder class (header) +/** \file IbcHashMap.h + \brief IBC hash map encoder class (header) */ -#ifndef __CPRHASHMAP__ -#define __CPRHASHMAP__ +#ifndef __IBCHASHMAP__ +#define __IBCHASHMAP__ // Include files #include "CommonLib/CommonDef.h" @@ -55,7 +55,7 @@ // Class definition // ==================================================================================================================== -class CprHashMap +class IbcHashMap { private: int m_picWidth; @@ -73,23 +73,23 @@ private: public: uint32_t (*m_computeCrc32c) (uint32_t crc, const Pel pel); - CprHashMap(); - virtual ~CprHashMap(); + IbcHashMap(); + virtual ~IbcHashMap(); void init(const int picWidth, const int picHeight); void destroy(); void rebuildPicHashMap(const PelUnitBuf& pic); - bool cprHashMatch(const Area& lumaArea, std::vector<Position>& cand, const CodingStructure& cs, const int maxCand, const int searchRange4SmallBlk); + bool ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand, const CodingStructure& cs, const int maxCand, const int searchRange4SmallBlk); int getHashHitRatio(const Area& lumaArea); #ifdef TARGET_SIMD_X86 - void initCprHashMapX86(); + void initIbcHashMapX86(); template <X86_VEXT vext> - void _initCprHashMapX86(); + void _initIbcHashMapX86(); #endif }; //! \} -#endif // __CPRHASHMAP__ +#endif // __IBCHASHMAP__ diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 923ebb2aa285311d32686dbfac9d5761c7668ed1..205d74394e0cf0adc52f44834c3280f42728888d 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -374,10 +374,10 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& int iRefIdx = pu.refIdx[eRefPicList]; Mv mv[3]; - bool isCPR = false; + bool isIBC = false; if (pu.cs->slice->getRefPic(eRefPicList, iRefIdx)->getPOC() == pu.cs->slice->getPOC()) { - isCPR = true; + isIBC = true; } if( pu.cu->affine ) { @@ -413,7 +413,7 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& { xPredInterBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx ), mv[0], pcYuvPred, bi, pu.cu->slice->clpRng( compID ) , bioApplied - , isCPR + , isIBC ); } @@ -426,7 +426,7 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) const Slice &slice = *pu.cs->slice; bool bioApplied = false; - if (pu.cs->sps->getSpsNext().getUseBIO()) + if (pu.cs->sps->getBDOFEnabledFlag()) { if (pu.cu->affine || m_subPuMC) { @@ -446,6 +446,13 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) } } +#if JVET_M0444_SMVD + if (bioApplied && pu.cu->smvdMode) + { + bioApplied = false; + } +#endif + if (pu.cu->cs->sps->getSpsNext().getUseGBi() && bioApplied && pu.cu->GBiIdx != GBI_DEFAULT) { bioApplied = false; @@ -517,7 +524,7 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) void InterPrediction::xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng , const bool& bioApplied - , bool isCPR + , bool isIBC ) { JVET_J0090_SET_REF_PICTURE( refPic, compID ); @@ -529,7 +536,7 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio int xFrac = _mv.hor & ((1 << shiftHor) - 1); int yFrac = _mv.ver & ((1 << shiftVer) - 1); - if (isCPR) + if (isIBC) { xFrac = yFrac = 0; JVET_J0090_SET_CACHE_ENABLE( false ); @@ -582,9 +589,42 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio JVET_J0090_SET_CACHE_ENABLE( true ); if (bioApplied && compID == COMPONENT_Y) { +#if JVET_M0487_INT_EXTEND + const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); + const Pel* refPel = refBuf.buf - refBuf.stride - 1; + Pel* dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + dstBuf.stride + 1; + for (int w = 0; w < (width - 2 * BIO_EXTEND_SIZE); w++) + { + Pel val = leftShift_round(refPel[w], shift); + dstPel[w] = val - (Pel)IF_INTERNAL_OFFS; + } + + refPel = refBuf.buf - 1; + dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 1; + for (int h = 0; h < (height - 2 * BIO_EXTEND_SIZE - 2); h++) + { + Pel val = leftShift_round(refPel[0], shift); + dstPel[0] = val - (Pel)IF_INTERNAL_OFFS; + + val = leftShift_round(refPel[width - 3], shift); + dstPel[width - 3] = val - (Pel)IF_INTERNAL_OFFS; + + refPel += refBuf.stride; + dstPel += dstBuf.stride; + } + + refPel = refBuf.buf + (height - 2 * BIO_EXTEND_SIZE - 2)*refBuf.stride - 1; + dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + (height - 2 * BIO_EXTEND_SIZE)*dstBuf.stride + 1; + for (int w = 0; w < (width - 2 * BIO_EXTEND_SIZE); w++) + { + Pel val = leftShift_round(refPel[w], shift); + dstPel[w] = val - (Pel)IF_INTERNAL_OFFS; + } +#else refBuf.buf = refBuf.buf - refBuf.stride - 1; dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + dstBuf.stride + 1; bioSampleExtendBilinearFilter(refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width - 2, height - 2, 1, xFrac, yFrac, rndRes, chFmt, clpRng); +#endif // restore data width = backupWidth; @@ -686,7 +726,7 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift); // clip and scale - if (sps.getUseWrapAround()) + if (sps.getWrapAroundEnabledFlag()) { m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer); Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); @@ -710,7 +750,7 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE + 1)* MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE + 1)] + Mv(2, 2)); curMv.set(curMv.getHor() >> 2, curMv.getVer() >> 2); - if (sps.getUseWrapAround()) + if (sps.getWrapAroundEnabledFlag()) { clipMv(curMv, Position(pu.Y().x + (w << iScaleX), pu.Y().y + (h << iScaleY)), Size(blockWidth << iScaleX, blockHeight << iScaleY), sps); } @@ -898,6 +938,7 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf } // yu } +#if !JVET_M0487_INT_EXTEND void InterPrediction::bioSampleExtendBilinearFilter(Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int dim, int fracX, int fracY, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng) { Pel const* pSrc = NULL; @@ -959,6 +1000,7 @@ void InterPrediction::bioSampleExtendBilinearFilter(Pel const* src, int srcStrid } } } +#endif bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths) { @@ -1070,7 +1112,7 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu , const bool luma, const bool chroma ) { - // dual tree handling for CPR as the only ref + // dual tree handling for IBC as the only ref if (!luma || !chroma) { if (!luma && chroma) @@ -1111,7 +1153,7 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu } else { - if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_CPR) + if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC) { xSubPuMC( pu, predBuf, eRefPicList ); } diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index 2f07955698f401aeb58e5de66cc1c30d8540229b..145431e2777417e854efa484d18fe1fba7418ad4 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -63,6 +63,9 @@ class Mv; class InterPrediction : public WeightPrediction { private: +#if JVET_M0170_MRG_SHARELIST + int m_shareState; +#endif Distortion m_bioDistThres; Distortion m_bioSubBlkDistThres; @@ -101,8 +104,9 @@ protected: int rightShiftMSB(int numer, int denom); void applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths); bool xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* yuvSrc0, const int src0Stride, const Pel* yuvSrc1, const int src1Stride, const BitDepths &clipBitDepths); +#if !JVET_M0487_INT_EXTEND void bioSampleExtendBilinearFilter(Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int dim, int fracX, int fracY, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng); - +#endif void xPredInterUni ( const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi , const bool& bioApplied , const bool luma, const bool chroma @@ -110,7 +114,7 @@ protected: void xPredInterBi ( PredictionUnit& pu, PelUnitBuf &pcYuvPred ); void xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng , const bool& bioApplied - , bool isCPR + , bool isIBC ); void xAddBIOAvg4 (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng); @@ -156,7 +160,9 @@ public: #if JVET_J0090_MEMORY_BANDWITH_MEASURE void cacheAssign( CacheModel *cache ); #endif - +#if JVET_M0170_MRG_SHARELIST + void setShareState(int shareStateIn) {m_shareState = shareStateIn;} +#endif }; //! \} diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index 3745b4bcc71eed36c838a90c714dccd25cd290d8..e49e92eceee0a95924f7d469f72ff4934e80877e 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -1563,6 +1563,24 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom pDst[i] = ( piSrc[2 * i] * 2 + piSrc[2 * i - 1] + piSrc[2 * i + 1] + 2 ) >> 2; } } +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + else if( pu.cs->sps->getSpsNext().getCclmCollocatedChromaFlag() ) + { + piSrc = pRecSrc0 - iRecStride2; + + if( i == 0 && !bLeftAvaillable ) + { + pDst[i] = ( piSrc[2 * i] * 2 + piSrc[2 * i - iRecStride] + piSrc[2 * i + iRecStride] + 2 ) >> 2; + } + else + { + pDst[i] = ( piSrc[2 * i - iRecStride] + + piSrc[2 * i ] * 4 + piSrc[2 * i - 1] + piSrc[2 * i + 1] + + piSrc[2 * i + iRecStride] + + 4 ) >> 3; + } + } +#endif else { piSrc = pRecSrc0 - iRecStride2; @@ -1592,9 +1610,30 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom } for (int j = 0; j < uiCHeight + addedLeftBelow; j++) { - pDst[0] = ( ( piSrc[1 ] * 2 + piSrc[0 ] + piSrc[2 ] ) - + ( piSrc[1 + iRecStride] * 2 + piSrc[iRecStride] + piSrc[2 + iRecStride] ) - + 4 ) >> 3; +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + if( pu.cs->sps->getSpsNext().getCclmCollocatedChromaFlag() ) + { + if( j == 0 && !bAboveAvaillable ) + { + pDst[0] = ( piSrc[1] * 2 + piSrc[0] + piSrc[2] + 2 ) >> 2; + } + else + { + pDst[0] = ( piSrc[1 - iRecStride] + + piSrc[1 ] * 4 + piSrc[0] + piSrc[2] + + piSrc[1 + iRecStride] + + 4 ) >> 3; + } + } + else + { +#endif + pDst[0] = ( ( piSrc[1 ] * 2 + piSrc[0 ] + piSrc[2 ] ) + + ( piSrc[1 + iRecStride] * 2 + piSrc[iRecStride] + piSrc[2 + iRecStride] ) + + 4 ) >> 3; +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + } +#endif piSrc += iRecStride2; pDst += iDstStride; @@ -1607,16 +1646,48 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom { for( int i = 0; i < uiCWidth; i++ ) { - if( i == 0 && !bLeftAvaillable ) +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + if( pu.cs->sps->getSpsNext().getCclmCollocatedChromaFlag() ) { - pDst0[i] = ( pRecSrc0[2 * i] + pRecSrc0[2 * i + iRecStride] + 1 ) >> 1; + if( i == 0 && !bLeftAvaillable ) + { + if( j == 0 && !bAboveAvaillable ) + { + pDst0[i] = pRecSrc0[2 * i]; + } + else + { + pDst0[i] = ( pRecSrc0[2 * i] * 2 + pRecSrc0[2 * i - iRecStride] + pRecSrc0[2 * i + iRecStride] + 2 ) >> 2; + } + } + else if( j == 0 && !bAboveAvaillable ) + { + pDst0[i] = ( pRecSrc0[2 * i] * 2 + pRecSrc0[2 * i - 1] + pRecSrc0[2 * i + 1] + 2 ) >> 2; + } + else + { + pDst0[i] = ( pRecSrc0[2 * i - iRecStride] + + pRecSrc0[2 * i ] * 4 + pRecSrc0[2 * i - 1] + pRecSrc0[2 * i + 1] + + pRecSrc0[2 * i + iRecStride] + + 4 ) >> 3; + } } else { - pDst0[i] = ( pRecSrc0[2 * i ] * 2 + pRecSrc0[2 * i + 1 ] + pRecSrc0[2 * i - 1 ] - + pRecSrc0[2 * i + iRecStride] * 2 + pRecSrc0[2 * i + 1 + iRecStride] + pRecSrc0[2 * i - 1 + iRecStride] - + 4 ) >> 3; +#endif + if( i == 0 && !bLeftAvaillable ) + { + pDst0[i] = ( pRecSrc0[2 * i] + pRecSrc0[2 * i + iRecStride] + 1 ) >> 1; + } + else + { + pDst0[i] = ( pRecSrc0[2 * i ] * 2 + pRecSrc0[2 * i + 1 ] + pRecSrc0[2 * i - 1 ] + + pRecSrc0[2 * i + iRecStride] * 2 + pRecSrc0[2 * i + 1 + iRecStride] + pRecSrc0[2 * i - 1 + iRecStride] + + 4 ) >> 3; + } +#if JVET_M0142_CCLM_COLLOCATED_CHROMA } +#endif } pDst0 += iDstStride; @@ -1784,6 +1855,37 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component if (leftAvailable || aboveAvailable) { +#if JVET_M0064_CCLM_SIMPLIFICATION + int diff = maxLuma[0] - minLuma[0]; + if (diff > 0) + { + int diffC = maxLuma[1] - minLuma[1]; + int x = floorLog2( diff ); + static const uint8_t DivSigTable[1 << 4] = { + // 4bit significands - 8 ( MSB is omitted ) + 0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0 + }; + int normDiff = (diff << 4 >> x) & 15; + int v = DivSigTable[normDiff] | 8; + x += normDiff != 0; + + int y = floorLog2( abs( diffC ) ) + 1; + int add = 1 << y >> 1; + a = (diffC * v + add) >> y; + iShift = 3 + x - y; + if ( iShift < 1 ) { + iShift = 1; + a = ( (a == 0)? 0: (a < 0)? -15 : 15 ); // a=Sign(a)*15 + } + b = minLuma[1] - ((a * minLuma[0]) >> iShift); + } + else + { + a = 0; + b = minLuma[1]; + iShift = 0; + } +#else // original a = 0; iShift = 16; int shift = (internalBitDepth > 8) ? internalBitDepth - 9 : 0; @@ -1795,6 +1897,7 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component a = (((maxLuma[1] - minLuma[1]) * g_aiLMDivTableHigh[diff - 1] + div + add) >> shift); } b = minLuma[1] - ((a * minLuma[0]) >> iShift); +#endif } else { diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp index ffc117b1f64e07628663ae4b3ba1fb9d866ea34a..f8543f5bc8a63203456999609cfe618e55c3a381 100644 --- a/source/Lib/CommonLib/LoopFilter.cpp +++ b/source/Lib/CommonLib/LoopFilter.cpp @@ -554,7 +554,7 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge unsigned uiBsAbsIdx = 0, uiBs = 0; int iOffset, iSrcStep; - bool bPCMFilter = (sps.getUsePCM() && sps.getPCMFilterDisableFlag()) ? true : false; + bool bPCMFilter = (sps.getPCMEnabledFlag() && sps.getPCMFilterDisableFlag()) ? true : false; bool bPartPNoFilter = false; bool bPartQNoFilter = false; int betaOffsetDiv2 = slice.getDeblockingFilterBetaOffsetDiv2(); @@ -704,7 +704,7 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed int iOffset, iSrcStep; unsigned uiLoopLength; - bool bPCMFilter = (sps.getUsePCM() && sps.getPCMFilterDisableFlag()) ? true : false; + bool bPCMFilter = (sps.getPCMEnabledFlag() && sps.getPCMFilterDisableFlag()) ? true : false; bool bPartPNoFilter = false; bool bPartQNoFilter = false; const int tcOffsetDiv2 = slice.getDeblockingFilterTcOffsetDiv2(); diff --git a/source/Lib/CommonLib/Mv.cpp b/source/Lib/CommonLib/Mv.cpp index 91f379414cddf1c4f579c3ccc62eb057ba94b84b..7edfa2c56903738f79ce174ae35d224d855d16b3 100644 --- a/source/Lib/CommonLib/Mv.cpp +++ b/source/Lib/CommonLib/Mv.cpp @@ -61,7 +61,7 @@ void clipMv( Mv& rcMv, const Position& pos, int iVerMax = ( sps.getPicHeightInLumaSamples() + iOffset - ( int ) pos.y - 1 ) << iMvShift; int iVerMin = ( -( int ) sps.getMaxCUHeight() - iOffset - ( int ) pos.y + 1 ) << iMvShift; - if( sps.getUseWrapAround() ) + if( sps.getWrapAroundEnabledFlag() ) { int iHorMax = ( sps.getPicWidthInLumaSamples() + sps.getMaxCUWidth() - size.width + iOffset - ( int ) pos.x - 1 ) << iMvShift; int iHorMin = ( -( int ) sps.getMaxCUWidth() - iOffset - ( int ) pos.x + 1 ) << iMvShift; diff --git a/source/Lib/CommonLib/Mv.h b/source/Lib/CommonLib/Mv.h index 7b2872a9e3de226d00f631e0e636ec46021bf802..56287bec0b64953893c6313ce5b7d1ae3349c595 100644 --- a/source/Lib/CommonLib/Mv.h +++ b/source/Lib/CommonLib/Mv.h @@ -165,8 +165,13 @@ public: const Mv scaleMv( int iScale ) const { +#if JVET_M0479_18BITS_MV_CLIP + const int mvx = Clip3( -131072, 131071, (iScale * getHor() + 127 + (iScale * getHor() < 0)) >> 8 ); + const int mvy = Clip3( -131072, 131071, (iScale * getVer() + 127 + (iScale * getVer() < 0)) >> 8 ); +#else const int mvx = Clip3( -32768, 32767, (iScale * getHor() + 127 + (iScale * getHor() < 0)) >> 8 ); const int mvy = Clip3( -32768, 32767, (iScale * getVer() + 127 + (iScale * getVer() < 0)) >> 8 ); +#endif return Mv( mvx, mvy ); } @@ -201,6 +206,13 @@ public: { roundToPrecision(src, m_amvrPrecision[amvr]); } + +#if JVET_M0444_SMVD + Mv getSymmvdMv(const Mv& curMvPred, const Mv& tarMvPred) + { + return Mv(tarMvPred.hor - hor + curMvPred.hor, tarMvPred.ver - ver + curMvPred.ver); + } +#endif };// END CLASS DEFINITION MV namespace std diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp index 0937fd1bf8ff1f7c45d51d9ee2d68d0995539ad1..4c96307cb2196d52f9886c1189987f91d09c69c8 100644 --- a/source/Lib/CommonLib/Picture.cpp +++ b/source/Lib/CommonLib/Picture.cpp @@ -1004,7 +1004,7 @@ void Picture::extendPicBorder() Pel* pi = piTxt; // do left and right margins - if (cs->sps->getUseWrapAround()) + if (cs->sps->getWrapAroundEnabledFlag()) { int xoffset = cs->sps->getWrapAroundOffset() >> getComponentScaleX( compID, cs->area.chromaFormat ); for (int y = 0; y < p.height; y++) diff --git a/source/Lib/CommonLib/Quant.cpp b/source/Lib/CommonLib/Quant.cpp index bed8fd5d85d0e325b213b748d921275be5672e63..ca477139b28c97e7342b3cae0b7c14d1494355ac 100644 --- a/source/Lib/CommonLib/Quant.cpp +++ b/source/Lib/CommonLib/Quant.cpp @@ -301,7 +301,11 @@ void Quant::dequant(const TransformUnit &tu, CHECK(uiWidth > m_uiMaxTrSize, "Unsupported transformation size"); // Represents scaling through forward transform +#if JVET_M0464_UNI_MTS + const bool bClipTransformShiftTo0 = tu.mtsIdx!=1 && sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); +#else const bool bClipTransformShiftTo0 = (tu.transformSkip[compID] != 0) && sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); +#endif const int originalTransformShift = getTransformShift(channelBitDepth, area.size(), maxLog2TrDynamicRange); const int iTransformShift = bClipTransformShiftTo0 ? std::max<int>(0, originalTransformShift) : originalTransformShift; @@ -717,7 +721,11 @@ void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf const CCoeffBuf &piCoef = pSrc; CoeffBuf piQCoef = tu.getCoeffs(compID); +#if JVET_M0464_UNI_MTS + const bool useTransformSkip = tu.mtsIdx==1; +#else const bool useTransformSkip = tu.transformSkip[compID]; +#endif const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(toChannelType(compID)); { @@ -820,7 +828,11 @@ bool Quant::xNeedRDOQ(TransformUnit &tu, const ComponentID &compID, const CCoeff const CCoeffBuf piCoef = pSrc; +#if JVET_M0464_UNI_MTS + const bool useTransformSkip = tu.mtsIdx==1; +#else const bool useTransformSkip = tu.transformSkip[compID]; +#endif const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(toChannelType(compID)); #if HEVC_USE_SCALING_LISTS diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp index 8204a8c7f3959988ac7bff07cb74a9c81afb980b..bcf25fe5fd2006cf085f2430ca6b3add0080e613 100644 --- a/source/Lib/CommonLib/QuantRDOQ.cpp +++ b/source/Lib/CommonLib/QuantRDOQ.cpp @@ -185,7 +185,11 @@ inline int QuantRDOQ::xGetICRate( const uint32_t uiAbsLevel, const bool useLimitedPrefixLength, const int maxLog2TrDynamicRange ) const { +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + if( remRegBins < 4 ) +#else if( remRegBins < 3 ) +#endif { int iRate = int( xGetIEPRate() ); // cost of sign bit uint32_t symbol = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel ); @@ -226,7 +230,11 @@ inline int QuantRDOQ::xGetICRate( const uint32_t uiAbsLevel, } int iRate = int( xGetIEPRate() ); // cost of sign bit +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + const uint32_t cthres = 4; +#else const uint32_t cthres = ( remGt2Bins ? 4 : 2 ); +#endif if( uiAbsLevel >= cthres ) { uint32_t symbol = ( uiAbsLevel - cthres ) >> 1; @@ -266,10 +274,14 @@ inline int QuantRDOQ::xGetICRate( const uint32_t uiAbsLevel, iRate += fracBitsGt1.intBits[1]; iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1]; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + iRate += fracBitsGt2.intBits[1]; +#else if( remGt2Bins ) { iRate += fracBitsGt2.intBits[1]; } +#endif } else if( uiAbsLevel == 1 ) { @@ -542,7 +554,11 @@ void QuantRDOQ::quant(TransformUnit &tu, const ComponentID &compID, const CCoeff const CCoeffBuf &piCoef = pSrc; CoeffBuf piQCoef = tu.getCoeffs(compID); +#if JVET_M0464_UNI_MTS + const bool useTransformSkip = tu.mtsIdx==1; +#else const bool useTransformSkip = tu.transformSkip[compID]; +#endif bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ; @@ -598,7 +614,11 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, // Represents scaling through forward transform int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); +#if JVET_M0464_UNI_MTS + if (tu.mtsIdx==1 && extendedPrecision) +#else if (tu.transformSkip[compID] && extendedPrecision) +#endif { iTransformShift = std::max<int>(0, iTransformShift); } @@ -686,7 +706,11 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, bool is2x2subblock = ( iCGSizeM1 == 3 ); int remGt2Bins = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK ); - int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ); +#else + int remRegBins = (is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK) - remGt2Bins; +#endif uint32_t goRiceParam = 0; double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig; @@ -762,7 +786,11 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, uint32_t uiGt1Ctx = cctx.greater1CtxIdAbs ( ctxOffset ); uint32_t uiGt2Ctx = cctx.greater2CtxIdAbs ( ctxOffset ); uint32_t goRiceZero = 0; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + if( remRegBins < 4 ) +#else if( remRegBins < 3 ) +#endif { unsigned sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff ); goRiceParam = g_auiGoRiceParsCoeff [ sumAbs ]; @@ -786,7 +814,11 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange ); #if HEVC_USE_SIGN_HIDING +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] ); +#else sigRateDelta[ uiBlkPos ] = ( remRegBins < 3 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] ); +#endif #endif } @@ -805,7 +837,11 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, } else // uiLevel == 0 { +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + if( remRegBins < 4 ) +#else if( remRegBins < 3 ) +#endif { int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ); rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; @@ -825,18 +861,32 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins; goRiceParam = 0; } +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + else if( remRegBins >= 4 ) +#else else if( remRegBins >= 3 ) +#endif { +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + const uint32_t baseLevel = 4; +#else const uint32_t baseLevel = ( remGt2Bins ? 4 : 2 ); +#endif if( goRiceParam < 3 && ((uiLevel-baseLevel)>>1) > (3<<goRiceParam)-1 ) { goRiceParam++; } +#if !JVET_M0173_MOVE_GT2_TO_FIRST_PASS if( uiLevel >= 2 && remGt2Bins ) { remGt2Bins--; } +#endif +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos); +#else remRegBins -= std::min<int>( uiLevel, 2 ) + (iScanPos != iLastScanPos); +#endif } } else diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h index 52422e36358f1de2a5be8d17dcd38c5d88c2f4f0..ead8fd5cb20aa52963c84a7c55dc6f1f34f1b880 100644 --- a/source/Lib/CommonLib/RdCost.h +++ b/source/Lib/CommonLib/RdCost.h @@ -118,7 +118,7 @@ private: double m_motionLambda; int m_iCostScale; - double m_dCost; // for cpr + double m_dCost; // for ibc public: RdCost(); virtual ~RdCost(); @@ -162,7 +162,7 @@ public: } void setCostScale ( int iCostScale ) { m_iCostScale = iCostScale; } Distortion getCost ( uint32_t b ) { return Distortion( m_motionLambda * b ); } - // for cpr + // for ibc void getMotionCost(int add, bool isTransquantBypass) { m_dCost = m_dLambdaMotionSAD[(isTransquantBypass && m_costMode == COST_MIXED_LOSSLESS_LOSSY_CODING) ? 1 : 0] + add; } void setPredictors(Mv* pcMv) diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index 5ae6612d5944d43ab9242a1f1e84a2c4214b3bea..a485d9fadf0a04f6d7a1aa3e0a9cc7ae868502a9 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -180,6 +180,7 @@ public: return rtn; } }; +#if !JVET_M0064_CCLM_SIMPLIFICATION int g_aiLMDivTableLow[] = { 0, 0, 21845, 0, 13107, 43690, 18724, 0, 50972, 39321, 53620, 21845, 15123, 9362, 4369, 0, 3855, 58254, 17246, 52428, 49932, 59578, 25644, 43690, 28835, 40329, 16990, 37449, 56496, 34952, 4228, 0, 61564, 34695, @@ -244,6 +245,7 @@ int g_aiLMDivTableHigh[] = { 134, 134, 134, 133, 133, 133, 132, 132, 132, 132, 131, 131, 131, 131, 130, 130, 130, 130, 129, 129, 129, 129, 128, 128, 128, 128, }; +#endif const int8_t g_GbiLog2WeightBase = 3; const int8_t g_GbiWeightBase = (1 << g_GbiLog2WeightBase); const int8_t g_GbiWeights[GBI_NUM] = { -2, 3, 4, 5, 10 }; @@ -619,7 +621,7 @@ const uint8_t g_chroma422IntraAngleMappingTable[NUM_INTRA_MODE] = { 0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 4, 6, 8, 10, 12, 13, 14, 16, 18, 20, 22, 23, 24, 26, 28, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 44, 44, 45, 46, 46, 46, 47, 48, 48, 48, 49, 50, 51, 52, 52, 52, 53, 54, 54, 54, 55, 56, 56, 56, 57, 58, 59, 60, DM_CHROMA_IDX }; - +#if !REMOVE_BIN_DECISION_TREE // ==================================================================================================================== // Decision tree templates // ==================================================================================================================== @@ -635,6 +637,7 @@ const DecisionTreeTemplate g_mtSplitDTT = compile( /*0*/ DTT_SPLIT_TT_VERT, /*1*/ DTT_SPLIT_BT_VERT ) ) ) ); +#endif // ==================================================================================================================== diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h index df42ecc8f5862ce7f546e1f25162ec795a075213..2a856a448d8bd7b4f06454e42cbf9806a8022191 100644 --- a/source/Lib/CommonLib/Rom.h +++ b/source/Lib/CommonLib/Rom.h @@ -113,7 +113,9 @@ extern const uint8_t g_chroma422IntraAngleMappingTable[NUM_INTRA_MODE]; extern const TMatrixCoeff g_as_DST_MAT_4 [TRANSFORM_NUMBER_OF_DIRECTIONS][4][4]; #endif +#if !JVET_M0464_UNI_MTS extern const uint32_t g_EmtSigNumThr; +#endif extern const TMatrixCoeff g_trCoreDCT2P2 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 2][ 2]; extern const TMatrixCoeff g_trCoreDCT2P4 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 4][ 4]; @@ -132,6 +134,7 @@ extern const TMatrixCoeff g_trCoreDST7P8 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 8][ extern const TMatrixCoeff g_trCoreDST7P16 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 16][ 16]; extern const TMatrixCoeff g_trCoreDST7P32 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 32][ 32]; +#if !REMOVE_BIN_DECISION_TREE // ==================================================================================================================== // Decision tree templates // ==================================================================================================================== @@ -155,6 +158,7 @@ extern const DecisionTreeTemplate g_mtSplitDTT; // decision tree for QTBT split extern const DecisionTreeTemplate g_qtbtSplitDTT; +#endif // ==================================================================================================================== // Misc. @@ -210,8 +214,10 @@ extern const uint32_t g_scalingListSizeX[SCALING_LIST_SIZE_NUM]; extern MsgLevel g_verbosity; +#if !JVET_M0064_CCLM_SIMPLIFICATION extern int g_aiLMDivTableLow[]; extern int g_aiLMDivTableHigh[]; +#endif extern const int g_aiNonLMPosThrs[]; diff --git a/source/Lib/CommonLib/RomTr.cpp b/source/Lib/CommonLib/RomTr.cpp index 3f0815e85b0fec545134cc9fa15920ed2cbe5191..2df3c5a131c9bf299a36c6a32995206b127253f8 100644 --- a/source/Lib/CommonLib/RomTr.cpp +++ b/source/Lib/CommonLib/RomTr.cpp @@ -396,13 +396,23 @@ const TMatrixCoeff g_trCoreDCT8P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8] = }; const TMatrixCoeff g_trCoreDCT8P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16] = { +#if JVET_M0497_FAST_DST7 + DEFINE_DCT8_P16_MATRIX(88, 88, 87, 85, 81, 77, 73, 68, 62, 55, 48, 40, 33, 25, 17, 8), + DEFINE_DCT8_P16_MATRIX(88, 88, 87, 85, 81, 77, 73, 68, 62, 55, 48, 40, 33, 25, 17, 8) +#else DEFINE_DCT8_P16_MATRIX(90, 89, 87, 83, 81, 77, 72, 66, 62, 56, 49, 41, 33, 25, 17, 9), DEFINE_DCT8_P16_MATRIX(90, 89, 87, 83, 81, 77, 72, 66, 62, 56, 49, 41, 33, 25, 17, 9) +#endif }; const TMatrixCoeff g_trCoreDCT8P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32] = { +#if JVET_M0497_FAST_DST7 + DEFINE_DCT8_P32_MATRIX(90, 90, 89, 88, 87, 86, 85, 84, 82, 80, 78, 77, 74, 72, 68, 66, 63, 60, 56, 53, 50, 46, 42, 38, 34, 30, 26, 21, 17, 13, 9, 4), + DEFINE_DCT8_P32_MATRIX(90, 90, 89, 88, 87, 86, 85, 84, 82, 80, 78, 77, 74, 72, 68, 66, 63, 60, 56, 53, 50, 46, 42, 38, 34, 30, 26, 21, 17, 13, 9, 4) +#else DEFINE_DCT8_P32_MATRIX(90, 90, 89, 88, 88, 86, 85, 84, 82, 80, 78, 77, 74, 72, 68, 66, 63, 60, 56, 53, 50, 45, 42, 38, 34, 30, 26, 21, 17, 13, 9, 4), DEFINE_DCT8_P32_MATRIX(90, 90, 89, 88, 88, 86, 85, 84, 82, 80, 78, 77, 74, 72, 68, 66, 63, 60, 56, 53, 50, 45, 42, 38, 34, 30, 26, 21, 17, 13, 9, 4) +#endif }; // DST-7 @@ -418,13 +428,23 @@ const TMatrixCoeff g_trCoreDST7P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8] = }; const TMatrixCoeff g_trCoreDST7P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16] = { +#if JVET_M0497_FAST_DST7 + DEFINE_DST7_P16_MATRIX(8, 17, 25, 33, 40, 48, 55, 62, 68, 73, 77, 81, 85, 87, 88, 88), + DEFINE_DST7_P16_MATRIX(8, 17, 25, 33, 40, 48, 55, 62, 68, 73, 77, 81, 85, 87, 88, 88) +#else DEFINE_DST7_P16_MATRIX(9, 17, 25, 33, 41, 49, 56, 62, 66, 72, 77, 81, 83, 87, 89, 90), DEFINE_DST7_P16_MATRIX(9, 17, 25, 33, 41, 49, 56, 62, 66, 72, 77, 81, 83, 87, 89, 90) +#endif }; const TMatrixCoeff g_trCoreDST7P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32] = { +#if JVET_M0497_FAST_DST7 + DEFINE_DST7_P32_MATRIX(4, 9, 13, 17, 21, 26, 30, 34, 38, 42, 46, 50, 53, 56, 60, 63, 66, 68, 72, 74, 77, 78, 80, 82, 84, 85, 86, 87, 88, 89, 90, 90), + DEFINE_DST7_P32_MATRIX(4, 9, 13, 17, 21, 26, 30, 34, 38, 42, 46, 50, 53, 56, 60, 63, 66, 68, 72, 74, 77, 78, 80, 82, 84, 85, 86, 87, 88, 89, 90, 90) +#else DEFINE_DST7_P32_MATRIX(4, 9, 13, 17, 21, 26, 30, 34, 38, 42, 45, 50, 53, 56, 60, 63, 66, 68, 72, 74, 77, 78, 80, 82, 84, 85, 86, 88, 88, 89, 90, 90), DEFINE_DST7_P32_MATRIX(4, 9, 13, 17, 21, 26, 30, 34, 38, 42, 45, 50, 53, 56, 60, 63, 66, 68, 72, 74, 77, 78, 80, 82, 84, 85, 86, 88, 88, 89, 90, 90) +#endif }; //-------------------------------------------------------------------------------------------------- diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp index 14763c7f70bc10b327679af805c93509af802360..a048e366d9a302f393798f6aeaa650b54a0f729a 100644 --- a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp +++ b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp @@ -614,7 +614,7 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP void SampleAdaptiveOffset::xPCMLFDisableProcess(CodingStructure& cs) { const PreCalcValues& pcv = *cs.pcv; - const bool bPCMFilter = (cs.sps->getUsePCM() && cs.sps->getPCMFilterDisableFlag()) ? true : false; + const bool bPCMFilter = (cs.sps->getPCMEnabledFlag() && cs.sps->getPCMFilterDisableFlag()) ? true : false; if( bPCMFilter || cs.pps->getTransquantBypassEnabledFlag() ) { diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index fa7f7a33b712149641546842dc329cd17775e176..00f16676bcbef8eece7b89a92ad3b667df5a08cc 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -74,6 +74,9 @@ Slice::Slice() , m_signDataHidingEnabledFlag ( false ) #endif , m_bCheckLDC ( false ) +#if JVET_M0444_SMVD +, m_biDirPred ( false ) +#endif , m_iSliceQpDelta ( 0 ) , m_iDepth ( 0 ) #if HEVC_VPS @@ -134,6 +137,9 @@ Slice::Slice() , m_uiMaxTTSizeIChroma ( 0 ) , m_uiMaxBTSize ( 0 ) , m_MotionCandLut (NULL) +#if JVET_M0170_MRG_SHARELIST +, m_MotionCandLuTsBkup (NULL) +#endif { for(uint32_t i=0; i<NUM_REF_PIC_LIST_01; i++) { @@ -192,6 +198,12 @@ void Slice::initSlice() m_bCheckLDC = false; +#if JVET_M0444_SMVD + m_biDirPred = false; + m_symRefIdx[0] = -1; + m_symRefIdx[1] = -1; +#endif + for (uint32_t component = 0; component < MAX_NUM_COMPONENT; component++) { m_iSliceChromaQpDelta[component] = 0; @@ -423,7 +435,7 @@ void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool b pcRefPic = xGetLongTermRefPic(rcListPic, m_pRPS->getPOC(i), m_pRPS->getCheckLTMSBPresent(i)); } } - if (getSPS()->getSpsNext().getCPRMode()) + if (getSPS()->getSpsNext().getIBCMode()) { RefPicSetLtCurr[NumPicLtCurr] = getPic(); //getPic()->setIsLongTerm(true); @@ -442,7 +454,7 @@ void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool b // - Otherwise, when the current picture contains a P or B slice, the value of NumPocTotalCurr shall not be equal to 0. if (getRapPicFlag()) { - if (getSPS()->getSpsNext().getCPRMode()) + if (getSPS()->getSpsNext().getIBCMode()) { CHECK(numPicTotalCurr != 1, "Invalid state"); } @@ -517,7 +529,7 @@ void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool b m_bIsUsedAsLongTerm[REF_PIC_LIST_1][rIdx] = ( cIdx >= NumPicStCurr0 + NumPicStCurr1 ); } } - if (getSPS()->getSpsNext().getCPRMode()) + if (getSPS()->getSpsNext().getIBCMode()) { m_apcRefPicList[REF_PIC_LIST_0][m_aiNumRefIdx[REF_PIC_LIST_0] - 1] = getPic(); m_bIsUsedAsLongTerm[REF_PIC_LIST_0][m_aiNumRefIdx[REF_PIC_LIST_0] - 1] = true; @@ -552,7 +564,7 @@ int Slice::getNumRpsCurrTempList() const numRpsCurrTempList++; } } - if (getSPS()->getSpsNext().getCPRMode()) + if (getSPS()->getSpsNext().getIBCMode()) { return numRpsCurrTempList + 1; } @@ -764,6 +776,13 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll) m_bCheckLDC = pSrc->m_bCheckLDC; m_iSliceQpDelta = pSrc->m_iSliceQpDelta; + +#if JVET_M0444_SMVD + m_biDirPred = pSrc->m_biDirPred; + m_symRefIdx[0] = pSrc->m_symRefIdx[0]; + m_symRefIdx[1] = pSrc->m_symRefIdx[1]; +#endif + for (uint32_t component = 0; component < MAX_NUM_COMPONENT; component++) { m_iSliceChromaQpDelta[component] = pSrc->m_iSliceChromaQpDelta[component]; @@ -1595,6 +1614,12 @@ void Slice::initMotionLUTs() m_MotionCandLut->currCnt = 0; m_MotionCandLut->motionCand = nullptr; m_MotionCandLut->motionCand = new MotionInfo[MAX_NUM_HMVP_CANDS]; +#if JVET_M0170_MRG_SHARELIST + m_MotionCandLuTsBkup = new LutMotionCand; + m_MotionCandLuTsBkup->currCnt = 0; + m_MotionCandLuTsBkup->motionCand = nullptr; + m_MotionCandLuTsBkup->motionCand = new MotionInfo[MAX_NUM_HMVP_CANDS]; +#endif } void Slice::destroyMotionLUTs() { @@ -1602,18 +1627,31 @@ void Slice::destroyMotionLUTs() m_MotionCandLut->motionCand = nullptr; delete m_MotionCandLut; m_MotionCandLut = NULL; +#if JVET_M0170_MRG_SHARELIST + delete[] m_MotionCandLuTsBkup->motionCand; + m_MotionCandLuTsBkup->motionCand = nullptr; + delete m_MotionCandLuTsBkup; + m_MotionCandLuTsBkup = NULL; +#endif } void Slice::resetMotionLUTs() { m_MotionCandLut->currCnt = 0; +#if JVET_M0170_MRG_SHARELIST + m_MotionCandLuTsBkup->currCnt = 0; +#endif } MotionInfo Slice::getMotionInfoFromLUTs(int MotCandIdx) const { return m_MotionCandLut->motionCand[MotCandIdx]; } - - +#if JVET_M0170_MRG_SHARELIST +MotionInfo Slice::getMotionInfoFromLUTBkup(int MotCandIdx) const +{ + return m_MotionCandLuTsBkup->motionCand[MotCandIdx]; +} +#endif void Slice::addMotionInfoToLUTs(LutMotionCand* lutMC, MotionInfo newMi) { @@ -1733,13 +1771,19 @@ SPSNext::SPSNext( SPS& sps ) , m_NextEnabled ( false ) // disable all tool enabling flags by default , m_LargeCTU ( false ) - , m_SubPuMvp ( false ) , m_IMV ( false ) - , m_BIO ( false ) , m_DisableMotionCompression ( false ) , m_LMChroma ( false ) +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + , m_cclmCollocatedChromaFlag ( false ) +#endif +#if JVET_M0464_UNI_MTS + , m_IntraMTS ( false ) + , m_InterMTS ( false ) +#else , m_IntraEMT ( false ) , m_InterEMT ( false ) +#endif , m_Affine ( false ) , m_AffineType ( false ) , m_MTTEnabled ( false ) @@ -1756,11 +1800,10 @@ SPSNext::SPSNext( SPS& sps ) #endif // default values for additional parameters - , m_subPuMrgMode ( 0 ) , m_ImvMode ( IMV_OFF ) , m_MTTMode ( 0 ) , m_compositeRefEnabled ( false ) - , m_CPRMode ( 0 ) + , m_IBCMode ( 0 ) // ADD_NEW_TOOL : (sps extension) add tool enabling flags here (with "false" as default values) { } @@ -1809,14 +1852,16 @@ SPS::SPS() , m_uiQuadtreeTUMaxDepthInter ( 0) , m_uiQuadtreeTUMaxDepthIntra ( 0) // Tool list -, m_usePCM (false) +, m_pcmEnabledFlag (false) , m_pcmLog2MaxSize ( 5) , m_uiPCMLog2MinSize ( 7) , m_bPCMFilterDisableFlag (false) +, m_sbtmvpEnabledFlag (false) +, m_bdofEnabledFlag (false) , m_uiBitsForPOC ( 8) , m_numLongTermRefPicSPS ( 0) , m_uiMaxTrSize ( 32) -, m_bUseSAO (false) +, m_saoEnabledFlag (false) , m_bTemporalIdNestingFlag (false) #if HEVC_USE_SCALING_LISTS , m_scalingListEnabledFlag (false) @@ -1827,7 +1872,7 @@ SPS::SPS() , m_vuiParametersPresentFlag (false) , m_vuiParameters () , m_spsNextExtension (*this) -, m_useWrapAround (false) +, m_wrapAroundEnabledFlag (false) , m_wrapAroundOffset ( 0) { for(int ch=0; ch<MAX_NUM_CHANNEL_TYPE; ch++) diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 60b4d5a27c78a5a582e08aa593d92b6992993f68..05b8ea8f046b66ed3bc1e72b0d4288b506ea31dd 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -791,6 +791,8 @@ public: class SPS; + +// Deprecated: SPSNext is going to be removed! Do not add any parameters to SPSNext class SPSNext { private: @@ -799,13 +801,19 @@ private: bool m_NextEnabled; //===== tool enabling flags (4 bytes - NOTE: last flag must be used for new extensions) ===== bool m_LargeCTU; // 5 - bool m_SubPuMvp; bool m_IMV; // 9 - bool m_BIO; bool m_DisableMotionCompression; // 13 bool m_LMChroma; // 17 +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + bool m_cclmCollocatedChromaFlag; +#endif +#if JVET_M0464_UNI_MTS + bool m_IntraMTS; // 18 + bool m_InterMTS; // 19 +#else bool m_IntraEMT; // 18 bool m_InterEMT; // 19 +#endif bool m_Affine; bool m_AffineType; bool m_GBi; // @@ -828,18 +836,13 @@ public: private: //===== additional parameters ===== // qtbt - // sub-pu merging - int m_subPuMrgMode; //imv ImvMode m_ImvMode; // multi type tree (QTBT + triple split) unsigned m_MTTMode; - bool m_compositeRefEnabled; //composite longterm reference - - unsigned m_CPRMode; - - // ADD_NEW_TOOL : (sps extension) add tool enabling flags and associated parameters here + bool m_compositeRefEnabled; //composite longterm reference + unsigned m_IBCMode; public: SPSNext( SPS& sps ); @@ -852,17 +855,12 @@ public: //===== tool enabling flags and extension bit ===== void setUseLargeCTU ( bool b ) { m_LargeCTU = b; } bool getUseLargeCTU () const { return m_LargeCTU; } - bool getUseSubPuMvp() const { return m_SubPuMvp; } - void setSubPuMvpMode(int n) { m_subPuMrgMode = n; m_SubPuMvp = n != 0; } - bool getUseATMVP() const { return (m_subPuMrgMode & 1) == 1; } void setUseIMV ( bool b ) { m_IMV = b; } bool getUseIMV () const { return m_IMV; } void setUseAffine ( bool b ) { m_Affine = b; } bool getUseAffine () const { return m_Affine; } void setUseAffineType ( bool b ) { m_AffineType = b; } bool getUseAffineType () const { return m_AffineType; } - void setUseBIO(bool b) { m_BIO = b; } - bool getUseBIO() const { return m_BIO; } void setDisableMotCompress ( bool b ) { m_DisableMotionCompression = b; } bool getDisableMotCompress () const { return m_DisableMotionCompression; } bool getMTTEnabled () const { return m_MTTEnabled; } @@ -872,10 +870,21 @@ public: #endif void setUseLMChroma ( bool b ) { m_LMChroma = b; } bool getUseLMChroma () const { return m_LMChroma; } +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + void setCclmCollocatedChromaFlag( bool b ) { m_cclmCollocatedChromaFlag = b; } + bool getCclmCollocatedChromaFlag() const { return m_cclmCollocatedChromaFlag; } +#endif +#if JVET_M0464_UNI_MTS + void setUseIntraMTS ( bool b ) { m_IntraMTS = b; } + bool getUseIntraMTS () const { return m_IntraMTS; } + void setUseInterMTS ( bool b ) { m_InterMTS = b; } + bool getUseInterMTS () const { return m_InterMTS; } +#else void setUseIntraEMT ( bool b ) { m_IntraEMT = b; } bool getUseIntraEMT () const { return m_IntraEMT; } void setUseInterEMT ( bool b ) { m_InterEMT = b; } bool getUseInterEMT () const { return m_InterEMT; } +#endif void setUseGBi ( bool b ) { m_GBi = b; } bool getUseGBi () const { return m_GBi; } #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET @@ -894,8 +903,6 @@ public: void setImvMode(ImvMode m) { m_ImvMode = m; m_IMV = m != 0; } ImvMode getImvMode () const { return m_ImvMode; } - - // multi type tree unsigned getMTTMode () const { return m_MTTMode; } void setMTTMode ( unsigned mode ) { m_MTTMode = mode; m_MTTEnabled = ( m_MTTMode != 0 ); } @@ -907,10 +914,8 @@ public: bool getUseMHIntra () const { return m_MHIntra; } void setUseTriangle ( bool b ) { m_Triangle = b; } bool getUseTriangle () const { return m_Triangle; } - void setCPRMode (unsigned CPRMode) { m_CPRMode = CPRMode; } - unsigned getCPRMode () const { return m_CPRMode; } - // ADD_NEW_TOOL : (sps extension) add access functions for tool enabling flags and associated parameters here - + void setIBCMode (unsigned IBCMode) { m_IBCMode = IBCMode; } + unsigned getIBCMode () const { return m_IBCMode; } }; @@ -973,10 +978,9 @@ private: uint32_t m_uiQuadtreeTULog2MinSize; uint32_t m_uiQuadtreeTUMaxDepthInter; uint32_t m_uiQuadtreeTUMaxDepthIntra; - bool m_usePCM; + bool m_pcmEnabledFlag; uint32_t m_pcmLog2MaxSize; uint32_t m_uiPCMLog2MinSize; - bool m_useAMP; // Parameter BitDepths m_bitDepths; @@ -984,6 +988,9 @@ private: int m_pcmBitDepths[MAX_NUM_CHANNEL_TYPE]; bool m_bPCMFilterDisableFlag; + bool m_sbtmvpEnabledFlag; + bool m_bdofEnabledFlag; + uint32_t m_uiBitsForPOC; uint32_t m_numLongTermRefPicSPS; uint32_t m_ltRefPicPocLsbSps[MAX_NUM_LONG_TERM_REF_PICS]; @@ -991,7 +998,7 @@ private: // Max physical transform size uint32_t m_uiMaxTrSize; - bool m_bUseSAO; + bool m_saoEnabledFlag; bool m_bTemporalIdNestingFlag; // temporal_id_nesting_flag @@ -1017,9 +1024,9 @@ private: static const int m_winUnitY[NUM_CHROMA_FORMAT]; PTL m_pcPTL; - bool m_useALF; + bool m_alfEnabledFlag; - bool m_useWrapAround; + bool m_wrapAroundEnabledFlag; unsigned m_wrapAroundOffset; public: @@ -1135,16 +1142,14 @@ public: uint32_t getMaxCUHeight() const { return m_uiMaxCUHeight; } void setMaxCodingDepth( uint32_t u ) { m_uiMaxCodingDepth = u; } uint32_t getMaxCodingDepth() const { return m_uiMaxCodingDepth; } - void setUsePCM( bool b ) { m_usePCM = b; } - bool getUsePCM() const { return m_usePCM; } + void setPCMEnabledFlag( bool b ) { m_pcmEnabledFlag = b; } + bool getPCMEnabledFlag() const { return m_pcmEnabledFlag; } void setPCMLog2MaxSize( uint32_t u ) { m_pcmLog2MaxSize = u; } uint32_t getPCMLog2MaxSize() const { return m_pcmLog2MaxSize; } void setPCMLog2MinSize( uint32_t u ) { m_uiPCMLog2MinSize = u; } uint32_t getPCMLog2MinSize() const { return m_uiPCMLog2MinSize; } void setBitsForPOC( uint32_t u ) { m_uiBitsForPOC = u; } uint32_t getBitsForPOC() const { return m_uiBitsForPOC; } - bool getUseAMP() const { return m_useAMP; } - void setUseAMP( bool b ) { m_useAMP = b; } void setQuadtreeTULog2MaxSize( uint32_t u ) { m_uiQuadtreeTULog2MaxSize = u; } uint32_t getQuadtreeTULog2MaxSize() const { return m_uiQuadtreeTULog2MaxSize; } void setQuadtreeTULog2MinSize( uint32_t u ) { m_uiQuadtreeTULog2MinSize = u; } @@ -1176,11 +1181,20 @@ public: int getQpBDOffset(ChannelType type) const { return m_qpBDOffset[type]; } void setQpBDOffset(ChannelType type, int i) { m_qpBDOffset[type] = i; } - void setUseSAO(bool bVal) { m_bUseSAO = bVal; } - bool getUseSAO() const { return m_bUseSAO; } + void setSAOEnabledFlag(bool bVal) { m_saoEnabledFlag = bVal; } + bool getSAOEnabledFlag() const { return m_saoEnabledFlag; } + + bool getALFEnabledFlag() const { return m_alfEnabledFlag; } + void setALFEnabledFlag( bool b ) { m_alfEnabledFlag = b; } + + bool getSBTMVPEnabledFlag() const { return m_sbtmvpEnabledFlag; } + void setSBTMVPEnabledFlag(bool b) { m_sbtmvpEnabledFlag = b; } + + void setBDOFEnabledFlag(bool b) { m_bdofEnabledFlag = b; } + bool getBDOFEnabledFlag() const { return m_bdofEnabledFlag; } - uint32_t getMaxTLayers() const { return m_uiMaxTLayers; } - void setMaxTLayers( uint32_t uiMaxTLayers ) { CHECK( uiMaxTLayers > MAX_TLAYER, "Invalid number T-layers" ); m_uiMaxTLayers = uiMaxTLayers; } + uint32_t getMaxTLayers() const { return m_uiMaxTLayers; } + void setMaxTLayers( uint32_t uiMaxTLayers ) { CHECK( uiMaxTLayers > MAX_TLAYER, "Invalid number T-layers" ); m_uiMaxTLayers = uiMaxTLayers; } bool getTemporalIdNestingFlag() const { return m_bTemporalIdNestingFlag; } void setTemporalIdNestingFlag( bool bValue ) { m_bTemporalIdNestingFlag = bValue; } @@ -1220,11 +1234,8 @@ public: const SPSNext& getSpsNext() const { return m_spsNextExtension; } SPSNext& getSpsNext() { return m_spsNextExtension; } - bool getUseALF() const { return m_useALF; } - void setUseALF( bool b ) { m_useALF = b; } - - void setUseWrapAround(bool b) { m_useWrapAround = b; } - bool getUseWrapAround() const { return m_useWrapAround; } + void setWrapAroundEnabledFlag(bool b) { m_wrapAroundEnabledFlag = b; } + bool getWrapAroundEnabledFlag() const { return m_wrapAroundEnabledFlag; } void setWrapAroundOffset(unsigned offset) { m_wrapAroundOffset = offset; } unsigned getWrapAroundOffset() const { return m_wrapAroundOffset; } }; @@ -1561,6 +1572,11 @@ private: #endif bool m_bCheckLDC; +#if JVET_M0444_SMVD + bool m_biDirPred; + int m_symRefIdx[2]; +#endif + // Data int m_iSliceQpDelta; int m_iSliceChromaQpDelta[MAX_NUM_COMPONENT]; @@ -1649,7 +1665,10 @@ private: AlfSliceParam m_alfSliceParam; LutMotionCand* m_MotionCandLut; - +#if JVET_M0170_MRG_SHARELIST +public: + LutMotionCand* m_MotionCandLuTsBkup; +#endif public: Slice(); virtual ~Slice(); @@ -1749,6 +1768,12 @@ public: void setCheckLDC( bool b ) { m_bCheckLDC = b; } void setMvdL1ZeroFlag( bool b) { m_bLMvdL1Zero = b; } +#if JVET_M0444_SMVD + void setBiDirPred( bool b, int refIdx0, int refIdx1 ) { m_biDirPred = b; m_symRefIdx[0] = refIdx0; m_symRefIdx[1] = refIdx1; } + bool getBiDirPred() const { return m_biDirPred; } + int getSymRefIdx( int refList ) const { return m_symRefIdx[refList]; } +#endif + bool isIntra() const { return m_eSliceType == I_SLICE; } bool isInterB() const { return m_eSliceType == B_SLICE; } bool isInterP() const { return m_eSliceType == P_SLICE; } @@ -1925,6 +1950,10 @@ public: void destroyMotionLUTs (); void resetMotionLUTs(); int getAvailableLUTMrgNum() const { return m_MotionCandLut->currCnt; } +#if JVET_M0170_MRG_SHARELIST + int getAvailableLUTBkupMrgNum() const { return m_MotionCandLuTsBkup->currCnt; } + MotionInfo getMotionInfoFromLUTBkup(int MotCandIdx) const; +#endif MotionInfo getMotionInfoFromLUTs(int MotCandIdx) const; LutMotionCand* getMotionLUTs() { return m_MotionCandLut; } diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index 92276925911e26cfe517cc9f236c50e32c3fc62b..f2afd109dbc34b8db435b25fb24882642c21634b 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -91,7 +91,13 @@ TrQuant::TrQuant() : m_quant( nullptr ) { // allocate temporary buffers m_plTempCoeff = (TCoeff*) xMalloc( TCoeff, MAX_CU_SIZE * MAX_CU_SIZE ); - +#if JVET_M0464_UNI_MTS + m_mtsCoeffs = new TCoeff*[ NUM_TRAFO_MODES_MTS ]; + for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ ) + { + m_mtsCoeffs[i] = (TCoeff*) xMalloc( TCoeff, MAX_CU_SIZE * MAX_CU_SIZE ); + } +#endif } TrQuant::~TrQuant() @@ -108,6 +114,17 @@ TrQuant::~TrQuant() xFree( m_plTempCoeff ); m_plTempCoeff = nullptr; } +#if JVET_M0464_UNI_MTS + if( m_mtsCoeffs ) + { + for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ ) + { + xFree( m_mtsCoeffs[i] ); + m_mtsCoeffs[i] = nullptr; + } + m_mtsCoeffs = nullptr; + } +#endif } #if ENABLE_SPLIT_PARALLELISM @@ -187,8 +204,11 @@ void TrQuant::invTransformNxN( TransformUnit &tu, const ComponentID &compID, Pel DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID ); - +#if JVET_M0464_UNI_MTS + if( tu.mtsIdx == 1 ) +#else if( tu.transformSkip[compID] ) +#endif { xITransformSkip( tempCoeff, pResi, tu, compID ); } @@ -207,7 +227,11 @@ void TrQuant::invRdpcmNxN(TransformUnit& tu, const ComponentID &compID, PelBuf & { const CompArea &area = tu.blocks[compID]; +#if JVET_M0464_UNI_MTS + if (CU::isRDPCMEnabled(*tu.cu) && (tu.mtsIdx==1 || tu.cu->transQuantBypass)) +#else if (CU::isRDPCMEnabled(*tu.cu) && ((tu.transformSkip[compID] != 0) || tu.cu->transQuantBypass)) +#endif { const uint32_t uiWidth = area.width; const uint32_t uiHeight = area.height; @@ -267,19 +291,34 @@ void TrQuant::invRdpcmNxN(TransformUnit& tu, const ComponentID &compID, PelBuf & void TrQuant::getTrTypes ( TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer ) { +#if JVET_M0464_UNI_MTS + bool mtsActivated = CU::isIntra( *tu.cu ) ? tu.cs->sps->getSpsNext().getUseIntraMTS() : tu.cs->sps->getSpsNext().getUseInterMTS(); +#else bool emtActivated = CU::isIntra( *tu.cu ) ? tu.cs->sps->getSpsNext().getUseIntraEMT() : tu.cs->sps->getSpsNext().getUseInterEMT(); +#endif trTypeHor = DCT2; trTypeVer = DCT2; +#if JVET_M0464_UNI_MTS + if ( mtsActivated ) +#else if ( emtActivated ) +#endif { if( compID == COMPONENT_Y ) { +#if JVET_M0464_UNI_MTS + if ( tu.mtsIdx > 1 ) + { + int indHor = ( tu.mtsIdx - 2 ) & 1; + int indVer = ( tu.mtsIdx - 2 ) >> 1; +#else if ( tu.cu->emtFlag ) { int indHor = tu.emtIdx & 1; int indVer = tu.emtIdx >> 1; +#endif trTypeHor = indHor ? DCT8 : DST7; trTypeVer = indVer ? DCT8 : DST7; @@ -437,7 +476,67 @@ void TrQuant::xQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffB m_quant->quant( tu, compID, pSrc, uiAbsSum, cQP, ctx ); } +#if JVET_M0464_UNI_MTS +void TrQuant::transformNxN(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, std::vector<TrMode>* trModes, const int maxCand) +{ + CodingStructure &cs = *tu.cs; + const SPS &sps = *cs.sps; + const CompArea &rect = tu.blocks[compID]; + const uint32_t width = rect.width; + const uint32_t height = rect.height; + + const CPelBuf resiBuf = cs.getResiBuf(rect); + + CHECK( sps.getMaxTrSize() < width, "Unsupported transformation size" ); + + int pos = 0; + std::vector<TrCost> trCosts; + std::vector<TrMode>::iterator it = trModes->begin(); + const double facBB[] = { 1.2, 1.3, 1.3, 1.4, 1.5 }; + while( it != trModes->end() ) + { + tu.mtsIdx = it->first; + CoeffBuf tempCoeff( m_mtsCoeffs[tu.mtsIdx], rect ); + + if( tu.mtsIdx == 1 ) + { + xTransformSkip( tu, compID, resiBuf, tempCoeff.buf ); + } + else + { + xT( tu, compID, resiBuf, tempCoeff, width, height ); + } + + int sumAbs = 0; + for( int pos = 0; pos < width*height; pos++ ) + { + sumAbs += abs( tempCoeff.buf[pos] ); + } + + trCosts.push_back( TrCost( sumAbs, pos++ ) ); + it++; + } + + int numTests = 0; + std::vector<TrCost>::iterator itC = trCosts.begin(); + const double fac = facBB[g_aucLog2[std::max(width, height)]-2]; + const double thr = fac * trCosts.begin()->first; + const double thrTS = trCosts.begin()->first; + while( itC != trCosts.end() ) + { + const bool testTr = itC->first <= ( itC->second == 1 ? thrTS : thr ) && numTests <= maxCand; + trModes->at( itC->second ).second = testTr; + numTests += testTr; + itC++; + } +} +#endif + +#if JVET_M0464_UNI_MTS +void TrQuant::transformNxN(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr) +#else void TrQuant::transformNxN(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx) +#endif { CodingStructure &cs = *tu.cs; const SPS &sps = *cs.sps; @@ -483,11 +582,21 @@ void TrQuant::transformNxN(TransformUnit &tu, const ComponentID &compID, const Q { CHECK( sps.getMaxTrSize() < uiWidth, "Unsupported transformation size" ); +#if JVET_M0464_UNI_MTS + CoeffBuf tempCoeff( loadTr ? m_mtsCoeffs[tu.mtsIdx] : m_plTempCoeff, rect ); +#else CoeffBuf tempCoeff( m_plTempCoeff, rect ); +#endif DTRACE_PEL_BUF( D_RESIDUALS, resiBuf, tu, tu.cu->predMode, compID ); +#if JVET_M0464_UNI_MTS + if( !loadTr ) + { + if( tu.mtsIdx == 1 ) +#else if( tu.transformSkip[compID] ) +#endif { xTransformSkip( tu, compID, resiBuf, tempCoeff.buf ); } @@ -495,6 +604,9 @@ void TrQuant::transformNxN(TransformUnit &tu, const ComponentID &compID, const Q { xT( tu, compID, resiBuf, tempCoeff, uiWidth, uiHeight ); } +#if JVET_M0464_UNI_MTS + } +#endif DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID ); @@ -567,7 +679,11 @@ void TrQuant::applyForwardRDPCM(TransformUnit &tu, const ComponentID &compID, co void TrQuant::rdpcmNxN(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, RDPCMMode &rdpcmMode) { +#if JVET_M0464_UNI_MTS + if (!CU::isRDPCMEnabled(*tu.cu) || (tu.mtsIdx!=1 && !tu.cu->transQuantBypass)) +#else if (!CU::isRDPCMEnabled(*tu.cu) || (!tu.transformSkip[compID] && !tu.cu->transQuantBypass)) +#endif { rdpcmMode = RDPCM_OFF; } diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h index c663f9e76ab738e0139daf5024b520fa9843cd3f..aa58a29981b7f6a8ac09601972a487a8811cc6f5 100644 --- a/source/Lib/CommonLib/TrQuant.h +++ b/source/Lib/CommonLib/TrQuant.h @@ -87,7 +87,12 @@ public: void invTransformNxN (TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQPs); +#if JVET_M0464_UNI_MTS + void transformNxN (TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, std::vector<TrMode>* trModes, const int maxCand); + void transformNxN (TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr=false); +#else void transformNxN (TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx); +#endif void rdpcmNxN (TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, RDPCMMode &rdpcmMode); void applyForwardRDPCM(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const RDPCMMode &rdpcmMode); @@ -122,6 +127,9 @@ protected: private: Quant *m_quant; //!< Quantizer +#if JVET_M0464_UNI_MTS + TCoeff** m_mtsCoeffs; +#endif // forward Transform diff --git a/source/Lib/CommonLib/TrQuant_EMT.cpp b/source/Lib/CommonLib/TrQuant_EMT.cpp index e717c6c2466c8997d5d700479b2e2cc5a0d3a628..3ea8ce7306e663478e28f20dfac2a5251e1b70ba 100644 --- a/source/Lib/CommonLib/TrQuant_EMT.cpp +++ b/source/Lib/CommonLib/TrQuant_EMT.cpp @@ -947,23 +947,314 @@ void fastInverseDST7_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int void fastForwardDST7_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) { +#if JVET_M0497_FAST_DST7 && !JVET_M0497_MATRIX_MULT + int j, k; + TCoeff a[5], b[5], c[5], d[5], t; + TCoeff add = (shift > 0) ? (1 << (shift - 1)) : 0; + + const TMatrixCoeff *iT = g_trCoreDST7P16[TRANSFORM_FORWARD][0]; + + TCoeff *pCoef = dst; + const int reducedLine = line - iSkipLine; + const int cutoff = 16 - iSkipLine2; + + for (j = 0; j < reducedLine; j++) + { + for (k = 0; k < 5; k++) + { + a[k] = src[ k] + src[11 + k]; + b[k] = src[9 - k] + src[11 + k]; + c[k] = src[ k] - src[ 9 - k]; + d[k] = src[ k] + src[ 9 - k] - src[11 + k]; + } + + t = iT[10] * src[10]; + + dst[ 1 * line] = ( iT[ 2]*d[0] + iT[ 5]*d[1] + iT[ 8]*d[2] + iT[11]*d[3] + iT[14]*d[4] + add) >> shift; + dst[ 4 * line] = ( iT[ 8]*d[0] + iT[14]*d[1] + iT[ 5]*d[2] - iT[ 2]*d[3] - iT[11]*d[4] + add) >> shift; + dst[ 7 * line] = ( iT[14]*d[0] + iT[ 2]*d[1] - iT[11]*d[2] - iT[ 5]*d[3] + iT[ 8]*d[4] + add) >> shift; + dst[10 * line] = ( iT[11]*d[0] - iT[ 8]*d[1] - iT[ 2]*d[2] + iT[14]*d[3] - iT[ 5]*d[4] + add) >> shift; + dst[13 * line] = ( iT[ 5]*d[0] - iT[11]*d[1] + iT[14]*d[2] - iT[ 8]*d[3] + iT[ 2]*d[4] + add) >> shift; + + dst[5 * line] = ( iT[10] * (src[0] + src[1] - src[3] - src[4] + src[6] + src[7] - src[9] - src[10] + src[12] + src[13] - src[15]) + add) >> shift; + + dst[ 0 * line] = ( iT[0]*a[0] + iT[9]*b[0] + iT[1]*a[1] + iT[8]*b[1] + iT[2]*a[2] + iT[7]*b[2] + iT[3]*a[3] + iT[6]*b[3] + iT[4]*a[4] + iT[5]*b[4] + t + add ) >> shift; + dst[ 2 * line] = ( iT[4]*c[0] - iT[5]*b[0] + iT[9]*c[1] - iT[0]*b[1] + iT[6]*c[2] + iT[3]*a[2] + iT[1]*c[3] + iT[8]*a[3] + iT[7]*a[4] + iT[2]*b[4] - t + add ) >> shift; + dst[ 3 * line] = ( iT[6]*a[0] + iT[3]*b[0] + iT[2]*c[1] + iT[7]*a[1] + iT[9]*c[2] + iT[0]*a[2] + iT[4]*c[3] - iT[5]*b[3] - iT[1]*a[4] - iT[8]*b[4] + t + add ) >> shift; + dst[ 6 * line] = ( iT[8]*a[0] + iT[1]*c[0] + iT[6]*c[1] - iT[3]*b[1] - iT[5]*a[2] - iT[4]*b[2] - iT[7]*c[3] - iT[2]*a[3] - iT[0]*c[4] + iT[9]*b[4] + t + add ) >> shift; + dst[ 8 * line] = ( iT[4]*c[0] + iT[5]*a[0] - iT[0]*c[1] + iT[9]*b[1] - iT[3]*c[2] - iT[6]*a[2] + iT[1]*c[3] - iT[8]*b[3] + iT[2]*c[4] + iT[7]*a[4] - t + add ) >> shift; + dst[ 9 * line] = ( iT[7]*c[0] + iT[2]*a[0] - iT[4]*a[1] - iT[5]*b[1] - iT[8]*c[2] + iT[1]*b[2] + iT[9]*a[3] + iT[0]*b[3] + iT[3]*c[4] - iT[6]*b[4] + t + add ) >> shift; + dst[11 * line] = ( iT[9]*a[0] + iT[0]*b[0] - iT[8]*c[1] - iT[1]*a[1] + iT[2]*c[2] - iT[7]*b[2] + iT[6]*a[3] + iT[3]*b[3] - iT[5]*c[4] - iT[4]*a[4] - t + add ) >> shift; + dst[12 * line] = ( iT[7]*c[0] - iT[2]*b[0] - iT[5]*c[1] - iT[4]*a[1] + iT[8]*a[2] + iT[1]*b[2] - iT[0]*a[3] - iT[9]*b[3] - iT[6]*c[4] + iT[3]*b[4] + t + add ) >> shift; + dst[14 * line] = ( iT[3]*a[0] + iT[6]*b[0] - iT[7]*a[1] - iT[2]*b[1] + iT[0]*c[2] + iT[9]*a[2] - iT[4]*c[3] - iT[5]*a[3] + iT[8]*c[4] + iT[1]*a[4] - t + add ) >> shift; + dst[15 * line] = ( iT[1]*c[0] - iT[8]*b[0] - iT[3]*c[1] + iT[6]*b[1] + iT[5]*c[2] - iT[4]*b[2] - iT[7]*c[3] + iT[2]*b[3] + iT[9]*c[4] - iT[0]*b[4] + t + add ) >> shift; + + src += 16; + dst++; + } + + if (iSkipLine) + { + dst = pCoef + reducedLine; + for (j = 0; j < cutoff; j++) + { + memset(dst, 0, sizeof(TCoeff)*iSkipLine); + dst += line; + } + } + + if (iSkipLine2) + { + dst = pCoef + line * cutoff; + memset(dst, 0, sizeof(TCoeff) * line * iSkipLine2); + } +#else _fastForwardMM< 16 >( src, dst, shift, line, iSkipLine, iSkipLine2, g_trCoreDST7P16[TRANSFORM_FORWARD][0] ); +#endif } void fastInverseDST7_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) { +#if JVET_M0497_FAST_DST7 && !JVET_M0497_MATRIX_MULT + int j, k; + TCoeff a[5], b[5], c[5], d[5], t; + + TCoeff add = (shift > 0) ? (1 << (shift - 1)) : 0; + + const TMatrixCoeff *iT = g_trCoreDST7P16[TRANSFORM_INVERSE][0]; + + const int reducedLine = line - iSkipLine; + + for (j = 0; j < reducedLine; j++) + { + for (k = 0; k < 5; k++) + { + a[k] = src[ k * line] + src[(10 - k) * line]; + b[k] = src[(11 + k) * line] + src[(10 - k) * line]; + c[k] = src[ k * line] - src[(11 + k) * line]; + d[k] = src[ k * line] + src[(11 + k) * line] - src[(10 - k)*line]; + } + + t = iT[10] * src[5 * line]; + + dst[ 2] = Clip3(outputMinimum, outputMaximum, (int)( iT[ 2]*d[0] + iT[ 8]*d[1] + iT[14]*d[2] + iT[11]*d[3] + iT[ 5]*d[4] + add ) >> shift); + dst[ 5] = Clip3(outputMinimum, outputMaximum, (int)( iT[ 5]*d[0] + iT[14]*d[1] + iT[ 2]*d[2] - iT[ 8]*d[3] - iT[11]*d[4] + add ) >> shift); + dst[ 8] = Clip3(outputMinimum, outputMaximum, (int)( iT[ 8]*d[0] + iT[ 5]*d[1] - iT[11]*d[2] - iT[ 2]*d[3] + iT[14]*d[4] + add ) >> shift); + dst[11] = Clip3(outputMinimum, outputMaximum, (int)( iT[11]*d[0] - iT[ 2]*d[1] - iT[ 5]*d[2] + iT[14]*d[3] - iT[ 8]*d[4] + add ) >> shift); + dst[14] = Clip3(outputMinimum, outputMaximum, (int)( iT[14]*d[0] - iT[11]*d[1] + iT[ 8]*d[2] - iT[ 5]*d[3] + iT[ 2]*d[4] + add ) >> shift); + + dst[10] = Clip3(outputMinimum, outputMaximum, (int)( iT[10]*(src[ 0*line]-src[ 2*line]+src[ 3*line]-src[5*line] + +src[ 6*line]-src[ 8*line]+src[ 9*line]-src[11*line] + +src[12*line]-src[14*line]+src[15*line]) + add ) >> shift); + + dst[ 0] = Clip3(outputMinimum, outputMaximum, (int)( iT[0]*a[0] + iT[9]*b[0] + iT[2]*a[1] + iT[7]*b[1] + iT[4]*a[2] + iT[5]*b[2] + iT[6]*a[3] + iT[3]*b[3] + iT[8]*a[4] + iT[1]*b[4] + t + add ) >> shift); + dst[ 1] = Clip3(outputMinimum, outputMaximum, (int)( iT[1]*c[0] - iT[8]*b[0] + iT[5]*c[1] - iT[4]*b[1] + iT[9]*c[2] - iT[0]*b[2] + iT[2]*a[3] + iT[7]*c[3] + iT[6]*a[4] + iT[3]*c[4] + t + add ) >> shift); + dst[ 3] = Clip3(outputMinimum, outputMaximum, (int)( iT[3]*a[0] + iT[6]*b[0] + iT[0]*c[1] + iT[9]*a[1] + iT[1]*a[2] + iT[8]*c[2] + iT[4]*c[3] - iT[5]*b[3] - iT[2]*a[4] - iT[7]*b[4] - t + add ) >> shift); + dst[ 4] = Clip3(outputMinimum, outputMaximum, (int)( iT[4]*c[0] - iT[5]*b[0] + iT[6]*c[1] + iT[3]*a[1] + iT[7]*a[2] + iT[2]*b[2] - iT[1]*c[3] + iT[8]*b[3] - iT[9]*c[4] - iT[0]*a[4] - t + add ) >> shift); + dst[ 6] = Clip3(outputMinimum, outputMaximum, (int)( iT[6]*a[0] + iT[3]*b[0] + iT[9]*c[1] + iT[0]*a[1] - iT[1]*a[2] - iT[8]*b[2] - iT[4]*c[3] - iT[5]*a[3] - iT[2]*c[4] + iT[7]*b[4] + t + add ) >> shift); + dst[ 7] = Clip3(outputMinimum, outputMaximum, (int)( iT[7]*c[0] - iT[2]*b[0] + iT[8]*a[1] + iT[1]*b[1] - iT[6]*c[2] + iT[3]*b[2] - iT[9]*a[3] - iT[0]*b[3] + iT[5]*c[4] - iT[4]*b[4] + t + add ) >> shift); + dst[ 9] = Clip3(outputMinimum, outputMaximum, (int)( iT[9]*a[0] + iT[0]*b[0] + iT[2]*c[1] - iT[7]*b[1] - iT[5]*c[2] - iT[4]*a[2] + iT[3]*a[3] + iT[6]*b[3] + iT[8]*c[4] - iT[1]*b[4] - t + add ) >> shift); + dst[12] = Clip3(outputMinimum, outputMaximum, (int)( iT[1]*c[0] + iT[8]*a[0] - iT[5]*a[1] - iT[4]*b[1] - iT[0]*c[2] + iT[9]*b[2] + iT[7]*c[3] - iT[2]*b[3] - iT[6]*c[4] - iT[3]*a[4] + t + add ) >> shift); + dst[13] = Clip3(outputMinimum, outputMaximum, (int)( iT[7]*c[0] + iT[2]*a[0] - iT[8]*c[1] + iT[1]*b[1] + iT[3]*c[2] - iT[6]*b[2] + iT[0]*a[3] + iT[9]*b[3] - iT[5]*a[4] - iT[4]*b[4] + t + add ) >> shift); + dst[15] = Clip3(outputMinimum, outputMaximum, (int)( iT[4]*c[0] + iT[5]*a[0] - iT[3]*c[1] - iT[6]*a[1] + iT[2]*c[2] + iT[7]*a[2] - iT[1]*c[3] - iT[8]*a[3] + iT[0]*c[4] + iT[9]*a[4] - t + add ) >> shift); + + src++; + dst += 16; + } + + if (iSkipLine) + { + memset(dst, 0, (iSkipLine * 16) * sizeof(TCoeff)); + } +#else _fastInverseMM< 16 >( src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_trCoreDST7P16[TRANSFORM_INVERSE][0]); +#endif } void fastForwardDST7_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) { +#if JVET_M0497_FAST_DST7 && !JVET_M0497_MATRIX_MULT + int j, k; + TCoeff a[10][6]; + TCoeff t[2]; + TCoeff b[6]; + TCoeff c[2]; + + TCoeff add = (shift > 0) ? (1 << (shift - 1)) : 0; + const TMatrixCoeff *iT = g_trCoreDST7P32[TRANSFORM_FORWARD][0]; + TCoeff *pCoef = dst; + const int reducedLine = line - iSkipLine; + const int cutoff = 32 - iSkipLine2; + + for (j = 0; j < reducedLine; j++) + { + for (k = 0; k < 6; k++) + { + a[0][k] = src[ k] - src[11 - k]; + a[1][k] = src[ k] + src[13 + k]; + a[2][k] = src[ k] + src[24 - k]; + a[3][k] = src[ k] - src[26 + k]; + a[4][k] = src[ 6 + k] + src[18 - k]; + a[5][k] = src[ 6 + k] + src[19 + k]; + a[6][k] = src[ 6 + k] - src[31 - k]; + a[7][k] = src[13 + k] - src[24 - k]; + a[8][k] = src[13 + k] + src[26 + k]; + a[9][k] = src[19 + k] + src[31 - k]; + + b[k] = src[k] + src[11 - k] - src[13 + k] - src[24 - k] + src[26 + k]; + } + for (k = 0; k < 2; k++) + { + c[k] = src[k] + src[3 - k] - src[5 + k] - src[8 - k] + src[10 + k] + src[13 - k] - src[15 + k] - src[18 - k] + src[20 + k] + src[23 - k] - src[25 + k] - src[28 - k] + src[30 + k]; + } + + t[0] = iT[12] * src[12] + iT[25] * src[25]; + t[1] = iT[12] * src[25] - iT[25] * src[12]; + + dst[ 0 * line] = ( iT[0] * a[3][0] + iT[11] * a[6][5] + iT[13] * a[8][0] + iT[24] * a[9][5] + iT[1] * a[3][1] + iT[10] * a[6][4] + iT[14] * a[8][1] + iT[23] * a[9][4] + iT[2] * a[3][2] + iT[9] * a[6][3] + iT[15] * a[8][2] + iT[22] * a[9][3] + iT[3] * a[3][3] + iT[8] * a[6][2] + iT[16] * a[8][3] + iT[21] * a[9][2] + iT[4] * a[3][4] + iT[7] * a[6][1] + iT[17] * a[8][4] + iT[20] * a[9][1] + iT[5] * a[3][5] + iT[6] * a[6][0] + iT[18] * a[8][5] + iT[19] * a[9][0] + t[0] + add) >> shift; + dst[ 1 * line] = (-iT[0] * a[5][2] + iT[11] * a[0][3] + iT[13] * a[4][2] + iT[24] * a[6][2] + iT[1] * a[9][1] + iT[10] * a[8][4] + iT[14] * a[3][4] + iT[23] * a[6][1] + iT[2] * a[0][0] - iT[9] * a[5][5] + iT[15] * a[6][5] + iT[22] * a[4][5] - iT[3] * a[5][3] + iT[8] * a[0][2] + iT[16] * a[4][3] + iT[21] * a[6][3] + iT[4] * a[9][0] + iT[7] * a[8][5] + iT[17] * a[3][5] + iT[20] * a[6][0] + iT[5] * a[0][1] - iT[6] * a[5][4] + iT[18] * a[6][4] + iT[19] * a[4][4] - t[1] + add) >> shift; + dst[ 3 * line] = (-iT[0] * a[9][4] - iT[11] * a[5][4] + iT[13] * a[2][1] - iT[24] * a[7][1] - iT[1] * a[0][3] - iT[10] * a[1][3] + iT[14] * a[3][3] + iT[23] * a[2][3] + iT[2] * a[8][5] + iT[9] * a[9][0] + iT[15] * a[6][0] + iT[22] * a[3][5] - iT[3] * a[1][4] - iT[8] * a[0][4] + iT[16] * a[2][4] + iT[21] * a[3][4] - iT[4] * a[5][3] - iT[7] * a[9][3] - iT[17] * a[7][2] + iT[20] * a[2][2] + iT[5] * a[8][0] + iT[6] * a[1][0] - iT[18] * a[4][5] - iT[19] * a[7][0] + t[1] + add) >> shift; + dst[ 4 * line] = (-iT[0] * a[3][2] - iT[11] * a[2][2] + iT[13] * a[1][2] + iT[24] * a[0][2] + iT[1] * a[6][0] + iT[10] * a[3][5] + iT[14] * a[9][0] + iT[23] * a[8][5] - iT[2] * a[2][3] - iT[9] * a[3][3] + iT[15] * a[0][3] + iT[22] * a[1][3] - iT[3] * a[7][0] + iT[8] * a[2][0] - iT[16] * a[9][5] - iT[21] * a[5][5] + iT[4] * a[4][4] + iT[7] * a[6][4] + iT[17] * a[0][1] - iT[20] * a[5][4] - iT[5] * a[7][4] - iT[6] * a[4][1] + iT[18] * a[8][4] + iT[19] * a[1][4] - t[0] + add) >> shift; + dst[ 5 * line] = (-iT[0] * a[3][5] - iT[11] * a[6][0] - iT[13] * a[8][5] - iT[24] * a[9][0] + iT[1] * a[6][5] + iT[10] * a[3][0] + iT[14] * a[9][5] + iT[23] * a[8][0] - iT[2] * a[7][4] + iT[9] * a[2][4] - iT[15] * a[9][1] - iT[22] * a[5][1] - iT[3] * a[7][1] - iT[8] * a[4][4] + iT[16] * a[8][1] + iT[21] * a[1][1] + iT[4] * a[6][2] + iT[7] * a[4][2] - iT[17] * a[5][2] + iT[20] * a[0][3] - iT[5] * a[3][2] - iT[6] * a[2][2] + iT[18] * a[1][2] + iT[19] * a[0][2] + t[0] + add) >> shift; + dst[ 8 * line] = ( iT[0] * a[9][3] + iT[11] * a[8][2] + iT[13] * a[3][2] + iT[24] * a[6][3] + iT[1] * a[1][5] + iT[10] * a[0][5] - iT[14] * a[2][5] - iT[23] * a[3][5] - iT[2] * a[1][3] - iT[9] * a[8][3] + iT[15] * a[7][3] + iT[22] * a[4][2] - iT[3] * a[9][5] - iT[8] * a[5][5] + iT[16] * a[2][0] - iT[21] * a[7][0] - iT[4] * a[1][1] - iT[7] * a[0][1] + iT[17] * a[2][1] + iT[20] * a[3][1] + iT[5] * a[5][1] + iT[6] * a[9][1] + iT[18] * a[7][4] - iT[19] * a[2][4] + t[1] + add) >> shift; + dst[ 9 * line] = (-iT[0] * a[2][1] - iT[11] * a[3][1] + iT[13] * a[0][1] + iT[24] * a[1][1] + iT[1] * a[7][3] - iT[10] * a[2][3] + iT[14] * a[9][2] + iT[23] * a[5][2] + iT[2] * a[4][0] + iT[9] * a[7][5] - iT[15] * a[1][5] - iT[22] * a[8][5] + iT[3] * a[3][4] + iT[8] * a[2][4] - iT[16] * a[1][4] - iT[21] * a[0][4] + iT[4] * a[6][3] + iT[7] * a[3][2] + iT[17] * a[9][3] + iT[20] * a[8][2] + iT[5] * a[4][5] + iT[6] * a[6][5] + iT[18] * a[0][0] - iT[19] * a[5][5] - t[0] + add) >> shift; + dst[10 * line] = (-iT[0] * a[6][1] - iT[11] * a[4][1] + iT[13] * a[5][1] - iT[24] * a[0][4] + iT[1] * a[2][2] - iT[10] * a[7][2] - iT[14] * a[5][3] - iT[23] * a[9][3] + iT[2] * a[6][4] + iT[9] * a[4][4] - iT[15] * a[5][4] + iT[22] * a[0][1] - iT[3] * a[2][5] + iT[8] * a[7][5] + iT[16] * a[5][0] + iT[21] * a[9][0] - iT[4] * a[7][0] - iT[7] * a[4][5] + iT[17] * a[8][0] + iT[20] * a[1][0] + iT[5] * a[4][2] + iT[6] * a[7][3] - iT[18] * a[1][3] - iT[19] * a[8][3] + t[0] + add) >> shift; + dst[11 * line] = ( iT[0] * a[1][3] + iT[11] * a[0][3] - iT[13] * a[2][3] - iT[24] * a[3][3] + iT[1] * a[9][1] + iT[10] * a[5][1] - iT[14] * a[2][4] + iT[23] * a[7][4] + iT[2] * a[8][0] + iT[9] * a[9][5] + iT[15] * a[6][5] + iT[22] * a[3][0] - iT[3] * a[0][2] + iT[8] * a[5][3] - iT[16] * a[6][3] - iT[21] * a[4][3] - iT[4] * a[5][0] + iT[7] * a[0][5] + iT[17] * a[4][0] + iT[20] * a[6][0] - iT[5] * a[9][4] - iT[6] * a[5][4] + iT[18] * a[2][1] - iT[19] * a[7][1] - t[1] + add) >> shift; + dst[13 * line] = (-iT[0] * a[0][0] - iT[11] * a[1][0] + iT[13] * a[3][0] + iT[24] * a[2][0] - iT[1] * a[5][4] + iT[10] * a[0][1] + iT[14] * a[4][4] + iT[23] * a[6][4] + iT[2] * a[9][3] + iT[9] * a[5][3] - iT[15] * a[2][2] + iT[22] * a[7][2] - iT[3] * a[8][3] - iT[8] * a[9][2] - iT[16] * a[6][2] - iT[21] * a[3][3] + iT[4] * a[1][4] + iT[7] * a[8][4] - iT[17] * a[7][4] - iT[20] * a[4][1] - iT[5] * a[0][5] - iT[6] * a[1][5] + iT[18] * a[3][5] + iT[19] * a[2][5] + t[1] + add) >> shift; + dst[14 * line] = ( iT[0] * a[4][2] + iT[11] * a[7][3] - iT[13] * a[1][3] - iT[24] * a[8][3] + iT[1] * a[4][1] + iT[10] * a[6][1] + iT[14] * a[0][4] - iT[23] * a[5][1] - iT[2] * a[3][0] - iT[9] * a[2][0] + iT[15] * a[1][0] + iT[22] * a[0][0] - iT[3] * a[6][3] - iT[8] * a[4][3] + iT[16] * a[5][3] - iT[21] * a[0][2] - iT[4] * a[7][5] - iT[7] * a[4][0] + iT[17] * a[8][5] + iT[20] * a[1][5] + iT[5] * a[6][4] + iT[6] * a[3][1] + iT[18] * a[9][4] + iT[19] * a[8][1] - t[0] + add) >> shift; + dst[15 * line] = (-iT[0] * a[7][4] - iT[11] * a[4][1] + iT[13] * a[8][4] + iT[24] * a[1][4] + iT[1] * a[2][2] + iT[10] * a[3][2] - iT[14] * a[0][2] - iT[23] * a[1][2] + iT[2] * a[2][1] - iT[9] * a[7][1] - iT[15] * a[5][4] - iT[22] * a[9][4] - iT[3] * a[7][5] + iT[8] * a[2][5] - iT[16] * a[9][0] - iT[21] * a[5][0] - iT[4] * a[2][0] - iT[7] * a[3][0] + iT[17] * a[0][0] + iT[20] * a[1][0] - iT[5] * a[2][3] + iT[6] * a[7][3] + iT[18] * a[5][2] + iT[19] * a[9][2] + t[0] + add) >> shift; + dst[16 * line] = (-iT[0] * a[0][1] + iT[11] * a[5][4] - iT[13] * a[6][4] - iT[24] * a[4][4] + iT[1] * a[0][3] - iT[10] * a[5][2] + iT[14] * a[6][2] + iT[23] * a[4][2] - iT[2] * a[0][5] + iT[9] * a[5][0] - iT[15] * a[6][0] - iT[22] * a[4][0] - iT[3] * a[0][4] - iT[8] * a[1][4] + iT[16] * a[3][4] + iT[21] * a[2][4] + iT[4] * a[0][2] + iT[7] * a[1][2] - iT[17] * a[3][2] - iT[20] * a[2][2] - iT[5] * a[0][0] - iT[6] * a[1][0] + iT[18] * a[3][0] + iT[19] * a[2][0] - t[1] + add) >> shift; + dst[18 * line] = ( iT[0] * a[0][5] + iT[11] * a[1][5] - iT[13] * a[3][5] - iT[24] * a[2][5] - iT[1] * a[1][0] - iT[10] * a[0][0] + iT[14] * a[2][0] + iT[23] * a[3][0] - iT[2] * a[5][1] + iT[9] * a[0][4] + iT[15] * a[4][1] + iT[22] * a[6][1] - iT[3] * a[8][1] - iT[8] * a[1][1] + iT[16] * a[4][4] + iT[21] * a[7][1] - iT[4] * a[9][2] - iT[7] * a[5][2] + iT[17] * a[2][3] - iT[20] * a[7][3] - iT[5] * a[9][3] - iT[6] * a[8][2] - iT[18] * a[3][2] - iT[19] * a[6][3] + t[1] + add) >> shift; + dst[20 * line] = (-iT[0] * a[4][0] - iT[11] * a[6][0] - iT[13] * a[0][5] + iT[24] * a[5][0] + iT[1] * a[6][5] + iT[10] * a[4][5] - iT[14] * a[5][5] + iT[23] * a[0][0] - iT[2] * a[6][1] - iT[9] * a[3][4] - iT[15] * a[9][1] - iT[22] * a[8][4] + iT[3] * a[4][4] + iT[8] * a[7][1] - iT[16] * a[1][1] - iT[21] * a[8][1] - iT[4] * a[3][3] - iT[7] * a[2][3] + iT[17] * a[1][3] + iT[20] * a[0][3] + iT[5] * a[7][2] - iT[6] * a[2][2] + iT[18] * a[9][3] + iT[19] * a[5][3] + t[0] + add) >> shift; + dst[21 * line] = (-iT[0] * a[1][2] - iT[11] * a[8][2] + iT[13] * a[7][2] + iT[24] * a[4][3] - iT[1] * a[1][5] - iT[10] * a[8][5] + iT[14] * a[7][5] + iT[23] * a[4][0] - iT[2] * a[5][2] - iT[9] * a[9][2] - iT[15] * a[7][3] + iT[22] * a[2][3] - iT[3] * a[5][5] - iT[8] * a[9][5] - iT[16] * a[7][0] + iT[21] * a[2][0] - iT[4] * a[8][1] - iT[7] * a[9][4] - iT[17] * a[6][4] - iT[20] * a[3][1] - iT[5] * a[8][4] - iT[6] * a[9][1] - iT[18] * a[6][1] - iT[19] * a[3][4] - t[1] + add) >> shift; + dst[23 * line] = (-iT[0] * a[8][4] - iT[11] * a[9][1] - iT[13] * a[6][1] - iT[24] * a[3][4] + iT[1] * a[8][2] + iT[10] * a[1][2] - iT[14] * a[4][3] - iT[23] * a[7][2] + iT[2] * a[0][1] + iT[9] * a[1][1] - iT[15] * a[3][1] - iT[22] * a[2][1] - iT[3] * a[5][0] - iT[8] * a[9][0] - iT[16] * a[7][5] + iT[21] * a[2][5] + iT[4] * a[9][5] + iT[7] * a[8][0] + iT[17] * a[3][0] + iT[20] * a[6][5] - iT[5] * a[5][2] + iT[6] * a[0][3] + iT[18] * a[4][2] + iT[19] * a[6][2] + t[1] + add) >> shift; + dst[24 * line] = (-iT[0] * a[2][3] + iT[11] * a[7][3] + iT[13] * a[5][2] + iT[24] * a[9][2] + iT[1] * a[4][1] + iT[10] * a[7][4] - iT[14] * a[1][4] - iT[23] * a[8][4] - iT[2] * a[4][5] - iT[9] * a[7][0] + iT[15] * a[1][0] + iT[22] * a[8][0] + iT[3] * a[4][3] + iT[8] * a[6][3] + iT[16] * a[0][2] - iT[21] * a[5][3] - iT[4] * a[2][5] - iT[7] * a[3][5] + iT[17] * a[0][5] + iT[20] * a[1][5] + iT[5] * a[2][1] + iT[6] * a[3][1] - iT[18] * a[0][1] - iT[19] * a[1][1] - t[0] + add) >> shift; + dst[25 * line] = ( iT[0] * a[4][5] + iT[11] * a[6][5] + iT[13] * a[0][0] - iT[24] * a[5][5] + iT[1] * a[3][1] + iT[10] * a[2][1] - iT[14] * a[1][1] - iT[23] * a[0][1] - iT[2] * a[7][2] - iT[9] * a[4][3] + iT[15] * a[8][2] + iT[22] * a[1][2] - iT[3] * a[6][2] - iT[8] * a[3][3] - iT[16] * a[9][2] - iT[21] * a[8][3] - iT[4] * a[2][4] + iT[7] * a[7][4] + iT[17] * a[5][1] + iT[20] * a[9][1] + iT[5] * a[4][0] + iT[6] * a[6][0] + iT[18] * a[0][5] - iT[19] * a[5][0] + t[0] + add) >> shift; + dst[26 * line] = ( iT[0] * a[8][0] + iT[11] * a[1][0] - iT[13] * a[4][5] - iT[24] * a[7][0] + iT[1] * a[5][4] + iT[10] * a[9][4] + iT[14] * a[7][1] - iT[23] * a[2][1] - iT[2] * a[1][2] - iT[9] * a[0][2] + iT[15] * a[2][2] + iT[22] * a[3][2] - iT[3] * a[9][2] - iT[8] * a[8][3] - iT[16] * a[3][3] - iT[21] * a[6][2] + iT[4] * a[0][4] - iT[7] * a[5][1] + iT[17] * a[6][1] + iT[20] * a[4][1] + iT[5] * a[8][5] + iT[6] * a[1][5] - iT[18] * a[4][0] - iT[19] * a[7][5] - t[1] + add) >> shift; + dst[28 * line] = (-iT[0] * a[5][1] - iT[11] * a[9][1] - iT[13] * a[7][4] + iT[24] * a[2][4] + iT[1] * a[8][2] + iT[10] * a[9][3] + iT[14] * a[6][3] + iT[23] * a[3][2] - iT[2] * a[9][4] - iT[9] * a[8][1] - iT[15] * a[3][1] - iT[22] * a[6][4] + iT[3] * a[9][0] + iT[8] * a[5][0] - iT[16] * a[2][5] + iT[21] * a[7][5] - iT[4] * a[5][5] + iT[7] * a[0][0] + iT[17] * a[4][5] + iT[20] * a[6][5] + iT[5] * a[1][3] + iT[6] * a[0][3] - iT[18] * a[2][3] - iT[19] * a[3][3] + t[1] + add) >> shift; + dst[29 * line] = (-iT[0] * a[6][4] - iT[11] * a[3][1] - iT[13] * a[9][4] - iT[24] * a[8][1] + iT[1] * a[7][3] + iT[10] * a[4][2] - iT[14] * a[8][3] - iT[23] * a[1][3] + iT[2] * a[3][5] + iT[9] * a[2][5] - iT[15] * a[1][5] - iT[22] * a[0][5] - iT[3] * a[2][4] - iT[8] * a[3][4] + iT[16] * a[0][4] + iT[21] * a[1][4] - iT[4] * a[4][3] - iT[7] * a[7][2] + iT[17] * a[1][2] + iT[20] * a[8][2] + iT[5] * a[3][0] + iT[6] * a[6][5] + iT[18] * a[8][0] + iT[19] * a[9][5] - t[0] + add) >> shift; + dst[30 * line] = (-iT[0] * a[7][2] + iT[11] * a[2][2] - iT[13] * a[9][3] - iT[24] * a[5][3] - iT[1] * a[6][0] - iT[10] * a[4][0] + iT[14] * a[5][0] - iT[23] * a[0][5] - iT[2] * a[4][2] - iT[9] * a[6][2] - iT[15] * a[0][3] + iT[22] * a[5][2] + iT[3] * a[2][0] - iT[8] * a[7][0] - iT[16] * a[5][5] - iT[21] * a[9][5] + iT[4] * a[7][1] - iT[7] * a[2][1] + iT[17] * a[9][4] + iT[20] * a[5][4] + iT[5] * a[6][1] + iT[6] * a[4][1] - iT[18] * a[5][1] + iT[19] * a[0][4] + t[0] + add) >> shift; + dst[31 * line] = (-iT[0] * a[8][5] - iT[11] * a[1][5] + iT[13] * a[4][0] + iT[24] * a[7][5] + iT[1] * a[1][0] + iT[10] * a[8][0] - iT[14] * a[7][0] - iT[23] * a[4][5] + iT[2] * a[8][4] + iT[9] * a[1][4] - iT[15] * a[4][1] - iT[22] * a[7][4] - iT[3] * a[1][1] - iT[8] * a[8][1] + iT[16] * a[7][1] + iT[21] * a[4][4] - iT[4] * a[8][3] - iT[7] * a[1][3] + iT[17] * a[4][2] + iT[20] * a[7][3] + iT[5] * a[1][2] + iT[6] * a[8][2] - iT[18] * a[7][2] - iT[19] * a[4][3] - t[1] + add) >> shift; + + dst[ 2 * line] = (iT[ 4]*b[0] + iT[ 9]*b[1] + iT[14]*b[2] + iT[19]*b[3] + iT[24]*b[4] + iT[29]*b[5] + add) >> shift; + dst[ 7 * line] = (iT[14]*b[0] + iT[29]*b[1] + iT[19]*b[2] + iT[ 4]*b[3] - iT[ 9]*b[4] - iT[24]*b[5] + add) >> shift; + dst[12 * line] = (iT[24]*b[0] + iT[14]*b[1] - iT[ 9]*b[2] - iT[29]*b[3] - iT[ 4]*b[4] + iT[19]*b[5] + add) >> shift; + dst[17 * line] = (iT[29]*b[0] - iT[ 4]*b[1] - iT[24]*b[2] + iT[ 9]*b[3] + iT[19]*b[4] - iT[14]*b[5] + add) >> shift; + dst[22 * line] = (iT[19]*b[0] - iT[24]*b[1] + iT[ 4]*b[2] + iT[14]*b[3] - iT[29]*b[4] + iT[ 9]*b[5] + add) >> shift; + dst[27 * line] = (iT[ 9]*b[0] - iT[19]*b[1] + iT[29]*b[2] - iT[24]*b[3] + iT[14]*b[4] - iT[ 4]*b[5] + add) >> shift; + + dst[ 6 * line] = (iT[12]*c[0] + iT[25]*c[1] + add) >> shift; + dst[19 * line] = (iT[25]*c[0] - iT[12]*c[1] + add) >> shift; + + src += 32; + dst++; + } + + if (iSkipLine) + { + dst = pCoef + reducedLine; + for (j = 0; j < cutoff; j++) + { + memset(dst, 0, sizeof(TCoeff)*iSkipLine); + dst += line; + } + } + + if (iSkipLine2) + { + dst = pCoef + line * cutoff; + memset(dst, 0, sizeof(TCoeff) * line * iSkipLine2); + } +#else _fastForwardMM< 32 >( src, dst, shift, line, iSkipLine, iSkipLine2, g_trCoreDST7P32[TRANSFORM_FORWARD][0] ); +#endif } void fastInverseDST7_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) { +#if JVET_M0497_FAST_DST7 && !JVET_M0497_MATRIX_MULT + int j, k; + TCoeff a[10][6]; + TCoeff t[2]; + TCoeff b[6]; + TCoeff c[2]; + + TCoeff add = (shift > 0) ? (1 << (shift - 1)) : 0; + const TMatrixCoeff *iT = g_trCoreDST7P32[TRANSFORM_INVERSE][0]; + const int reducedLine = line - iSkipLine; + + for (j = 0; j < reducedLine; j++) + { + for (k = 0; k < 6; k++) + { + a[0][k] = src[ k * line] + src[(12 - k) * line]; + a[1][k] = src[ k * line] - src[(13 + k) * line]; + a[2][k] = src[ k * line] + src[(25 - k) * line]; + a[3][k] = src[ k * line] - src[(26 + k) * line]; + a[4][k] = src[( 7 + k) * line] + src[(18 - k) * line]; + a[5][k] = src[( 7 + k) * line] - src[(20 + k) * line]; + a[6][k] = src[( 7 + k) * line] + src[(31 - k) * line]; + a[7][k] = src[(13 + k) * line] + src[(25 - k) * line]; + a[8][k] = src[(13 + k) * line] - src[(26 + k) * line]; + a[9][k] = src[(20 + k) * line] + src[(31 - k) * line]; + + b[k] = src[k * line] - src[(12-k) * line] + src[(13+k) * line] - src[(25-k) * line] + src[(26+k) * line]; + } + for (k = 0; k < 2; k++) + { + c[k] = src[k * line] - src[(4-k) * line] + src[(5+k) * line] - src[(9-k) * line] + src[(10+k) * line] - src[(14-k) * line] + src[(15+k)*line] - src[(19-k)*line] + src[(20+k)*line] - src[(24-k)*line] + src[(25+k)*line] - src[(29-k)*line] + src[(30+k)*line]; + } + + t[0] = iT[12] * src[6*line] + iT[25] * src[19*line]; + t[1] = iT[25] * src[6*line] - iT[12] * src[19*line]; + + dst[ 0] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[1][0] - iT[11] * a[8][0] + iT[13] * a[7][0] + iT[24] * a[4][5] - iT[1] * a[8][5] + iT[10] * a[1][5] + iT[14] * a[4][0] + iT[23] * a[7][5] + iT[2] * a[1][1] - iT[9] * a[8][1] + iT[15] * a[7][1] + iT[22] * a[4][4] - iT[3] * a[8][4] + iT[8] * a[1][4] + iT[16] * a[4][1] + iT[21] * a[7][4] + iT[4] * a[1][2] - iT[7] * a[8][2] + iT[17] * a[7][2] + iT[20] * a[4][3] - iT[5] * a[8][3] + iT[6] * a[1][3] + iT[18] * a[4][2] + iT[19] * a[7][3] + t[0] + add) >> shift); + dst[ 1] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[4][2] - iT[11] * a[6][2] + iT[13] * a[0][3] + iT[24] * a[5][2] + iT[1] * a[2][0] + iT[10] * a[7][0] + iT[14] * a[5][5] - iT[23] * a[9][5] + iT[2] * a[7][2] + iT[9] * a[2][2] - iT[15] * a[9][3] + iT[22] * a[5][3] - iT[3] * a[6][0] - iT[8] * a[4][0] + iT[16] * a[5][0] + iT[21] * a[0][5] - iT[4] * a[4][1] - iT[7] * a[6][1] + iT[17] * a[0][4] + iT[20] * a[5][1] + iT[5] * a[2][1] + iT[6] * a[7][1] + iT[18] * a[5][4] - iT[19] * a[9][4] + t[1] + add) >> shift); + dst[ 2] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[2][4] - iT[11] * a[3][4] + iT[13] * a[0][4] + iT[24] * a[1][4] + iT[1] * a[4][3] + iT[10] * a[7][2] + iT[14] * a[1][2] - iT[23] * a[8][2] + iT[2] * a[3][0] - iT[9] * a[6][5] - iT[15] * a[8][0] + iT[22] * a[9][5] - iT[3] * a[6][4] + iT[8] * a[3][1] + iT[16] * a[9][4] - iT[21] * a[8][1] + iT[4] * a[7][3] + iT[7] * a[4][2] - iT[17] * a[8][3] + iT[20] * a[1][3] - iT[5] * a[3][5] - iT[6] * a[2][5] + iT[18] * a[1][5] + iT[19] * a[0][5] + t[1] + add) >> shift); + dst[ 3] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[5][4] + iT[11] * a[0][1] - iT[13] * a[4][4] - iT[24] * a[6][4] - iT[1] * a[1][3] - iT[10] * a[0][3] + iT[14] * a[2][3] + iT[23] * a[3][3] - iT[2] * a[0][4] - iT[9] * a[1][4] + iT[15] * a[3][4] + iT[22] * a[2][4] + iT[3] * a[0][0] + iT[8] * a[5][5] - iT[16] * a[6][5] - iT[21] * a[4][5] + iT[4] * a[5][0] - iT[7] * a[9][0] + iT[17] * a[7][5] + iT[20] * a[2][5] - iT[5] * a[8][2] + iT[6] * a[9][3] - iT[18] * a[6][3] + iT[19] * a[3][2] + t[0] + add) >> shift); + dst[ 5] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[1][5] + iT[11] * a[8][5] - iT[13] * a[7][5] - iT[24] * a[4][0] + iT[1] * a[5][1] + iT[10] * a[0][4] - iT[14] * a[4][1] - iT[23] * a[6][1] - iT[2] * a[8][3] + iT[9] * a[9][2] - iT[15] * a[6][2] + iT[22] * a[3][3] - iT[3] * a[0][2] - iT[8] * a[1][2] + iT[16] * a[3][2] + iT[21] * a[2][2] - iT[4] * a[9][4] + iT[7] * a[5][4] + iT[17] * a[2][1] + iT[20] * a[7][1] + iT[5] * a[1][0] - iT[6] * a[8][0] + iT[18] * a[7][0] + iT[19] * a[4][5] - t[0] + add) >> shift); + dst[ 6] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[7][5] - iT[11] * a[2][5] + iT[13] * a[9][0] - iT[24] * a[5][0] + iT[1] * a[3][4] - iT[10] * a[6][1] - iT[14] * a[8][4] + iT[23] * a[9][1] + iT[2] * a[4][2] + iT[9] * a[7][3] + iT[15] * a[1][3] - iT[22] * a[8][3] - iT[3] * a[2][2] - iT[8] * a[3][2] + iT[16] * a[0][2] + iT[21] * a[1][2] - iT[4] * a[6][4] - iT[7] * a[4][4] + iT[17] * a[5][4] + iT[20] * a[0][1] + iT[5] * a[7][0] + iT[6] * a[2][0] - iT[18] * a[9][5] + iT[19] * a[5][5] - t[1] + add) >> shift); + dst[ 7] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[6][3] - iT[11] * a[4][3] + iT[13] * a[5][3] + iT[24] * a[0][2] + iT[1] * a[7][1] + iT[10] * a[4][4] - iT[14] * a[8][1] + iT[23] * a[1][1] - iT[2] * a[7][5] - iT[9] * a[4][0] + iT[15] * a[8][5] - iT[22] * a[1][5] + iT[3] * a[7][3] + iT[8] * a[2][3] - iT[16] * a[9][2] + iT[21] * a[5][2] - iT[4] * a[6][5] + iT[7] * a[3][0] + iT[17] * a[9][5] - iT[20] * a[8][0] + iT[5] * a[6][1] - iT[6] * a[3][4] - iT[18] * a[9][1] + iT[19] * a[8][4] - t[1] + add) >> shift); + dst[ 8] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[1][1] - iT[11] * a[0][1] + iT[13] * a[2][1] + iT[24] * a[3][1] + iT[1] * a[1][3] - iT[10] * a[8][3] + iT[14] * a[7][3] + iT[23] * a[4][2] - iT[2] * a[9][1] + iT[9] * a[8][4] - iT[15] * a[3][4] + iT[22] * a[6][1] + iT[3] * a[5][5] + iT[8] * a[0][0] - iT[16] * a[4][5] - iT[21] * a[6][5] + iT[4] * a[0][5] + iT[7] * a[1][5] - iT[17] * a[3][5] - iT[20] * a[2][5] + iT[5] * a[5][3] - iT[6] * a[9][3] + iT[18] * a[7][2] + iT[19] * a[2][2] - t[0] + add) >> shift); + dst[10] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[8][3] - iT[11] * a[1][3] - iT[13] * a[4][2] - iT[24] * a[7][3] - iT[1] * a[8][0] + iT[10] * a[1][0] + iT[14] * a[4][5] + iT[23] * a[7][0] + iT[2] * a[5][3] + iT[9] * a[0][2] - iT[15] * a[4][3] - iT[22] * a[6][3] - iT[3] * a[5][0] - iT[8] * a[0][5] + iT[16] * a[4][0] + iT[21] * a[6][0] + iT[4] * a[1][4] + iT[7] * a[0][4] - iT[17] * a[2][4] - iT[20] * a[3][4] - iT[5] * a[1][1] - iT[6] * a[0][1] + iT[18] * a[2][1] + iT[19] * a[3][1] + t[0] + add) >> shift); + dst[11] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[7][0] + iT[11] * a[2][0] - iT[13] * a[9][5] + iT[24] * a[5][5] + iT[1] * a[2][5] + iT[10] * a[7][5] + iT[14] * a[5][0] - iT[23] * a[9][0] - iT[2] * a[2][1] - iT[9] * a[3][1] + iT[15] * a[0][1] + iT[22] * a[1][1] - iT[3] * a[7][4] - iT[8] * a[4][1] + iT[16] * a[8][4] - iT[21] * a[1][4] + iT[4] * a[3][2] - iT[7] * a[6][3] - iT[17] * a[8][2] + iT[20] * a[9][3] + iT[5] * a[4][2] + iT[6] * a[6][2] - iT[18] * a[0][3] - iT[19] * a[5][2] + t[1] + add) >> shift); + dst[13] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[9][5] - iT[11] * a[8][0] + iT[13] * a[3][0] - iT[24] * a[6][5] - iT[1] * a[8][5] + iT[10] * a[9][0] - iT[14] * a[6][0] + iT[23] * a[3][5] + iT[2] * a[5][4] - iT[9] * a[9][4] + iT[15] * a[7][1] + iT[22] * a[2][1] - iT[3] * a[1][4] + iT[8] * a[8][4] - iT[16] * a[7][4] - iT[21] * a[4][1] - iT[4] * a[0][2] - iT[7] * a[5][3] + iT[17] * a[6][3] + iT[20] * a[4][3] + iT[5] * a[0][3] + iT[6] * a[1][3] - iT[18] * a[3][3] - iT[19] * a[2][3] + t[0] + add) >> shift); + dst[15] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[9][1] + iT[11] * a[5][1] + iT[13] * a[2][4] + iT[24] * a[7][4] + iT[1] * a[9][3] - iT[10] * a[5][3] - iT[14] * a[2][2] - iT[23] * a[7][2] - iT[2] * a[9][5] + iT[9] * a[5][5] + iT[15] * a[2][0] + iT[22] * a[7][0] + iT[3] * a[9][4] - iT[8] * a[8][1] + iT[16] * a[3][1] - iT[21] * a[6][4] - iT[4] * a[9][2] + iT[7] * a[8][3] - iT[17] * a[3][3] + iT[20] * a[6][2] + iT[5] * a[9][0] - iT[6] * a[8][5] + iT[18] * a[3][5] - iT[19] * a[6][0] - t[0] + add) >> shift); + dst[16] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[4][4] + iT[11] * a[7][1] + iT[13] * a[1][1] - iT[24] * a[8][1] + iT[1] * a[6][2] - iT[10] * a[3][3] - iT[14] * a[9][2] + iT[23] * a[8][3] - iT[2] * a[6][1] - iT[9] * a[4][1] + iT[15] * a[5][1] + iT[22] * a[0][4] - iT[3] * a[4][5] - iT[8] * a[6][5] + iT[16] * a[0][0] + iT[21] * a[5][5] - iT[4] * a[6][0] + iT[7] * a[3][5] + iT[17] * a[9][0] - iT[20] * a[8][5] + iT[5] * a[6][3] + iT[6] * a[4][3] - iT[18] * a[5][3] - iT[19] * a[0][2] - t[1] + add) >> shift); + dst[17] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[7][2] - iT[11] * a[4][3] + iT[13] * a[8][2] - iT[24] * a[1][2] + iT[1] * a[7][1] + iT[10] * a[2][1] - iT[14] * a[9][4] + iT[23] * a[5][4] - iT[2] * a[3][5] + iT[9] * a[6][0] + iT[15] * a[8][5] - iT[22] * a[9][0] - iT[3] * a[2][3] - iT[8] * a[7][3] - iT[16] * a[5][2] + iT[21] * a[9][2] + iT[4] * a[4][5] + iT[7] * a[7][0] + iT[17] * a[1][0] - iT[20] * a[8][0] - iT[5] * a[2][4] - iT[6] * a[3][4] + iT[18] * a[0][4] + iT[19] * a[1][4] - t[1] + add) >> shift); + dst[18] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[9][0] + iT[11] * a[8][5] - iT[13] * a[3][5] + iT[24] * a[6][0] + iT[1] * a[5][1] - iT[10] * a[9][1] + iT[14] * a[7][4] + iT[23] * a[2][4] + iT[2] * a[0][3] + iT[9] * a[5][2] - iT[15] * a[6][2] - iT[22] * a[4][2] + iT[3] * a[1][2] + iT[8] * a[0][2] - iT[16] * a[2][2] - iT[21] * a[3][2] - iT[4] * a[8][1] + iT[7] * a[1][1] + iT[17] * a[4][4] + iT[20] * a[7][1] + iT[5] * a[9][5] - iT[6] * a[8][0] + iT[18] * a[3][0] - iT[19] * a[6][5] - t[0] + add) >> shift); + dst[20] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[8][2] - iT[11] * a[9][3] + iT[13] * a[6][3] - iT[24] * a[3][2] + iT[1] * a[0][1] + iT[10] * a[5][4] - iT[14] * a[6][4] - iT[23] * a[4][4] + iT[2] * a[1][5] + iT[9] * a[0][5] - iT[15] * a[2][5] - iT[22] * a[3][5] - iT[3] * a[9][2] + iT[8] * a[5][2] + iT[16] * a[2][3] + iT[21] * a[7][3] + iT[4] * a[5][5] - iT[7] * a[9][5] + iT[17] * a[7][0] + iT[20] * a[2][0] + iT[5] * a[0][4] + iT[6] * a[5][1] - iT[18] * a[6][1] - iT[19] * a[4][1] + t[0] + add) >> shift); + dst[21] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[2][1] - iT[11] * a[7][1] - iT[13] * a[5][4] + iT[24] * a[9][4] - iT[1] * a[6][2] - iT[10] * a[4][2] + iT[14] * a[5][2] + iT[23] * a[0][3] - iT[2] * a[2][4] - iT[9] * a[7][4] - iT[15] * a[5][1] + iT[22] * a[9][1] - iT[3] * a[6][5] - iT[8] * a[4][5] + iT[16] * a[5][5] + iT[21] * a[0][0] - iT[4] * a[4][0] - iT[7] * a[7][5] - iT[17] * a[1][5] + iT[20] * a[8][5] - iT[5] * a[7][2] - iT[6] * a[4][3] + iT[18] * a[8][2] - iT[19] * a[1][2] + t[1] + add) >> shift); + dst[22] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[6][1] - iT[11] * a[3][4] - iT[13] * a[9][1] + iT[24] * a[8][4] + iT[1] * a[4][3] + iT[10] * a[6][3] - iT[14] * a[0][2] - iT[23] * a[5][3] + iT[2] * a[7][0] + iT[9] * a[4][5] - iT[15] * a[8][0] + iT[22] * a[1][0] - iT[3] * a[3][1] + iT[8] * a[6][4] + iT[16] * a[8][1] - iT[21] * a[9][4] - iT[4] * a[2][3] - iT[7] * a[3][3] + iT[17] * a[0][3] + iT[20] * a[1][3] - iT[5] * a[7][5] - iT[6] * a[2][5] + iT[18] * a[9][0] - iT[19] * a[5][0] + t[1] + add) >> shift); + dst[23] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[0][3] - iT[11] * a[1][3] + iT[13] * a[3][3] + iT[24] * a[2][3] - iT[1] * a[8][0] + iT[10] * a[9][5] - iT[14] * a[6][5] + iT[23] * a[3][0] + iT[2] * a[8][2] - iT[9] * a[1][2] - iT[15] * a[4][3] - iT[22] * a[7][2] + iT[3] * a[0][5] + iT[8] * a[5][0] - iT[16] * a[6][0] - iT[21] * a[4][0] + iT[4] * a[8][4] - iT[7] * a[9][1] + iT[17] * a[6][1] - iT[20] * a[3][4] - iT[5] * a[5][4] - iT[6] * a[0][1] + iT[18] * a[4][4] + iT[19] * a[6][4] + t[0] + add) >> shift); + dst[26] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[3][0] - iT[11] * a[2][0] + iT[13] * a[1][0] + iT[24] * a[0][0] - iT[1] * a[2][5] - iT[10] * a[3][5] + iT[14] * a[0][5] + iT[23] * a[1][5] + iT[2] * a[4][4] + iT[9] * a[6][4] - iT[15] * a[0][1] - iT[22] * a[5][4] - iT[3] * a[4][1] - iT[8] * a[7][4] - iT[16] * a[1][4] + iT[21] * a[8][4] + iT[4] * a[2][2] + iT[7] * a[7][2] + iT[17] * a[5][3] - iT[20] * a[9][3] + iT[5] * a[3][3] - iT[6] * a[6][2] - iT[18] * a[8][3] + iT[19] * a[9][2] - t[1] + add) >> shift); + dst[27] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[3][3] + iT[11] * a[6][2] + iT[13] * a[8][3] - iT[24] * a[9][2] - iT[1] * a[2][0] - iT[10] * a[3][0] + iT[14] * a[0][0] + iT[23] * a[1][0] - iT[2] * a[6][3] + iT[9] * a[3][2] + iT[15] * a[9][3] - iT[22] * a[8][2] - iT[3] * a[4][0] - iT[8] * a[6][0] + iT[16] * a[0][5] + iT[21] * a[5][0] - iT[4] * a[7][4] - iT[7] * a[2][4] + iT[17] * a[9][1] - iT[20] * a[5][1] - iT[5] * a[4][4] - iT[6] * a[7][1] - iT[18] * a[1][1] + iT[19] * a[8][1] - t[1] + add) >> shift); + dst[28] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[0][4] + iT[11] * a[5][1] - iT[13] * a[6][1] - iT[24] * a[4][1] + iT[1] * a[9][3] - iT[10] * a[8][2] + iT[14] * a[3][2] - iT[23] * a[6][3] - iT[2] * a[1][0] - iT[9] * a[0][0] + iT[15] * a[2][0] + iT[22] * a[3][0] + iT[3] * a[8][1] - iT[8] * a[9][4] + iT[16] * a[6][4] - iT[21] * a[3][1] - iT[4] * a[5][2] - iT[7] * a[0][3] + iT[17] * a[4][2] + iT[20] * a[6][2] + iT[5] * a[1][5] - iT[6] * a[8][5] + iT[18] * a[7][5] + iT[19] * a[4][0] - t[0] + add) >> shift); + dst[30] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[5][3] - iT[11] * a[9][3] + iT[13] * a[7][2] + iT[24] * a[2][2] + iT[1] * a[0][1] + iT[10] * a[1][1] - iT[14] * a[3][1] - iT[23] * a[2][1] + iT[2] * a[9][0] - iT[9] * a[5][0] - iT[15] * a[2][5] - iT[22] * a[7][5] - iT[3] * a[5][2] + iT[8] * a[9][2] - iT[16] * a[7][3] - iT[21] * a[2][3] - iT[4] * a[0][0] - iT[7] * a[1][0] + iT[17] * a[3][0] + iT[20] * a[2][0] - iT[5] * a[9][1] + iT[6] * a[5][1] + iT[18] * a[2][4] + iT[19] * a[7][4] + t[0] + add) >> shift); + dst[31] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[3][5] + iT[11] * a[2][5] - iT[13] * a[1][5] - iT[24] * a[0][5] - iT[1] * a[3][4] - iT[10] * a[2][4] + iT[14] * a[1][4] + iT[23] * a[0][4] + iT[2] * a[3][3] + iT[9] * a[2][3] - iT[15] * a[1][3] - iT[22] * a[0][3] - iT[3] * a[3][2] - iT[8] * a[2][2] + iT[16] * a[1][2] + iT[21] * a[0][2] + iT[4] * a[3][1] + iT[7] * a[2][1] - iT[17] * a[1][1] - iT[20] * a[0][1] - iT[5] * a[3][0] - iT[6] * a[2][0] + iT[18] * a[1][0] + iT[19] * a[0][0] + t[1] + add) >> shift); + + dst[ 4] = Clip3(outputMinimum, outputMaximum, (int)(iT[ 4] * b[0] + iT[14] * b[1] + iT[24] * b[2] + iT[29] * b[3] + iT[19] * b[4] + iT[ 9] * b[5] + add) >> shift); + dst[ 9] = Clip3(outputMinimum, outputMaximum, (int)(iT[ 9] * b[0] + iT[29] * b[1] + iT[14] * b[2] - iT[ 4] * b[3] - iT[24] * b[4] - iT[19] * b[5] + add) >> shift); + dst[14] = Clip3(outputMinimum, outputMaximum, (int)(iT[14] * b[0] + iT[19] * b[1] - iT[ 9] * b[2] - iT[24] * b[3] + iT[ 4] * b[4] + iT[29] * b[5] + add) >> shift); + dst[19] = Clip3(outputMinimum, outputMaximum, (int)(iT[19] * b[0] + iT[ 4] * b[1] - iT[29] * b[2] + iT[ 9] * b[3] + iT[14] * b[4] - iT[24] * b[5] + add) >> shift); + dst[24] = Clip3(outputMinimum, outputMaximum, (int)(iT[24] * b[0] - iT[ 9] * b[1] - iT[ 4] * b[2] + iT[19] * b[3] - iT[29] * b[4] + iT[14] * b[5] + add) >> shift); + dst[29] = Clip3(outputMinimum, outputMaximum, (int)(iT[29] * b[0] - iT[24] * b[1] + iT[19] * b[2] - iT[14] * b[3] + iT[ 9] * b[4] - iT[ 4] * b[5] + add) >> shift); + + dst[12] = Clip3(outputMinimum, outputMaximum, (int)(iT[12]*c[0] + iT[25]*c[1] + add) >> shift); + dst[25] = Clip3(outputMinimum, outputMaximum, (int)(iT[25]*c[0] - iT[12]*c[1] + add) >> shift); + + src++; + dst += 32; + } + + if (iSkipLine) + { + memset(dst, 0, (iSkipLine * 32) * sizeof(TCoeff)); + } +#else _fastInverseMM< 32 >( src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_trCoreDST7P32[TRANSFORM_INVERSE][0] ); +#endif } @@ -1049,22 +1340,314 @@ void fastInverseDCT8_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int void fastForwardDCT8_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) { +#if JVET_M0497_FAST_DST7 && !JVET_M0497_MATRIX_MULT + int j, k; + TCoeff a[5], b[5], c[5], d[5], t; + TCoeff add = (shift > 0) ? (1 << (shift - 1)) : 0; + + const TMatrixCoeff *iT = g_trCoreDST7P16[TRANSFORM_FORWARD][0]; + + TCoeff *pCoef = dst; + const int reducedLine = line - iSkipLine; + const int cutoff = 16 - iSkipLine2; + + for (j = 0; j < reducedLine; j++) + { + for (k = 0; k < 5; k++) + { + a[k] = src[15 - k] + src[ 4 - k]; + b[k] = src[ 6 + k] + src[ 4 - k]; + c[k] = src[15 - k] - src[ 6 + k]; + d[k] = src[15 - k] + src[ 6 + k] - src[ 4 - k]; + } + + t = iT[10] * src[5]; + + dst[ 1 * line] = ( - iT[ 2]*d[0] - iT[ 5]*d[1] - iT[ 8]*d[2] - iT[11]*d[3] - iT[14]*d[4] + add) >> shift; + dst[ 4 * line] = ( iT[ 8]*d[0] + iT[14]*d[1] + iT[ 5]*d[2] - iT[ 2]*d[3] - iT[11]*d[4] + add) >> shift; + dst[ 7 * line] = ( - iT[14]*d[0] - iT[ 2]*d[1] + iT[11]*d[2] + iT[ 5]*d[3] - iT[ 8]*d[4] + add) >> shift; + dst[10 * line] = ( iT[11]*d[0] - iT[ 8]*d[1] - iT[ 2]*d[2] + iT[14]*d[3] - iT[ 5]*d[4] + add) >> shift; + dst[13 * line] = ( - iT[ 5]*d[0] + iT[11]*d[1] - iT[14]*d[2] + iT[ 8]*d[3] - iT[ 2]*d[4] + add) >> shift; + + dst[ 5 * line] = ( - iT[10] * (src[15] + src[14] - src[12] - src[11] + src[9] + src[8] - src[6] - src[5] + src[3] + src[2] - src[0]) + add) >> shift; + + dst[ 0 * line] = ( iT[0]*a[0] + iT[9]*b[0] + iT[1]*a[1] + iT[8]*b[1] + iT[2]*a[2] + iT[7]*b[2] + iT[3]*a[3] + iT[6]*b[3] + iT[4]*a[4] + iT[5]*b[4] + t + add ) >> shift; + dst[ 2 * line] = ( iT[4]*c[0] - iT[5]*b[0] + iT[9]*c[1] - iT[0]*b[1] + iT[6]*c[2] + iT[3]*a[2] + iT[1]*c[3] + iT[8]*a[3] + iT[7]*a[4] + iT[2]*b[4] - t + add ) >> shift; + dst[ 3 * line] = ( - iT[6]*a[0] - iT[3]*b[0] - iT[2]*c[1] - iT[7]*a[1] - iT[9]*c[2] - iT[0]*a[2] - iT[4]*c[3] + iT[5]*b[3] + iT[1]*a[4] + iT[8]*b[4] - t + add ) >> shift; + dst[ 6 * line] = ( iT[8]*a[0] + iT[1]*c[0] + iT[6]*c[1] - iT[3]*b[1] - iT[5]*a[2] - iT[4]*b[2] - iT[7]*c[3] - iT[2]*a[3] - iT[0]*c[4] + iT[9]*b[4] + t + add ) >> shift; + dst[ 8 * line] = ( iT[4]*c[0] + iT[5]*a[0] - iT[0]*c[1] + iT[9]*b[1] - iT[3]*c[2] - iT[6]*a[2] + iT[1]*c[3] - iT[8]*b[3] + iT[2]*c[4] + iT[7]*a[4] - t + add ) >> shift; + dst[ 9 * line] = ( - iT[7]*c[0] - iT[2]*a[0] + iT[4]*a[1] + iT[5]*b[1] + iT[8]*c[2] - iT[1]*b[2] - iT[9]*a[3] - iT[0]*b[3] - iT[3]*c[4] + iT[6]*b[4] - t + add ) >> shift; + dst[11 * line] = ( - iT[9]*a[0] - iT[0]*b[0] + iT[8]*c[1] + iT[1]*a[1] - iT[2]*c[2] + iT[7]*b[2] - iT[6]*a[3] - iT[3]*b[3] + iT[5]*c[4] + iT[4]*a[4] + t + add ) >> shift; + dst[12 * line] = ( iT[7]*c[0] - iT[2]*b[0] - iT[5]*c[1] - iT[4]*a[1] + iT[8]*a[2] + iT[1]*b[2] - iT[0]*a[3] - iT[9]*b[3] - iT[6]*c[4] + iT[3]*b[4] + t + add ) >> shift; + dst[14 * line] = ( iT[3]*a[0] + iT[6]*b[0] - iT[7]*a[1] - iT[2]*b[1] + iT[0]*c[2] + iT[9]*a[2] - iT[4]*c[3] - iT[5]*a[3] + iT[8]*c[4] + iT[1]*a[4] - t + add ) >> shift; + dst[15 * line] = ( - iT[1]*c[0] + iT[8]*b[0] + iT[3]*c[1] - iT[6]*b[1] - iT[5]*c[2] + iT[4]*b[2] + iT[7]*c[3] - iT[2]*b[3] - iT[9]*c[4] + iT[0]*b[4] - t + add ) >> shift; + + src += 16; + dst++; + } + + if (iSkipLine) + { + dst = pCoef + reducedLine; + for (j = 0; j < cutoff; j++) + { + memset(dst, 0, sizeof(TCoeff)*iSkipLine); + dst += line; + } + } + + if (iSkipLine2) + { + dst = pCoef + line * cutoff; + memset(dst, 0, sizeof(TCoeff) * line * iSkipLine2); + } +#else _fastForwardMM< 16 >( src, dst, shift, line, iSkipLine, iSkipLine2, g_trCoreDCT8P16[TRANSFORM_FORWARD][0] ); +#endif } void fastInverseDCT8_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) { +#if JVET_M0497_FAST_DST7 && !JVET_M0497_MATRIX_MULT + int j, k; + TCoeff a[5], b[5], c[5], d[5], t; + + TCoeff add = (shift > 0) ? (1 << (shift - 1)) : 0; + + const TMatrixCoeff *iT = g_trCoreDST7P16[TRANSFORM_INVERSE][0]; + + const int reducedLine = line - iSkipLine; + + for (j = 0; j < reducedLine; j++) + { + for (k = 0; k < 5; k++) + { + a[k] = src[(15 - k ) * line] + src[( 4 - k) * line]; + b[k] = src[( 6 + k ) * line] + src[( 4 - k) * line]; + c[k] = src[(15 - k ) * line] - src[( 6 + k) * line]; + d[k] = src[(15 - k ) * line] + src[( 6 + k) * line] - src[(4 - k) * line]; + } + + t = iT[10] * src[5*line]; + + dst[ 1] = Clip3(outputMinimum, outputMaximum, (int)( - iT[ 2]*d[0] - iT[ 5]*d[1] - iT[ 8]*d[2] - iT[11]*d[3] - iT[14]*d[4] + add) >> shift); + dst[ 4] = Clip3(outputMinimum, outputMaximum, (int)( iT[ 8]*d[0] + iT[14]*d[1] + iT[ 5]*d[2] - iT[ 2]*d[3] - iT[11]*d[4] + add) >> shift); + dst[ 7] = Clip3(outputMinimum, outputMaximum, (int)( - iT[14]*d[0] - iT[ 2]*d[1] + iT[11]*d[2] + iT[ 5]*d[3] - iT[ 8]*d[4] + add) >> shift); + dst[10] = Clip3(outputMinimum, outputMaximum, (int)( iT[11]*d[0] - iT[ 8]*d[1] - iT[ 2]*d[2] + iT[14]*d[3] - iT[ 5]*d[4] + add) >> shift); + dst[13] = Clip3(outputMinimum, outputMaximum, (int)( - iT[ 5]*d[0] + iT[11]*d[1] - iT[14]*d[2] + iT[ 8]*d[3] - iT[ 2]*d[4] + add) >> shift); + + dst[ 5] = Clip3(outputMinimum, outputMaximum, (int)( - iT[10] * (src[15 * line] + src[14 * line] - src[12 * line] - src[11 * line] + src[9 * line] + src[8 * line] - src[6 * line] - src[5 * line] + src[3 * line] + src[2 * line] - src[0 * line]) + add) >> shift); + + dst[ 0] = Clip3(outputMinimum, outputMaximum, (int)( iT[0]*a[0] + iT[9]*b[0] + iT[1]*a[1] + iT[8]*b[1] + iT[2]*a[2] + iT[7]*b[2] + iT[3]*a[3] + iT[6]*b[3] + iT[4]*a[4] + iT[5]*b[4] + t + add ) >> shift ); + dst[ 2] = Clip3(outputMinimum, outputMaximum, (int)( iT[4]*c[0] - iT[5]*b[0] + iT[9]*c[1] - iT[0]*b[1] + iT[6]*c[2] + iT[3]*a[2] + iT[1]*c[3] + iT[8]*a[3] + iT[7]*a[4] + iT[2]*b[4] - t + add ) >> shift ); + dst[ 3] = Clip3(outputMinimum, outputMaximum, (int)( - iT[6]*a[0] - iT[3]*b[0] - iT[2]*c[1] - iT[7]*a[1] - iT[9]*c[2] - iT[0]*a[2] - iT[4]*c[3] + iT[5]*b[3] + iT[1]*a[4] + iT[8]*b[4] - t + add ) >> shift ); + dst[ 6] = Clip3(outputMinimum, outputMaximum, (int)( iT[8]*a[0] + iT[1]*c[0] + iT[6]*c[1] - iT[3]*b[1] - iT[5]*a[2] - iT[4]*b[2] - iT[7]*c[3] - iT[2]*a[3] - iT[0]*c[4] + iT[9]*b[4] + t + add ) >> shift ); + dst[ 8] = Clip3(outputMinimum, outputMaximum, (int)( iT[4]*c[0] + iT[5]*a[0] - iT[0]*c[1] + iT[9]*b[1] - iT[3]*c[2] - iT[6]*a[2] + iT[1]*c[3] - iT[8]*b[3] + iT[2]*c[4] + iT[7]*a[4] - t + add ) >> shift ); + dst[ 9] = Clip3(outputMinimum, outputMaximum, (int)( - iT[7]*c[0] - iT[2]*a[0] + iT[4]*a[1] + iT[5]*b[1] + iT[8]*c[2] - iT[1]*b[2] - iT[9]*a[3] - iT[0]*b[3] - iT[3]*c[4] + iT[6]*b[4] - t + add ) >> shift ); + dst[11] = Clip3(outputMinimum, outputMaximum, (int)( - iT[9]*a[0] - iT[0]*b[0] + iT[8]*c[1] + iT[1]*a[1] - iT[2]*c[2] + iT[7]*b[2] - iT[6]*a[3] - iT[3]*b[3] + iT[5]*c[4] + iT[4]*a[4] + t + add ) >> shift ); + dst[12] = Clip3(outputMinimum, outputMaximum, (int)( iT[7]*c[0] - iT[2]*b[0] - iT[5]*c[1] - iT[4]*a[1] + iT[8]*a[2] + iT[1]*b[2] - iT[0]*a[3] - iT[9]*b[3] - iT[6]*c[4] + iT[3]*b[4] + t + add ) >> shift ); + dst[14] = Clip3(outputMinimum, outputMaximum, (int)( iT[3]*a[0] + iT[6]*b[0] - iT[7]*a[1] - iT[2]*b[1] + iT[0]*c[2] + iT[9]*a[2] - iT[4]*c[3] - iT[5]*a[3] + iT[8]*c[4] + iT[1]*a[4] - t + add ) >> shift ); + dst[15] = Clip3(outputMinimum, outputMaximum, (int)( - iT[1]*c[0] + iT[8]*b[0] + iT[3]*c[1] - iT[6]*b[1] - iT[5]*c[2] + iT[4]*b[2] + iT[7]*c[3] - iT[2]*b[3] - iT[9]*c[4] + iT[0]*b[4] - t + add ) >> shift ); + + src++; + dst += 16; + } + + if (iSkipLine) + { + memset(dst, 0, (iSkipLine * 16) * sizeof(TCoeff)); + } +#else _fastInverseMM< 16 >( src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_trCoreDCT8P16[TRANSFORM_INVERSE][0] ); +#endif } void fastForwardDCT8_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) { +#if JVET_M0497_FAST_DST7 && !JVET_M0497_MATRIX_MULT + int j, k; + TCoeff a[10][6]; + TCoeff t[2]; + TCoeff b[6]; + TCoeff c[2]; + + TCoeff add = (shift > 0) ? (1 << (shift - 1)) : 0; + const TMatrixCoeff *iT = g_trCoreDST7P32[TRANSFORM_FORWARD][0]; + TCoeff *pCoef = dst; + const int reducedLine = line - iSkipLine; + const int cutoff = 32 - iSkipLine2; + + for (j = 0; j < reducedLine; j++) + { + for (k = 0; k < 6; k++) + { + a[0][k] = src[31-k] - src[20+k]; + a[1][k] = src[31-k] + src[18-k]; + a[2][k] = src[31-k] + src[ 7+k]; + a[3][k] = src[31-k] - src[ 5-k]; + a[4][k] = src[25-k] + src[13+k]; + a[5][k] = src[25-k] + src[12-k]; + a[6][k] = src[25-k] - src[ k]; + a[7][k] = src[18-k] - src[ 7+k]; + a[8][k] = src[18-k] + src[ 5-k]; + a[9][k] = src[12-k] + src[ k]; + + b[k] = src[31-k] + src[20+k] - src[18-k] - src[7+k] + src[5-k]; + } + + for (k = 0; k < 2; k++) + { + c[k] = src[31-k] + src[28+k] - src[26-k] - src[23+k] + src[21-k] + src[18+k] - src[16-k] - src[13+k] + src[11-k] + src[8+k] - src[6-k] - src[3+k] + src[1-k]; + } + + t[0] = iT[12] * src[19] + iT[25] * src[6]; + t[1] = iT[12] * src[6] - iT[25] * src[19]; + + dst[ 0 * line] = ( iT[0] * a[3][0] + iT[11] * a[6][5] + iT[13] * a[8][0] + iT[24] * a[9][5] + iT[1] * a[3][1] + iT[10] * a[6][4] + iT[14] * a[8][1] + iT[23] * a[9][4] + iT[2] * a[3][2] + iT[9] * a[6][3] + iT[15] * a[8][2] + iT[22] * a[9][3] + iT[3] * a[3][3] + iT[8] * a[6][2] + iT[16] * a[8][3] + iT[21] * a[9][2] + iT[4] * a[3][4] + iT[7] * a[6][1] + iT[17] * a[8][4] + iT[20] * a[9][1] + iT[5] * a[3][5] + iT[6] * a[6][0] + iT[18] * a[8][5] + iT[19] * a[9][0] + t[0] + add) >> shift; + dst[ 1 * line] = ( iT[0] * a[5][2] - iT[11] * a[0][3] - iT[13] * a[4][2] - iT[24] * a[6][2] - iT[1] * a[9][1] - iT[10] * a[8][4] - iT[14] * a[3][4] - iT[23] * a[6][1] - iT[2] * a[0][0] + iT[9] * a[5][5] - iT[15] * a[6][5] - iT[22] * a[4][5] + iT[3] * a[5][3] - iT[8] * a[0][2] - iT[16] * a[4][3] - iT[21] * a[6][3] - iT[4] * a[9][0] - iT[7] * a[8][5] - iT[17] * a[3][5] - iT[20] * a[6][0] - iT[5] * a[0][1] + iT[6] * a[5][4] - iT[18] * a[6][4] - iT[19] * a[4][4] + t[1] + add) >> shift; + dst[ 3 * line] = ( iT[0] * a[9][4] + iT[11] * a[5][4] - iT[13] * a[2][1] + iT[24] * a[7][1] + iT[1] * a[0][3] + iT[10] * a[1][3] - iT[14] * a[3][3] - iT[23] * a[2][3] - iT[2] * a[8][5] - iT[9] * a[9][0] - iT[15] * a[6][0] - iT[22] * a[3][5] + iT[3] * a[1][4] + iT[8] * a[0][4] - iT[16] * a[2][4] - iT[21] * a[3][4] + iT[4] * a[5][3] + iT[7] * a[9][3] + iT[17] * a[7][2] - iT[20] * a[2][2] - iT[5] * a[8][0] - iT[6] * a[1][0] + iT[18] * a[4][5] + iT[19] * a[7][0] - t[1] + add) >> shift; + dst[ 4 * line] = ( - iT[0] * a[3][2] - iT[11] * a[2][2] + iT[13] * a[1][2] + iT[24] * a[0][2] + iT[1] * a[6][0] + iT[10] * a[3][5] + iT[14] * a[9][0] + iT[23] * a[8][5] - iT[2] * a[2][3] - iT[9] * a[3][3] + iT[15] * a[0][3] + iT[22] * a[1][3] - iT[3] * a[7][0] + iT[8] * a[2][0] - iT[16] * a[9][5] - iT[21] * a[5][5] + iT[4] * a[4][4] + iT[7] * a[6][4] + iT[17] * a[0][1] - iT[20] * a[5][4] - iT[5] * a[7][4] - iT[6] * a[4][1] + iT[18] * a[8][4] + iT[19] * a[1][4] - t[0] + add) >> shift; + dst[ 5 * line] = ( iT[0] * a[3][5] + iT[11] * a[6][0] + iT[13] * a[8][5] + iT[24] * a[9][0] - iT[1] * a[6][5] - iT[10] * a[3][0] - iT[14] * a[9][5] - iT[23] * a[8][0] + iT[2] * a[7][4] - iT[9] * a[2][4] + iT[15] * a[9][1] + iT[22] * a[5][1] + iT[3] * a[7][1] + iT[8] * a[4][4] - iT[16] * a[8][1] - iT[21] * a[1][1] - iT[4] * a[6][2] - iT[7] * a[4][2] + iT[17] * a[5][2] - iT[20] * a[0][3] + iT[5] * a[3][2] + iT[6] * a[2][2] - iT[18] * a[1][2] - iT[19] * a[0][2] - t[0] + add) >> shift; + dst[ 8 * line] = ( iT[0] * a[9][3] + iT[11] * a[8][2] + iT[13] * a[3][2] + iT[24] * a[6][3] + iT[1] * a[1][5] + iT[10] * a[0][5] - iT[14] * a[2][5] - iT[23] * a[3][5] - iT[2] * a[1][3] - iT[9] * a[8][3] + iT[15] * a[7][3] + iT[22] * a[4][2] - iT[3] * a[9][5] - iT[8] * a[5][5] + iT[16] * a[2][0] - iT[21] * a[7][0] - iT[4] * a[1][1] - iT[7] * a[0][1] + iT[17] * a[2][1] + iT[20] * a[3][1] + iT[5] * a[5][1] + iT[6] * a[9][1] + iT[18] * a[7][4] - iT[19] * a[2][4] + t[1] + add) >> shift; + dst[ 9 * line] = ( iT[0] * a[2][1] + iT[11] * a[3][1] - iT[13] * a[0][1] - iT[24] * a[1][1] - iT[1] * a[7][3] + iT[10] * a[2][3] - iT[14] * a[9][2] - iT[23] * a[5][2] - iT[2] * a[4][0] - iT[9] * a[7][5] + iT[15] * a[1][5] + iT[22] * a[8][5] - iT[3] * a[3][4] - iT[8] * a[2][4] + iT[16] * a[1][4] + iT[21] * a[0][4] - iT[4] * a[6][3] - iT[7] * a[3][2] - iT[17] * a[9][3] - iT[20] * a[8][2] - iT[5] * a[4][5] - iT[6] * a[6][5] - iT[18] * a[0][0] + iT[19] * a[5][5] + t[0] + add) >> shift; + dst[10 * line] = ( - iT[0] * a[6][1] - iT[11] * a[4][1] + iT[13] * a[5][1] - iT[24] * a[0][4] + iT[1] * a[2][2] - iT[10] * a[7][2] - iT[14] * a[5][3] - iT[23] * a[9][3] + iT[2] * a[6][4] + iT[9] * a[4][4] - iT[15] * a[5][4] + iT[22] * a[0][1] - iT[3] * a[2][5] + iT[8] * a[7][5] + iT[16] * a[5][0] + iT[21] * a[9][0] - iT[4] * a[7][0] - iT[7] * a[4][5] + iT[17] * a[8][0] + iT[20] * a[1][0] + iT[5] * a[4][2] + iT[6] * a[7][3] - iT[18] * a[1][3] - iT[19] * a[8][3] + t[0] + add) >> shift; + dst[11 * line] = ( - iT[0] * a[1][3] - iT[11] * a[0][3] + iT[13] * a[2][3] + iT[24] * a[3][3] - iT[1] * a[9][1] - iT[10] * a[5][1] + iT[14] * a[2][4] - iT[23] * a[7][4] - iT[2] * a[8][0] - iT[9] * a[9][5] - iT[15] * a[6][5] - iT[22] * a[3][0] + iT[3] * a[0][2] - iT[8] * a[5][3] + iT[16] * a[6][3] + iT[21] * a[4][3] + iT[4] * a[5][0] - iT[7] * a[0][5] - iT[17] * a[4][0] - iT[20] * a[6][0] + iT[5] * a[9][4] + iT[6] * a[5][4] - iT[18] * a[2][1] + iT[19] * a[7][1] + t[1] + add) >> shift; + dst[13 * line] = ( iT[0] * a[0][0] + iT[11] * a[1][0] - iT[13] * a[3][0] - iT[24] * a[2][0] + iT[1] * a[5][4] - iT[10] * a[0][1] - iT[14] * a[4][4] - iT[23] * a[6][4] - iT[2] * a[9][3] - iT[9] * a[5][3] + iT[15] * a[2][2] - iT[22] * a[7][2] + iT[3] * a[8][3] + iT[8] * a[9][2] + iT[16] * a[6][2] + iT[21] * a[3][3] - iT[4] * a[1][4] - iT[7] * a[8][4] + iT[17] * a[7][4] + iT[20] * a[4][1] + iT[5] * a[0][5] + iT[6] * a[1][5] - iT[18] * a[3][5] - iT[19] * a[2][5] - t[1] + add) >> shift; + dst[14 * line] = ( iT[0] * a[4][2] + iT[11] * a[7][3] - iT[13] * a[1][3] - iT[24] * a[8][3] + iT[1] * a[4][1] + iT[10] * a[6][1] + iT[14] * a[0][4] - iT[23] * a[5][1] - iT[2] * a[3][0] - iT[9] * a[2][0] + iT[15] * a[1][0] + iT[22] * a[0][0] - iT[3] * a[6][3] - iT[8] * a[4][3] + iT[16] * a[5][3] - iT[21] * a[0][2] - iT[4] * a[7][5] - iT[7] * a[4][0] + iT[17] * a[8][5] + iT[20] * a[1][5] + iT[5] * a[6][4] + iT[6] * a[3][1] + iT[18] * a[9][4] + iT[19] * a[8][1] - t[0] + add) >> shift; + dst[15 * line] = ( iT[0] * a[7][4] + iT[11] * a[4][1] - iT[13] * a[8][4] - iT[24] * a[1][4] - iT[1] * a[2][2] - iT[10] * a[3][2] + iT[14] * a[0][2] + iT[23] * a[1][2] - iT[2] * a[2][1] + iT[9] * a[7][1] + iT[15] * a[5][4] + iT[22] * a[9][4] + iT[3] * a[7][5] - iT[8] * a[2][5] + iT[16] * a[9][0] + iT[21] * a[5][0] + iT[4] * a[2][0] + iT[7] * a[3][0] - iT[17] * a[0][0] - iT[20] * a[1][0] + iT[5] * a[2][3] - iT[6] * a[7][3] - iT[18] * a[5][2] - iT[19] * a[9][2] - t[0] + add) >> shift; + dst[16 * line] = ( - iT[0] * a[0][1] + iT[11] * a[5][4] - iT[13] * a[6][4] - iT[24] * a[4][4] + iT[1] * a[0][3] - iT[10] * a[5][2] + iT[14] * a[6][2] + iT[23] * a[4][2] - iT[2] * a[0][5] + iT[9] * a[5][0] - iT[15] * a[6][0] - iT[22] * a[4][0] - iT[3] * a[0][4] - iT[8] * a[1][4] + iT[16] * a[3][4] + iT[21] * a[2][4] + iT[4] * a[0][2] + iT[7] * a[1][2] - iT[17] * a[3][2] - iT[20] * a[2][2] - iT[5] * a[0][0] - iT[6] * a[1][0] + iT[18] * a[3][0] + iT[19] * a[2][0] - t[1] + add) >> shift; + dst[18 * line] = ( iT[0] * a[0][5] + iT[11] * a[1][5] - iT[13] * a[3][5] - iT[24] * a[2][5] - iT[1] * a[1][0] - iT[10] * a[0][0] + iT[14] * a[2][0] + iT[23] * a[3][0] - iT[2] * a[5][1] + iT[9] * a[0][4] + iT[15] * a[4][1] + iT[22] * a[6][1] - iT[3] * a[8][1] - iT[8] * a[1][1] + iT[16] * a[4][4] + iT[21] * a[7][1] - iT[4] * a[9][2] - iT[7] * a[5][2] + iT[17] * a[2][3] - iT[20] * a[7][3] - iT[5] * a[9][3] - iT[6] * a[8][2] - iT[18] * a[3][2] - iT[19] * a[6][3] + t[1] + add) >> shift; + dst[20 * line] = ( - iT[0] * a[4][0] - iT[11] * a[6][0] - iT[13] * a[0][5] + iT[24] * a[5][0] + iT[1] * a[6][5] + iT[10] * a[4][5] - iT[14] * a[5][5] + iT[23] * a[0][0] - iT[2] * a[6][1] - iT[9] * a[3][4] - iT[15] * a[9][1] - iT[22] * a[8][4] + iT[3] * a[4][4] + iT[8] * a[7][1] - iT[16] * a[1][1] - iT[21] * a[8][1] - iT[4] * a[3][3] - iT[7] * a[2][3] + iT[17] * a[1][3] + iT[20] * a[0][3] + iT[5] * a[7][2] - iT[6] * a[2][2] + iT[18] * a[9][3] + iT[19] * a[5][3] + t[0] + add) >> shift; + dst[21 * line] = ( iT[0] * a[1][2] + iT[11] * a[8][2] - iT[13] * a[7][2] - iT[24] * a[4][3] + iT[1] * a[1][5] + iT[10] * a[8][5] - iT[14] * a[7][5] - iT[23] * a[4][0] + iT[2] * a[5][2] + iT[9] * a[9][2] + iT[15] * a[7][3] - iT[22] * a[2][3] + iT[3] * a[5][5] + iT[8] * a[9][5] + iT[16] * a[7][0] - iT[21] * a[2][0] + iT[4] * a[8][1] + iT[7] * a[9][4] + iT[17] * a[6][4] + iT[20] * a[3][1] + iT[5] * a[8][4] + iT[6] * a[9][1] + iT[18] * a[6][1] + iT[19] * a[3][4] + t[1] + add) >> shift; + dst[23 * line] = ( iT[0] * a[8][4] + iT[11] * a[9][1] + iT[13] * a[6][1] + iT[24] * a[3][4] - iT[1] * a[8][2] - iT[10] * a[1][2] + iT[14] * a[4][3] + iT[23] * a[7][2] - iT[2] * a[0][1] - iT[9] * a[1][1] + iT[15] * a[3][1] + iT[22] * a[2][1] + iT[3] * a[5][0] + iT[8] * a[9][0] + iT[16] * a[7][5] - iT[21] * a[2][5] - iT[4] * a[9][5] - iT[7] * a[8][0] - iT[17] * a[3][0] - iT[20] * a[6][5] + iT[5] * a[5][2] - iT[6] * a[0][3] - iT[18] * a[4][2] - iT[19] * a[6][2] - t[1] + add) >> shift; + dst[24 * line] = ( - iT[0] * a[2][3] + iT[11] * a[7][3] + iT[13] * a[5][2] + iT[24] * a[9][2] + iT[1] * a[4][1] + iT[10] * a[7][4] - iT[14] * a[1][4] - iT[23] * a[8][4] - iT[2] * a[4][5] - iT[9] * a[7][0] + iT[15] * a[1][0] + iT[22] * a[8][0] + iT[3] * a[4][3] + iT[8] * a[6][3] + iT[16] * a[0][2] - iT[21] * a[5][3] - iT[4] * a[2][5] - iT[7] * a[3][5] + iT[17] * a[0][5] + iT[20] * a[1][5] + iT[5] * a[2][1] + iT[6] * a[3][1] - iT[18] * a[0][1] - iT[19] * a[1][1] - t[0] + add) >> shift; + dst[25 * line] = ( - iT[0] * a[4][5] - iT[11] * a[6][5] - iT[13] * a[0][0] + iT[24] * a[5][5] - iT[1] * a[3][1] - iT[10] * a[2][1] + iT[14] * a[1][1] + iT[23] * a[0][1] + iT[2] * a[7][2] + iT[9] * a[4][3] - iT[15] * a[8][2] - iT[22] * a[1][2] + iT[3] * a[6][2] + iT[8] * a[3][3] + iT[16] * a[9][2] + iT[21] * a[8][3] + iT[4] * a[2][4] - iT[7] * a[7][4] - iT[17] * a[5][1] - iT[20] * a[9][1] - iT[5] * a[4][0] - iT[6] * a[6][0] - iT[18] * a[0][5] + iT[19] * a[5][0] - t[0] + add) >> shift; + dst[26 * line] = ( iT[0] * a[8][0] + iT[11] * a[1][0] - iT[13] * a[4][5] - iT[24] * a[7][0] + iT[1] * a[5][4] + iT[10] * a[9][4] + iT[14] * a[7][1] - iT[23] * a[2][1] - iT[2] * a[1][2] - iT[9] * a[0][2] + iT[15] * a[2][2] + iT[22] * a[3][2] - iT[3] * a[9][2] - iT[8] * a[8][3] - iT[16] * a[3][3] - iT[21] * a[6][2] + iT[4] * a[0][4] - iT[7] * a[5][1] + iT[17] * a[6][1] + iT[20] * a[4][1] + iT[5] * a[8][5] + iT[6] * a[1][5] - iT[18] * a[4][0] - iT[19] * a[7][5] - t[1] + add) >> shift; + dst[28 * line] = ( - iT[0] * a[5][1] - iT[11] * a[9][1] - iT[13] * a[7][4] + iT[24] * a[2][4] + iT[1] * a[8][2] + iT[10] * a[9][3] + iT[14] * a[6][3] + iT[23] * a[3][2] - iT[2] * a[9][4] - iT[9] * a[8][1] - iT[15] * a[3][1] - iT[22] * a[6][4] + iT[3] * a[9][0] + iT[8] * a[5][0] - iT[16] * a[2][5] + iT[21] * a[7][5] - iT[4] * a[5][5] + iT[7] * a[0][0] + iT[17] * a[4][5] + iT[20] * a[6][5] + iT[5] * a[1][3] + iT[6] * a[0][3] - iT[18] * a[2][3] - iT[19] * a[3][3] + t[1] + add) >> shift; + dst[29 * line] = ( iT[0] * a[6][4] + iT[11] * a[3][1] + iT[13] * a[9][4] + iT[24] * a[8][1] - iT[1] * a[7][3] - iT[10] * a[4][2] + iT[14] * a[8][3] + iT[23] * a[1][3] - iT[2] * a[3][5] - iT[9] * a[2][5] + iT[15] * a[1][5] + iT[22] * a[0][5] + iT[3] * a[2][4] + iT[8] * a[3][4] - iT[16] * a[0][4] - iT[21] * a[1][4] + iT[4] * a[4][3] + iT[7] * a[7][2] - iT[17] * a[1][2] - iT[20] * a[8][2] - iT[5] * a[3][0] - iT[6] * a[6][5] - iT[18] * a[8][0] - iT[19] * a[9][5] + t[0] + add) >> shift; + dst[30 * line] = ( - iT[0] * a[7][2] + iT[11] * a[2][2] - iT[13] * a[9][3] - iT[24] * a[5][3] - iT[1] * a[6][0] - iT[10] * a[4][0] + iT[14] * a[5][0] - iT[23] * a[0][5] - iT[2] * a[4][2] - iT[9] * a[6][2] - iT[15] * a[0][3] + iT[22] * a[5][2] + iT[3] * a[2][0] - iT[8] * a[7][0] - iT[16] * a[5][5] - iT[21] * a[9][5] + iT[4] * a[7][1] - iT[7] * a[2][1] + iT[17] * a[9][4] + iT[20] * a[5][4] + iT[5] * a[6][1] + iT[6] * a[4][1] - iT[18] * a[5][1] + iT[19] * a[0][4] + t[0] + add) >> shift; + dst[31 * line] = ( iT[0] * a[8][5] + iT[11] * a[1][5] - iT[13] * a[4][0] - iT[24] * a[7][5] - iT[1] * a[1][0] - iT[10] * a[8][0] + iT[14] * a[7][0] + iT[23] * a[4][5] - iT[2] * a[8][4] - iT[9] * a[1][4] + iT[15] * a[4][1] + iT[22] * a[7][4] + iT[3] * a[1][1] + iT[8] * a[8][1] - iT[16] * a[7][1] - iT[21] * a[4][4] + iT[4] * a[8][3] + iT[7] * a[1][3] - iT[17] * a[4][2] - iT[20] * a[7][3] - iT[5] * a[1][2] - iT[6] * a[8][2] + iT[18] * a[7][2] + iT[19] * a[4][3] + t[1] + add) >> shift; + + dst[ 2 * line] = ( iT[ 4] * b[0] + iT[ 9] * b[1] + iT[14] * b[2] + iT[19] * b[3] + iT[24] * b[4] + iT[29] * b[5] + add) >> shift; + dst[ 7 * line] = ( - iT[14] * b[0] - iT[29] * b[1] - iT[19] * b[2] - iT[ 4] * b[3] + iT[ 9] * b[4] + iT[24] * b[5] + add) >> shift; + dst[12 * line] = ( iT[24] * b[0] + iT[14] * b[1] - iT[ 9] * b[2] - iT[29] * b[3] - iT[ 4] * b[4] + iT[19] * b[5] + add) >> shift; + dst[17 * line] = ( - iT[29] * b[0] + iT[ 4] * b[1] + iT[24] * b[2] - iT[ 9] * b[3] - iT[19] * b[4] + iT[14] * b[5] + add) >> shift; + dst[22 * line] = ( iT[19] * b[0] - iT[24] * b[1] + iT[ 4] * b[2] + iT[14] * b[3] - iT[29] * b[4] + iT[ 9] * b[5] + add) >> shift; + dst[27 * line] = ( - iT[ 9] * b[0] + iT[19] * b[1] - iT[29] * b[2] + iT[24] * b[3] - iT[14] * b[4] + iT[ 4] * b[5] + add) >> shift; + + dst[ 6 * line] = ( iT[12] * c[0] + iT[25] * c[1] + add) >> shift; + dst[19 * line] = ( - iT[25] * c[0] + iT[12] * c[1] + add) >> shift; + + src += 32; + dst++; + } + + if (iSkipLine) + { + dst = pCoef + reducedLine; + for (j = 0; j < cutoff; j++) + { + memset(dst, 0, sizeof(TCoeff)*iSkipLine); + dst += line; + } + } + + if (iSkipLine2) + { + dst = pCoef + line * cutoff; + memset(dst, 0, sizeof(TCoeff) * line * iSkipLine2); + } +#else _fastForwardMM< 32 >( src, dst, shift, line, iSkipLine, iSkipLine2, g_trCoreDCT8P32[TRANSFORM_FORWARD][0] ); +#endif } void fastInverseDCT8_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) { +#if JVET_M0497_FAST_DST7 && !JVET_M0497_MATRIX_MULT + int j, k; + TCoeff a[10][6]; + TCoeff t[2]; + TCoeff b[6]; + TCoeff c[2]; + TCoeff add = (shift > 0) ? (1 << (shift - 1)) : 0; + + const TMatrixCoeff *iT = g_trCoreDST7P32[TRANSFORM_INVERSE][0]; + + const int reducedLine = line - iSkipLine; + + for (j = 0; j < reducedLine; j++) + { + for (k = 0; k < 6; k++) + { + a[0][k] = src[(31 - k)*line] - src[(20 + k)*line]; + a[1][k] = src[(31 - k)*line] + src[(18 - k)*line]; + a[2][k] = src[(31 - k)*line] + src[( 7 + k)*line]; + a[3][k] = src[(31 - k)*line] - src[( 5 - k)*line]; + a[4][k] = src[(25 - k)*line] + src[(13 + k)*line]; + a[5][k] = src[(25 - k)*line] + src[(12 - k)*line]; + a[6][k] = src[(25 - k)*line] - src[ k *line]; + a[7][k] = src[(18 - k)*line] - src[( 7 + k)*line]; + a[8][k] = src[(18 - k)*line] + src[( 5 - k)*line]; + a[9][k] = src[(12 - k)*line] + src[ k *line]; + + b[k] = src[(31 - k)*line] + src[(20 + k)*line] - src[(18 - k)*line] - src[(7 + k)*line] + src[(5 - k)*line]; + } + + for (k = 0; k < 2; k++) + { + c[k] = src[(31 - k)*line] + src[(28 + k)*line] - src[(26 - k)*line] - src[(23 + k)*line] + src[(21 - k)*line] + src[(18 + k)*line] - src[(16 - k)*line] - src[(13 + k)*line] + src[(11 - k)*line] + src[(8 + k)*line] - src[(6 - k)*line] - src[(3 + k)*line] + src[(1 - k)*line]; + } + + t[0] = iT[12] * src[19 * line] + iT[25] * src[ 6 * line]; + t[1] = iT[12] * src[ 6 * line] - iT[25] * src[19 * line]; + + dst[ 0] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[3][0] + iT[11] * a[6][5] + iT[13] * a[8][0] + iT[24] * a[9][5] + iT[1] * a[3][1] + iT[10] * a[6][4] + iT[14] * a[8][1] + iT[23] * a[9][4] + iT[2] * a[3][2] + iT[9] * a[6][3] + iT[15] * a[8][2] + iT[22] * a[9][3] + iT[3] * a[3][3] + iT[8] * a[6][2] + iT[16] * a[8][3] + iT[21] * a[9][2] + iT[4] * a[3][4] + iT[7] * a[6][1] + iT[17] * a[8][4] + iT[20] * a[9][1] + iT[5] * a[3][5] + iT[6] * a[6][0] + iT[18] * a[8][5] + iT[19] * a[9][0] + t[0] + add) >> shift); + dst[ 1] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[5][2] - iT[11] * a[0][3] - iT[13] * a[4][2] - iT[24] * a[6][2] - iT[1] * a[9][1] - iT[10] * a[8][4] - iT[14] * a[3][4] - iT[23] * a[6][1] - iT[2] * a[0][0] + iT[9] * a[5][5] - iT[15] * a[6][5] - iT[22] * a[4][5] + iT[3] * a[5][3] - iT[8] * a[0][2] - iT[16] * a[4][3] - iT[21] * a[6][3] - iT[4] * a[9][0] - iT[7] * a[8][5] - iT[17] * a[3][5] - iT[20] * a[6][0] - iT[5] * a[0][1] + iT[6] * a[5][4] - iT[18] * a[6][4] - iT[19] * a[4][4] + t[1] + add) >> shift); + dst[ 3] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[9][4] + iT[11] * a[5][4] - iT[13] * a[2][1] + iT[24] * a[7][1] + iT[1] * a[0][3] + iT[10] * a[1][3] - iT[14] * a[3][3] - iT[23] * a[2][3] - iT[2] * a[8][5] - iT[9] * a[9][0] - iT[15] * a[6][0] - iT[22] * a[3][5] + iT[3] * a[1][4] + iT[8] * a[0][4] - iT[16] * a[2][4] - iT[21] * a[3][4] + iT[4] * a[5][3] + iT[7] * a[9][3] + iT[17] * a[7][2] - iT[20] * a[2][2] - iT[5] * a[8][0] - iT[6] * a[1][0] + iT[18] * a[4][5] + iT[19] * a[7][0] - t[1] + add) >> shift); + dst[ 4] = Clip3(outputMinimum, outputMaximum, (int)( - iT[0] * a[3][2] - iT[11] * a[2][2] + iT[13] * a[1][2] + iT[24] * a[0][2] + iT[1] * a[6][0] + iT[10] * a[3][5] + iT[14] * a[9][0] + iT[23] * a[8][5] - iT[2] * a[2][3] - iT[9] * a[3][3] + iT[15] * a[0][3] + iT[22] * a[1][3] - iT[3] * a[7][0] + iT[8] * a[2][0] - iT[16] * a[9][5] - iT[21] * a[5][5] + iT[4] * a[4][4] + iT[7] * a[6][4] + iT[17] * a[0][1] - iT[20] * a[5][4] - iT[5] * a[7][4] - iT[6] * a[4][1] + iT[18] * a[8][4] + iT[19] * a[1][4] - t[0] + add) >> shift); + dst[ 5] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[3][5] + iT[11] * a[6][0] + iT[13] * a[8][5] + iT[24] * a[9][0] - iT[1] * a[6][5] - iT[10] * a[3][0] - iT[14] * a[9][5] - iT[23] * a[8][0] + iT[2] * a[7][4] - iT[9] * a[2][4] + iT[15] * a[9][1] + iT[22] * a[5][1] + iT[3] * a[7][1] + iT[8] * a[4][4] - iT[16] * a[8][1] - iT[21] * a[1][1] - iT[4] * a[6][2] - iT[7] * a[4][2] + iT[17] * a[5][2] - iT[20] * a[0][3] + iT[5] * a[3][2] + iT[6] * a[2][2] - iT[18] * a[1][2] - iT[19] * a[0][2] - t[0] + add) >> shift); + dst[ 8] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[9][3] + iT[11] * a[8][2] + iT[13] * a[3][2] + iT[24] * a[6][3] + iT[1] * a[1][5] + iT[10] * a[0][5] - iT[14] * a[2][5] - iT[23] * a[3][5] - iT[2] * a[1][3] - iT[9] * a[8][3] + iT[15] * a[7][3] + iT[22] * a[4][2] - iT[3] * a[9][5] - iT[8] * a[5][5] + iT[16] * a[2][0] - iT[21] * a[7][0] - iT[4] * a[1][1] - iT[7] * a[0][1] + iT[17] * a[2][1] + iT[20] * a[3][1] + iT[5] * a[5][1] + iT[6] * a[9][1] + iT[18] * a[7][4] - iT[19] * a[2][4] + t[1] + add) >> shift); + dst[ 9] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[2][1] + iT[11] * a[3][1] - iT[13] * a[0][1] - iT[24] * a[1][1] - iT[1] * a[7][3] + iT[10] * a[2][3] - iT[14] * a[9][2] - iT[23] * a[5][2] - iT[2] * a[4][0] - iT[9] * a[7][5] + iT[15] * a[1][5] + iT[22] * a[8][5] - iT[3] * a[3][4] - iT[8] * a[2][4] + iT[16] * a[1][4] + iT[21] * a[0][4] - iT[4] * a[6][3] - iT[7] * a[3][2] - iT[17] * a[9][3] - iT[20] * a[8][2] - iT[5] * a[4][5] - iT[6] * a[6][5] - iT[18] * a[0][0] + iT[19] * a[5][5] + t[0] + add) >> shift); + dst[10] = Clip3(outputMinimum, outputMaximum, (int)( - iT[0] * a[6][1] - iT[11] * a[4][1] + iT[13] * a[5][1] - iT[24] * a[0][4] + iT[1] * a[2][2] - iT[10] * a[7][2] - iT[14] * a[5][3] - iT[23] * a[9][3] + iT[2] * a[6][4] + iT[9] * a[4][4] - iT[15] * a[5][4] + iT[22] * a[0][1] - iT[3] * a[2][5] + iT[8] * a[7][5] + iT[16] * a[5][0] + iT[21] * a[9][0] - iT[4] * a[7][0] - iT[7] * a[4][5] + iT[17] * a[8][0] + iT[20] * a[1][0] + iT[5] * a[4][2] + iT[6] * a[7][3] - iT[18] * a[1][3] - iT[19] * a[8][3] + t[0] + add) >> shift); + dst[11] = Clip3(outputMinimum, outputMaximum, (int)( - iT[0] * a[1][3] - iT[11] * a[0][3] + iT[13] * a[2][3] + iT[24] * a[3][3] - iT[1] * a[9][1] - iT[10] * a[5][1] + iT[14] * a[2][4] - iT[23] * a[7][4] - iT[2] * a[8][0] - iT[9] * a[9][5] - iT[15] * a[6][5] - iT[22] * a[3][0] + iT[3] * a[0][2] - iT[8] * a[5][3] + iT[16] * a[6][3] + iT[21] * a[4][3] + iT[4] * a[5][0] - iT[7] * a[0][5] - iT[17] * a[4][0] - iT[20] * a[6][0] + iT[5] * a[9][4] + iT[6] * a[5][4] - iT[18] * a[2][1] + iT[19] * a[7][1] + t[1] + add) >> shift); + dst[13] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[0][0] + iT[11] * a[1][0] - iT[13] * a[3][0] - iT[24] * a[2][0] + iT[1] * a[5][4] - iT[10] * a[0][1] - iT[14] * a[4][4] - iT[23] * a[6][4] - iT[2] * a[9][3] - iT[9] * a[5][3] + iT[15] * a[2][2] - iT[22] * a[7][2] + iT[3] * a[8][3] + iT[8] * a[9][2] + iT[16] * a[6][2] + iT[21] * a[3][3] - iT[4] * a[1][4] - iT[7] * a[8][4] + iT[17] * a[7][4] + iT[20] * a[4][1] + iT[5] * a[0][5] + iT[6] * a[1][5] - iT[18] * a[3][5] - iT[19] * a[2][5] - t[1] + add) >> shift); + dst[14] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[4][2] + iT[11] * a[7][3] - iT[13] * a[1][3] - iT[24] * a[8][3] + iT[1] * a[4][1] + iT[10] * a[6][1] + iT[14] * a[0][4] - iT[23] * a[5][1] - iT[2] * a[3][0] - iT[9] * a[2][0] + iT[15] * a[1][0] + iT[22] * a[0][0] - iT[3] * a[6][3] - iT[8] * a[4][3] + iT[16] * a[5][3] - iT[21] * a[0][2] - iT[4] * a[7][5] - iT[7] * a[4][0] + iT[17] * a[8][5] + iT[20] * a[1][5] + iT[5] * a[6][4] + iT[6] * a[3][1] + iT[18] * a[9][4] + iT[19] * a[8][1] - t[0] + add) >> shift); + dst[15] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[7][4] + iT[11] * a[4][1] - iT[13] * a[8][4] - iT[24] * a[1][4] - iT[1] * a[2][2] - iT[10] * a[3][2] + iT[14] * a[0][2] + iT[23] * a[1][2] - iT[2] * a[2][1] + iT[9] * a[7][1] + iT[15] * a[5][4] + iT[22] * a[9][4] + iT[3] * a[7][5] - iT[8] * a[2][5] + iT[16] * a[9][0] + iT[21] * a[5][0] + iT[4] * a[2][0] + iT[7] * a[3][0] - iT[17] * a[0][0] - iT[20] * a[1][0] + iT[5] * a[2][3] - iT[6] * a[7][3] - iT[18] * a[5][2] - iT[19] * a[9][2] - t[0] + add) >> shift); + dst[16] = Clip3(outputMinimum, outputMaximum, (int)( - iT[0] * a[0][1] + iT[11] * a[5][4] - iT[13] * a[6][4] - iT[24] * a[4][4] + iT[1] * a[0][3] - iT[10] * a[5][2] + iT[14] * a[6][2] + iT[23] * a[4][2] - iT[2] * a[0][5] + iT[9] * a[5][0] - iT[15] * a[6][0] - iT[22] * a[4][0] - iT[3] * a[0][4] - iT[8] * a[1][4] + iT[16] * a[3][4] + iT[21] * a[2][4] + iT[4] * a[0][2] + iT[7] * a[1][2] - iT[17] * a[3][2] - iT[20] * a[2][2] - iT[5] * a[0][0] - iT[6] * a[1][0] + iT[18] * a[3][0] + iT[19] * a[2][0] - t[1] + add) >> shift); + dst[18] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[0][5] + iT[11] * a[1][5] - iT[13] * a[3][5] - iT[24] * a[2][5] - iT[1] * a[1][0] - iT[10] * a[0][0] + iT[14] * a[2][0] + iT[23] * a[3][0] - iT[2] * a[5][1] + iT[9] * a[0][4] + iT[15] * a[4][1] + iT[22] * a[6][1] - iT[3] * a[8][1] - iT[8] * a[1][1] + iT[16] * a[4][4] + iT[21] * a[7][1] - iT[4] * a[9][2] - iT[7] * a[5][2] + iT[17] * a[2][3] - iT[20] * a[7][3] - iT[5] * a[9][3] - iT[6] * a[8][2] - iT[18] * a[3][2] - iT[19] * a[6][3] + t[1] + add) >> shift); + dst[20] = Clip3(outputMinimum, outputMaximum, (int)( - iT[0] * a[4][0] - iT[11] * a[6][0] - iT[13] * a[0][5] + iT[24] * a[5][0] + iT[1] * a[6][5] + iT[10] * a[4][5] - iT[14] * a[5][5] + iT[23] * a[0][0] - iT[2] * a[6][1] - iT[9] * a[3][4] - iT[15] * a[9][1] - iT[22] * a[8][4] + iT[3] * a[4][4] + iT[8] * a[7][1] - iT[16] * a[1][1] - iT[21] * a[8][1] - iT[4] * a[3][3] - iT[7] * a[2][3] + iT[17] * a[1][3] + iT[20] * a[0][3] + iT[5] * a[7][2] - iT[6] * a[2][2] + iT[18] * a[9][3] + iT[19] * a[5][3] + t[0] + add) >> shift); + dst[21] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[1][2] + iT[11] * a[8][2] - iT[13] * a[7][2] - iT[24] * a[4][3] + iT[1] * a[1][5] + iT[10] * a[8][5] - iT[14] * a[7][5] - iT[23] * a[4][0] + iT[2] * a[5][2] + iT[9] * a[9][2] + iT[15] * a[7][3] - iT[22] * a[2][3] + iT[3] * a[5][5] + iT[8] * a[9][5] + iT[16] * a[7][0] - iT[21] * a[2][0] + iT[4] * a[8][1] + iT[7] * a[9][4] + iT[17] * a[6][4] + iT[20] * a[3][1] + iT[5] * a[8][4] + iT[6] * a[9][1] + iT[18] * a[6][1] + iT[19] * a[3][4] + t[1] + add) >> shift); + dst[23] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[8][4] + iT[11] * a[9][1] + iT[13] * a[6][1] + iT[24] * a[3][4] - iT[1] * a[8][2] - iT[10] * a[1][2] + iT[14] * a[4][3] + iT[23] * a[7][2] - iT[2] * a[0][1] - iT[9] * a[1][1] + iT[15] * a[3][1] + iT[22] * a[2][1] + iT[3] * a[5][0] + iT[8] * a[9][0] + iT[16] * a[7][5] - iT[21] * a[2][5] - iT[4] * a[9][5] - iT[7] * a[8][0] - iT[17] * a[3][0] - iT[20] * a[6][5] + iT[5] * a[5][2] - iT[6] * a[0][3] - iT[18] * a[4][2] - iT[19] * a[6][2] - t[1] + add) >> shift); + dst[24] = Clip3(outputMinimum, outputMaximum, (int)( - iT[0] * a[2][3] + iT[11] * a[7][3] + iT[13] * a[5][2] + iT[24] * a[9][2] + iT[1] * a[4][1] + iT[10] * a[7][4] - iT[14] * a[1][4] - iT[23] * a[8][4] - iT[2] * a[4][5] - iT[9] * a[7][0] + iT[15] * a[1][0] + iT[22] * a[8][0] + iT[3] * a[4][3] + iT[8] * a[6][3] + iT[16] * a[0][2] - iT[21] * a[5][3] - iT[4] * a[2][5] - iT[7] * a[3][5] + iT[17] * a[0][5] + iT[20] * a[1][5] + iT[5] * a[2][1] + iT[6] * a[3][1] - iT[18] * a[0][1] - iT[19] * a[1][1] - t[0] + add) >> shift); + dst[25] = Clip3(outputMinimum, outputMaximum, (int)( - iT[0] * a[4][5] - iT[11] * a[6][5] - iT[13] * a[0][0] + iT[24] * a[5][5] - iT[1] * a[3][1] - iT[10] * a[2][1] + iT[14] * a[1][1] + iT[23] * a[0][1] + iT[2] * a[7][2] + iT[9] * a[4][3] - iT[15] * a[8][2] - iT[22] * a[1][2] + iT[3] * a[6][2] + iT[8] * a[3][3] + iT[16] * a[9][2] + iT[21] * a[8][3] + iT[4] * a[2][4] - iT[7] * a[7][4] - iT[17] * a[5][1] - iT[20] * a[9][1] - iT[5] * a[4][0] - iT[6] * a[6][0] - iT[18] * a[0][5] + iT[19] * a[5][0] - t[0] + add) >> shift); + dst[26] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[8][0] + iT[11] * a[1][0] - iT[13] * a[4][5] - iT[24] * a[7][0] + iT[1] * a[5][4] + iT[10] * a[9][4] + iT[14] * a[7][1] - iT[23] * a[2][1] - iT[2] * a[1][2] - iT[9] * a[0][2] + iT[15] * a[2][2] + iT[22] * a[3][2] - iT[3] * a[9][2] - iT[8] * a[8][3] - iT[16] * a[3][3] - iT[21] * a[6][2] + iT[4] * a[0][4] - iT[7] * a[5][1] + iT[17] * a[6][1] + iT[20] * a[4][1] + iT[5] * a[8][5] + iT[6] * a[1][5] - iT[18] * a[4][0] - iT[19] * a[7][5] - t[1] + add) >> shift); + dst[28] = Clip3(outputMinimum, outputMaximum, (int)( - iT[0] * a[5][1] - iT[11] * a[9][1] - iT[13] * a[7][4] + iT[24] * a[2][4] + iT[1] * a[8][2] + iT[10] * a[9][3] + iT[14] * a[6][3] + iT[23] * a[3][2] - iT[2] * a[9][4] - iT[9] * a[8][1] - iT[15] * a[3][1] - iT[22] * a[6][4] + iT[3] * a[9][0] + iT[8] * a[5][0] - iT[16] * a[2][5] + iT[21] * a[7][5] - iT[4] * a[5][5] + iT[7] * a[0][0] + iT[17] * a[4][5] + iT[20] * a[6][5] + iT[5] * a[1][3] + iT[6] * a[0][3] - iT[18] * a[2][3] - iT[19] * a[3][3] + t[1] + add) >> shift); + dst[29] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[6][4] + iT[11] * a[3][1] + iT[13] * a[9][4] + iT[24] * a[8][1] - iT[1] * a[7][3] - iT[10] * a[4][2] + iT[14] * a[8][3] + iT[23] * a[1][3] - iT[2] * a[3][5] - iT[9] * a[2][5] + iT[15] * a[1][5] + iT[22] * a[0][5] + iT[3] * a[2][4] + iT[8] * a[3][4] - iT[16] * a[0][4] - iT[21] * a[1][4] + iT[4] * a[4][3] + iT[7] * a[7][2] - iT[17] * a[1][2] - iT[20] * a[8][2] - iT[5] * a[3][0] - iT[6] * a[6][5] - iT[18] * a[8][0] - iT[19] * a[9][5] + t[0] + add) >> shift); + dst[30] = Clip3(outputMinimum, outputMaximum, (int)( - iT[0] * a[7][2] + iT[11] * a[2][2] - iT[13] * a[9][3] - iT[24] * a[5][3] - iT[1] * a[6][0] - iT[10] * a[4][0] + iT[14] * a[5][0] - iT[23] * a[0][5] - iT[2] * a[4][2] - iT[9] * a[6][2] - iT[15] * a[0][3] + iT[22] * a[5][2] + iT[3] * a[2][0] - iT[8] * a[7][0] - iT[16] * a[5][5] - iT[21] * a[9][5] + iT[4] * a[7][1] - iT[7] * a[2][1] + iT[17] * a[9][4] + iT[20] * a[5][4] + iT[5] * a[6][1] + iT[6] * a[4][1] - iT[18] * a[5][1] + iT[19] * a[0][4] + t[0] + add) >> shift); + dst[31] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[8][5] + iT[11] * a[1][5] - iT[13] * a[4][0] - iT[24] * a[7][5] - iT[1] * a[1][0] - iT[10] * a[8][0] + iT[14] * a[7][0] + iT[23] * a[4][5] - iT[2] * a[8][4] - iT[9] * a[1][4] + iT[15] * a[4][1] + iT[22] * a[7][4] + iT[3] * a[1][1] + iT[8] * a[8][1] - iT[16] * a[7][1] - iT[21] * a[4][4] + iT[4] * a[8][3] + iT[7] * a[1][3] - iT[17] * a[4][2] - iT[20] * a[7][3] - iT[5] * a[1][2] - iT[6] * a[8][2] + iT[18] * a[7][2] + iT[19] * a[4][3] + t[1] + add) >> shift); + + dst[ 2] = Clip3(outputMinimum, outputMaximum, (int)( iT[ 4] * b[0] + iT[ 9] * b[1] + iT[14] * b[2] + iT[19] * b[3] + iT[24] * b[4] + iT[29] * b[5] + add) >> shift); + dst[ 7] = Clip3(outputMinimum, outputMaximum, (int)( - iT[14] * b[0] - iT[29] * b[1] - iT[19] * b[2] - iT[ 4] * b[3] + iT[ 9] * b[4] + iT[24] * b[5] + add) >> shift); + dst[12] = Clip3(outputMinimum, outputMaximum, (int)( iT[24] * b[0] + iT[14] * b[1] - iT[ 9] * b[2] - iT[29] * b[3] - iT[ 4] * b[4] + iT[19] * b[5] + add) >> shift); + dst[17] = Clip3(outputMinimum, outputMaximum, (int)( - iT[29] * b[0] + iT[ 4] * b[1] + iT[24] * b[2] - iT[ 9] * b[3] - iT[19] * b[4] + iT[14] * b[5] + add) >> shift); + dst[22] = Clip3(outputMinimum, outputMaximum, (int)( iT[19] * b[0] - iT[24] * b[1] + iT[ 4] * b[2] + iT[14] * b[3] - iT[29] * b[4] + iT[ 9] * b[5] + add) >> shift); + dst[27] = Clip3(outputMinimum, outputMaximum, (int)( - iT[ 9] * b[0] + iT[19] * b[1] - iT[29] * b[2] + iT[24] * b[3] - iT[14] * b[4] + iT[ 4] * b[5] + add) >> shift); + + dst[ 6] = Clip3(outputMinimum, outputMaximum, (int)( iT[12] * c[0] + iT[25] * c[1] + add) >> shift); + dst[19] = Clip3(outputMinimum, outputMaximum, (int)( - iT[25] * c[0] + iT[12] * c[1] + add) >> shift); + + src++; + dst += 32; + } + + if (iSkipLine) + { + memset(dst, 0, (iSkipLine * 32) * sizeof(TCoeff)); + } +#else _fastInverseMM< 32 >( src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_trCoreDCT8P32[TRANSFORM_INVERSE][0] ); +#endif } diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 1d0452b39117b53d0fda1c2196cd0bb4e042e83a..7e15e14992f67adeb9ea2d1d6e8c0f46e1cda2e2 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,9 +50,53 @@ #include <assert.h> #include <cassert> +#define JVET_M0118_M0185_TRIANGLE_FLAG_FIX 1 // Avoid signaling triangle flag if a CU uses MMVD or CIIP + +#define JVET_M0487_INT_EXTEND 1 // CE9.1.1 b: integer reference samples as 1 extended samples + +#define JVET_M0444_SMVD 1 // SMVD mode + +#define JVET_M0170_MRG_SHARELIST 1 +#if JVET_M0170_MRG_SHARELIST +#define MRG_SHARELIST_SHARSIZE 32 +#endif + +#define JVET_M0064_CCLM_SIMPLIFICATION 1 + +#define JVET_M0142_CCLM_COLLOCATED_CHROMA 1 // Adding support for chroma sample location type 2 in CCLM + + +#define JVET_M0479_18BITS_MV_CLIP 1 + +#define JVET_M0497_FAST_DST7 1 +#if JVET_M0497_FAST_DST7 +#define JVET_M0497_MATRIX_MULT 0 // 0: Fast method; 1: Matrix multiplication +#endif +#define JVET_M0502_PRED_MODE_CTX 1 + +#define JVET_M0407_IBC_RANGE 1 // extend IBC search range to some part of left CTU + +#define JVET_M0464_UNI_MTS 1 +#define JVET_M0068_M0171_MMVD_CLEANUP 1 // MMVD cleanup with 1) flip removal, 2) L1 zero vector fix, 3) bi-pred restriction after merge/MMVD + +#if JVET_M0464_UNI_MTS +typedef std::pair<int, bool> TrMode; +typedef std::pair<int, int> TrCost; +#endif + +#define JVET_M0421_SPLIT_SIG 1 + +#define JVET_M0173_MOVE_GT2_TO_FIRST_PASS 1 // Moving the gtr2 flag to the first coding pass + +#define REMOVE_BIN_DECISION_TREE 1 + +#define JVET_M0446_M0888_M0905_VPDU_AT_PIC_BOUNDARY 1 + // clang-format off #define JVET_M0453_CABAC_ENGINE 1 +#define JVET_M0409_ATMVP_FIX 1 + #define JVET_L0090_PAIR_AVG 1 // Add pairwise average candidates, replace HEVC combined candidates #define REUSE_CU_RESULTS 1 // clang-format on @@ -839,7 +883,7 @@ enum MergeType { MRG_TYPE_DEFAULT_N = 0, // 0 MRG_TYPE_SUBPU_ATMVP, - MRG_TYPE_CPR, + MRG_TYPE_IBC, NUM_MRG_TYPE // 5 }; @@ -850,6 +894,14 @@ enum TriangleSplit TRIANGLE_DIR_NUM }; +#if JVET_M0170_MRG_SHARELIST +enum SharedMrgState +{ + NO_SHARE = 0, + GEN_ON_SHARED_BOUND = 1, + SHARING = 2 +}; +#endif ////////////////////////////////////////////////////////////////////////// // Encoder modes to try out ////////////////////////////////////////////////////////////////////////// diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 30286edd9bf4c88dd17cf26614ed33172eb188f3..ecaf89ca16990d33b31fcee0e1c6c86d1e0c0d15 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -261,7 +261,9 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) qp = other.qp; chromaQpAdj = other.chromaQpAdj; rootCbf = other.rootCbf; +#if !JVET_M0464_UNI_MTS emtFlag = other.emtFlag; +#endif #if HEVC_TILES_WPP tileIdx = other.tileIdx; #endif @@ -270,7 +272,16 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) GBiIdx = other.GBiIdx; for (int i = 0; i<2; i++) refIdxBi[i] = other.refIdxBi[i]; - cpr = other.cpr; + +#if JVET_M0170_MRG_SHARELIST + shareParentPos = other.shareParentPos; + shareParentSize = other.shareParentSize; +#endif + ibc = other.ibc; +#if JVET_M0444_SMVD + smvdMode = other.smvdMode; +#endif + return *this; } @@ -292,7 +303,9 @@ void CodingUnit::initData() qp = 0; chromaQpAdj = 0; rootCbf = true; +#if !JVET_M0464_UNI_MTS emtFlag = 0; +#endif #if HEVC_TILES_WPP tileIdx = 0; #endif @@ -301,7 +314,15 @@ void CodingUnit::initData() GBiIdx = GBI_DEFAULT; for (int i = 0; i < 2; i++) refIdxBi[i] = -1; - cpr = false; +#if JVET_M0170_MRG_SHARELIST + shareParentPos = Position(-1, -1); + shareParentSize.width = -1; + shareParentSize.height = -1; +#endif + ibc = false; +#if JVET_M0444_SMVD + smvdMode = 0; +#endif } @@ -345,6 +366,11 @@ void PredictionUnit::initData() } } mhIntraFlag = false; +#if JVET_M0170_MRG_SHARELIST + shareParentPos = Position(-1, -1); + shareParentSize.width = -1; + shareParentSize.height = -1; +#endif } PredictionUnit& PredictionUnit::operator=(const IntraPredictionData& predData) @@ -385,7 +411,10 @@ PredictionUnit& PredictionUnit::operator=(const InterPredictionData& predData) } } mhIntraFlag = predData.mhIntraFlag; - +#if JVET_M0170_MRG_SHARELIST + shareParentPos = predData.shareParentPos; + shareParentSize = predData.shareParentSize; +#endif return *this; } @@ -422,7 +451,10 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other ) } } mhIntraFlag = other.mhIntraFlag; - +#if JVET_M0170_MRG_SHARELIST + shareParentPos = other.shareParentPos; + shareParentSize = other.shareParentSize; +#endif return *this; } @@ -493,12 +525,17 @@ void TransformUnit::initData() { cbf[i] = 0; rdpcm[i] = NUMBER_OF_RDPCM_MODES; +#if !JVET_M0464_UNI_MTS transformSkip[i] = false; +#endif compAlpha[i] = 0; } depth = 0; +#if JVET_M0464_UNI_MTS + mtsIdx = 0; +#else emtIdx = 0; - +#endif } void TransformUnit::init(TCoeff **coeffs, Pel **pcmbuf) @@ -528,11 +565,17 @@ TransformUnit& TransformUnit::operator=(const TransformUnit& other) cbf[i] = other.cbf[i]; rdpcm[i] = other.rdpcm[i]; +#if !JVET_M0464_UNI_MTS transformSkip[i] = other.transformSkip[i]; +#endif compAlpha[i] = other.compAlpha[i]; } depth = other.depth; +#if JVET_M0464_UNI_MTS + mtsIdx = other.mtsIdx; +#else emtIdx = other.emtIdx; +#endif return *this; } @@ -549,15 +592,20 @@ void TransformUnit::copyComponentFrom(const TransformUnit& other, const Componen cbf[i] = other.cbf[i]; rdpcm[i] = other.rdpcm[i]; +#if !JVET_M0464_UNI_MTS transformSkip[i] = other.transformSkip[i]; +#endif compAlpha[i] = other.compAlpha[i]; depth = other.depth; - +#if JVET_M0464_UNI_MTS + mtsIdx = isLuma( i ) ? other.mtsIdx : mtsIdx; +#else if( isLuma( i ) ) { emtIdx = other.emtIdx; } +#endif } CoeffBuf TransformUnit::getCoeffs(const ComponentID id) { return CoeffBuf(m_coeffs[id], blocks[id]); } diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index 761da4cfe37a7b803d5d0a10aab70013849da36f..4eb057dde4a9cfe99bc1f4970d4f1e29e9f9613d 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -305,12 +305,21 @@ struct CodingUnit : public UnitArea #if HEVC_TILES_WPP uint32_t tileIdx; #endif +#if !JVET_M0464_UNI_MTS uint8_t emtFlag; +#endif uint8_t GBiIdx; int refIdxBi[2]; // needed for fast imv mode decisions int8_t imvNumCand; - bool cpr; +#if JVET_M0170_MRG_SHARELIST + Position shareParentPos; + Size shareParentSize; +#endif + bool ibc; +#if JVET_M0444_SMVD + uint8_t smvdMode; +#endif CodingUnit() : chType( CH_L ) { } CodingUnit(const UnitArea &unit); @@ -361,8 +370,13 @@ struct InterPredictionData Mv mvdAffi [NUM_REF_PIC_LIST_01][3]; Mv mvAffi[NUM_REF_PIC_LIST_01][3]; bool mhIntraFlag; - Mv bv; // block vector for CPR - Mv bvd; // block vector difference for CPR + +#if JVET_M0170_MRG_SHARELIST + Position shareParentPos; + Size shareParentSize; +#endif + Mv bv; // block vector for IBC + Mv bvd; // block vector difference for IBC }; struct PredictionUnit : public UnitArea, public IntraPredictionData, public InterPredictionData @@ -384,6 +398,11 @@ struct PredictionUnit : public UnitArea, public IntraPredictionData, public Inte PredictionUnit& operator=(const MotionInfo& mi); unsigned idx; +#if JVET_M0170_MRG_SHARELIST + Position shareParentPos; + Size shareParentSize; +#endif + PredictionUnit *next; // for accessing motion information, which can have higher resolution than PUs (should always be used, when accessing neighboring motion information) @@ -410,11 +429,17 @@ struct TransformUnit : public UnitArea ChannelType chType; uint8_t depth; +#if JVET_M0464_UNI_MTS + uint8_t mtsIdx; +#else uint8_t emtIdx; - uint8_t cbf [ MAX_NUM_TBLOCKS ]; +#endif + uint8_t cbf [ MAX_NUM_TBLOCKS ]; RDPCMMode rdpcm [ MAX_NUM_TBLOCKS ]; +#if !JVET_M0464_UNI_MTS bool transformSkip[ MAX_NUM_TBLOCKS ]; - int8_t compAlpha [ MAX_NUM_TBLOCKS ]; +#endif + int8_t compAlpha [ MAX_NUM_TBLOCKS ]; TransformUnit() : chType( CH_L ) { } TransformUnit(const UnitArea& unit); diff --git a/source/Lib/CommonLib/UnitPartitioner.cpp b/source/Lib/CommonLib/UnitPartitioner.cpp index be921219b6e50cc53904cdfe371262b894b580cb..dae0769736f028afa368c3d303d82047aeb09b0b 100644 --- a/source/Lib/CommonLib/UnitPartitioner.cpp +++ b/source/Lib/CommonLib/UnitPartitioner.cpp @@ -297,8 +297,97 @@ void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructur } } +#if JVET_M0421_SPLIT_SIG +void QTBTPartitioner::canSplit( const CodingStructure &cs, bool& canNo, bool& canQt, bool& canBh, bool& canBv, bool& canTh, bool& canTv ) +{ + const PartSplit implicitSplit = m_partStack.back().checkdIfImplicit ? m_partStack.back().implicitSplit : getImplicitSplit( cs ); + + const unsigned maxBTD = cs.pcv->getMaxBtDepth( *cs.slice, chType ) + currImplicitBtDepth; + const unsigned maxBtSize = cs.pcv->getMaxBtSize ( *cs.slice, chType ); + const unsigned minBtSize = cs.pcv->getMinBtSize ( *cs.slice, chType ); + const unsigned maxTtSize = cs.pcv->getMaxTtSize ( *cs.slice, chType ); + const unsigned minTtSize = cs.pcv->getMinTtSize ( *cs.slice, chType ); + const unsigned minQtSize = cs.pcv->getMinQtSize ( *cs.slice, chType ); + + canNo = canQt = canBh = canTh = canBv = canTv = true; + bool canBtt = currMtDepth < maxBTD; + + // the minimal and maximal sizes are given in luma samples + const CompArea& area = currArea().Y(); + PartLevel& level = m_partStack.back(); + + const PartSplit lastSplit = level.split; + const PartSplit parlSplit = lastSplit == CU_TRIH_SPLIT ? CU_HORZ_SPLIT : CU_VERT_SPLIT; + + // don't allow QT-splitting below a BT split + if( lastSplit != CTU_LEVEL && lastSplit != CU_QUAD_SPLIT ) canQt = false; + if( area.width <= minQtSize ) canQt = false; + + if( implicitSplit != CU_DONT_SPLIT ) + { + canNo = canTh = canTv = false; + + canBh = implicitSplit == CU_HORZ_SPLIT; + canBv = implicitSplit == CU_VERT_SPLIT; + + return; + } + + if( ( lastSplit == CU_TRIH_SPLIT || lastSplit == CU_TRIV_SPLIT ) && currPartIdx() == 1 ) + { + canBh = parlSplit != CU_HORZ_SPLIT; + canBv = parlSplit != CU_VERT_SPLIT; + } + + if( canBtt && ( area.width <= minBtSize && area.height <= minBtSize ) + && ( ( area.width <= minTtSize && area.height <= minTtSize ) + || cs.sps->getSpsNext().getMTTMode() == 0 ) ) + { + canBtt = false; + } + if( canBtt && ( area.width > maxBtSize || area.height > maxBtSize ) + && ( ( area.width > maxTtSize || area.height > maxTtSize ) + || cs.sps->getSpsNext().getMTTMode() == 0 ) ) + { + canBtt = false; + } + + if( !canBtt ) + { + canBh = canTh = canBv = canTv = false; + + return; + } + + // specific check for BT splits + if( area.height <= minBtSize || area.height > maxBtSize ) canBh = false; + if( area.width > MAX_TU_SIZE_FOR_PROFILE && area.height <= MAX_TU_SIZE_FOR_PROFILE ) canBh = false; + + if( area.width <= minBtSize || area.width > maxBtSize ) canBv = false; + if( area.width <= MAX_TU_SIZE_FOR_PROFILE && area.height > MAX_TU_SIZE_FOR_PROFILE ) canBv = false; + + if( ( cs.sps->getSpsNext().getMTTMode() & 1 ) != 1 ) canTh = false; + if( area.height <= 2 * minTtSize || area.height > maxTtSize || area.width > maxTtSize ) + canTh = false; + if( area.width > MAX_TU_SIZE_FOR_PROFILE || area.height > MAX_TU_SIZE_FOR_PROFILE ) canTh = false; + + if( ( cs.sps->getSpsNext().getMTTMode() & 1 ) != 1 ) canTv = false; + if( area.width <= 2 * minTtSize || area.width > maxTtSize || area.height > maxTtSize ) + canTv = false; + if( area.width > MAX_TU_SIZE_FOR_PROFILE || area.height > MAX_TU_SIZE_FOR_PROFILE ) canTv = false; +} + +#endif bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs ) { +#if JVET_M0421_SPLIT_SIG + const CompArea area = currArea().Y(); + const unsigned maxTrSize = cs.sps->getMaxTrSize(); + + bool canNo, canQt, canBh, canTh, canBv, canTv; + + canSplit( cs, canNo, canQt, canBh, canBv, canTh, canTv ); +#else const PartSplit implicitSplit = getImplicitSplit( cs ); // the minimal and maximal sizes are given in luma samples @@ -319,6 +408,7 @@ bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs return false; } +#endif switch( split ) { case CTU_LEVEL: @@ -328,6 +418,20 @@ bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs case TU_MAX_TR_SPLIT: return area.width > maxTrSize || area.height > maxTrSize; break; +#if JVET_M0421_SPLIT_SIG + case CU_QUAD_SPLIT: + return canQt; + case CU_DONT_SPLIT: + return canNo; + case CU_HORZ_SPLIT: + return canBh; + case CU_VERT_SPLIT: + return canBv; + case CU_TRIH_SPLIT: + return canTh; + case CU_TRIV_SPLIT: + return canTv; +#else case CU_QUAD_SPLIT: { // don't allow QT-splitting below a BT split @@ -369,8 +473,15 @@ bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs } if( implicitSplit == split ) return true; if( implicitSplit != CU_DONT_SPLIT && implicitSplit != split ) return false; +#endif case CU_MT_SPLIT: +#if JVET_M0421_SPLIT_SIG + return ( canBh || canTh || canBv || canTv ); +#endif case CU_BT_SPLIT: +#if JVET_M0421_SPLIT_SIG + return ( canBh || canBv ); +#else { if( currMtDepth >= maxBTD ) return false; if( ( area.width <= minBtSize && area.height <= minBtSize ) @@ -382,13 +493,14 @@ bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs return false; } } +#endif break; default: THROW( "Unknown split mode" ); return false; break; } - +#if !JVET_M0421_SPLIT_SIG // specific check for BT splits switch( split ) { @@ -413,6 +525,7 @@ bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs default: break; } +#endif return true; } @@ -462,6 +575,12 @@ PartSplit QTBTPartitioner::getImplicitSplit( const CodingStructure &cs ) { split = CU_QUAD_SPLIT; } +#if JVET_M0446_M0888_M0905_VPDU_AT_PIC_BOUNDARY + if ((!isBlInPic || !isTrInPic) && (currArea().Y().width > MAX_TU_SIZE_FOR_PROFILE || currArea().Y().height > MAX_TU_SIZE_FOR_PROFILE)) + { + split = CU_QUAD_SPLIT; + } +#endif } m_partStack.back().checkdIfImplicit = true; diff --git a/source/Lib/CommonLib/UnitPartitioner.h b/source/Lib/CommonLib/UnitPartitioner.h index 6b8e354f96a57456a2b3380f65d32fcef3d8a033..222fe99edeb0a83f5ef5ab5462a25f63d1bb1eb5 100644 --- a/source/Lib/CommonLib/UnitPartitioner.h +++ b/source/Lib/CommonLib/UnitPartitioner.h @@ -128,6 +128,9 @@ public: virtual void copyState ( const Partitioner& other ); public: +#if JVET_M0421_SPLIT_SIG + virtual void canSplit ( const CodingStructure &cs, bool& canNo, bool& canQt, bool& canBh, bool& canBv, bool& canTh, bool& canTv ) = 0; +#endif virtual bool canSplit ( const PartSplit split, const CodingStructure &cs ) = 0; virtual bool isSplitImplicit ( const PartSplit split, const CodingStructure &cs ) = 0; virtual PartSplit getImplicitSplit ( const CodingStructure &cs ) = 0; @@ -147,7 +150,10 @@ public: void exitCurrSplit (); bool nextPart ( const CodingStructure &cs, bool autoPop = false ); bool hasNextPart (); - + +#if JVET_M0421_SPLIT_SIG + void canSplit ( const CodingStructure &cs, bool& canNo, bool& canQt, bool& canBh, bool& canBv, bool& canTh, bool& canTv ); +#endif bool canSplit ( const PartSplit split, const CodingStructure &cs ); bool isSplitImplicit ( const PartSplit split, const CodingStructure &cs ); PartSplit getImplicitSplit ( const CodingStructure &cs ); diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index ee952d0c99bd5a5fabb49abf9d8c7a445f5d5e86..5bb6c03629e76d5b62c5bab6d180f5f04acbd58e 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -236,7 +236,11 @@ bool CU::hasNonTsCodedBlock( const CodingUnit& cu ) { for( uint32_t i = 0; i < ::getNumberValidTBlocks( *cu.cs->pcv ); i++ ) { +#if JVET_M0464_UNI_MTS + hasAnyNonTSCoded |= ( currTU.blocks[i].valid() && ( isLuma(ComponentID(i)) ? currTU.mtsIdx != 1 : true ) && TU::getCbf( currTU, ComponentID( i ) ) ); +#else hasAnyNonTSCoded |= ( currTU.blocks[i].valid() && !currTU.transformSkip[i] && TU::getCbf( currTU, ComponentID( i ) ) ); +#endif } } @@ -708,6 +712,9 @@ bool PU::xCheckSimilarMotion(const int mergeCandIndex, const int prevCnt, const bool PU::addMergeHMVPCand(const Slice &slice, MergeCtx& mrgCtx, bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos , int mmvdList +#if JVET_M0170_MRG_SHARELIST + , bool isShared +#endif ) #else @@ -716,7 +723,7 @@ bool PU::addMergeHMVPCand(const Slice &slice, MergeCtx& mrgCtx, bool isCandInter ) #endif { - int mrgCandIdxCPR = mrgCandIdx; + int mrgCandIdxIBC = mrgCandIdx; MotionInfo miNeighbor; bool hasPruned[MRG_MAX_NUM_CANDS]; memset(hasPruned, 0, MRG_MAX_NUM_CANDS * sizeof(bool)); @@ -724,10 +731,19 @@ bool PU::addMergeHMVPCand(const Slice &slice, MergeCtx& mrgCtx, bool isCandInter { hasPruned[subPuMvpPos] = true; } +#if JVET_M0170_MRG_SHARELIST + int num_avai_candInLUT = (isShared ? slice.getAvailableLUTBkupMrgNum() : slice.getAvailableLUTMrgNum()); +#else int num_avai_candInLUT = slice.getAvailableLUTMrgNum(); +#endif + for (int mrgIdx = 1; mrgIdx <= num_avai_candInLUT; mrgIdx++) { +#if JVET_M0170_MRG_SHARELIST + miNeighbor = isShared ? slice.getMotionInfoFromLUTBkup(num_avai_candInLUT - mrgIdx) : slice.getMotionInfoFromLUTs(num_avai_candInLUT - mrgIdx); +#else miNeighbor = slice.getMotionInfoFromLUTs(num_avai_candInLUT - mrgIdx); +#endif mrgCtx.interDirNeighbours[cnt] = miNeighbor.interDir; mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miNeighbor.mv[0], miNeighbor.refIdx[0]); if (slice.isInterB()) @@ -741,11 +757,11 @@ bool PU::addMergeHMVPCand(const Slice &slice, MergeCtx& mrgCtx, bool isCandInter #endif if (miNeighbor.interDir == 1 && slice.getRefPic(REF_PIC_LIST_0, miNeighbor.refIdx[0])->getPOC() == slice.getPOC()) { - mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_CPR; + mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_IBC; if(mmvdList != 0 && mrgCandIdx != -1) - mrgCandIdxCPR++; + mrgCandIdxIBC++; } - if (mrgCandIdxCPR == cnt && canFastExit) + if (mrgCandIdxIBC == cnt && canFastExit) { return true; } @@ -791,12 +807,17 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, int cnt = 0; - int mrgCandIdxCPR = mrgCandIdx; + int mrgCandIdxIBC = mrgCandIdx; +#if JVET_M0170_MRG_SHARELIST + const Position posLT = pu.shareParentPos; + const Position posRT = pu.shareParentPos.offset(pu.shareParentSize.width - 1, 0); + const Position posLB = pu.shareParentPos.offset(0, pu.shareParentSize.height - 1); +#else const Position posLT = pu.Y().topLeft(); const Position posRT = pu.Y().topRight(); const Position posLB = pu.Y().bottomLeft(); - +#endif MotionInfo miAbove, miLeft, miAboveLeft, miAboveRight, miBelowLeft; //left @@ -816,11 +837,11 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.interDirNeighbours[cnt] = miLeft.interDir; mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeft->cu->GBiIdx : GBI_DEFAULT; // get Mv from Left - if (puLeft->cu->cpr) + if (puLeft->cu->ibc) { - mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_CPR; + mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_IBC; if (mmvdList != 0 && mrgCandIdx != -1) - mrgCandIdxCPR++; + mrgCandIdxIBC++; } mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]); @@ -828,7 +849,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, { mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miLeft.mv[1], miLeft.refIdx[1]); } - if (mrgCandIdxCPR == cnt && canFastExit) + if (mrgCandIdxIBC == cnt && canFastExit) { return; } @@ -862,11 +883,11 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.interDirNeighbours[cnt] = miAbove.interDir; // get Mv from Above mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAbove->cu->GBiIdx : GBI_DEFAULT; - if (puAbove->cu->cpr) + if (puAbove->cu->ibc) { - mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_CPR; + mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_IBC; if (mmvdList != 0 && mrgCandIdx != -1) - mrgCandIdxCPR++; + mrgCandIdxIBC++; } mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAbove.mv[0], miAbove.refIdx[0] ); @@ -874,7 +895,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, { mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAbove.mv[1], miAbove.refIdx[1] ); } - if (mrgCandIdxCPR == cnt && canFastExit) + if (mrgCandIdxIBC == cnt && canFastExit) { return; } @@ -912,11 +933,11 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.interDirNeighbours[cnt] = miAboveRight.interDir; // get Mv from Above-right mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveRight->cu->GBiIdx : GBI_DEFAULT; - if (puAboveRight->cu->cpr) + if (puAboveRight->cu->ibc) { - mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_CPR; + mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_IBC; if (mmvdList != 0 && mrgCandIdx != -1) - mrgCandIdxCPR++; + mrgCandIdxIBC++; } mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveRight.mv[0], miAboveRight.refIdx[0] ); @@ -925,7 +946,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAboveRight.mv[1], miAboveRight.refIdx[1] ); } - if (mrgCandIdxCPR == cnt && canFastExit) + if (mrgCandIdxIBC == cnt && canFastExit) { return; } @@ -962,11 +983,11 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.interDirNeighbours[cnt] = miBelowLeft.interDir; mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeftBottom->cu->GBiIdx : GBI_DEFAULT; // get Mv from Bottom-Left - if (puLeftBottom->cu->cpr) + if (puLeftBottom->cu->ibc) { - mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_CPR; + mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_IBC; if (mmvdList != 0 && mrgCandIdx != -1) - mrgCandIdxCPR++; + mrgCandIdxIBC++; } mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miBelowLeft.mv[0], miBelowLeft.refIdx[0] ); @@ -975,7 +996,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miBelowLeft.mv[1], miBelowLeft.refIdx[1] ); } - if (mrgCandIdxCPR == cnt && canFastExit) + if (mrgCandIdxIBC == cnt && canFastExit) { return; } @@ -1015,11 +1036,11 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.interDirNeighbours[cnt] = miAboveLeft.interDir; mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveLeft->cu->GBiIdx : GBI_DEFAULT; // get Mv from Above-Left - if (puAboveLeft->cu->cpr) + if (puAboveLeft->cu->ibc) { - mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_CPR; + mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_IBC; if (mmvdList != 0 && mrgCandIdx != -1) - mrgCandIdxCPR++; + mrgCandIdxIBC++; } mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveLeft.mv[0], miAboveLeft.refIdx[0] ); @@ -1028,7 +1049,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAboveLeft.mv[1], miAboveLeft.refIdx[1] ); } - if (mrgCandIdxCPR == cnt && canFastExit) + if (mrgCandIdxIBC == cnt && canFastExit) { return; } @@ -1047,13 +1068,24 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, { //>> MTK colocated-RightBottom // offset the pos to be sure to "point" to the same position the uiAbsPartIdx would've pointed to +#if JVET_M0170_MRG_SHARELIST + Position posRB = pu.shareParentPos.offset(pu.shareParentSize.width-3, pu.shareParentSize.height - 3); +#else Position posRB = pu.Y().bottomRight().offset(-3, -3); - +#endif const PreCalcValues& pcv = *cs.pcv; Position posC0; +#if JVET_M0170_MRG_SHARELIST + Position posC1 = pu.shareParentPos.offset((pu.shareParentSize.width/2), (pu.shareParentSize.height/2)); +#else Position posC1 = pu.Y().center(); +#endif + bool C0Avail = false; +#if JVET_M0170_MRG_SHARELIST + bool C1Avail = (posC1.x < pcv.lumaWidth) && (posC1.y < pcv.lumaHeight); +#endif if (((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight)) { @@ -1089,7 +1121,11 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, int dir = 0; unsigned uiArrayAddr = cnt; bool bExistMV = ( C0Avail && getColocatedMVP(pu, REF_PIC_LIST_0, posC0, cColMv, iRefIdx ) ) +#if JVET_M0170_MRG_SHARELIST + || ( C1Avail && getColocatedMVP(pu, REF_PIC_LIST_0, posC1, cColMv, iRefIdx )); +#else || getColocatedMVP(pu, REF_PIC_LIST_0, posC1, cColMv, iRefIdx ); +#endif if (bExistMV) { @@ -1100,7 +1136,11 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, if (slice.isInterB()) { bExistMV = ( C0Avail && getColocatedMVP(pu, REF_PIC_LIST_1, posC0, cColMv, iRefIdx ) ) +#if JVET_M0170_MRG_SHARELIST + || (C1Avail && getColocatedMVP(pu, REF_PIC_LIST_1, posC1, cColMv, iRefIdx ) ); +#else || getColocatedMVP(pu, REF_PIC_LIST_1, posC1, cColMv, iRefIdx ); +#endif if (bExistMV) { dir |= 2; @@ -1130,7 +1170,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, isCandInter [uiArrayAddr] = true; #endif mrgCtx.GBiIdx[uiArrayAddr] = GBI_DEFAULT; - if (mrgCandIdxCPR == cnt && canFastExit) + if (mrgCandIdxIBC == cnt && canFastExit) { return; } @@ -1152,14 +1192,20 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, bool isAvailableSubPu = false; unsigned subPuMvpPos = 0; #if JVET_L0090_PAIR_AVG +#if JVET_M0170_MRG_SHARELIST + bool isShared = ((pu.Y().lumaSize().width != pu.shareParentSize.width) || (pu.Y().lumaSize().height != pu.shareParentSize.height)); +#endif bool bFound = addMergeHMVPCand(slice, mrgCtx, canFastExit - , (mmvdList != 0 && mrgCandIdx != -1) ? (const int) mrgCandIdxCPR : mrgCandIdx + , (mmvdList != 0 && mrgCandIdx != -1) ? (const int) mrgCandIdxIBC : mrgCandIdx , maxNumMergeCandMin1, cnt, cnt, isAvailableSubPu, subPuMvpPos , mmvdList +#if JVET_M0170_MRG_SHARELIST + , isShared +#endif ); #else bool bFound = addMergeHMVPCand(slice, mrgCtx, isCandInter, canFastExit - , (mmvdList != 0 && mrgCandIdx != -1) ? (const int)mrgCandIdxCPR : mrgCandIdx + , (mmvdList != 0 && mrgCandIdx != -1) ? (const int)mrgCandIdxIBC : mrgCandIdx , maxNumMergeCandMin1, cnt, cnt, isAvailableSubPu, subPuMvpPos , mmvdList ); @@ -1190,7 +1236,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, // skip when one is BV and one is MV - if (mrgCtx.mrgTypeNeighbours[i] != mrgCtx.mrgTypeNeighbours[j] && pu.cs->sps->getSpsNext().getCPRMode()) + if (mrgCtx.mrgTypeNeighbours[i] != mrgCtx.mrgTypeNeighbours[j] && pu.cs->sps->getSpsNext().getIBCMode()) { continue; } @@ -1220,9 +1266,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, - if (mrgCtx.mrgTypeNeighbours[i] == MRG_TYPE_CPR && mrgCtx.mrgTypeNeighbours[j] == MRG_TYPE_CPR && pu.cs->sps->getSpsNext().getCPRMode()) + if (mrgCtx.mrgTypeNeighbours[i] == MRG_TYPE_IBC && mrgCtx.mrgTypeNeighbours[j] == MRG_TYPE_IBC && pu.cs->sps->getSpsNext().getIBCMode()) { - mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_CPR; + mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_IBC; avgMv.setHor((avgMv.getHor() / 16) << 4); avgMv.setVer((avgMv.getVer() / 16) << 4); } @@ -1324,7 +1370,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, if ( mrgCtx.interDirNeighbours[uiArrayAddr] == 1 && pu.cs->slice->getRefPic(REF_PIC_LIST_0, mrgCtx.mvFieldNeighbours[uiArrayAddr << 1].refIdx)->getPOC() == pu.cs->slice->getPOC()) { - mrgCtx.mrgTypeNeighbours[uiArrayAddr] = MRG_TYPE_CPR; + mrgCtx.mrgTypeNeighbours[uiArrayAddr] = MRG_TYPE_IBC; } uiArrayAddr++; @@ -1341,7 +1387,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, } mrgCtx.numValidMergeCand = uiArrayAddr; } -// for cpr pu validation +// for ibc pu validation bool PU::isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize) { const int ctuSizeLog2 = g_aucLog2[ctuSize]; @@ -1385,9 +1431,28 @@ bool PU::isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, i } // in the same CTU line - +#if JVET_M0407_IBC_RANGE + if ((refRightX >> ctuSizeLog2 <= xPos >> ctuSizeLog2) && (refLeftX >> ctuSizeLog2 >= (xPos >> ctuSizeLog2) - 1)) +#else if ((refRightX >> ctuSizeLog2 <= xPos >> ctuSizeLog2) && (refLeftX >> ctuSizeLog2 >= (xPos >> ctuSizeLog2))) +#endif { + +#if JVET_M0407_IBC_RANGE + // in the same CTU, or left CTU + // if part of ref block is in the left CTU, some area can be referred from the not-yet updated local CTU buffer + if ((refLeftX >> ctuSizeLog2) == ((xPos >> ctuSizeLog2) - 1)) + { + // ref block's collocated block in current CTU + const Position refPosCol = pu.Y().topLeft().offset(xBv + ctuSize, yBv); + int offset64x = (refPosCol.x >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1); + int offset64y = (refPosCol.y >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1); + const Position refPosCol64x64 = {offset64x, offset64y}; + if (pu.cs->isDecomp(refPosCol64x64, toChannelType(COMPONENT_Y))) + return false; + } +#endif +#if !JVET_M0407_IBC_RANGE // in the same CTU, check if the reference block is already coded const Position refPosLT = pu.Y().topLeft().offset(xBv, yBv); const Position refPosBR = pu.Y().bottomRight().offset(xBv, yBv); @@ -1399,11 +1464,24 @@ bool PU::isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, i return false; } return true; +#endif } else return false; -}// for cpr pu validation +#if JVET_M0407_IBC_RANGE + // in the same CTU, or valid area from left CTU. Check if the reference block is already coded + const Position refPosLT = pu.Y().topLeft().offset(xBv, yBv); + const Position refPosBR = pu.Y().bottomRight().offset(xBv, yBv); + const ChannelType chType = toChannelType(COMPONENT_Y); + if (!pu.cs->isDecomp(refPosBR, chType)) + return false; + if (!pu.cs->isDecomp(refPosLT, chType)) + return false; + return true; +#endif + +}// for ibc pu validation static int xGetDistScaleFactor(const int &iCurrPOC, const int &iCurrRefPOC, const int &iColPOC, const int &iColRefPOC) { @@ -1469,8 +1547,14 @@ void PU::getInterMMVDMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, { for (k = currBaseNum; k < MMVD_BASE_MV_NUM; k++) { +#if JVET_M0068_M0171_MMVD_CLEANUP + mrgCtx.mmvdBaseMv[k][0] = MvField(Mv(0, 0), 0); + const Slice &slice = *pu.cs->slice; + mrgCtx.mmvdBaseMv[k][1] = MvField(Mv(0, 0), (slice.isInterB() ? 0 : -1)); +#else mrgCtx.mmvdBaseMv[k][0] = MvField(Mv(0, 0), 0); mrgCtx.mmvdBaseMv[k][0] = MvField(Mv(0, 0), 0); +#endif } } } @@ -1590,7 +1674,7 @@ bool PU::isDiffMER(const PredictionUnit &pu1, const PredictionUnit &pu2) return false; } -void PU::getCprMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred) +void PU::getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred) { //-- Get Spatial MV @@ -1603,7 +1687,7 @@ void PU::getCprMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred) //left const PredictionUnit *neibLeftPU = NULL; neibLeftPU = pu.cs->getPURestricted(posLB.offset(-1, 0), pu, pu.cs->chType); - left = (neibLeftPU) ? neibLeftPU->cu->cpr : 0; + left = (neibLeftPU) ? neibLeftPU->cu->ibc : 0; if (left) { @@ -1615,7 +1699,7 @@ void PU::getCprMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred) //above const PredictionUnit *neibAbovePU = NULL; neibAbovePU = pu.cs->getPURestricted(posRT.offset(0, -1), pu, pu.cs->chType); - above = (neibAbovePU) ? neibAbovePU->cu->cpr : 0; + above = (neibAbovePU) ? neibAbovePU->cu->ibc : 0; if (above) { @@ -1627,7 +1711,7 @@ void PU::getCprMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred) // Below Left predictor search const PredictionUnit *neibBelowLeftPU = NULL; neibBelowLeftPU = pu.cs->getPURestricted(posLB.offset(-1, 1), pu, pu.cs->chType); - unsigned int belowLeft = (neibBelowLeftPU) ? neibBelowLeftPU->cu->cpr : 0; + unsigned int belowLeft = (neibBelowLeftPU) ? neibBelowLeftPU->cu->ibc : 0; if (belowLeft) { @@ -1640,7 +1724,7 @@ void PU::getCprMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred) // Above Right predictor search const PredictionUnit *neibAboveRightPU = NULL; neibAboveRightPU = pu.cs->getPURestricted(posRT.offset(1, -1), pu, pu.cs->chType); - unsigned int aboveRight = (neibAboveRightPU) ? neibAboveRightPU->cu->cpr : 0; + unsigned int aboveRight = (neibAboveRightPU) ? neibAboveRightPU->cu->ibc : 0; if (aboveRight) { @@ -1653,7 +1737,7 @@ void PU::getCprMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred) // Above Left predictor search const PredictionUnit *neibAboveLeftPU = NULL; neibAboveLeftPU = pu.cs->getPURestricted(posLT.offset(-1, -1), pu, pu.cs->chType); - unsigned int aboveLeft = (neibAboveLeftPU) ? neibAboveLeftPU->cu->cpr : 0; + unsigned int aboveLeft = (neibAboveLeftPU) ? neibAboveLeftPU->cu->ibc : 0; if (aboveLeft) { @@ -1681,13 +1765,13 @@ bool PU::getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv) const PredictionUnit *neibRefPU = NULL; neibRefPU = pu.cs->getPURestricted(pu.lumaPos().offset(offsetX, offsetY), pu, pu.cs->chType); - bool isCPR = (neibRefPU) ? neibRefPU->cu->cpr : 0; - if (isCPR) + bool isIBC = (neibRefPU) ? neibRefPU->cu->ibc : 0; + if (isIBC) { derivedMv = neibRefPU->bv; derivedMv += currentMv; } - return isCPR; + return isIBC; } /** Constructs a list of candidates for AMVP (See specification, section "Derivation process for motion vector predictor candidates") * \param uiPartIdx @@ -1806,6 +1890,9 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in Position posC0; bool C0Avail = false; Position posC1 = pu.Y().center(); +#if JVET_M0170_MRG_SHARELIST + bool C1Avail = ( posC1.x < pcv.lumaWidth ) && ( posC1.y < pcv.lumaHeight ) ; +#endif Mv cColMv; @@ -1836,7 +1923,11 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in } } +#if JVET_M0170_MRG_SHARELIST + if ((C0Avail && getColocatedMVP(pu, eRefPicList, posC0, cColMv, refIdx_Col)) || (C1Avail && getColocatedMVP(pu, eRefPicList, posC1, cColMv, refIdx_Col))) +#else if ((C0Avail && getColocatedMVP(pu, eRefPicList, posC0, cColMv, refIdx_Col)) || getColocatedMVP(pu, eRefPicList, posC1, cColMv, refIdx_Col)) +#endif { if (pu.cu->imv != 0) { @@ -2153,6 +2244,9 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co Position posC0; bool C0Avail = false; Position posC1 = pu.Y().center(); +#if JVET_M0170_MRG_SHARELIST + bool C1Avail = ( posC1.x < pcv.lumaWidth ) && ( posC1.y < pcv.lumaHeight ) ; +#endif Mv cColMv; if ( ((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight) ) @@ -2182,7 +2276,11 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co } } +#if JVET_M0170_MRG_SHARELIST + if ( (C0Avail && getColocatedMVP( pu, eRefPicList, posC0, cColMv, refIdxCol )) || (C1Avail && getColocatedMVP( pu, eRefPicList, posC1, cColMv, refIdxCol ) ) ) +#else if ( (C0Avail && getColocatedMVP( pu, eRefPicList, posC0, cColMv, refIdxCol )) || getColocatedMVP( pu, eRefPicList, posC1, cColMv, refIdxCol ) ) +#endif { cColMv.roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); affiAMVPInfo.mvCandLT[affiAMVPInfo.numCand] = cColMv; @@ -2628,11 +2726,13 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx affMrgCtx.numValidMergeCand = 0; affMrgCtx.maxNumMergeCand = maxNumAffineMergeCand; - bool enableSubPuMvp = slice.getSPS()->getSpsNext().getUseSubPuMvp() && !(slice.getPOC() == slice.getRefPic(REF_PIC_LIST_0, 0)->getPOC() && slice.isIRAP()); + bool enableSubPuMvp = slice.getSPS()->getSBTMVPEnabledFlag() && !(slice.getPOC() == slice.getRefPic(REF_PIC_LIST_0, 0)->getPOC() && slice.isIRAP()); bool isAvailableSubPu = false; if ( enableSubPuMvp && slice.getEnableTMVPFlag() ) { - int cntCPR = 0; +#if !JVET_M0409_ATMVP_FIX + int cntIBC = 0; +#endif MergeCtx mrgCtx = *affMrgCtx.mrgCtx; bool tmpLICFlag = false; @@ -2655,10 +2755,12 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx mrgCtx.interDirNeighbours[pos] = miLeft.interDir; // get Mv from Left - if (puLeft->cu->cpr) +#if !JVET_M0409_ATMVP_FIX + if (puLeft->cu->ibc) { - cntCPR++; + cntIBC++; } +#endif mrgCtx.mvFieldNeighbours[pos << 1].setMvField( miLeft.mv[0], miLeft.refIdx[0] ); if ( slice.isInterB() ) @@ -2680,10 +2782,12 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx // get Inter Dir mrgCtx.interDirNeighbours[pos] = miAbove.interDir; // get Mv from Left - if (puAbove->cu->cpr) +#if !JVET_M0409_ATMVP_FIX + if (puAbove->cu->ibc) { - cntCPR++; + cntIBC++; } +#endif mrgCtx.mvFieldNeighbours[pos << 1].setMvField( miAbove.mv[0], miAbove.refIdx[0] ); if ( slice.isInterB() ) @@ -2711,10 +2815,12 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx // get Inter Dir mrgCtx.interDirNeighbours[pos] = miAboveRight.interDir; // get Mv from Left - if (puAboveRight->cu->cpr) +#if !JVET_M0409_ATMVP_FIX + if (puAboveRight->cu->ibc) { - cntCPR++; + cntIBC++; } +#endif mrgCtx.mvFieldNeighbours[pos << 1].setMvField( miAboveRight.mv[0], miAboveRight.refIdx[0] ); if ( slice.isInterB() ) @@ -2742,10 +2848,12 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx // get Inter Dir mrgCtx.interDirNeighbours[pos] = miBelowLeft.interDir; // get Mv from Bottom-Left - if (puLeftBottom->cu->cpr) +#if !JVET_M0409_ATMVP_FIX + if (puLeftBottom->cu->ibc) { - cntCPR++; + cntIBC++; } +#endif mrgCtx.mvFieldNeighbours[pos << 1].setMvField( miBelowLeft.mv[0], miBelowLeft.refIdx[0] ); if ( slice.isInterB() ) @@ -2759,7 +2867,9 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx isAvailableSubPu = getInterMergeSubPuMvpCand( pu, mrgCtx, tmpLICFlag, pos , 0 - , cntCPR +#if !JVET_M0409_ATMVP_FIX + , cntIBC +#endif ); if ( isAvailableSubPu ) { @@ -2847,7 +2957,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx const PredictionUnit* puNeigh = cs.getPURestricted( pos, pu, pu.chType ); if ( puNeigh && CU::isInter( *puNeigh->cu ) - && !puNeigh->cu->cpr + && !puNeigh->cu->ibc ) { isAvailable[0] = true; @@ -2865,7 +2975,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx if ( puNeigh && CU::isInter( *puNeigh->cu ) - && !puNeigh->cu->cpr + && !puNeigh->cu->ibc ) { isAvailable[1] = true; @@ -2883,7 +2993,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx if ( puNeigh && CU::isInter( *puNeigh->cu ) - && !puNeigh->cu->cpr + && !puNeigh->cu->ibc ) { isAvailable[2] = true; @@ -3182,11 +3292,15 @@ void clipColPos(int& posX, int& posY, const PredictionUnit& pu) bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, bool& LICFlag, const int count , int mmvdList - , const int countCPR +#if !JVET_M0409_ATMVP_FIX + , const int countIBC +#endif ) { - if (count == countCPR && pu.cs->slice->getSPS()->getSpsNext().getCPRMode()) +#if !JVET_M0409_ATMVP_FIX + if (count == countIBC && pu.cs->slice->getSPS()->getSpsNext().getIBCMode()) return false; +#endif const Slice &slice = *pu.cs->slice; const unsigned scale = 4 * std::max<int>(1, 4 * AMVP_DECIMATION_FACTOR / 4); const unsigned mask = ~(scale - 1); @@ -3301,7 +3415,7 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b mi.isInter = true; mi.sliceIdx = slice.getIndependentSliceIdx(); - if (colMi.isInter && !((colMi.interDir == 1 || colMi.interDir == 3) && (pColPic->cs->slice->getRefPOC(REF_PIC_LIST_0, colMi.refIdx[0]) == pColPic->cs->slice->getPOC()) && pu.cs->sps->getSpsNext().getCPRMode())) + if (colMi.isInter && !((colMi.interDir == 1 || colMi.interDir == 3) && (pColPic->cs->slice->getRefPOC(REF_PIC_LIST_0, colMi.refIdx[0]) == pColPic->cs->slice->getPOC()) && pu.cs->sps->getSpsNext().getIBCMode())) { for (unsigned currRefListId = 0; currRefListId < (bBSlice ? 2 : 1); currRefListId++) { @@ -3343,7 +3457,7 @@ void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx ) MotionBuf mb = pu.getMotionBuf(); if( !pu.mergeFlag || pu.mergeType == MRG_TYPE_DEFAULT_N - || pu.mergeType == MRG_TYPE_CPR + || pu.mergeType == MRG_TYPE_IBC ) { MotionInfo mi; @@ -3435,7 +3549,7 @@ void PU::applyImv( PredictionUnit& pu, MergeCtx &mrgCtx, InterPrediction *interP pu.mv[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); if (pu.interDir == 1 && pu.cs->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[REF_PIC_LIST_0])->getPOC() == pu.cs->slice->getPOC()) { - pu.cu->cpr = true; + pu.cu->ibc = true; } } @@ -3461,7 +3575,9 @@ void PU::applyImv( PredictionUnit& pu, MergeCtx &mrgCtx, InterPrediction *interP PU::getInterMergeCandidates ( pu, mrgCtx , 0 ); +#if !JVET_M0068_M0171_MMVD_CLEANUP PU::restrictBiPredMergeCands( pu, mrgCtx ); +#endif mrgCtx.setMergeInfo( pu, pu.mergeIdx ); } @@ -3501,6 +3617,22 @@ void PU::restrictBiPredMergeCands( const PredictionUnit &pu, MergeCtx& mergeCtx } } +#if JVET_M0068_M0171_MMVD_CLEANUP +void PU::restrictBiPredMergeCandsOne(PredictionUnit &pu) +{ + if (PU::isBipredRestriction(pu)) + { + if (pu.interDir == 3) + { + pu.interDir = 1; + pu.refIdx[1] = -1; + pu.mv[1] = Mv(0, 0); + pu.cu->GBiIdx = GBI_DEFAULT; + } + } +} +#endif + void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangleMrgCtx ) { const CodingStructure &cs = *pu.cs; @@ -3530,7 +3662,7 @@ void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangl //left const PredictionUnit* puLeft = cs.getPURestricted( posLB.offset( -1, 0 ), pu, pu.chType ); const bool isAvailableA1 = puLeft && isDiffMER( pu, *puLeft ) && pu.cu != puLeft->cu && CU::isInter( *puLeft->cu ) - && !puLeft->cu->cpr + && !puLeft->cu->ibc ; if( isAvailableA1 ) { @@ -3547,7 +3679,7 @@ void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangl // above const PredictionUnit *puAbove = cs.getPURestricted( posRT.offset( 0, -1 ), pu, pu.chType ); bool isAvailableB1 = puAbove && isDiffMER( pu, *puAbove ) && pu.cu != puAbove->cu && CU::isInter( *puAbove->cu ) - && !puAbove->cu->cpr + && !puAbove->cu->ibc ; if( isAvailableB1 ) { @@ -3568,7 +3700,7 @@ void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangl // above right const PredictionUnit *puAboveRight = cs.getPURestricted( posRT.offset( 1, -1 ), pu, pu.chType ); bool isAvailableB0 = puAboveRight && isDiffMER( pu, *puAboveRight ) && CU::isInter( *puAboveRight->cu ) - && !puAboveRight->cu->cpr + && !puAboveRight->cu->ibc ; if( isAvailableB0 ) @@ -3590,7 +3722,7 @@ void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangl //left bottom const PredictionUnit *puLeftBottom = cs.getPURestricted( posLB.offset( -1, 1 ), pu, pu.chType ); bool isAvailableA0 = puLeftBottom && isDiffMER( pu, *puLeftBottom ) && CU::isInter( *puLeftBottom->cu ) - && !puLeftBottom->cu->cpr + && !puLeftBottom->cu->ibc ; if( isAvailableA0 ) { @@ -3611,7 +3743,7 @@ void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangl // above left const PredictionUnit *puAboveLeft = cs.getPURestricted( posLT.offset( -1, -1 ), pu, pu.chType ); bool isAvailableB2 = puAboveLeft && isDiffMER( pu, *puAboveLeft ) && CU::isInter( *puAboveLeft->cu ) - && !puAboveLeft->cu->cpr + && !puAboveLeft->cu->ibc ; if( isAvailableB2 ) @@ -3639,6 +3771,9 @@ void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangl Position posC0; Position posC1 = pu.Y().center(); bool isAvailableC0 = false; +#if JVET_M0170_MRG_SHARELIST + bool isAvailableC1 = (posC1.x < pcv.lumaWidth) && (posC1.y < pcv.lumaHeight); +#endif if (((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight)) { @@ -3701,7 +3836,11 @@ void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangl // C1 temporalMv.interDir = 0; +#if JVET_M0170_MRG_SHARELIST + existMV = isAvailableC1 && getColocatedMVP(pu, REF_PIC_LIST_0, posC1, cColMv, refIdx ); +#else existMV = getColocatedMVP(pu, REF_PIC_LIST_0, posC1, cColMv, refIdx ); +#endif if( existMV ) { temporalMv.isInter = true; @@ -3709,7 +3848,11 @@ void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangl temporalMv.mv[0] = cColMv; temporalMv.refIdx[0] = refIdx; } +#if JVET_M0170_MRG_SHARELIST + existMV = isAvailableC1 && getColocatedMVP(pu, REF_PIC_LIST_1, posC1, cColMv, refIdx ); +#else existMV = getColocatedMVP(pu, REF_PIC_LIST_1, posC1, cColMv, refIdx ); +#endif if( existMV ) { temporalMv.interDir |= 2; @@ -4054,7 +4197,9 @@ void CU::resetMVDandMV2Int( CodingUnit& cu, InterPrediction *interPred ) PU::getInterMergeCandidates ( pu, mrgCtx , 0 ); +#if !JVET_M0068_M0171_MMVD_CLEANUP PU::restrictBiPredMergeCands( pu, mrgCtx ); +#endif mrgCtx.setMergeInfo( pu, pu.mergeIdx ); } @@ -4248,6 +4393,31 @@ void TU::setCbfAtDepth(TransformUnit &tu, const ComponentID &compID, const unsig tu.cbf[compID] |= ((cbf ? 1 : 0) << depth); } +#if JVET_M0464_UNI_MTS +bool TU::isTSAllowed(const TransformUnit &tu, const ComponentID compID) +{ + bool tsAllowed = compID == COMPONENT_Y; + const int maxSize = tu.cs->pps->getPpsRangeExtension().getLog2MaxTransformSkipBlockSize(); + + tsAllowed &= tu.cs->pps->getUseTransformSkip(); + tsAllowed &= !tu.cu->transQuantBypass; + + SizeType transformSkipMaxSize = 1 << maxSize; + tsAllowed &= tu.lwidth() <= transformSkipMaxSize && tu.lheight() <= transformSkipMaxSize; + + return tsAllowed; +} + +bool TU::isMTSAllowed(const TransformUnit &tu, const ComponentID compID) +{ + bool mtsAllowed = compID == COMPONENT_Y; + const int maxSize = CU::isIntra( *tu.cu ) ? MTS_INTRA_MAX_CU_SIZE : MTS_INTER_MAX_CU_SIZE; + + mtsAllowed &= CU::isIntra( *tu.cu ) ? tu.cs->sps->getSpsNext().getUseIntraMTS() : tu.cs->sps->getSpsNext().getUseInterMTS(); + mtsAllowed &= ( tu.lwidth() <= maxSize && tu.lheight() <= maxSize ); + return mtsAllowed; +} +#else bool TU::hasTransformSkipFlag(const CodingStructure& cs, const CompArea& area) { uint32_t transformSkipLog2MaxSize = cs.pps->getPpsRangeExtension().getLog2MaxTransformSkipBlockSize(); @@ -4255,10 +4425,15 @@ bool TU::hasTransformSkipFlag(const CodingStructure& cs, const CompArea& area) SizeType transformSkipMaxSize = 1 << transformSkipLog2MaxSize; return area.width <= transformSkipMaxSize && area.height <= transformSkipMaxSize; } +#endif uint32_t TU::getGolombRiceStatisticsIndex(const TransformUnit &tu, const ComponentID &compID) { +#if JVET_M0464_UNI_MTS + const bool transformSkip = tu.mtsIdx==1; +#else const bool transformSkip = tu.transformSkip[compID]; +#endif const bool transquantBypass = tu.cu->transQuantBypass; //-------- @@ -4340,7 +4515,11 @@ uint32_t TU::getNumNonZeroCoeffsNonTS( const TransformUnit& tu, const bool bLuma uint32_t count = 0; for( uint32_t i = 0; i < ::getNumberValidTBlocks( *tu.cs->pcv ); i++ ) { +#if JVET_M0464_UNI_MTS + if( tu.blocks[i].valid() && ( isLuma(ComponentID(i)) ? tu.mtsIdx !=1 : true ) && TU::getCbf( tu, ComponentID( i ) ) ) +#else if( tu.blocks[i].valid() && !tu.transformSkip[i] && TU::getCbf( tu, ComponentID( i ) ) ) +#endif { if( isLuma ( tu.blocks[i].compID ) && !bLuma ) continue; if( isChroma( tu.blocks[i].compID ) && !bChroma ) continue; diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index b844652e5da8d9ebdce9fa509b2aff051053152b..0b7c97f00bbcb8f128e2befbc791a9d559ad5034 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -120,6 +120,9 @@ namespace PU #if JVET_L0090_PAIR_AVG bool addMergeHMVPCand(const Slice &slice, MergeCtx& mrgCtx, bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos , int mmvdList +#if JVET_M0170_MRG_SHARELIST + , bool isShared +#endif ); #else bool addMergeHMVPCand(const Slice &slice, MergeCtx& mrgCtx, bool isCandInter[MRG_MAX_NUM_CANDS], bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos @@ -139,11 +142,16 @@ namespace PU ); bool getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx &mrgCtx, bool& LICFlag, const int count , int mmvdList - , const int countCPR +#if !JVET_M0409_ATMVP_FIX + , const int countIBC +#endif ); bool getInterMergeSubPuRecurCand(const PredictionUnit &pu, MergeCtx &mrgCtx, const int count); bool isBiPredFromDifferentDir (const PredictionUnit &pu); void restrictBiPredMergeCands (const PredictionUnit &pu, MergeCtx& mrgCtx); +#if JVET_M0068_M0171_MMVD_CLEANUP + void restrictBiPredMergeCandsOne (PredictionUnit &pu); +#endif bool isLMCMode ( unsigned mode); bool isLMCModeEnabled (const PredictionUnit &pu, unsigned mode); @@ -155,7 +163,7 @@ namespace PU bool getTriangleWeights (const PredictionUnit &pu, MergeCtx &triangleMrgCtx, const uint8_t candIdx0, const uint8_t candIdx1); void spanTriangleMotionInfo ( PredictionUnit &pu, MergeCtx &triangleMrgCtx, const uint8_t mergeIdx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1); int32_t mappingRefPic (const PredictionUnit &pu, int32_t refPicPoc, bool targetRefPicList); - void getCprMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred); + void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred); bool getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv); bool isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize); } @@ -171,7 +179,12 @@ namespace TU bool getCbf (const TransformUnit &tu, const ComponentID &compID); bool getCbfAtDepth (const TransformUnit &tu, const ComponentID &compID, const unsigned &depth); void setCbfAtDepth ( TransformUnit &tu, const ComponentID &compID, const unsigned &depth, const bool &cbf); +#if JVET_M0464_UNI_MTS + bool isTSAllowed (const TransformUnit &tu, const ComponentID compID); + bool isMTSAllowed (const TransformUnit &tu, const ComponentID compID); +#else bool hasTransformSkipFlag (const CodingStructure& cs, const CompArea& area); +#endif uint32_t getGolombRiceStatisticsIndex (const TransformUnit &tu, const ComponentID &compID); #if HEVC_USE_MDCS uint32_t getCoefScanIdx (const TransformUnit &tu, const ComponentID &compID); diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.cpp b/source/Lib/CommonLib/dtrace_blockstatistics.cpp index 6e94dd90471545effdaddf4fe37b0ca9c3191bc3..5971ab4046563e91164918e0f8c3608e9921a4fa 100644 --- a/source/Lib/CommonLib/dtrace_blockstatistics.cpp +++ b/source/Lib/CommonLib/dtrace_blockstatistics.cpp @@ -668,7 +668,7 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea) DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IMVMode), cu.imv); DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::RootCbf), cu.rootCbf); DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::GBIIndex), cu.GBiIdx); - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::CPRFlag), cu.cpr); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IBCFlag), cu.ibc); } break; diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.h b/source/Lib/CommonLib/dtrace_blockstatistics.h index df69c1f2eba7b73428e739a2b6ec53a6e17ecc42..34d457098ab571aa683c1cd49efbba568eee2515 100644 --- a/source/Lib/CommonLib/dtrace_blockstatistics.h +++ b/source/Lib/CommonLib/dtrace_blockstatistics.h @@ -108,7 +108,7 @@ enum class BlockStatistic { TriangleMVL0, //<< currently only uni-prediction enabled TriangleMVL1, //<< currently only uni-prediction enabled GBIIndex, - CPRFlag, + IBCFlag, // for dual tree // general Depth_Chroma, @@ -192,7 +192,7 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType { BlockStatistic::TriangleMVL0, std::tuple<std::string, BlockStatisticType, std::string>{"TriangleMVL0", BlockStatisticType::VectorPolygon, "Scale: 4"}}, { BlockStatistic::TriangleMVL1, std::tuple<std::string, BlockStatisticType, std::string>{"TriangleMVL1", BlockStatisticType::VectorPolygon, "Scale: 4"}}, { BlockStatistic::GBIIndex, std::tuple<std::string, BlockStatisticType, std::string>{"GBIIndex", BlockStatisticType::Integer, "[0, 4]"}}, - { BlockStatistic::CPRFlag, std::tuple<std::string, BlockStatisticType, std::string>{"CPRFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::IBCFlag, std::tuple<std::string, BlockStatisticType, std::string>{"IBCFlag", BlockStatisticType::Flag, ""}}, // for dual tree { BlockStatistic::Depth_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"Depth_Chroma", BlockStatisticType::Integer, "[0, 10]"}}, // todo: actual limits? { BlockStatistic::QT_Depth_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth_Chroma", BlockStatisticType::Integer, "[0, 10]"}}, // todo: actual limits? diff --git a/source/Lib/CommonLib/x86/CprHashMapX86.h b/source/Lib/CommonLib/x86/IbcHashMapX86.h similarity index 92% rename from source/Lib/CommonLib/x86/CprHashMapX86.h rename to source/Lib/CommonLib/x86/IbcHashMapX86.h index 133fad437022b8728a04f820a02edc3ca87c31db..67e4c063d06741b1e4d7fe5dd7870326e6da83de 100644 --- a/source/Lib/CommonLib/x86/CprHashMapX86.h +++ b/source/Lib/CommonLib/x86/IbcHashMapX86.h @@ -33,11 +33,11 @@ /** * \file - * \brief Implementation of CprHashMap class + * \brief Implementation of IbcHashMap class */ #include "CommonDefX86.h" -#include "../CprHashMap.h" +#include "../IbcHashMap.h" #ifdef TARGET_SIMD_X86 @@ -50,12 +50,12 @@ static uint32_t simdComputeCrc32c16bit(uint32_t crc, const Pel pel) } template <X86_VEXT vext> -void CprHashMap::_initCprHashMapX86() +void IbcHashMap::_initIbcHashMapX86() { m_computeCrc32c = simdComputeCrc32c16bit<vext>; } -template void CprHashMap::_initCprHashMapX86<SIMDX86>(); +template void IbcHashMap::_initIbcHashMapX86<SIMDX86>(); #endif //#ifdef TARGET_SIMD_X86 diff --git a/source/Lib/CommonLib/x86/InitX86.cpp b/source/Lib/CommonLib/x86/InitX86.cpp index 14642e1611a5ad1b57d8d7e36dfe714c46626cf7..00f18ae12450d7db4c8c03c51cd6d3c31850b756 100644 --- a/source/Lib/CommonLib/x86/InitX86.cpp +++ b/source/Lib/CommonLib/x86/InitX86.cpp @@ -48,7 +48,7 @@ #include "CommonLib/AdaptiveLoopFilter.h" -#include "CommonLib/CprHashMap.h" +#include "CommonLib/IbcHashMap.h" #ifdef TARGET_SIMD_X86 @@ -167,8 +167,8 @@ void AdaptiveLoopFilter::initAdaptiveLoopFilterX86() } #endif -#if ENABLE_SIMD_OPT_CPR -void CprHashMap::initCprHashMapX86() +#if ENABLE_SIMD_OPT_IBC +void IbcHashMap::initIbcHashMapX86() { auto vext = read_x86_extension_flags(); switch (vext) @@ -177,7 +177,7 @@ void CprHashMap::initCprHashMapX86() case AVX2: case AVX: case SSE42: - _initCprHashMapX86<SSE42>(); + _initIbcHashMapX86<SSE42>(); break; case SSE41: default: diff --git a/source/Lib/CommonLib/x86/sse42/CprHashmap_sse42.cpp b/source/Lib/CommonLib/x86/sse42/CprHashmap_sse42.cpp deleted file mode 100644 index 2a26c1c63909d81c48b0f4f7f07bf36e46b76f2e..0000000000000000000000000000000000000000 --- a/source/Lib/CommonLib/x86/sse42/CprHashmap_sse42.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "../CprHashMapX86.h" diff --git a/source/Lib/CommonLib/x86/sse42/IbcHashmap_sse42.cpp b/source/Lib/CommonLib/x86/sse42/IbcHashmap_sse42.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5aca413b98ebec0d6d55418864d6dce88427736e --- /dev/null +++ b/source/Lib/CommonLib/x86/sse42/IbcHashmap_sse42.cpp @@ -0,0 +1 @@ +#include "../IbcHashMapX86.h" diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index 4105d63632773fd5d513262abe07b85c6110eb4a..6015b1921d223706e2740de44633a9e6e42c505d 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -149,7 +149,7 @@ bool CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i AlfSliceParam& alfSliceParam = cs.slice->getAlfSliceParam(); - if( cs.sps->getUseALF() && ( alfSliceParam.enabledFlag[COMPONENT_Y] || alfSliceParam.enabledFlag[COMPONENT_Cb] || alfSliceParam.enabledFlag[COMPONENT_Cr] ) ) + if( cs.sps->getALFEnabledFlag() && ( alfSliceParam.enabledFlag[COMPONENT_Y] || alfSliceParam.enabledFlag[COMPONENT_Cb] || alfSliceParam.enabledFlag[COMPONENT_Cr] ) ) { const PreCalcValues& pcv = *cs.pcv; @@ -231,7 +231,7 @@ void CABACReader::sao( CodingStructure& cs, unsigned ctuRsAddr ) { const SPS& sps = *cs.sps; - if( !sps.getUseSAO() ) + if( !sps.getSAOEnabledFlag() ) { return; } @@ -420,7 +420,18 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU pCuCtxChroma->isChromaQpAdjCoded = false; } } +#if JVET_M0170_MRG_SHARELIST + int startShareThisLevel = 0; +#endif + +#if JVET_M0421_SPLIT_SIG + const PartSplit splitMode = split_cu_mode( cs, partitioner ); + + CHECK( !partitioner.canSplit( splitMode, cs ), "Got an invalid split!" ); + if( splitMode != CU_DONT_SPLIT ) + { +#else const PartSplit implicitSplit = partitioner.getImplicitSplit( cs ); // QT @@ -440,6 +451,34 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU // quad-tree split if( qtSplit ) { +#endif +#if JVET_M0170_MRG_SHARELIST + const PartSplit split = splitMode; + int splitRatio = 1; + CHECK(!(split == CU_QUAD_SPLIT || split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT + || split == CU_TRIH_SPLIT || split == CU_TRIV_SPLIT), "invalid split type"); + splitRatio = (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) ? 1 : 2; + + bool isOneChildSmall = (((partitioner.currArea().lwidth())*(partitioner.currArea().lheight())) >> splitRatio) < MRG_SHARELIST_SHARSIZE; + + if ((((partitioner.currArea().lwidth())*(partitioner.currArea().lheight())) > (MRG_SHARELIST_SHARSIZE * 1))) + { + shareStateDec = NO_SHARE; + } + + if (shareStateDec == NO_SHARE)//init state + { + if (isOneChildSmall) + { + shareStateDec = SHARING;//share start state + startShareThisLevel = 1; + + shareParentPos = partitioner.currArea().lumaPos(); + shareParentSize.width = partitioner.currArea().lwidth(); + shareParentSize.height = partitioner.currArea().lheight(); + } + } +#endif if (CS::isDualITree(cs) && pPartitionerChroma != nullptr && (partitioner.currArea().lwidth() >= 64 || partitioner.currArea().lheight() >= 64)) { partitioner.splitCurrArea(CU_QUAD_SPLIT, cs); @@ -519,7 +558,11 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU } else { +#if JVET_M0421_SPLIT_SIG + partitioner.splitCurrArea( splitMode, cs ); +#else partitioner.splitCurrArea( CU_QUAD_SPLIT, cs ); +#endif do { if( !lastSegment && cs.area.blocks[partitioner.chType].contains( partitioner.currArea().blocks[partitioner.chType].pos() ) ) @@ -530,10 +573,17 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU partitioner.exitCurrSplit(); } +#if JVET_M0170_MRG_SHARELIST + if (startShareThisLevel == 1) + shareStateDec = NO_SHARE; +#endif return lastSegment; +#if !JVET_M0421_SPLIT_SIG } +#endif } +#if !JVET_M0421_SPLIT_SIG { // MT bool mtSplit = partitioner.canSplit( CU_MT_SPLIT, cs ); @@ -544,6 +594,33 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU if( splitMode != CU_DONT_SPLIT ) { +#if JVET_M0170_MRG_SHARELIST + const PartSplit split = splitMode; + int splitRatio = 1; + CHECK(!(split == CU_QUAD_SPLIT || split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT + || split == CU_TRIH_SPLIT || split == CU_TRIV_SPLIT), "invalid split type"); + splitRatio = (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) ? 1 : 2; + + bool isOneChildSmall = (((partitioner.currArea().lwidth())*(partitioner.currArea().lheight())) >> splitRatio) < MRG_SHARELIST_SHARSIZE; + + if ((((partitioner.currArea().lwidth())*(partitioner.currArea().lheight())) > (MRG_SHARELIST_SHARSIZE * 1))) + { + shareStateDec = NO_SHARE; + } + + if (shareStateDec == NO_SHARE)//init state + { + if (isOneChildSmall) + { + shareStateDec = SHARING;//share start state + startShareThisLevel = 1; + + shareParentPos = partitioner.currArea().lumaPos(); + shareParentSize.width = partitioner.currArea().lwidth(); + shareParentSize.height = partitioner.currArea().lheight(); + } + } +#endif partitioner.splitCurrArea( splitMode, cs ); do @@ -555,12 +632,16 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU } while( partitioner.nextPart( cs ) ); partitioner.exitCurrSplit(); +#if JVET_M0170_MRG_SHARELIST + if (startShareThisLevel == 1) + shareStateDec = NO_SHARE; +#endif return lastSegment; } } } - +#endif CodingUnit& cu = cs.addCU( CS::getArea( cs, currArea, partitioner.chType ), partitioner.chType ); partitioner.setCUData( cu ); @@ -588,13 +669,91 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU cu.chromaQpAdj = cs.chromaQpAdj; //NOTE: CU chroma QP adjustment can be changed by adjustment signaling at TU level // coding unit +#if JVET_M0170_MRG_SHARELIST + cu.shareParentPos = (shareStateDec == SHARING) ? shareParentPos : partitioner.currArea().lumaPos(); + cu.shareParentSize = (shareStateDec == SHARING) ? shareParentSize : partitioner.currArea().lumaSize(); +#endif bool isLastCtu = coding_unit( cu, partitioner, cuCtx ); DTRACE( g_trace_ctx, D_QP, "x=%d, y=%d, w=%d, h=%d, qp=%d\n", cu.Y().x, cu.Y().y, cu.Y().width, cu.Y().height, cu.qp ); +#if JVET_M0170_MRG_SHARELIST + if (startShareThisLevel == 1) + shareStateDec = NO_SHARE; +#endif return isLastCtu; } +#if JVET_M0421_SPLIT_SIG +PartSplit CABACReader::split_cu_mode( CodingStructure& cs, Partitioner &partitioner ) +{ + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__SPLIT_FLAG, partitioner.currArea().blocks[partitioner.chType].size(), partitioner.chType ); + + PartSplit mode = CU_DONT_SPLIT; + + bool canNo, canQt, canBh, canBv, canTh, canTv; + partitioner.canSplit( cs, canNo, canQt, canBh, canBv, canTh, canTv ); + + bool canSpl[6] = { canNo, canQt, canBh, canBv, canTh, canTv }; + + unsigned ctxSplit = 0, ctxQtSplit = 0, ctxBttHV = 0, ctxBttH12 = 0, ctxBttV12; + DeriveCtx::CtxSplit( cs, partitioner, ctxSplit, ctxQtSplit, ctxBttHV, ctxBttH12, ctxBttV12, canSpl ); + + bool isSplit = canBh || canBv || canTh || canTv || canQt; + + if( canNo && isSplit ) + { + isSplit = m_BinDecoder.decodeBin( Ctx::SplitFlag( ctxSplit ) ); + } + + DTRACE( g_trace_ctx, D_SYNTAX, "split_cu_mode() ctx=%d split=%d\n", ctxSplit, isSplit ); + + if( !isSplit ) + { + return CU_DONT_SPLIT; + } + + const bool canBtt = canBh || canBv || canTh || canTv; + bool isQt = canQt; + + if( isQt && canBtt ) + { + isQt = m_BinDecoder.decodeBin( Ctx::SplitQtFlag( ctxQtSplit ) ); + } + + DTRACE( g_trace_ctx, D_SYNTAX, "split_cu_mode() ctx=%d qt=%d\n", ctxQtSplit, isQt ); + + if( isQt ) + { + return CU_QUAD_SPLIT; + } + + const bool canHor = canBh || canTh; + bool isVer = canBv || canTv; + + if( isVer && canHor ) + { + isVer = m_BinDecoder.decodeBin( Ctx::SplitHvFlag( ctxBttHV ) ); + } + + const bool can14 = isVer ? canTv : canTh; + bool is12 = isVer ? canBv : canBh; + + if( is12 && can14 ) + { + is12 = m_BinDecoder.decodeBin( Ctx::Split12Flag( isVer ? ctxBttV12 : ctxBttH12 ) ); + } + + if ( isVer && is12 ) mode = CU_VERT_SPLIT; + else if( isVer && !is12 ) mode = CU_TRIV_SPLIT; + else if( !isVer && is12 ) mode = CU_HORZ_SPLIT; + else mode = CU_TRIH_SPLIT; + + DTRACE( g_trace_ctx, D_SYNTAX, "split_cu_mode() ctxHv=%d ctx12=%d mode=%d\n", ctxBttHV, isVer ? ctxBttV12 : ctxBttH12, mode ); + + return mode; +} +#else PartSplit CABACReader::split_cu_mode_mt( CodingStructure& cs, Partitioner &partitioner ) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__SPLIT_FLAG ); @@ -606,6 +765,50 @@ PartSplit CABACReader::split_cu_mode_mt( CodingStructure& cs, Partitioner &parti unsigned width = partitioner.currArea().lumaSize().width; unsigned height = partitioner.currArea().lumaSize().height; +#if REMOVE_BIN_DECISION_TREE + unsigned btSCtxId = width == height ? 0 : ( width > height ? 1 : 2 ); + + const bool canNo = partitioner.canSplit( CU_DONT_SPLIT, cs ); + const bool canBh = partitioner.canSplit( CU_HORZ_SPLIT, cs ); + const bool canBv = partitioner.canSplit( CU_VERT_SPLIT, cs ); + const bool canTh = partitioner.canSplit( CU_TRIH_SPLIT, cs ); + const bool canTv = partitioner.canSplit( CU_TRIV_SPLIT, cs ); + + bool isSplit = canBh || canBv || canTh || canTv; + + if( canNo && isSplit ) + { + isSplit = m_BinDecoder.decodeBin( Ctx::BTSplitFlag( ctxIdBT ) ); + } + + if( !isSplit ) + { + DTRACE( g_trace_ctx, D_SYNTAX, "split_cu_mode_mt() ctx=%d split=%d\n", ctxIdBT, mode ); + + return mode; + } + + const bool canHor = canBh || canTh; + bool isVer = canBv || canTv; + + if( isVer && canHor ) + { + isVer = m_BinDecoder.decodeBin( Ctx::BTSplitFlag( 12 + btSCtxId ) ); + } + + const bool can14 = isVer ? canTv : canTh; + bool is12 = isVer ? canBv : canBh; + + if( is12 && can14 ) + { + is12 = m_BinDecoder.decodeBin( Ctx::BTSplitFlag( 15 ) ); + } + + if ( isVer && is12 ) mode = CU_VERT_SPLIT; + else if( isVer && !is12 ) mode = CU_TRIV_SPLIT; + else if( !isVer && is12 ) mode = CU_HORZ_SPLIT; + else mode = CU_TRIH_SPLIT; +#else DecisionTree dt( g_mtSplitDTT ); dt.setAvail( DTT_SPLIT_BT_HORZ, partitioner.canSplit( CU_HORZ_SPLIT, cs ) ); @@ -626,6 +829,7 @@ PartSplit CABACReader::split_cu_mode_mt( CodingStructure& cs, Partitioner &parti mode = id == DTT_SPLIT_NO_SPLIT ? CU_DONT_SPLIT : PartSplit( id ); +#endif DTRACE( g_trace_ctx, D_SYNTAX, "split_cu_mode_mt() ctx=%d split=%d\n", ctxIdBT, mode ); return mode; @@ -649,6 +853,7 @@ bool CABACReader::split_cu_flag( CodingStructure& cs, Partitioner &partitioner ) return split; } +#endif //================================================================================ // clause 7.3.8.5 @@ -690,6 +895,10 @@ bool CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx& { cs.addTU ( cu, partitioner.chType ); PredictionUnit& pu = cs.addPU( cu, partitioner.chType ); +#if JVET_M0170_MRG_SHARELIST + pu.shareParentPos = cu.shareParentPos; + pu.shareParentSize = cu.shareParentSize; +#endif MergeCtx mrgCtx; prediction_unit ( pu, mrgCtx ); return end_of_ctu( cu, cuCtx ); @@ -774,7 +983,7 @@ void CABACReader::imv_mode( CodingUnit& cu, MergeCtx& mrgCtx ) unsigned value = 0; unsigned ctxId = DeriveCtx::CtxIMVFlag( cu ); - if (cu.firstPU->interDir == 1 && cu.cs->slice->getRefPic(REF_PIC_LIST_0, cu.firstPU->refIdx[REF_PIC_LIST_0])->getPOC() == cu.cs->slice->getPOC()) // the first bin of IMV flag does need to be signaled in CPR block + if (cu.firstPU->interDir == 1 && cu.cs->slice->getRefPic(REF_PIC_LIST_0, cu.firstPU->refIdx[REF_PIC_LIST_0])->getPOC() == cu.cs->slice->getPOC()) // the first bin of IMV flag does need to be signaled in IBC block value = 1; else value = m_BinDecoder.decodeBin( Ctx::ImvFlag( ctxId ) ); @@ -795,7 +1004,11 @@ void CABACReader::pred_mode( CodingUnit& cu ) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__PRED_MODE ); +#if JVET_M0502_PRED_MODE_CTX + if( cu.cs->slice->isIntra() || m_BinDecoder.decodeBin( Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu)) ) ) +#else if( cu.cs->slice->isIntra() || m_BinDecoder.decodeBin( Ctx::PredMode() ) ) +#endif { cu.predMode = MODE_INTRA; } @@ -808,7 +1021,7 @@ void CABACReader::pred_mode( CodingUnit& cu ) void CABACReader::pcm_flag( CodingUnit& cu, Partitioner &partitioner ) { const SPS& sps = *cu.cs->sps; - if( !sps.getUsePCM() || partitioner.currArea().lwidth() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lwidth() < (1 << sps.getPCMLog2MinSize()) + if( !sps.getPCMEnabledFlag() || partitioner.currArea().lwidth() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lwidth() < (1 << sps.getPCMLog2MinSize()) || partitioner.currArea().lheight() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lheight() < (1 << sps.getPCMLog2MinSize()) ) { cu.ipcm = false; @@ -829,13 +1042,17 @@ void CABACReader::cu_pred_data( CodingUnit &cu ) if (!cu.Y().valid()) // dual tree chroma CU { cu.predMode = MODE_INTER; - cu.cpr = true; + cu.ibc = true; return; } MergeCtx mrgCtx; for( auto &pu : CU::traversePUs( cu ) ) { +#if JVET_M0170_MRG_SHARELIST + pu.shareParentPos = cu.shareParentPos; + pu.shareParentSize = cu.shareParentSize; +#endif prediction_unit( pu, mrgCtx ); } @@ -1206,6 +1423,9 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx ) { inter_pred_idc( pu ); affine_flag ( *pu.cu ); +#if JVET_M0444_SMVD + smvd_mode( pu ); +#endif if( pu.interDir != 2 /* PRED_L1 */ ) { @@ -1228,6 +1448,10 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx ) if( pu.interDir != 1 /* PRED_L0 */ ) { +#if JVET_M0444_SMVD + if ( pu.cu->smvdMode != 1 ) + { +#endif ref_idx ( pu, REF_PIC_LIST_1 ); if( pu.cu->cs->slice->getMvdL1ZeroFlag() && pu.interDir == 3 /* PRED_BI */ ) { @@ -1249,6 +1473,9 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx ) { mvd_coding( pu.mvd[REF_PIC_LIST_1] ); } +#if JVET_M0444_SMVD + } +#endif mvp_flag ( pu, REF_PIC_LIST_1 ); } } @@ -1260,9 +1487,40 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx ) pu.cu->GBiIdx = GBI_DEFAULT; } +#if JVET_M0444_SMVD + if ( pu.cu->smvdMode ) + { + RefPicList eCurRefList = (RefPicList)(pu.cu->smvdMode - 1); + pu.mvd[1 - eCurRefList].set( -pu.mvd[eCurRefList].hor, -pu.mvd[eCurRefList].ver ); + pu.refIdx[1 - eCurRefList] = pu.cs->slice->getSymRefIdx( 1 - eCurRefList ); + } +#endif + PU::spanMotionInfo( pu, mrgCtx ); } +#if JVET_M0444_SMVD +void CABACReader::smvd_mode( PredictionUnit& pu ) +{ + pu.cu->smvdMode = 0; + if ( pu.interDir != 3 || pu.cu->affine ) + { + return; + } + + if ( pu.cs->slice->getBiDirPred() == false ) + { + return; + } + + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__SYMMVD_FLAG ); + + pu.cu->smvdMode = m_BinDecoder.decodeBin( Ctx::SmvdFlag() ) ? 1 : 0; + + DTRACE( g_trace_ctx, D_SYNTAX, "symmvd_flag() symmvd=%d pos=(%d,%d) size=%dx%d\n", pu.cu->smvdMode ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height ); +} +#endif + void CABACReader::subblock_merge_flag( CodingUnit& cu ) { if ( cu.firstPU->mergeFlag && (cu.firstPU->mmvdMergeFlag || cu.mmvdSkip) ) @@ -1270,7 +1528,7 @@ void CABACReader::subblock_merge_flag( CodingUnit& cu ) return; } - if ( !cu.cs->slice->isIntra() && (cu.cs->sps->getSpsNext().getUseAffine() || cu.cs->sps->getSpsNext().getUseSubPuMvp()) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 ) + if ( !cu.cs->slice->isIntra() && (cu.cs->sps->getSpsNext().getUseAffine() || cu.cs->sps->getSBTMVPEnabledFlag()) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 ) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__AFFINE_FLAG ); @@ -1342,7 +1600,7 @@ void CABACReader::merge_idx( PredictionUnit& pu ) { if ( m_BinDecoder.decodeBin( Ctx::AffMergeIdx() ) ) { - bool useExtCtx = pu.cs->sps->getSpsNext().getUseSubPuMvp(); + bool useExtCtx = pu.cs->sps->getSBTMVPEnabledFlag(); pu.mergeIdx++; for ( ; pu.mergeIdx < numCandminus1; pu.mergeIdx++ ) { @@ -1508,6 +1766,14 @@ void CABACReader::ref_idx( PredictionUnit &pu, RefPicList eRefList ) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__REF_FRM_IDX ); +#if JVET_M0444_SMVD + if ( pu.cu->smvdMode ) + { + pu.refIdx[eRefList] = pu.cs->slice->getSymRefIdx( eRefList ); + return; + } +#endif + int numRef = pu.cs->slice->getNumRefIdx(eRefList); if( numRef <= 1 || !m_BinDecoder.decodeBin( Ctx::RefPic() ) ) { @@ -1685,6 +1951,18 @@ void CABACReader::triangle_mode( CodingUnit& cu ) return; } +#if JVET_M0118_M0185_TRIANGLE_FLAG_FIX + if ( cu.firstPU->mmvdMergeFlag || cu.mmvdSkip ) + { + return; + } + + if ( cu.firstPU->mhIntraFlag ) + { + return; + } +#endif + unsigned flag_idx = DeriveCtx::CtxTriangleFlag( cu ); cu.triangle = m_BinDecoder.decodeBin( Ctx::TriangleFlag(flag_idx) ); @@ -1773,7 +2051,9 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, if( split ) { { +#if !JVET_M0464_UNI_MTS if( trDepth == 0 ) emt_cu_flag( cu ); +#endif if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) { @@ -1857,7 +2137,9 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, TU::setCbfAtDepth( tu, COMPONENT_Cr, trDepth, ( chromaCbfs.Cr ? 1 : 0 ) ); } +#if !JVET_M0464_UNI_MTS if( trDepth == 0 && TU::getCbfAtDepth( tu, COMPONENT_Y, 0 ) ) emt_cu_flag( cu ); +#endif transform_unit( tu, cuCtx, chromaCbfs ); } @@ -2054,14 +2336,22 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID ) DTRACE( g_trace_ctx, D_SYNTAX, "residual_coding() etype=%d pos=(%d,%d) size=%dx%d predMode=%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height, cu.predMode ); // parse transform skip and explicit rdpcm mode +#if JVET_M0464_UNI_MTS + mts_coding ( tu, compID ); +#else transform_skip_flag( tu, compID ); +#endif explicit_rdpcm_mode( tu, compID ); #if HEVC_USE_SIGN_HIDING // determine sign hiding bool signHiding = ( cu.cs->slice->getSignDataHidingEnabledFlag() && !cu.transQuantBypass && tu.rdpcm[compID] == RDPCM_OFF ); +#if JVET_M0464_UNI_MTS + if( signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.mtsIdx==1 ) +#else if( signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.transformSkip[compID] ) +#endif { const ChannelType chType = toChannelType( compID ); const unsigned intraMode = PU::getFinalIntraMode( *cu.cs->getPU( tu.blocks[compID].pos(), chType ), chType ); @@ -2079,7 +2369,9 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID ) CoeffCodingContext cctx ( tu, compID ); #endif TCoeff* coeff = tu.getCoeffs( compID ).buf; +#if !JVET_M0464_UNI_MTS unsigned numSig = 0; +#endif // parse last coeff position cctx.setScanPosLast( last_sig_coeff( cctx ) ); @@ -2088,21 +2380,25 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID ) const int stateTransTab = ( tu.cs->slice->getDepQuantEnabledFlag() ? 32040 : 0 ); int state = 0; +#if !JVET_M0464_UNI_MTS bool useEmt = ( cu.cs->sps->getSpsNext().getUseIntraEMT() && cu.predMode == MODE_INTRA ) || ( cu.cs->sps->getSpsNext().getUseInterEMT() && cu.predMode != MODE_INTRA ); useEmt = useEmt && isLuma(compID); +#endif for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--) { cctx.initSubblock ( subSetId ); residual_coding_subblock( cctx, coeff, stateTransTab, state ); +#if !JVET_M0464_UNI_MTS if (useEmt) { numSig += cctx.emtNumSigCoeff(); cctx.setEmtNumSigCoeff( 0 ); } +#endif } - +#if !JVET_M0464_UNI_MTS if( useEmt && !tu.transformSkip[compID] && compID == COMPONENT_Y && tu.cu->emtFlag ) { if( CU::isIntra( *tu.cu ) ) @@ -2114,9 +2410,55 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID ) emt_tu_index( tu ); } } +#endif } +#if JVET_M0464_UNI_MTS +void CABACReader::mts_coding( TransformUnit& tu, ComponentID compID ) +{ + const CodingUnit &cu = *tu.cu; + const bool tsAllowed = TU::isTSAllowed ( tu, compID ); + const bool mtsAllowed = TU::isMTSAllowed( tu, compID ); + + if( !mtsAllowed && !tsAllowed ) return; + + int symbol = 0; + int ctxIdx = 0; + + if( tsAllowed ) + { + ctxIdx = 6; + symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) ); + tu.mtsIdx = 1-symbol; // 1 = TS + } + + if( tu.mtsIdx != 1 ) + { + if( mtsAllowed ) + { + ctxIdx = std::min( (int)cu.qtDepth, 5 ); + symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) ); + + if( symbol ) + { + ctxIdx = 7; + tu.mtsIdx = 2; // mtsIdx = 2 -- 4 + for( int i = 0; i < 3; i++, ctxIdx++ ) + { + symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) ); + tu.mtsIdx += symbol; + if( !symbol ) + { + break; + } + } + } + } + } + DTRACE( g_trace_ctx, D_SYNTAX, "mts_coding() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), tu.mtsIdx ); +} +#else void CABACReader::transform_skip_flag( TransformUnit& tu, ComponentID compID ) { @@ -2195,7 +2537,7 @@ void CABACReader::emt_cu_flag( CodingUnit& cu ) DTRACE( g_trace_ctx, D_SYNTAX, "emt_cu_flag() etype=%d pos=(%d,%d) emtCuFlag=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), ( int ) cu.emtFlag ); } } - +#endif void CABACReader::explicit_rdpcm_mode( TransformUnit& tu, ComponentID compID ) { @@ -2203,7 +2545,11 @@ void CABACReader::explicit_rdpcm_mode( TransformUnit& tu, ComponentID compID ) tu.rdpcm[compID] = RDPCM_OFF; +#if JVET_M0464_UNI_MTS + if( !CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && ( tu.mtsIdx==1 || cu.transQuantBypass ) ) +#else if( !CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && ( tu.transformSkip[compID] || cu.transQuantBypass ) ) +#endif { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE( STATS__EXPLICIT_RDPCM_BITS, tu.blocks[tu.chType].lumaSize() ); @@ -2332,13 +2678,23 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co #endif int numNonZero = 0; bool is2x2subblock = ( cctx.log2CGSize() == 2 ); +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ); +#else int remGt2Bins = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK ); int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins; +#endif int firstPosMode2 = minSubPos - 1; +#if !JVET_M0173_MOVE_GT2_TO_FIRST_PASS int firstPosMode1 = minSubPos - 1; +#endif int sigBlkPos[ 1 << MLS_CG_SIZE ]; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + for( ; nextSigPos >= minSubPos && remRegBins >= 4; nextSigPos-- ) +#else for( ; nextSigPos >= minSubPos && remRegBins >= 3; nextSigPos-- ) +#endif { int blkPos = cctx.blockPos( nextSigPos ); unsigned sigFlag = ( !numNonZero && nextSigPos == inferSigPos ); @@ -2367,6 +2723,9 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co remRegBins--; unsigned parFlag = 0; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + unsigned gt2Flag = 0; +#endif if( gt1Flag ) { RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_par ); @@ -2374,19 +2733,33 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co DTRACE( g_trace_ctx, D_SYNTAX_RESI, "par_flag() bin=%d ctx=%d\n", parFlag, cctx.parityCtxIdAbs( ctxOff ) ); remRegBins--; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + RExt__DECODER_DEBUG_BIT_STATISTICS_SET(ctype_gt2); + gt2Flag = m_BinDecoder.decodeBin( cctx.greater2CtxIdAbs( ctxOff ) ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "gt2_flag() bin=%d ctx=%d\n", gt2Flag, cctx.greater2CtxIdAbs( ctxOff ) ); + remRegBins--; +#else if( remGt2Bins && !--remGt2Bins ) { firstPosMode1 = nextSigPos - 1; } +#endif } +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + coeff[ blkPos ] += 1 + parFlag + gt1Flag + (gt2Flag << 1); +#else coeff[ blkPos ] += 1 + parFlag + gt1Flag; +#endif } state = ( stateTransTable >> ((state<<2)+((coeff[blkPos]&1)<<1)) ) & 3; } firstPosMode2 = nextSigPos; +#if !JVET_M0173_MOVE_GT2_TO_FIRST_PASS firstPosMode1 = ( firstPosMode1 > firstPosMode2 ? firstPosMode1 : firstPosMode2 ); +#endif +#if !JVET_M0173_MOVE_GT2_TO_FIRST_PASS //===== 2nd PASS: gt2 ===== for( int scanPos = firstSigPos; scanPos > firstPosMode1; scanPos-- ) { @@ -2400,10 +2773,17 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co tcoeff += (gt2Flag<<1); } } +#endif +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + //===== 2nd PASS: Go-rice codes ===== + unsigned ricePar = 0; + for( int scanPos = firstSigPos; scanPos > firstPosMode2; scanPos-- ) +#else //===== 3rd PASS: Go-rice codes ===== unsigned ricePar = 0; for( int scanPos = firstSigPos; scanPos > firstPosMode1; scanPos-- ) +#endif { TCoeff& tcoeff = coeff[ cctx.blockPos( scanPos ) ]; if( tcoeff >= 4 ) @@ -2418,6 +2798,7 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co } } } +#if !JVET_M0173_MOVE_GT2_TO_FIRST_PASS for( int scanPos = firstPosMode1; scanPos > firstPosMode2; scanPos-- ) { TCoeff& tcoeff = coeff[ cctx.blockPos( scanPos ) ]; @@ -2433,6 +2814,7 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co } } } +#endif //===== coeff bypass ==== for( int scanPos = firstPosMode2; scanPos >= minSubPos; scanPos-- ) @@ -2486,7 +2868,9 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co coeff[ sigBlkPos[k] ] = ( sumAbs & 1 ? -AbsCoeff : AbsCoeff ); } #endif +#if !JVET_M0464_UNI_MTS cctx.setEmtNumSigCoeff( numNonZero ); +#endif } @@ -2577,7 +2961,7 @@ unsigned CABACReader::exp_golomb_eqprob( unsigned count ) return symbol; } - +#if !REMOVE_BIN_DECISION_TREE unsigned CABACReader::decode_sparse_dt( DecisionTree& dt ) { dt.reduce(); @@ -2626,3 +3010,4 @@ unsigned CABACReader::decode_sparse_dt( DecisionTree& dt ) return dt.dtt.ids[offset]; } +#endif diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index c1e2f3e75b73c95943a748ba3c4a7dee17bfab1e..8e5586e5007f95e5575812bfd5fbe899116e6aca 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -48,7 +48,11 @@ class CABACReader { public: +#if JVET_M0170_MRG_SHARELIST + CABACReader(BinDecoderBase& binDecoder) : shareStateDec(0), m_BinDecoder(binDecoder), m_Bitstream(0) {} +#else CABACReader( BinDecoderBase& binDecoder ) : m_BinDecoder( binDecoder ), m_Bitstream( 0 ) {} +#endif virtual ~CABACReader() {} public: @@ -70,8 +74,12 @@ public: // coding (quad)tree (clause 7.3.8.4) bool coding_tree ( CodingStructure& cs, Partitioner& pm, CUCtx& cuCtx, Partitioner* pPartitionerChroma = nullptr, CUCtx* pCuCtxChroma = nullptr); +#if JVET_M0421_SPLIT_SIG + PartSplit split_cu_mode ( CodingStructure& cs, Partitioner& pm ); +#else bool split_cu_flag ( CodingStructure& cs, Partitioner& pm ); PartSplit split_cu_mode_mt ( CodingStructure& cs, Partitioner& pm ); +#endif // coding unit (clause 7.3.8.5) bool coding_unit ( CodingUnit& cu, Partitioner& pm, CUCtx& cuCtx ); @@ -105,6 +113,9 @@ public: void MHIntra_flag ( PredictionUnit& pu ); void MHIntra_luma_pred_modes ( CodingUnit& cu ); void triangle_mode ( CodingUnit& cu ); + #if JVET_M0444_SMVD + void smvd_mode ( PredictionUnit& pu ); +#endif // pcm samples (clause 7.3.8.7) void pcm_samples ( TransformUnit& tu ); @@ -123,9 +134,13 @@ public: // residual coding (clause 7.3.8.11) void residual_coding ( TransformUnit& tu, ComponentID compID ); +#if JVET_M0464_UNI_MTS + void mts_coding ( TransformUnit& tu, ComponentID compID ); +#else void transform_skip_flag ( TransformUnit& tu, ComponentID compID ); void emt_tu_index ( TransformUnit& tu ); void emt_cu_flag ( CodingUnit& cu ); +#endif void explicit_rdpcm_mode ( TransformUnit& tu, ComponentID compID ); int last_sig_coeff ( CoeffCodingContext& cctx ); void residual_coding_subblock ( CoeffCodingContext& cctx, TCoeff* coeff, const int stateTransTable, int& state ); @@ -137,11 +152,18 @@ private: unsigned unary_max_symbol ( unsigned ctxId0, unsigned ctxIdN, unsigned maxSymbol ); unsigned unary_max_eqprob ( unsigned maxSymbol ); unsigned exp_golomb_eqprob ( unsigned count ); +#if !REMOVE_BIN_DECISION_TREE unsigned decode_sparse_dt ( DecisionTree& dt ); +#endif unsigned get_num_bits_read () { return m_BinDecoder.getNumBitsRead(); } void xReadTruncBinCode(uint32_t& symbol, uint32_t maxSymbol); - +#if JVET_M0170_MRG_SHARELIST +public: + int shareStateDec; + Position shareParentPos; + Size shareParentSize; +#endif private: BinDecoderBase& m_BinDecoder; InputBitstream* m_Bitstream; diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index c18a57b9b27fb195067add00b4e428163a1e2fa9..6913f67f690f184b7f47a0e3664118494d989372 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -81,14 +81,42 @@ void DecCu::init( TrQuant* pcTrQuant, IntraPrediction* pcIntra, InterPrediction* void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) { + const int maxNumChannelType = cs.pcv->chrFormat != CHROMA_400 && CS::isDualITree( cs ) ? 2 : 1; +#if JVET_M0170_MRG_SHARELIST + if (!cs.pcv->isEncoder) + { + m_shareStateDec = NO_SHARE; + } + bool sharePrepareCondition = ((!cs.pcv->isEncoder) && (!(cs.slice->isIntra()))); +#endif for( int ch = 0; ch < maxNumChannelType; ch++ ) { const ChannelType chType = ChannelType( ch ); +#if JVET_M0170_MRG_SHARELIST + Position prevTmpPos; + prevTmpPos.x = -1; prevTmpPos.y = -1; +#endif for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, chType ), chType ) ) { +#if JVET_M0170_MRG_SHARELIST + if(sharePrepareCondition) + { + if ((currCU.shareParentPos.x >= 0) && (!(currCU.shareParentPos.x == prevTmpPos.x && currCU.shareParentPos.y == prevTmpPos.y))) + { + m_shareStateDec = GEN_ON_SHARED_BOUND; + cs.slice->copyMotionLUTs(cs.slice->getMotionLUTs(), cs.slice->m_MotionCandLuTsBkup); + } + + if (currCU.shareParentPos.x < 0) + { + m_shareStateDec = 0; + } + prevTmpPos = currCU.shareParentPos; + } +#endif cs.chType = chType; if (currCU.predMode != MODE_INTRA && currCU.Y().valid()) { @@ -336,10 +364,10 @@ void DecCu::xReconInter(CodingUnit &cu) m_pcIntraPred->geneIntrainterPred(cu); // inter prediction - CHECK(cu.cpr && cu.firstPU->mhIntraFlag, "CPR and MHIntra cannot be used together"); - CHECK(cu.cpr && cu.affine, "CPR and Affine cannot be used together"); - CHECK(cu.cpr && cu.triangle, "CPR and triangle cannot be used together"); - CHECK(cu.cpr && cu.firstPU->mmvdMergeFlag, "CPR and MMVD cannot be used together"); + CHECK(cu.ibc && cu.firstPU->mhIntraFlag, "IBC and MHIntra cannot be used together"); + CHECK(cu.ibc && cu.affine, "IBC and Affine cannot be used together"); + CHECK(cu.ibc && cu.triangle, "IBC and triangle cannot be used together"); + CHECK(cu.ibc && cu.firstPU->mmvdMergeFlag, "IBC and MMVD cannot be used together"); const bool luma = cu.Y().valid(); const bool chroma = cu.Cb().valid(); if (luma && chroma) @@ -458,15 +486,21 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) if (pu.mmvdMergeFlag || pu.cu->mmvdSkip) { CHECK(pu.mhIntraFlag == true, "invalid MHIntra"); - if (pu.cs->sps->getSpsNext().getUseSubPuMvp()) + if (pu.cs->sps->getSBTMVPEnabledFlag()) { Size bufSize = g_miScaling.scale(pu.lumaSize()); mrgCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize); } int fPosBaseIdx = pu.mmvdMergeIdx / MMVD_MAX_REFINE_NUM; +#if JVET_M0170_MRG_SHARELIST + pu.shareParentPos = cu.shareParentPos; + pu.shareParentSize = cu.shareParentSize; +#endif PU::getInterMergeCandidates(pu, mrgCtx, 1, fPosBaseIdx + 1); +#if !JVET_M0068_M0171_MMVD_CLEANUP PU::restrictBiPredMergeCands(pu, mrgCtx); +#endif PU::getInterMMVDMergeCandidates(pu, mrgCtx, pu.mmvdMergeIdx ); @@ -486,7 +520,7 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) if( pu.cu->affine ) { AffineMergeCtx affineMergeCtx; - if ( pu.cs->sps->getSpsNext().getUseSubPuMvp() ) + if ( pu.cs->sps->getSBTMVPEnabledFlag() ) { Size bufSize = g_miScaling.scale( pu.lumaSize() ); mrgCtx.subPuMvpMiBuf = MotionBuf( m_SubPuMiBuf, bufSize ); @@ -520,10 +554,14 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) } else { - +#if JVET_M0170_MRG_SHARELIST + pu.shareParentPos = cu.shareParentPos; + pu.shareParentSize = cu.shareParentSize; +#endif PU::getInterMergeCandidates(pu, mrgCtx, 0, pu.mergeIdx); +#if !JVET_M0068_M0171_MMVD_CLEANUP PU::restrictBiPredMergeCands(pu, mrgCtx); - +#endif mrgCtx.setMergeInfo( pu, pu.mergeIdx ); PU::spanMotionInfo( pu, mrgCtx ); @@ -591,7 +629,7 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) Mv mvd = pu.mvd[eRefList]; if (eRefList == REF_PIC_LIST_0 && pu.cs->slice->getRefPic(eRefList, pu.refIdx[eRefList])->getPOC() == pu.cs->slice->getPOC()) { - pu.cu->cpr = true; + pu.cu->ibc = true; #if REUSE_CU_RESULTS if (!cu.cs->pcv->isEncoder) #endif diff --git a/source/Lib/DecoderLib/DecCu.h b/source/Lib/DecoderLib/DecCu.h index d5598b0dfa25662cf25b829d39a4645fb18527c1..67e5dc4fbc2a15faa777b7c1c34b35042e1d6fa0 100644 --- a/source/Lib/DecoderLib/DecCu.h +++ b/source/Lib/DecoderLib/DecCu.h @@ -69,6 +69,9 @@ public: /// destroy internal buffers void decompressCtu ( CodingStructure& cs, const UnitArea& ctuArea ); +#if JVET_M0170_MRG_SHARELIST + void setShareStateDec (int shareStateDecIn) { m_shareStateDec = shareStateDecIn; } +#endif /// reconstruct Ctu information protected: void xIntraRecQT ( CodingUnit& cu, const ChannelType chType ); @@ -90,6 +93,9 @@ private: IntraPrediction* m_pcIntraPred; InterPrediction* m_pcInterPred; +#if JVET_M0170_MRG_SHARELIST + int m_shareStateDec; +#endif MotionInfo m_SubPuMiBuf[(MAX_CU_SIZE * MAX_CU_SIZE) >> (MIN_CU_LOG2 << 1)]; diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp index 0a1d88c69aa5b8584f0735d0204077a97892d3aa..09b493dd5c22f0168a000b5768cd947043a1d35d 100644 --- a/source/Lib/DecoderLib/DecLib.cpp +++ b/source/Lib/DecoderLib/DecLib.cpp @@ -168,12 +168,12 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri pcEncPic->cs->slice = pcEncPic->slices.back(); - if ( pic->cs->sps->getUseSAO() ) + if ( pic->cs->sps->getSAOEnabledFlag() ) { pcEncPic->copySAO( *pic, 0 ); } - if( pic->cs->sps->getUseALF() ) + if( pic->cs->sps->getALFEnabledFlag() ) { for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ ) { @@ -187,7 +187,7 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri } pcDecLib->executeLoopFilters(); - if ( pic->cs->sps->getUseSAO() ) + if ( pic->cs->sps->getSAOEnabledFlag() ) { pcEncPic->copySAO( *pic, 1 ); } @@ -511,12 +511,12 @@ void DecLib::executeLoopFilters() // deblocking filter m_cLoopFilter.loopFilterPic( cs ); - if( cs.sps->getUseSAO() ) + if( cs.sps->getSAOEnabledFlag() ) { m_cSAO.SAOProcess( cs, cs.picture->getSAO() ); } - if( cs.sps->getUseALF() ) + if( cs.sps->getALFEnabledFlag() ) { m_cALF.ALFProcess( cs, cs.slice->getAlfSliceParam() ); } @@ -772,7 +772,7 @@ void DecLib::xActivateParameterSets() m_cSliceDecoder.create(); - if( sps->getUseALF() ) + if( sps->getALFEnabledFlag() ) { m_cALF.create( sps->getPicWidthInLumaSamples(), sps->getPicHeightInLumaSamples(), sps->getChromaFormatIdc(), sps->getMaxCUWidth(), sps->getMaxCUHeight(), sps->getMaxCodingDepth(), sps->getBitDepths().recon ); } @@ -1116,6 +1116,84 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl pcSlice->setCheckLDC(bLowDelay); } +#if JVET_M0444_SMVD + if ( pcSlice->getCheckLDC() == false && pcSlice->getMvdL1ZeroFlag() == false ) + { + int currPOC = pcSlice->getPOC(); + + int forwardPOC = currPOC; + int backwardPOC = currPOC; + int ref = 0; + int refIdx0 = -1; + int refIdx1 = -1; + + // search nearest forward POC in List 0 + for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ ) + { + int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC(); + if ( poc < currPOC && (poc > forwardPOC || refIdx0 == -1) ) + { + forwardPOC = poc; + refIdx0 = ref; + } + } + + // search nearest backward POC in List 1 + for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ ) + { + int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC(); + if ( poc > currPOC && (poc < backwardPOC || refIdx1 == -1) ) + { + backwardPOC = poc; + refIdx1 = ref; + } + } + + if ( !(forwardPOC < currPOC && backwardPOC > currPOC) ) + { + forwardPOC = currPOC; + backwardPOC = currPOC; + refIdx0 = -1; + refIdx1 = -1; + + // search nearest backward POC in List 0 + for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ ) + { + int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC(); + if ( poc > currPOC && (poc < backwardPOC || refIdx0 == -1) ) + { + backwardPOC = poc; + refIdx0 = ref; + } + } + + // search nearest forward POC in List 1 + for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ ) + { + int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC(); + if ( poc < currPOC && (poc > forwardPOC || refIdx1 == -1) ) + { + forwardPOC = poc; + refIdx1 = ref; + } + } + } + + if ( forwardPOC < currPOC && backwardPOC > currPOC ) + { + pcSlice->setBiDirPred( true, refIdx0, refIdx1 ); + } + else + { + pcSlice->setBiDirPred( false, -1, -1 ); + } + } + else + { + pcSlice->setBiDirPred( false, -1, -1 ); + } +#endif + //--------------- pcSlice->setRefPOCList(); @@ -1150,7 +1228,7 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl } #endif - if (pcSlice->getSPS()->getSpsNext().getCPRMode() && pcSlice->getEnableTMVPFlag()) + if (pcSlice->getSPS()->getSpsNext().getIBCMode() && pcSlice->getEnableTMVPFlag()) { CHECK(pcSlice->getRefPic(RefPicList(pcSlice->isInterB() ? 1 - pcSlice->getColFromL0Flag() : 0), pcSlice->getColRefIdx())->getPOC() == pcSlice->getPOC(), "curr ref picture cannot be collocated picture"); } @@ -1160,7 +1238,7 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl m_cSliceDecoder.decompressSlice( pcSlice, &(nalu.getBitstream()) ); m_bFirstSliceInPicture = false; - if (pcSlice->getSPS()->getSpsNext().getCPRMode()) + if (pcSlice->getSPS()->getSpsNext().getIBCMode()) { pcSlice->getPic()->longTerm = false; } diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index e6813e513f4fe3a4eb1dea8065235b1051c7b69c..8e1725797c1a84d4eb0812d62a0b0356e20d60f7 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -789,20 +789,29 @@ void HLSyntaxReader::parseSPSNext( SPSNext& spsNext, const bool usePCM ) // tool enabling flags READ_FLAG( symbol, "large_ctu_flag" ); spsNext.setUseLargeCTU ( symbol != 0 ); - READ_FLAG( symbol, "subpu_tmvp_flag" ); spsNext.setSubPuMvpMode (symbol); READ_FLAG( symbol, "imv_enable_flag" ); spsNext.setUseIMV ( symbol != 0 ); - READ_FLAG( symbol, "bio_enable_flag" ); spsNext.setUseBIO ( symbol != 0 ); READ_FLAG( symbol, "disable_motion_compression_flag" ); spsNext.setDisableMotCompress ( symbol != 0 ); READ_FLAG( symbol, "lm_chroma_enabled_flag" ); spsNext.setUseLMChroma ( symbol != 0 ); +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + if ( spsNext.getUseLMChroma() && spsNext.getSPS().getChromaFormatIdc() == CHROMA_420 ) + { + READ_FLAG( symbol, "sps_cclm_collocated_chroma_flag" ); spsNext.setCclmCollocatedChromaFlag( symbol != 0 ); + } +#endif +#if JVET_M0464_UNI_MTS + READ_FLAG( symbol, "mts_intra_enabled_flag" ); spsNext.setUseIntraMTS ( symbol != 0 ); + READ_FLAG( symbol, "mts_inter_enabled_flag" ); spsNext.setUseInterMTS ( symbol != 0 ); +#else READ_FLAG( symbol, "emt_intra_enabled_flag" ); spsNext.setUseIntraEMT ( symbol != 0 ); READ_FLAG( symbol, "emt_inter_enabled_flag" ); spsNext.setUseInterEMT ( symbol != 0 ); +#endif READ_FLAG( symbol, "affine_flag" ); spsNext.setUseAffine ( symbol != 0 ); if ( spsNext.getUseAffine() ) { READ_FLAG( symbol, "affine_type_flag" ); spsNext.setUseAffineType ( symbol != 0 ); } READ_FLAG( symbol, "gbi_flag" ); spsNext.setUseGBi ( symbol != 0 ); - READ_FLAG( symbol, "cpr_flag"); spsNext.setCPRMode ( symbol != 0 ); + READ_FLAG( symbol, "ibc_flag"); spsNext.setIBCMode ( symbol != 0 ); for( int k = 0; k < SPSNext::NumReservedFlags; k++ ) { READ_FLAG( symbol, "reserved_flag" ); if( symbol != 0 ) EXIT("Incompatible version: SPSNext reserved flag not equal to zero (bitstream was probably created with newer software version)" ); @@ -816,14 +825,6 @@ void HLSyntaxReader::parseSPSNext( SPSNext& spsNext, const bool usePCM ) READ_FLAG( symbol, "reserved_flag" ); CHECK( symbol, "reserved flag not 0!" ); #endif - // additional parameters - if( spsNext.getUseSubPuMvp() ) - { - int subPuMode = 1; - spsNext.setSubPuMvpMode( subPuMode ); - } - - if( spsNext.getUseIMV() ) { READ_UVLC( symbol, "imv_mode_minus1" ); spsNext.setImvMode( ImvMode( symbol + 1 ) ); @@ -1014,8 +1015,39 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) READ_UVLC( uiCode, "log2_diff_max_min_luma_transform_block_size" ); pcSPS->setQuadtreeTULog2MaxSize( uiCode + pcSPS->getQuadtreeTULog2MinSize() ); pcSPS->setMaxTrSize( 1<<(uiCode + pcSPS->getQuadtreeTULog2MinSize()) ); - READ_FLAG( uiCode, "sps_alf_enable_flag" ); pcSPS->setUseALF( uiCode ); + READ_FLAG( uiCode, "sps_sao_enabled_flag" ); pcSPS->setSAOEnabledFlag ( uiCode ? true : false ); + READ_FLAG( uiCode, "sps_alf_enabled_flag" ); pcSPS->setALFEnabledFlag ( uiCode ? true : false ); + + READ_FLAG( uiCode, "pcm_enabled_flag" ); pcSPS->setPCMEnabledFlag( uiCode ? true : false ); + if( pcSPS->getPCMEnabledFlag() ) + { + READ_CODE( 4, uiCode, "pcm_sample_bit_depth_luma_minus1" ); pcSPS->setPCMBitDepth ( CHANNEL_TYPE_LUMA, 1 + uiCode ); + READ_CODE( 4, uiCode, "pcm_sample_bit_depth_chroma_minus1" ); pcSPS->setPCMBitDepth ( CHANNEL_TYPE_CHROMA, 1 + uiCode ); + READ_UVLC( uiCode, "log2_min_pcm_luma_coding_block_size_minus3" ); pcSPS->setPCMLog2MinSize ( uiCode+3 ); + READ_UVLC( uiCode, "log2_diff_max_min_pcm_luma_coding_block_size" ); pcSPS->setPCMLog2MaxSize ( uiCode+pcSPS->getPCMLog2MinSize() ); + READ_FLAG( uiCode, "pcm_loop_filter_disable_flag" ); pcSPS->setPCMFilterDisableFlag ( uiCode ? true : false ); + } + + READ_FLAG(uiCode, "sps_ref_wraparound_enabled_flag"); pcSPS->setWrapAroundEnabledFlag( uiCode ? true : false ); + if (pcSPS->getWrapAroundEnabledFlag()) + { + READ_UVLC(uiCode, "sps_ref_wraparound_offset"); pcSPS->setWrapAroundOffset( uiCode ); + } + + READ_FLAG( uiCode, "sps_temporal_mvp_enabled_flag" ); pcSPS->setSPSTemporalMVPEnabledFlag(uiCode); + + if ( pcSPS->getSPSTemporalMVPEnabledFlag() ) + { + READ_FLAG( uiCode, "sps_sbtmvp_enabled_flag" ); pcSPS->setSBTMVPEnabledFlag ( uiCode != 0 ); + } + else + { + pcSPS->setSBTMVPEnabledFlag(false); + } + + READ_FLAG( uiCode, "sps_bdof_enable_flag" ); pcSPS->setBDOFEnabledFlag ( uiCode != 0 ); + #if HEVC_USE_SCALING_LISTS READ_FLAG( uiCode, "scaling_list_enabled_flag" ); pcSPS->setScalingListFlag ( uiCode ); if(pcSPS->getScalingListFlag()) @@ -1027,24 +1059,6 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) } } #endif - READ_FLAG( uiCode, "amp_enabled_flag" ); pcSPS->setUseAMP( uiCode ); - READ_FLAG( uiCode, "sample_adaptive_offset_enabled_flag" ); pcSPS->setUseSAO ( uiCode ? true : false ); - - READ_FLAG( uiCode, "pcm_enabled_flag" ); pcSPS->setUsePCM( uiCode ? true : false ); - if( pcSPS->getUsePCM() ) - { - READ_CODE( 4, uiCode, "pcm_sample_bit_depth_luma_minus1" ); pcSPS->setPCMBitDepth ( CHANNEL_TYPE_LUMA, 1 + uiCode ); - READ_CODE( 4, uiCode, "pcm_sample_bit_depth_chroma_minus1" ); pcSPS->setPCMBitDepth ( CHANNEL_TYPE_CHROMA, 1 + uiCode ); - READ_UVLC( uiCode, "log2_min_pcm_luma_coding_block_size_minus3" ); pcSPS->setPCMLog2MinSize (uiCode+3); - READ_UVLC( uiCode, "log2_diff_max_min_pcm_luma_coding_block_size" ); pcSPS->setPCMLog2MaxSize ( uiCode+pcSPS->getPCMLog2MinSize() ); - READ_FLAG( uiCode, "pcm_loop_filter_disable_flag" ); pcSPS->setPCMFilterDisableFlag ( uiCode ? true : false ); - } - - READ_FLAG(uiCode, "ref_wraparound_enabled_flag"); pcSPS->setUseWrapAround( uiCode ? true : false ); - if (pcSPS->getUseWrapAround()) - { - READ_UVLC(uiCode, "ref_wraparound_offset"); pcSPS->setWrapAroundOffset( uiCode ); - } READ_UVLC( uiCode, "num_short_term_ref_pic_sets" ); CHECK(uiCode > 64, "Invalid code"); @@ -1071,8 +1085,6 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) pcSPS->setUsedByCurrPicLtSPSFlag(k, uiCode?1:0); } } - READ_FLAG( uiCode, "sps_temporal_mvp_enabled_flag" ); pcSPS->setSPSTemporalMVPEnabledFlag(uiCode); - #if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 READ_FLAG( uiCode, "strong_intra_smoothing_enable_flag" ); pcSPS->setUseStrongIntraSmoothing(uiCode); @@ -1137,7 +1149,7 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) case SPS_EXT__NEXT: { CHECK( !pcSPS->getSpsNext().nextToolsEnabled(), "Got SPS Next extension in non NEXT profile" ); - parseSPSNext( pcSPS->getSpsNext(), pcSPS->getUsePCM() ); + parseSPSNext( pcSPS->getSpsNext(), pcSPS->getPCMEnabledFlag() ); break; } default: @@ -1512,7 +1524,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para pcSlice->setEnableTMVPFlag(false); } } - if(sps->getUseSAO()) + if(sps->getSAOEnabledFlag()) { READ_FLAG(uiCode, "slice_sao_luma_flag"); pcSlice->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, (bool)uiCode); @@ -1522,7 +1534,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para } } - if( sps->getUseALF() ) + if( sps->getALFEnabledFlag() ) { alf( pcSlice->getAlfSliceParam() ); } @@ -1741,11 +1753,11 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para READ_UVLC(uiCode, "six_minus_max_num_merge_cand"); pcSlice->setMaxNumMergeCand(MRG_MAX_NUM_CANDS - uiCode); - if ( sps->getSpsNext().getUseSubPuMvp() && !sps->getSpsNext().getUseAffine() ) // ATMVP only + if ( sps->getSBTMVPEnabledFlag() && !sps->getSpsNext().getUseAffine() ) // ATMVP only { pcSlice->setMaxNumAffineMergeCand( 1 ); } - else if ( !sps->getSpsNext().getUseSubPuMvp() && !sps->getSpsNext().getUseAffine() ) // both off + else if ( !sps->getSBTMVPEnabledFlag() && !sps->getSpsNext().getUseAffine() ) // both off { pcSlice->setMaxNumAffineMergeCand( 0 ); } @@ -1833,7 +1845,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para pcSlice->setDeblockingFilterTcOffsetDiv2 ( 0 ); } - bool isSAOEnabled = sps->getUseSAO() && (pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA) || (bChroma && pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA))); + bool isSAOEnabled = sps->getSAOEnabledFlag() && (pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA) || (bChroma && pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA))); bool isDBFEnabled = (!pcSlice->getDeblockingFilterDisable()); if(pps->getLoopFilterAcrossSlicesEnabledFlag() && ( isSAOEnabled || isDBFEnabled )) diff --git a/source/Lib/EncoderLib/Analyze.h b/source/Lib/EncoderLib/Analyze.h index 69aaad7cef931082bd60885cb237805c93118d90..086c834bf6c9bcd3a4306d526c5e40c2a5c904fe 100644 --- a/source/Lib/EncoderLib/Analyze.h +++ b/source/Lib/EncoderLib/Analyze.h @@ -60,10 +60,6 @@ // Class definition // ==================================================================================================================== -#if ENABLE_QPA - #define FRAME_WEIGHTING 0 // WPSNR temporal weighting according to hierarchical coding structure; only for GOP size 16 -#endif - /// encoder analyzer class class Analyze { @@ -73,10 +69,6 @@ private: uint32_t m_uiNumPic; double m_dFrmRate; //--CFG_KDY double m_MSEyuvframe[MAX_NUM_COMPONENT]; // sum of MSEs -#if ENABLE_QPA && FRAME_WEIGHTING - double m_sumWSSD[MAX_NUM_COMPONENT]; // weighted SSDs - double m_sumW; -#endif #if EXTENSION_360_VIDEO TExt360EncAnalyze m_ext360; #endif @@ -101,13 +93,7 @@ public: m_uiNumPic++; } #if ENABLE_QPA - #if FRAME_WEIGHTING - void addWeightedSSD(const double dWeightedSSD, const ComponentID compID) { m_sumWSSD[compID] += dWeightedSSD; } - void addWeight (const double dWeight) { m_sumW += dWeight; } - double getWPSNR (const ComponentID compID) const { return (m_sumWSSD[compID] > 0.0 ? 10.0 * log10(m_sumW / m_sumWSSD[compID]) : 999.99); } - #else double getWPSNR (const ComponentID compID) const { return m_dPSNRSum[compID] / (double)m_uiNumPic; } - #endif #endif double getPsnr(ComponentID compID) const { return m_dPSNRSum[compID]; } double getBits() const { return m_dAddBits; } @@ -125,13 +111,7 @@ public: { m_dPSNRSum[i] = 0; m_MSEyuvframe[i] = 0; -#if ENABLE_QPA && FRAME_WEIGHTING - m_sumWSSD[i] = 0; -#endif } -#if ENABLE_QPA && FRAME_WEIGHTING - m_sumW = 0; -#endif m_uiNumPic = 0; #if EXTENSION_360_VIDEO m_ext360.clear(); diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index d0d937ad47e37c4e6611d65d094bfbc8605cf2e2..a9795b6dbf2e120a85cfced81944161f270c5930 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -215,7 +215,7 @@ void CABACWriter::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i void CABACWriter::sao( const Slice& slice, unsigned ctuRsAddr ) { const SPS& sps = *slice.getSPS(); - if( !sps.getUseSAO() ) + if( !sps.getSAOEnabledFlag() ) { return; } @@ -396,6 +396,16 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione } } +#if JVET_M0421_SPLIT_SIG + const PartSplit splitMode = CU::getSplitAtDepth( cu, partitioner.currDepth ); + + split_cu_mode( splitMode, cs, partitioner ); + + CHECK( !partitioner.canSplit( splitMode, cs ), "The chosen split mode is invalid!" ); + + if( splitMode != CU_DONT_SPLIT ) + { +#else const PartSplit implicitSplit = partitioner.getImplicitSplit( cs ); // QT @@ -415,6 +425,7 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione // quad-tree split if( qtSplit ) { +#endif if (CS::isDualITree(cs) && pPartitionerChroma != nullptr && (partitioner.currArea().lwidth() >= 64 || partitioner.currArea().lheight() >= 64)) { partitioner.splitCurrArea(CU_QUAD_SPLIT, cs); @@ -459,7 +470,11 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione } else { +#if JVET_M0421_SPLIT_SIG + partitioner.splitCurrArea( splitMode, cs ); +#else partitioner.splitCurrArea( CU_QUAD_SPLIT, cs ); +#endif do { @@ -472,9 +487,12 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione partitioner.exitCurrSplit(); } return; +#if !JVET_M0421_SPLIT_SIG } +#endif } +#if !JVET_M0421_SPLIT_SIG { bool mtSplit = partitioner.canSplit( CU_MT_SPLIT, cs ); @@ -501,6 +519,7 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione } } +#endif // Predict QP on start of quantization group if( pps.getUseDQP() && !cuCtx.isDQPCoded && CU::isQGStart( cu, partitioner ) ) { @@ -515,6 +534,68 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione DTRACE_BLOCK_REC_COND( ( !isEncoding() ), cs.picture->getRecoBuf( cu ), cu, cu.predMode ); } +#if JVET_M0421_SPLIT_SIG +void CABACWriter::split_cu_mode( const PartSplit split, const CodingStructure& cs, Partitioner& partitioner ) +{ + bool canNo, canQt, canBh, canBv, canTh, canTv; + partitioner.canSplit( cs, canNo, canQt, canBh, canBv, canTh, canTv ); + + bool canSpl[6] = { canNo, canQt, canBh, canBv, canTh, canTv }; + + unsigned ctxSplit = 0, ctxQtSplit = 0, ctxBttHV = 0, ctxBttH12 = 0, ctxBttV12; + DeriveCtx::CtxSplit( cs, partitioner, ctxSplit, ctxQtSplit, ctxBttHV, ctxBttH12, ctxBttV12, canSpl ); + + const bool canSplit = canBh || canBv || canTh || canTv || canQt; + const bool isNo = split == CU_DONT_SPLIT; + + if( canNo && canSplit ) + { + m_BinEncoder.encodeBin( !isNo, Ctx::SplitFlag( ctxSplit ) ); + } + + DTRACE( g_trace_ctx, D_SYNTAX, "split_cu_mode() ctx=%d split=%d\n", ctxSplit, !isNo ); + + if( isNo ) + { + return; + } + + const bool canBtt = canBh || canBv || canTh || canTv; + const bool isQt = split == CU_QUAD_SPLIT; + + if( canQt && canBtt ) + { + m_BinEncoder.encodeBin( isQt, Ctx::SplitQtFlag( ctxQtSplit ) ); + } + + DTRACE( g_trace_ctx, D_SYNTAX, "split_cu_mode() ctx=%d qt=%d\n", ctxQtSplit, isQt ); + + if( isQt ) + { + return; + } + + const bool canHor = canBh || canTh; + const bool canVer = canBv || canTv; + const bool isVer = split == CU_VERT_SPLIT || split == CU_TRIV_SPLIT; + + if( canVer && canHor ) + { + m_BinEncoder.encodeBin( isVer, Ctx::SplitHvFlag( ctxBttHV ) ); + } + + const bool can14 = isVer ? canTv : canTh; + const bool can12 = isVer ? canBv : canBh; + const bool is12 = isVer ? ( split == CU_VERT_SPLIT ) : ( split == CU_HORZ_SPLIT ); + + if( can12 && can14 ) + { + m_BinEncoder.encodeBin( is12, Ctx::Split12Flag( isVer ? ctxBttV12 : ctxBttH12 ) ); + } + + DTRACE( g_trace_ctx, D_SYNTAX, "split_cu_mode() ctxHv=%d ctx12=%d mode=%d\n", ctxBttHV, isVer ? ctxBttV12 : ctxBttH12, split ); +} +#else void CABACWriter::split_cu_flag( bool split, const CodingStructure& cs, Partitioner& partitioner ) { unsigned maxQTDepth = ( g_aucLog2[cs.sps->getCTUSize()] - g_aucLog2[cs.sps->getMinQTSize(cs.slice->getSliceType(), partitioner.chType)] ); @@ -535,6 +616,49 @@ void CABACWriter::split_cu_mode_mt(const PartSplit split, const CodingStructure& unsigned width = partitioner.currArea().lumaSize().width; unsigned height = partitioner.currArea().lumaSize().height; +#if REMOVE_BIN_DECISION_TREE + unsigned btSCtxId = width == height ? 0 : ( width > height ? 1 : 2 ); + + const bool canNo = partitioner.canSplit( CU_DONT_SPLIT, cs ); + const bool canBh = partitioner.canSplit( CU_HORZ_SPLIT, cs ); + const bool canBv = partitioner.canSplit( CU_VERT_SPLIT, cs ); + const bool canTh = partitioner.canSplit( CU_TRIH_SPLIT, cs ); + const bool canTv = partitioner.canSplit( CU_TRIV_SPLIT, cs ); + + const bool canSplit = canBh || canBv || canTh || canTv; + const bool isNo = split == CU_DONT_SPLIT; + + if( canNo && canSplit ) + { + m_BinEncoder.encodeBin( !isNo, Ctx::BTSplitFlag( ctxIdBT ) ); + } + + if( isNo ) + { + DTRACE( g_trace_ctx, D_SYNTAX, "split_cu_mode_mt() ctx=%d split=%d\n", ctxIdBT, split ); + + return; + } + + const bool canHor = canBh || canTh; + const bool canVer = canBv || canTv; + const bool isVer = split == CU_VERT_SPLIT || split == CU_TRIV_SPLIT; + + if( canVer && canHor ) + { + m_BinEncoder.encodeBin( isVer, Ctx::BTSplitFlag( 12 + btSCtxId ) ); + } + + const bool can14 = isVer ? canTv : canTh; + const bool can12 = isVer ? canBv : canBh; + const bool is12 = isVer ? ( split == CU_VERT_SPLIT ) : ( split == CU_HORZ_SPLIT ); + + + if( can12 && can14 ) + { + m_BinEncoder.encodeBin( is12, Ctx::BTSplitFlag( 15 ) ); + } +#else DecisionTree dt( g_mtSplitDTT ); dt.setAvail( DTT_SPLIT_BT_HORZ, partitioner.canSplit( CU_HORZ_SPLIT, cs ) ); @@ -553,10 +677,11 @@ void CABACWriter::split_cu_mode_mt(const PartSplit split, const CodingStructure& encode_sparse_dt( dt, split == CU_DONT_SPLIT ? ( unsigned ) DTT_SPLIT_NO_SPLIT : ( unsigned ) split ); +#endif DTRACE(g_trace_ctx, D_SYNTAX, "split_cu_mode_mt() ctx=%d split=%d\n", ctxIdBT, split); } - +#endif //================================================================================ // clause 7.3.8.5 @@ -658,7 +783,11 @@ void CABACWriter::pred_mode( const CodingUnit& cu ) { return; } +#if JVET_M0502_PRED_MODE_CTX + m_BinEncoder.encodeBin( ( CU::isIntra( cu ) ), Ctx::PredMode( DeriveCtx::CtxPredModeFlag( cu ) ) ); +#else m_BinEncoder.encodeBin( ( CU::isIntra( cu ) ), Ctx::PredMode() ); +#endif } void CABACWriter::pcm_data( const CodingUnit& cu, Partitioner& partitioner ) @@ -674,7 +803,7 @@ void CABACWriter::pcm_data( const CodingUnit& cu, Partitioner& partitioner ) void CABACWriter::pcm_flag( const CodingUnit& cu, Partitioner& partitioner ) { const SPS& sps = *cu.cs->sps; - if( !sps.getUsePCM() || partitioner.currArea().lwidth() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lwidth() < (1 << sps.getPCMLog2MinSize()) + if( !sps.getPCMEnabledFlag() || partitioner.currArea().lwidth() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lwidth() < (1 << sps.getPCMLog2MinSize()) || partitioner.currArea().lheight() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lheight() < (1 << sps.getPCMLog2MinSize()) ) { return; @@ -1199,6 +1328,9 @@ void CABACWriter::prediction_unit( const PredictionUnit& pu ) { inter_pred_idc( pu ); affine_flag ( *pu.cu ); +#if JVET_M0444_SMVD + smvd_mode( pu ); +#endif if( pu.interDir != 2 /* PRED_L1 */ ) { ref_idx ( pu, REF_PIC_LIST_0 ); @@ -1219,6 +1351,10 @@ void CABACWriter::prediction_unit( const PredictionUnit& pu ) } if( pu.interDir != 1 /* PRED_L0 */ ) { +#if JVET_M0444_SMVD + if ( pu.cu->smvdMode != 1 ) + { +#endif ref_idx ( pu, REF_PIC_LIST_1 ); if( !pu.cs->slice->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ ) { @@ -1236,11 +1372,33 @@ void CABACWriter::prediction_unit( const PredictionUnit& pu ) mvd_coding( pu.mvd[REF_PIC_LIST_1], pu.cu->imv ); } } +#if JVET_M0444_SMVD + } +#endif mvp_flag ( pu, REF_PIC_LIST_1 ); } } } +#if JVET_M0444_SMVD +void CABACWriter::smvd_mode( const PredictionUnit& pu ) +{ + if ( pu.interDir != 3 || pu.cu->affine ) + { + return; + } + + if ( pu.cs->slice->getBiDirPred() == false ) + { + return; + } + + m_BinEncoder.encodeBin( pu.cu->smvdMode ? 1 : 0, Ctx::SmvdFlag() ); + + DTRACE( g_trace_ctx, D_SYNTAX, "symmvd_flag() symmvd=%d pos=(%d,%d) size=%dx%d\n", pu.cu->smvdMode ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height ); +} +#endif + void CABACWriter::subblock_merge_flag( const CodingUnit& cu ) { if ( cu.firstPU->mergeFlag && (cu.firstPU->mmvdMergeFlag || cu.mmvdSkip) ) @@ -1248,7 +1406,7 @@ void CABACWriter::subblock_merge_flag( const CodingUnit& cu ) return; } - if ( !cu.cs->slice->isIntra() && (cu.cs->sps->getSpsNext().getUseAffine() || cu.cs->sps->getSpsNext().getUseATMVP()) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 ) + if ( !cu.cs->slice->isIntra() && (cu.cs->sps->getSpsNext().getUseAffine() || cu.cs->sps->getSBTMVPEnabledFlag()) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 ) { unsigned ctxId = DeriveCtx::CtxAffineFlag( cu ); m_BinEncoder.encodeBin( cu.affine, Ctx::AffineFlag( ctxId ) ); @@ -1301,7 +1459,7 @@ void CABACWriter::imv_mode( const CodingUnit& cu ) } unsigned ctxId = DeriveCtx::CtxIMVFlag( cu ); - if (!(cu.firstPU->interDir == 1 && cu.cs->slice->getRefPic(REF_PIC_LIST_0, cu.firstPU->refIdx[REF_PIC_LIST_0])->getPOC() == cu.cs->slice->getPOC())) // the first bin of IMV flag does need to be signaled in CPR block + if (!(cu.firstPU->interDir == 1 && cu.cs->slice->getRefPic(REF_PIC_LIST_0, cu.firstPU->refIdx[REF_PIC_LIST_0])->getPOC() == cu.cs->slice->getPOC())) // the first bin of IMV flag does need to be signaled in IBC block m_BinEncoder.encodeBin( ( cu.imv > 0 ), Ctx::ImvFlag( ctxId ) ); DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 0), ctxId ); @@ -1330,7 +1488,7 @@ void CABACWriter::merge_idx( const PredictionUnit& pu ) } else { - bool useExtCtx = pu.cs->sps->getSpsNext().getUseSubPuMvp(); + bool useExtCtx = pu.cs->sps->getSBTMVPEnabledFlag(); m_BinEncoder.encodeBin( 1, Ctx::AffMergeIdx() ); for ( unsigned idx = 1; idx < numCandminus1; idx++ ) { @@ -1478,6 +1636,14 @@ void CABACWriter::inter_pred_idc( const PredictionUnit& pu ) void CABACWriter::ref_idx( const PredictionUnit& pu, RefPicList eRefList ) { +#if JVET_M0444_SMVD + if ( pu.cu->smvdMode ) + { + CHECK( pu.refIdx[eRefList] != pu.cs->slice->getSymRefIdx( eRefList ), "Invalid reference index!\n" ); + return; + } +#endif + int numRef = pu.cs->slice->getNumRefIdx(eRefList); if( numRef <= 1 ) { @@ -1616,6 +1782,18 @@ void CABACWriter::triangle_mode( const CodingUnit& cu ) return; } +#if JVET_M0118_M0185_TRIANGLE_FLAG_FIX + if ( cu.firstPU->mmvdMergeFlag || cu.mmvdSkip ) + { + return; + } + + if ( cu.firstPU->mhIntraFlag ) + { + return; + } +#endif + unsigned flag_idx = DeriveCtx::CtxTriangleFlag( cu ); m_BinEncoder.encodeBin( cu.triangle, Ctx::TriangleFlag(flag_idx) ); @@ -1718,7 +1896,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit chromaCbfs.Cb = TU::getCbfAtDepth( tu, COMPONENT_Cb, trDepth ); chromaCbfs.Cr = TU::getCbfAtDepth( tu, COMPONENT_Cr, trDepth ); } +#if !JVET_M0464_UNI_MTS if( trDepth == 0 ) emt_cu_flag( cu ); +#endif if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) { @@ -1756,7 +1936,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit } } +#if !JVET_M0464_UNI_MTS if( trDepth == 0 && TU::getCbfAtDepth( tu, COMPONENT_Y, 0 ) ) emt_cu_flag( cu ); +#endif transform_unit( tu, cuCtx, chromaCbfs ); } @@ -1961,13 +2143,21 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID ) DTRACE( g_trace_ctx, D_SYNTAX, "residual_coding() etype=%d pos=(%d,%d) size=%dx%d predMode=%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height, cu.predMode ); // code transform skip and explicit rdpcm mode +#if JVET_M0464_UNI_MTS + mts_coding ( tu, compID ); +#else transform_skip_flag( tu, compID ); +#endif explicit_rdpcm_mode( tu, compID ); #if HEVC_USE_SIGN_HIDING // determine sign hiding bool signHiding = ( cu.cs->slice->getSignDataHidingEnabledFlag() && !cu.transQuantBypass && tu.rdpcm[compID] == RDPCM_OFF ); +#if JVET_M0464_UNI_MTS + if( signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.mtsIdx==1 ) +#else if( signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.transformSkip[compID] ) +#endif { const ChannelType chType = toChannelType( compID ); const unsigned intraMode = PU::getFinalIntraMode( *cu.cs->getPU( tu.blocks[compID].pos(), chType ), chType ); @@ -1985,7 +2175,9 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID ) CoeffCodingContext cctx ( tu, compID ); #endif const TCoeff* coeff = tu.getCoeffs( compID ).buf; +#if !JVET_M0464_UNI_MTS unsigned numSig = 0; +#endif // determine and set last coeff position and sig group flags int scanPosLast = -1; @@ -2008,22 +2200,27 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID ) // code subblocks const int stateTab = ( tu.cs->slice->getDepQuantEnabledFlag() ? 32040 : 0 ); int state = 0; +#if !JVET_M0464_UNI_MTS bool useEmt = ( cu.cs->sps->getSpsNext().getUseIntraEMT() && cu.predMode == MODE_INTRA ) || ( cu.cs->sps->getSpsNext().getUseInterEMT() && cu.predMode != MODE_INTRA ); useEmt = useEmt && isLuma(compID); +#endif for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--) { cctx.initSubblock ( subSetId, sigGroupFlags[subSetId] ); residual_coding_subblock( cctx, coeff, stateTab, state ); +#if !JVET_M0464_UNI_MTS if (useEmt) { numSig += cctx.emtNumSigCoeff(); cctx.setEmtNumSigCoeff( 0 ); } +#endif } +#if !JVET_M0464_UNI_MTS if( useEmt && !tu.transformSkip[compID] && compID == COMPONENT_Y && tu.cu->emtFlag ) { if( CU::isIntra( *tu.cu ) ) @@ -2035,9 +2232,56 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID ) emt_tu_index( tu ); } } +#endif } +#if JVET_M0464_UNI_MTS +void CABACWriter::mts_coding( const TransformUnit& tu, ComponentID compID ) +{ + const CodingUnit &cu = *tu.cu; + const bool tsAllowed = TU::isTSAllowed ( tu, compID ); + const bool mtsAllowed = TU::isMTSAllowed( tu, compID ); + + if( !mtsAllowed && !tsAllowed ) return; + + int symbol = 0; + int ctxIdx = 0; + + if( tsAllowed ) + { + symbol = 1 - ( tu.mtsIdx == 1 ? 1 : 0 ); + ctxIdx = 6; + m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) ); + } + + if( tu.mtsIdx != 1 ) + { + if( mtsAllowed ) + { + symbol = tu.mtsIdx != 0 ? 1 : 0; + ctxIdx = std::min( (int)cu.qtDepth, 5 ); + m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) ); + if( symbol ) + { + ctxIdx = 7; + for( int i = 0; i < 3; i++, ctxIdx++ ) + { + symbol = tu.mtsIdx > i + 2 ? 1 : 0; + m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) ); + + if( !symbol ) + { + break; + } + } + } + } + } + + DTRACE( g_trace_ctx, D_SYNTAX, "mts_coding() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), tu.mtsIdx ); +} +#else void CABACWriter::transform_skip_flag( const TransformUnit& tu, ComponentID compID ) { if( !tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag( *tu.cs, tu.blocks[compID] ) || ( isLuma( compID ) && tu.cu->emtFlag ) ) @@ -2099,13 +2343,17 @@ void CABACWriter::emt_cu_flag( const CodingUnit& cu ) DTRACE( g_trace_ctx, D_SYNTAX, "emt_cu_flag() etype=%d pos=(%d,%d) emtCuFlag=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), ( int ) cu.emtFlag ); } } - +#endif void CABACWriter::explicit_rdpcm_mode( const TransformUnit& tu, ComponentID compID ) { const CodingUnit& cu = *tu.cu; +#if JVET_M0464_UNI_MTS + if( !CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && ( tu.mtsIdx==1 || cu.transQuantBypass ) ) +#else if( !CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && ( tu.transformSkip[compID] || cu.transQuantBypass ) ) +#endif { ChannelType chType = toChannelType( compID ); switch( tu.rdpcm[compID] ) @@ -2216,12 +2464,22 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe int numNonZero = 0; unsigned signPattern = 0; bool is2x2subblock = ( cctx.log2CGSize() == 2 ); +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ); +#else int remGt2Bins = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK ); int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins; +#endif int firstPosMode2 = minSubPos - 1; +#if !JVET_M0173_MOVE_GT2_TO_FIRST_PASS int firstPosMode1 = minSubPos - 1; +#endif +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + for( ; nextSigPos >= minSubPos && remRegBins >= 4; nextSigPos-- ) +#else for( ; nextSigPos >= minSubPos && remRegBins >= 3; nextSigPos-- ) +#endif { TCoeff Coeff = coeff[ cctx.blockPos( nextSigPos ) ]; unsigned sigFlag = ( Coeff != 0 ); @@ -2260,19 +2518,29 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe remAbsLevel >>= 1; remRegBins--; +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + unsigned gt2 = !!remAbsLevel; + m_BinEncoder.encodeBin(gt2, cctx.greater2CtxIdAbs(ctxOff)); + DTRACE(g_trace_ctx, D_SYNTAX_RESI, "gt2_flag() bin=%d ctx=%d\n", gt2, cctx.greater2CtxIdAbs(ctxOff)); + remRegBins--; +#else if( remGt2Bins && !--remGt2Bins ) { firstPosMode1 = nextSigPos - 1; } +#endif } } state = ( stateTransTable >> ((state<<2)+((Coeff&1)<<1)) ) & 3; } firstPosMode2 = nextSigPos; +#if !JVET_M0173_MOVE_GT2_TO_FIRST_PASS firstPosMode1 = ( firstPosMode1 > firstPosMode2 ? firstPosMode1 : firstPosMode2 ); +#endif +#if !JVET_M0173_MOVE_GT2_TO_FIRST_PASS //===== 2nd PASS: gt2 ===== for( int scanPos = firstSigPos; scanPos > firstPosMode1; scanPos-- ) { @@ -2285,10 +2553,17 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe DTRACE( g_trace_ctx, D_SYNTAX_RESI, "gt2_flag() bin=%d ctx=%d\n", gt2, cctx.greater2CtxIdAbs(ctxOff) ); } } +#endif +#if JVET_M0173_MOVE_GT2_TO_FIRST_PASS + //===== 2nd PASS: Go-rice codes ===== + unsigned ricePar = 0; + for( int scanPos = firstSigPos; scanPos > firstPosMode2; scanPos-- ) +#else //===== 3rd PASS: Go-rice codes ===== unsigned ricePar = 0; for( int scanPos = firstSigPos; scanPos > firstPosMode1; scanPos-- ) +#endif { unsigned absLevel = abs( coeff[ cctx.blockPos( scanPos ) ] ); if( absLevel >= 4 ) @@ -2302,6 +2577,7 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe } } } +#if !JVET_M0173_MOVE_GT2_TO_FIRST_PASS for( int scanPos = firstPosMode1; scanPos > firstPosMode2; scanPos-- ) { unsigned absLevel = abs( coeff[ cctx.blockPos( scanPos ) ] ); @@ -2316,6 +2592,7 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe } } } +#endif //===== coeff bypass ==== for( int scanPos = firstPosMode2; scanPos >= minSubPos; scanPos-- ) @@ -2352,7 +2629,9 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe #else m_BinEncoder.encodeBinsEP( signPattern, numNonZero ); #endif +#if !JVET_M0464_UNI_MTS cctx.setEmtNumSigCoeff(numNonZero); +#endif } @@ -2468,6 +2747,7 @@ void CABACWriter::exp_golomb_eqprob( unsigned symbol, unsigned count ) m_BinEncoder.encodeBinsEP( bins, numBins ); } +#if !REMOVE_BIN_DECISION_TREE void CABACWriter::encode_sparse_dt( DecisionTree& dt, unsigned toCodeId ) { // propagate the sparsity information from end-nodes to intermediate nodes @@ -2517,6 +2797,7 @@ void CABACWriter::encode_sparse_dt( DecisionTree& dt, unsigned toCodeId ) return; } +#endif void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ChannelType channel, AlfSliceParam* alfParam) { if( isLuma( channel ) ) @@ -2546,7 +2827,7 @@ void CABACWriter::codeAlfCtuEnableFlag( CodingStructure& cs, uint32_t ctuRsAddr, { AlfSliceParam& alfSliceParam = alfParam ? (*alfParam) : cs.slice->getAlfSliceParam(); - if( cs.sps->getUseALF() && alfSliceParam.enabledFlag[compIdx] ) + if( cs.sps->getALFEnabledFlag() && alfSliceParam.enabledFlag[compIdx] ) { const PreCalcValues& pcv = *cs.pcv; int frame_width_in_ctus = pcv.widthInCtus; diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index 6cc2e708e3109415ffed9f7f55a30015cb3b8191..10c97e26f7796ea71d546d39521533b9f4fa2c19 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -82,8 +82,12 @@ public: void sao_offset_pars ( const SAOOffset& ctbPars, ComponentID compID, bool sliceEnabled, int bitDepth ); // coding (quad)tree (clause 7.3.8.4) void coding_tree ( const CodingStructure& cs, Partitioner& pm, CUCtx& cuCtx, Partitioner* pPartitionerChroma = nullptr, CUCtx* pCuCtxChroma = nullptr); +#if JVET_M0421_SPLIT_SIG + void split_cu_mode ( const PartSplit split, const CodingStructure& cs, Partitioner& pm ); +#else void split_cu_flag ( bool split, const CodingStructure& cs, Partitioner& pm ); void split_cu_mode_mt ( const PartSplit split, const CodingStructure& cs, Partitioner& pm ); +#endif // coding unit (clause 7.3.8.5) void coding_unit ( const CodingUnit& cu, Partitioner& pm, CUCtx& cuCtx ); @@ -120,6 +124,9 @@ public: void MHIntra_flag ( const PredictionUnit& pu ); void MHIntra_luma_pred_modes ( const CodingUnit& cu ); void triangle_mode ( const CodingUnit& cu ); +#if JVET_M0444_SMVD + void smvd_mode ( const PredictionUnit& pu ); +#endif // pcm samples (clause 7.3.8.7) void pcm_samples ( const TransformUnit& tu ); @@ -138,9 +145,13 @@ public: // residual coding (clause 7.3.8.11) void residual_coding ( const TransformUnit& tu, ComponentID compID ); +#if JVET_M0464_UNI_MTS + void mts_coding ( const TransformUnit& tu, ComponentID compID ); +#else void transform_skip_flag ( const TransformUnit& tu, ComponentID compID ); void emt_tu_index ( const TransformUnit& tu ); void emt_cu_flag ( const CodingUnit& cu ); +#endif void explicit_rdpcm_mode ( const TransformUnit& tu, ComponentID compID ); void last_sig_coeff ( CoeffCodingContext& cctx ); void residual_coding_subblock ( CoeffCodingContext& cctx, const TCoeff* coeff, const int stateTransTable, int& state ); @@ -156,7 +167,9 @@ private: void unary_max_symbol ( unsigned symbol, unsigned ctxId0, unsigned ctxIdN, unsigned maxSymbol ); void unary_max_eqprob ( unsigned symbol, unsigned maxSymbol ); void exp_golomb_eqprob ( unsigned symbol, unsigned count ); +#if !REMOVE_BIN_DECISION_TREE void encode_sparse_dt ( DecisionTree& dt, unsigned toCodeId ); +#endif // statistic unsigned get_num_written_bits() { return m_BinEncoder.getNumWrittenBits(); } diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index c60a7ad50613f0b9ac6c1ce0e3701f528b26a25d..0fdf461193b496844a0114d1dc5da9f382ac02bf 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -188,7 +188,6 @@ protected: // TODO: We need to have a common sliding mechanism used by both the encoder and decoder int m_maxTempLayer; ///< Max temporal layer - bool m_useAMP; unsigned m_CTUSize; bool m_useSplitConsOverride; unsigned m_uiMinQT[3]; //0: I slice; 1: P/B slice, 2: I slice chroma @@ -202,10 +201,20 @@ protected: unsigned m_log2DiffMaxMinCodingBlockSize; int m_LMChroma; +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + bool m_cclmCollocatedChromaFlag; +#endif +#if JVET_M0464_UNI_MTS + int m_IntraMTS; + int m_InterMTS; + int m_IntraMTSMaxCand; + int m_InterMTSMaxCand; +#else int m_IntraEMT; int m_InterEMT; int m_FastIntraEMT; int m_FastInterEMT; +#endif bool m_LargeCTU; int m_SubPuMvpMode; bool m_Affine; @@ -230,13 +239,13 @@ protected: bool m_MHIntra; bool m_Triangle; - unsigned m_CPRMode; - unsigned m_CPRLocalSearchRangeX; - unsigned m_CPRLocalSearchRangeY; - unsigned m_CPRHashSearch; - unsigned m_CPRHashSearchMaxCand; - unsigned m_CPRHashSearchRange4SmallBlk; - unsigned m_CPRFastMethod; + unsigned m_IBCMode; + unsigned m_IBCLocalSearchRangeX; + unsigned m_IBCLocalSearchRangeY; + unsigned m_IBCHashSearch; + unsigned m_IBCHashSearchMaxCand; + unsigned m_IBCHashSearchRange4SmallBlk; + unsigned m_IBCFastMethod; bool m_wrapAround; unsigned m_wrapAroundOffset; @@ -674,6 +683,10 @@ public: void setUseLMChroma ( int n ) { m_LMChroma = n; } int getUseLMChroma() const { return m_LMChroma; } +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + void setCclmCollocatedChromaFlag ( bool b ) { m_cclmCollocatedChromaFlag = b; } + bool getCclmCollocatedChromaFlag () const { return m_cclmCollocatedChromaFlag; } +#endif void setSubPuMvpMode(int n) { m_SubPuMvpMode = n; } bool getSubPuMvpMode() const { return m_SubPuMvpMode; } @@ -695,6 +708,16 @@ public: bool getUseAltDQPCoding () const { return m_AltDQPCoding; } #endif +#if JVET_M0464_UNI_MTS + void setIntraMTSMaxCand ( unsigned u ) { m_IntraMTSMaxCand = u; } + unsigned getIntraMTSMaxCand () const { return m_IntraMTSMaxCand; } + void setInterMTSMaxCand ( unsigned u ) { m_InterMTSMaxCand = u; } + unsigned getInterMTSMaxCand () const { return m_InterMTSMaxCand; } + void setIntraMTS ( bool b ) { m_IntraMTS = b; } + bool getIntraMTS () const { return m_IntraMTS; } + void setInterMTS ( bool b ) { m_InterMTS = b; } + bool getInterMTS () const { return m_InterMTS; } +#else void setFastIntraEMT ( bool b ) { m_FastIntraEMT = b; } bool getFastIntraEMT () const { return m_FastIntraEMT; } void setFastInterEMT ( bool b ) { m_FastInterEMT = b; } @@ -703,6 +726,7 @@ public: bool getIntraEMT () const { return m_IntraEMT; } void setInterEMT ( bool b ) { m_InterEMT = b; } bool getInterEMT () const { return m_InterEMT; } +#endif @@ -732,20 +756,20 @@ public: bool getUseTriangle () const { return m_Triangle; } - void setCPRMode (unsigned n) { m_CPRMode = n; } - unsigned getCPRMode () const { return m_CPRMode; } - void setCPRLocalSearchRangeX (unsigned n) { m_CPRLocalSearchRangeX = n; } - unsigned getCPRLocalSearchRangeX () const { return m_CPRLocalSearchRangeX; } - void setCPRLocalSearchRangeY (unsigned n) { m_CPRLocalSearchRangeY = n; } - unsigned getCPRLocalSearchRangeY () const { return m_CPRLocalSearchRangeY; } - void setCPRHashSearch (unsigned n) { m_CPRHashSearch = n; } - unsigned getCPRHashSearch () const { return m_CPRHashSearch; } - void setCPRHashSearchMaxCand (unsigned n) { m_CPRHashSearchMaxCand = n; } - unsigned getCPRHashSearchMaxCand () const { return m_CPRHashSearchMaxCand; } - void setCPRHashSearchRange4SmallBlk (unsigned n) { m_CPRHashSearchRange4SmallBlk = n; } - unsigned getCPRHashSearchRange4SmallBlk () const { return m_CPRHashSearchRange4SmallBlk; } - void setCPRFastMethod (unsigned n) { m_CPRFastMethod = n; } - unsigned getCPRFastMethod () const { return m_CPRFastMethod; } + void setIBCMode (unsigned n) { m_IBCMode = n; } + unsigned getIBCMode () const { return m_IBCMode; } + void setIBCLocalSearchRangeX (unsigned n) { m_IBCLocalSearchRangeX = n; } + unsigned getIBCLocalSearchRangeX () const { return m_IBCLocalSearchRangeX; } + void setIBCLocalSearchRangeY (unsigned n) { m_IBCLocalSearchRangeY = n; } + unsigned getIBCLocalSearchRangeY () const { return m_IBCLocalSearchRangeY; } + void setIBCHashSearch (unsigned n) { m_IBCHashSearch = n; } + unsigned getIBCHashSearch () const { return m_IBCHashSearch; } + void setIBCHashSearchMaxCand (unsigned n) { m_IBCHashSearchMaxCand = n; } + unsigned getIBCHashSearchMaxCand () const { return m_IBCHashSearchMaxCand; } + void setIBCHashSearchRange4SmallBlk (unsigned n) { m_IBCHashSearchRange4SmallBlk = n; } + unsigned getIBCHashSearchRange4SmallBlk () const { return m_IBCHashSearchRange4SmallBlk; } + void setIBCFastMethod (unsigned n) { m_IBCFastMethod = n; } + unsigned getIBCFastMethod () const { return m_IBCFastMethod; } void setUseWrapAround ( bool b ) { m_wrapAround = b; } bool getUseWrapAround () const { return m_wrapAround; } @@ -783,8 +807,6 @@ public: void setQuadtreeTUMaxDepthInter ( uint32_t u ) { m_uiQuadtreeTUMaxDepthInter = u; } void setQuadtreeTUMaxDepthIntra ( uint32_t u ) { m_uiQuadtreeTUMaxDepthIntra = u; } - void setUseAMP( bool b ) { m_useAMP = b; } - //====== Loop/Deblock Filter ======== void setLoopFilterDisable ( bool b ) { m_bLoopFilterDisable = b; } void setLoopFilterOffsetInPPS ( bool b ) { m_loopFilterOffsetInPPS = b; } @@ -878,13 +900,8 @@ public: #if X0038_LAMBDA_FROM_QP_CAPABILITY int getIntraQPOffset () const { return m_intraQPOffset; } int getLambdaFromQPEnable () const { return m_lambdaFromQPEnable; } -#if ENABLE_QPA | JVET_L0646_GBI public: -#else -protected: -#endif int getBaseQP () const { return m_iQP; } // public should use getQPForPicture. -public: int getQPForPicture (const uint32_t gopIndex, const Slice *pSlice) const; // Function actually defined in EncLib.cpp #else int getBaseQP () { return m_iQP; } diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index f74add71740e260dd27ea94d3353de02ddd5e820..2b41fcb861b2322aa96914164e4b886e52868414 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -268,6 +268,18 @@ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) ) m_pcEncLib = pcEncLib; m_dataId = tId; #endif +#if JVET_M0170_MRG_SHARELIST + m_shareState = NO_SHARE; + m_pcInterSearch->setShareState(0); + setShareStateDec(0); +#endif + +#if JVET_M0170_MRG_SHARELIST + m_shareBndPosX = -1; + m_shareBndPosY = -1; + m_shareBndSizeW = 0; + m_shareBndSizeH = 0; +#endif #if REUSE_CU_RESULTS DecCu::init( m_pcTrQuant, m_pcIntraSearch, m_pcInterSearch ); @@ -280,9 +292,9 @@ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) ) ::memset(m_subMergeBlkNum, 0, sizeof(m_subMergeBlkNum)); m_prevPOC = MAX_UINT; - if (m_pcEncCfg->getCPRHashSearch() && m_pcEncCfg->getCPRMode()) + if (m_pcEncCfg->getIBCHashSearch() && m_pcEncCfg->getIBCMode()) { - m_cprHashMap.init(m_pcEncCfg->getSourceWidth(), m_pcEncCfg->getSourceHeight()); + m_ibcHashMap.init(m_pcEncCfg->getSourceWidth(), m_pcEncCfg->getSourceHeight()); } } @@ -292,9 +304,9 @@ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) ) void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsigned ctuRsAddr, const int prevQP[], const int currQP[] ) { - if (m_pcEncCfg->getCPRHashSearch() && ctuRsAddr == 0 && cs.slice->getSPS()->getSpsNext().getCPRMode()) + if (m_pcEncCfg->getIBCHashSearch() && ctuRsAddr == 0 && cs.slice->getSPS()->getSpsNext().getIBCMode()) { - m_cprHashMap.rebuildPicHashMap(cs.picture->getOrigBuf()); + m_ibcHashMap.rebuildPicHashMap(cs.picture->getOrigBuf()); } m_modeCtrl->initCTUEncoding( *cs.slice ); @@ -317,24 +329,24 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign // init the partitioning manager Partitioner *partitioner = PartitionerFactory::get( *cs.slice ); partitioner->initCtu( area, CH_L, *cs.slice ); - if (m_pcEncCfg->getCPRMode()) + if (m_pcEncCfg->getIBCMode()) { m_pcInterSearch->resetCtuRecord(); - m_ctuCprSearchRangeX = m_pcEncCfg->getCPRLocalSearchRangeX(); - m_ctuCprSearchRangeY = m_pcEncCfg->getCPRLocalSearchRangeY(); + m_ctuIbcSearchRangeX = m_pcEncCfg->getIBCLocalSearchRangeX(); + m_ctuIbcSearchRangeY = m_pcEncCfg->getIBCLocalSearchRangeY(); } - if (m_pcEncCfg->getCPRMode() && m_pcEncCfg->getCPRHashSearch() && (m_pcEncCfg->getCPRFastMethod() & CPR_FAST_METHOD_ADAPTIVE_SEARCHRANGE)) + if (m_pcEncCfg->getIBCMode() && m_pcEncCfg->getIBCHashSearch() && (m_pcEncCfg->getIBCFastMethod() & IBC_FAST_METHOD_ADAPTIVE_SEARCHRANGE)) { - const int hashHitRatio = m_cprHashMap.getHashHitRatio(area.Y()); // in percent + const int hashHitRatio = m_ibcHashMap.getHashHitRatio(area.Y()); // in percent if (hashHitRatio < 5) // 5% { - m_ctuCprSearchRangeX >>= 1; - m_ctuCprSearchRangeY >>= 1; + m_ctuIbcSearchRangeX >>= 1; + m_ctuIbcSearchRangeY >>= 1; } if (cs.slice->getNumRefIdx(REF_PIC_LIST_0) > 1) { - m_ctuCprSearchRangeX >>= 1; - m_ctuCprSearchRangeY >>= 1; + m_ctuIbcSearchRangeX >>= 1; + m_ctuIbcSearchRangeY >>= 1; } } // init current context pointer @@ -573,6 +585,17 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par , LutMotionCand *&bestMotCandLUTs ) { +#if JVET_M0170_MRG_SHARELIST + if (m_shareState == NO_SHARE) + { + tempCS->sharedBndPos = tempCS->area.Y().lumaPos(); + tempCS->sharedBndSize.width = tempCS->area.lwidth(); + tempCS->sharedBndSize.height = tempCS->area.lheight(); + bestCS->sharedBndPos = bestCS->area.Y().lumaPos(); + bestCS->sharedBndSize.width = bestCS->area.lwidth(); + bestCS->sharedBndSize.height = bestCS->area.lheight(); + } +#endif #if ENABLE_SPLIT_PARALLELISM CHECK( m_dataId != tempCS->picture->scheduler.getDataId(), "Working in the wrong dataId!" ); @@ -635,6 +658,11 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuh", tempCS->area.lheight() ) ); DTRACE( g_trace_ctx, D_COMMON, "@(%4d,%4d) [%2dx%2d]\n", tempCS->area.lx(), tempCS->area.ly(), tempCS->area.lwidth(), tempCS->area.lheight() ); + +#if JVET_M0170_MRG_SHARELIST + int startShareThisLevel = 0; +#endif + do { EncTestMode currTestMode = m_modeCtrl->currTestMode(); @@ -716,13 +744,13 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par { xCheckIntraPCM( tempCS, bestCS, partitioner, currTestMode ); } - else if (currTestMode.type == ETM_CPR) + else if (currTestMode.type == ETM_IBC) { - xCheckRDCostCPRMode(tempCS, bestCS, partitioner, currTestMode); + xCheckRDCostIBCMode(tempCS, bestCS, partitioner, currTestMode); } - else if (currTestMode.type == ETM_CPR_MERGE) + else if (currTestMode.type == ETM_IBC_MERGE) { - xCheckRDCostCPRModeMerge2Nx2N(tempCS, bestCS, partitioner, currTestMode); + xCheckRDCostIBCModeMerge2Nx2N(tempCS, bestCS, partitioner, currTestMode); } else if( isModeSplit( currTestMode ) ) { @@ -739,6 +767,15 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par } } while( m_modeCtrl->nextMode( *tempCS, partitioner ) ); +#if JVET_M0170_MRG_SHARELIST + if(startShareThisLevel == 1) + { + m_shareState = NO_SHARE; + m_pcInterSearch->setShareState(m_shareState); + setShareStateDec(m_shareState); + } +#endif + ////////////////////////////////////////////////////////////////////////// // Finishing CU #if ENABLE_SPLIT_PARALLELISM @@ -1043,10 +1080,19 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, m_CABACEstimator->getCtx() = m_CurrCtx->start; const TempCtx ctxStartSP( m_CtxCache, SubCtx( Ctx::SplitFlag, m_CABACEstimator->getCtx() ) ); +#if JVET_M0421_SPLIT_SIG + const TempCtx ctxStartQt( m_CtxCache, SubCtx( Ctx::SplitQtFlag, m_CABACEstimator->getCtx() ) ); + const TempCtx ctxStartHv( m_CtxCache, SubCtx( Ctx::SplitHvFlag, m_CABACEstimator->getCtx() ) ); + const TempCtx ctxStart12( m_CtxCache, SubCtx( Ctx::Split12Flag, m_CABACEstimator->getCtx() ) ); +#else const TempCtx ctxStartBT( m_CtxCache, SubCtx( Ctx::BTSplitFlag, m_CABACEstimator->getCtx() ) ); +#endif m_CABACEstimator->resetBits(); +#if JVET_M0421_SPLIT_SIG + m_CABACEstimator->split_cu_mode( split, *tempCS, partitioner ); +#else if( partitioner.getImplicitSplit( *tempCS ) != CU_QUAD_SPLIT ) { if( partitioner.canSplit( CU_QUAD_SPLIT, *tempCS ) ) @@ -1058,12 +1104,19 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, m_CABACEstimator->split_cu_mode_mt( split, *tempCS, partitioner ); } } +#endif const double factor = ( tempCS->currQP[partitioner.chType] > 30 ? 1.1 : 1.075 ); const double cost = m_pcRdCost->calcRdCost( uint64_t( m_CABACEstimator->getEstFracBits() + ( ( bestCS->fracBits ) / factor ) ), Distortion( bestCS->dist / factor ) ); m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitFlag, ctxStartSP ); +#if JVET_M0421_SPLIT_SIG + m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitQtFlag, ctxStartQt ); + m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitHvFlag, ctxStartHv ); + m_CABACEstimator->getCtx() = SubCtx( Ctx::Split12Flag, ctxStart12 ); +#else m_CABACEstimator->getCtx() = SubCtx( Ctx::BTSplitFlag, ctxStartBT ); +#endif if( cost > bestCS->cost #if ENABLE_QPA_SUB_CTU @@ -1076,6 +1129,55 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, return; } +#if JVET_M0170_MRG_SHARELIST + if (!slice.isIntra() + && tempCS->chType == CHANNEL_TYPE_LUMA + ) + { + tempCS->slice->copyMotionLUTs(tempMotCandLUTs, tempCS->slice->getMotionLUTs()); + } + + int startShareThisLevel = 0; + const uint32_t uiLPelX = tempCS->area.Y().lumaPos().x; + const uint32_t uiTPelY = tempCS->area.Y().lumaPos().y; + + int splitRatio = 1; + CHECK(!(split == CU_QUAD_SPLIT || split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT + || split == CU_TRIH_SPLIT || split == CU_TRIV_SPLIT), "invalid split type"); + splitRatio = (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) ? 1 : 2; + + bool isOneChildSmall = ((tempCS->area.lwidth())*(tempCS->area.lheight()) >> splitRatio) < MRG_SHARELIST_SHARSIZE; + + if ((((tempCS->area.lwidth())*(tempCS->area.lheight())) > (MRG_SHARELIST_SHARSIZE * 1))) + { + m_shareState = NO_SHARE; + } + + if (m_shareState == NO_SHARE)//init state + { + if (isOneChildSmall) + { + m_shareState = GEN_ON_SHARED_BOUND;//share start state + startShareThisLevel = 1; + } + } + if ((m_shareState == GEN_ON_SHARED_BOUND) && (!slice.isIntra())) + { +#if JVET_M0170_MRG_SHARELIST + tempCS->slice->copyMotionLUTs(tempCS->slice->getMotionLUTs(), tempCS->slice->m_MotionCandLuTsBkup); + m_shareBndPosX = uiLPelX; + m_shareBndPosY = uiTPelY; + m_shareBndSizeW = tempCS->area.lwidth(); + m_shareBndSizeH = tempCS->area.lheight(); + m_shareState = SHARING; +#endif + } + + + m_pcInterSearch->setShareState(m_shareState); + setShareStateDec(m_shareState); +#endif + partitioner.splitCurrArea( split, *tempCS ); m_CurrCtx++; @@ -1106,7 +1208,16 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, tempCS->slice->copyMotionLUTs(tempMotCandLUTs, tempSubMotCandLUTs); tempCS->slice->copyMotionLUTs(tempMotCandLUTs, bestSubMotCandLUTs); } - +#if JVET_M0170_MRG_SHARELIST + tempSubCS->sharedBndPos.x = (m_shareState == SHARING) ? m_shareBndPosX : tempSubCS->area.Y().lumaPos().x; + tempSubCS->sharedBndPos.y = (m_shareState == SHARING) ? m_shareBndPosY : tempSubCS->area.Y().lumaPos().y; + tempSubCS->sharedBndSize.width = (m_shareState == SHARING) ? m_shareBndSizeW : tempSubCS->area.lwidth(); + tempSubCS->sharedBndSize.height = (m_shareState == SHARING) ? m_shareBndSizeH : tempSubCS->area.lheight(); + bestSubCS->sharedBndPos.x = (m_shareState == SHARING) ? m_shareBndPosX : tempSubCS->area.Y().lumaPos().x; + bestSubCS->sharedBndPos.y = (m_shareState == SHARING) ? m_shareBndPosY : tempSubCS->area.Y().lumaPos().y; + bestSubCS->sharedBndSize.width = (m_shareState == SHARING) ? m_shareBndSizeW : tempSubCS->area.lwidth(); + bestSubCS->sharedBndSize.height = (m_shareState == SHARING) ? m_shareBndSizeH : tempSubCS->area.lheight(); +#endif xCompressCU( tempSubCS, bestSubCS, partitioner , tempSubMotCandLUTs , bestSubMotCandLUTs @@ -1146,6 +1257,15 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, partitioner.exitCurrSplit(); +#if JVET_M0170_MRG_SHARELIST + if (startShareThisLevel == 1) + { + m_shareState = NO_SHARE; + m_pcInterSearch->setShareState(m_shareState); + setShareStateDec(m_shareState); + } +#endif + m_CurrCtx--; // Finally, generate split-signaling bits for RD-cost check @@ -1178,6 +1298,9 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, { m_CABACEstimator->resetBits(); +#if JVET_M0421_SPLIT_SIG + m_CABACEstimator->split_cu_mode( split, *tempCS, partitioner ); +#else if( partitioner.canSplit( CU_QUAD_SPLIT, *tempCS ) ) { m_CABACEstimator->split_cu_flag( split == CU_QUAD_SPLIT, *tempCS, partitioner ); @@ -1186,6 +1309,7 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, { m_CABACEstimator->split_cu_mode_mt( split, *tempCS, partitioner ); } +#endif tempCS->fracBits += m_CABACEstimator->getEstFracBits(); // split bits } @@ -1261,19 +1385,25 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { +#if !JVET_M0464_UNI_MTS double bestInterCost = m_modeCtrl->getBestInterCost(); double costSize2Nx2NemtFirstPass = m_modeCtrl->getEmtSize2Nx2NFirstPassCost(); bool skipSecondEmtPass = m_modeCtrl->getSkipSecondEMTPass(); const SPS &sps = *tempCS->sps; - const PPS &pps = *tempCS->pps; +#endif + const PPS &pps = *tempCS->pps; +#if !JVET_M0464_UNI_MTS const CodingUnit *bestCU = bestCS->getCU( partitioner.chType ); const int maxSizeEMT = EMT_INTRA_MAX_CU_WITH_QTBT; uint8_t considerEmtSecondPass = ( sps.getSpsNext().getUseIntraEMT() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeEMT && partitioner.currArea().lheight() <= maxSizeEMT ) ? 1 : 0; +#endif Distortion interHad = m_modeCtrl->getInterHad(); - +#if JVET_M0464_UNI_MTS + { +#else for( uint8_t emtCuFlag = 0; emtCuFlag <= considerEmtSecondPass; emtCuFlag++ ) { //Possible early EMT tests interruptions @@ -1287,6 +1417,7 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC { continue; } +#endif tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); @@ -1304,7 +1435,9 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; //cu.ipcm = false; +#if !JVET_M0464_UNI_MTS cu.emtFlag = emtCuFlag; +#endif CU::addPUs( cu ); @@ -1320,7 +1453,11 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC interHad = 0; // JEM assumes only perfect reconstructions can from now on beat the inter mode m_modeCtrl->enforceInterHad( 0 ); +#if JVET_M0464_UNI_MTS + return; +#else continue; +#endif } if( !CS::isDualITree( *tempCS ) ) @@ -1373,9 +1510,10 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC xCheckDQP( *tempCS, partitioner ); - +#if !JVET_M0464_UNI_MTS // we save the cost of the modes for the first EMT pass if( !emtCuFlag ) static_cast< double& >( costSize2Nx2NemtFirstPass ) = tempCS->cost; +#endif #if WCG_EXT DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); @@ -1384,7 +1522,7 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC #endif xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); - +#if !JVET_M0464_UNI_MTS //now we check whether the second pass of SIZE_2Nx2N and the whole Intra SIZE_NxN should be skipped or not if( !emtCuFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA && m_pcEncCfg->getFastInterEMT() ) { @@ -1396,7 +1534,7 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC break; } } - +#endif } //for emtCuFlag } @@ -1567,7 +1705,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& MergeCtx mergeCtx; const SPS &sps = *tempCS->sps; - if( sps.getSpsNext().getUseSubPuMvp() ) + if( sps.getSBTMVPEnabledFlag() ) { Size bufSize = g_miScaling.scale( tempCS->area.lumaSize() ); mergeCtx.subPuMvpMiBuf = MotionBuf( m_SubPuMiBuf, bufSize ); @@ -1588,10 +1726,16 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& PredictionUnit pu( tempCS->area ); pu.cu = &cu; pu.cs = tempCS; +#if JVET_M0170_MRG_SHARELIST + pu.shareParentPos = tempCS->sharedBndPos; + pu.shareParentSize = tempCS->sharedBndSize; +#endif PU::getInterMergeCandidates(pu, mergeCtx , 0 ); +#if !JVET_M0068_M0171_MMVD_CLEANUP PU::restrictBiPredMergeCands(pu, mergeCtx); +#endif PU::getInterMMVDMergeCandidates(pu, mergeCtx); } bool candHasNoResidual[MRG_MAX_NUM_CANDS + MMVD_ADD_NUM]; @@ -1654,7 +1798,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& if( auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >( m_modeCtrl ) ) { - if (slice.getSPS()->getSpsNext().getCPRMode()) + if (slice.getSPS()->getSpsNext().getIBCMode()) { ComprCUCtx cuECtx = m_modeCtrl->getComprCUCtx(); bestIsSkip = blkCache->isSkip(tempCS->area) && cuECtx.bestCU; @@ -1704,13 +1848,13 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& m_pcRdCost->setDistParam (distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth (CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard); const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) ); - uint32_t cprCand = 0; + uint32_t ibcCand = 0; uint32_t numValidMv = mergeCtx.numValidMergeCand; for( uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++ ) { if ((mergeCtx.interDirNeighbours[uiMergeCand] == 1 || mergeCtx.interDirNeighbours[uiMergeCand] == 3) && tempCS->slice->getRefPic(REF_PIC_LIST_0, mergeCtx.mvFieldNeighbours[uiMergeCand << 1].refIdx)->getPOC() == tempCS->slice->getPOC()) { - cprCand++; + ibcCand++; numValidMv--; continue; } @@ -1752,7 +1896,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]); } } - CHECK(std::min(uiMergeCand + 1 - cprCand, uiNumMrgSATDCand) != RdModeList.size(), ""); + CHECK(std::min(uiMergeCand + 1 - ibcCand, uiNumMrgSATDCand) != RdModeList.size(), ""); } if (numValidMv < uiNumMrgSATDCand) uiNumMrgSATDCand = numValidMv; @@ -2073,10 +2217,14 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& isTestSkipMerge[uiMergeCand] = true; } +#if JVET_M0464_UNI_MTS + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL ); +#else xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass , NULL , 1 , uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL); +#endif if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip && !pu.mhIntraFlag) { @@ -2333,7 +2481,11 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru m_pcInterSearch->weightedTriangleBlk( pu, PU::getTriangleWeights(pu, triangleMrgCtx, candIdx0, candIdx1), splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, triangleBuffer[candIdx0], triangleBuffer[candIdx1] ); } +#if JVET_M0464_UNI_MTS + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, NULL, ( noResidualPass == 0 ? &trianglecandHasNoResidual[mergeCand] : NULL ) ); +#else xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, NULL, true, ( (noResidualPass == 0 ) ? &trianglecandHasNoResidual[mergeCand] : NULL ) ); +#endif if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip) { @@ -2367,7 +2519,7 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct const SPS &sps = *tempCS->sps; MergeCtx mrgCtx; - if ( sps.getSpsNext().getUseSubPuMvp() ) + if ( sps.getSBTMVPEnabledFlag() ) { Size bufSize = g_miScaling.scale( tempCS->area.lumaSize() ); mrgCtx.subPuMvpMiBuf = MotionBuf( m_SubPuMiBuf, bufSize ); @@ -2580,7 +2732,11 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct m_pcInterSearch->motionCompensation( pu ); } +#if JVET_M0464_UNI_MTS + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, ( uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL ) ); +#else xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, true, ((uiNoResidualPass == 0) ? &candHasNoResidual[uiMergeCand] : NULL) ); +#endif if ( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip ) { @@ -2622,12 +2778,12 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct } } ////////////////////////////////////////////////////////////////////////////////////////////// -// cpr merge/skip mode check -void EncCu::xCheckRDCostCPRModeMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode) +// ibc merge/skip mode check +void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode) { - assert(tempCS->chType != CHANNEL_TYPE_CHROMA); // chroma CPR is derived + assert(tempCS->chType != CHANNEL_TYPE_CHROMA); // chroma IBC is derived - if (tempCS->area.lwidth() > CPR_MAX_CAND_SIZE || tempCS->area.lheight() > CPR_MAX_CAND_SIZE) // currently only check 32x32 and below block for cpr merge/skip + if (tempCS->area.lwidth() > IBC_MAX_CAND_SIZE || tempCS->area.lheight() > IBC_MAX_CAND_SIZE) // currently only check 32x32 and below block for ibc merge/skip { return; } @@ -2637,7 +2793,7 @@ void EncCu::xCheckRDCostCPRModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct MergeCtx mergeCtx; - if (sps.getSpsNext().getUseSubPuMvp()) + if (sps.getSBTMVPEnabledFlag()) { Size bufSize = g_miScaling.scale(tempCS->area.lumaSize()); mergeCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize); @@ -2648,7 +2804,7 @@ void EncCu::xCheckRDCostCPRModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct CodingUnit cu(tempCS->area); cu.cs = tempCS; cu.predMode = MODE_INTER; - cu.cpr = true; + cu.ibc = true; cu.slice = tempCS->slice; #if HEVC_TILES_WPP cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos()); @@ -2659,6 +2815,10 @@ void EncCu::xCheckRDCostCPRModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct cu.mmvdSkip = false; pu.mmvdMergeFlag = false; cu.triangle = false; +#if JVET_M0170_MRG_SHARELIST + pu.shareParentPos = tempCS->sharedBndPos; + pu.shareParentSize = tempCS->sharedBndSize; +#endif PU::getInterMergeCandidates(pu, mergeCtx , 0 ); @@ -2693,7 +2853,7 @@ void EncCu::xCheckRDCostCPRModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct #endif cu.skip = false; cu.predMode = MODE_INTER; - cu.cpr = true; + cu.ibc = true; cu.transQuantBypass = encTestMode.lossless; cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; @@ -2803,6 +2963,9 @@ void EncCu::xCheckRDCostCPRModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct { if (!(bestIsSkip && (numResidualPass == 0))) { +#if JVET_M0464_UNI_MTS + { +#else unsigned char considerEmtSecondPass = 0; bool skipSecondEmtPass = true; bool hasResidual[2] = { false, false }; @@ -2815,6 +2978,7 @@ void EncCu::xCheckRDCostCPRModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct { continue; } +#endif // first get merge candidates CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, (const ChannelType)partitioner.chType), (const ChannelType)partitioner.chType); @@ -2826,23 +2990,24 @@ void EncCu::xCheckRDCostCPRModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct #endif cu.skip = false; cu.predMode = MODE_INTER; - cu.cpr = true; + cu.ibc = true; cu.transQuantBypass = encTestMode.lossless; cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; - +#if !JVET_M0464_UNI_MTS cu.emtFlag = false; +#endif PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType);// tempCS->addPU(cu); - pu.intraDir[0] = DC_IDX; // set intra pred for cpr block - pu.intraDir[1] = PLANAR_IDX; // set intra pred for cpr block + pu.intraDir[0] = DC_IDX; // set intra pred for ibc block + pu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block cu.mmvdSkip = false; pu.mmvdMergeFlag = false; cu.triangle = false; mergeCtx.setMergeInfo(pu, mergeCand); PU::spanMotionInfo(pu, mergeCtx); - assert(mergeCtx.mrgTypeNeighbours[mergeCand] == MRG_TYPE_CPR); // should be CPR candidate at this round + assert(mergeCtx.mrgTypeNeighbours[mergeCand] == MRG_TYPE_IBC); // should be IBC candidate at this round const bool chroma = !(CS::isDualITree(*tempCS)); // MC @@ -2862,20 +3027,24 @@ void EncCu::xCheckRDCostCPRModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct } #endif +#if !JVET_M0464_UNI_MTS hasResidual[emtCuFlag] = cu.rootCbf; emtCost[emtCuFlag] = tempCS->cost; +#endif DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda()); xCheckBestMode(tempCS, bestCS, partitioner, encTestMode); tempCS->initStructData(encTestMode.qp, encTestMode.lossless); } +#if !JVET_M0464_UNI_MTS if (numResidualPass == 0 && (emtCost[0] <= emtCost[1] ? !hasResidual[0] : !hasResidual[1])) { // If no residual when allowing for one, then set mark to not try case where residual is forced to 0 candHasNoResidual[mergeCand] = 1; } +#endif if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip) { @@ -2891,7 +3060,7 @@ void EncCu::xCheckRDCostCPRModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct } -void EncCu::xCheckRDCostCPRMode(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode) +void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode) { tempCS->initStructData(encTestMode.qp, encTestMode.lossless); @@ -2907,7 +3076,7 @@ void EncCu::xCheckRDCostCPRMode(CodingStructure *&tempCS, CodingStructure *&best cu.transQuantBypass = encTestMode.lossless; cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; - cu.cpr = true; + cu.ibc = true; cu.imv = 0; CU::addPUs(cu); @@ -2916,16 +3085,16 @@ void EncCu::xCheckRDCostCPRMode(CodingStructure *&tempCS, CodingStructure *&best cu.mmvdSkip = false; pu.mmvdMergeFlag = false; - pu.intraDir[0] = DC_IDX; // set intra pred for cpr block - pu.intraDir[1] = PLANAR_IDX; // set intra pred for cpr block + pu.intraDir[0] = DC_IDX; // set intra pred for ibc block + pu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block - pu.interDir = 1; // use list 0 for CPR mode + pu.interDir = 1; // use list 0 for IBC mode pu.refIdx[REF_PIC_LIST_0] = pu.cs->slice->getNumRefIdx(REF_PIC_LIST_0) - 1; // last idx in the list if (partitioner.chType == CHANNEL_TYPE_LUMA) { - bool bValid = m_pcInterSearch->predCPRSearch(cu, partitioner, m_ctuCprSearchRangeX, m_ctuCprSearchRangeY, m_cprHashMap); + bool bValid = m_pcInterSearch->predIBCSearch(cu, partitioner, m_ctuIbcSearchRangeX, m_ctuIbcSearchRangeY, m_ibcHashMap); if (bValid) { @@ -2934,6 +3103,9 @@ void EncCu::xCheckRDCostCPRMode(CodingStructure *&tempCS, CodingStructure *&best // MC m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_0, true, chroma); +#if JVET_M0464_UNI_MTS + { +#else double bestCost = bestCS->cost; unsigned char considerEmtSecondPass = 0; bool skipSecondEmtPass = true; @@ -2949,13 +3121,16 @@ void EncCu::xCheckRDCostCPRMode(CodingStructure *&tempCS, CodingStructure *&best } tempCS->getCU(tempCS->chType)->emtFlag = emtCuFlag; +#endif m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, false, true, chroma); +#if !JVET_M0464_UNI_MTS if (m_pcEncCfg->getFastInterEMT()) { emtFirstPassCost = (!emtCuFlag) ? tempCS->cost : emtFirstPassCost; } +#endif xEncodeDontSplit(*tempCS, partitioner); #if ENABLE_QPA_SUB_CTU @@ -2971,6 +3146,7 @@ void EncCu::xCheckRDCostCPRMode(CodingStructure *&tempCS, CodingStructure *&best DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda()); xCheckBestMode(tempCS, bestCS, partitioner, encTestMode); +#if !JVET_M0464_UNI_MTS //now we check whether the second pass should be skipped or not if (!emtCuFlag && considerEmtSecondPass) { @@ -2999,6 +3175,7 @@ void EncCu::xCheckRDCostCPRMode(CodingStructure *&tempCS, CodingStructure *&best tempCS->cost = MAX_DOUBLE; } } +#endif } } // bValid @@ -3009,13 +3186,13 @@ void EncCu::xCheckRDCostCPRMode(CodingStructure *&tempCS, CodingStructure *&best tempCS->cost = MAX_DOUBLE; } } - // chroma CU cpr comp + // chroma CU ibc comp else { bool success = true; // chroma tree, reuse luma bv at minimal block level // enabled search only when each chroma sub-block has a BV from its luma sub-block - assert(tempCS->getCprLumaCoverage(pu.Cb()) == CPR_LUMA_COVERAGE_FULL); + assert(tempCS->getIbcLumaCoverage(pu.Cb()) == IBC_LUMA_COVERAGE_FULL); // check if each BV for the chroma sub-block is valid //static const UInt unitArea = MIN_PU_SIZE * MIN_PU_SIZE; const CompArea lumaArea = CompArea(COMPONENT_Y, pu.chromaFormat, pu.Cb().lumaPos(), recalcSize(pu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, pu.Cb().size())); @@ -3050,7 +3227,7 @@ void EncCu::xCheckRDCostCPRMode(CodingStructure *&tempCS, CodingStructure *&best if (success) { - //pu.mergeType = MRG_TYPE_CPR; + //pu.mergeType = MRG_TYPE_IBC; m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_0, false, true); // luma=0, chroma=1 m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, false, false, true); @@ -3070,7 +3247,7 @@ void EncCu::xCheckRDCostCPRMode(CodingStructure *&tempCS, CodingStructure *&best } } } - // check cpr mode in encoder RD + // check ibc mode in encoder RD ////////////////////////////////////////////////////////////////////////////////////////////// void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) @@ -3164,11 +3341,18 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC } } +#if JVET_M0464_UNI_MTS + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0 + , m_pImvTempCS ? m_pImvTempCS[wIdx] : NULL + , 0 + , &equGBiCost +#else xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0 , m_pImvTempCS ? m_pImvTempCS[wIdx] : NULL , 1 , 0 , &equGBiCost +#endif ); if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT ) @@ -3291,7 +3475,9 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be } cu.imv = iIMV > 1 ? 2 : 1; +#if !JVET_M0464_UNI_MTS cu.emtFlag = false; +#endif bool testGbi; uint8_t gbiIdx; @@ -3358,11 +3544,18 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be return false; } +#if JVET_M0464_UNI_MTS + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0 + , NULL + , 0 + , &equGBiCost +#else xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0 , NULL , true , 0 , &equGBiCost +#endif ); tempCS->initStructData(encTestMode.qp, encTestMode.lossless); @@ -3390,11 +3583,22 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be return true; } +#if JVET_M0464_UNI_MTS +void EncCu::xEncodeInterResidual( CodingStructure *&tempCS + , CodingStructure *&bestCS + , Partitioner &partitioner + , const EncTestMode& encTestMode + , int residualPass + , CodingStructure* imvCS + , bool* bestHasNonResi + , double* equGBiCost +#else void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass , CodingStructure* imvCS , int emtMode , bool* bestHasNonResi , double* equGBiCost +#endif ) { if( residualPass == 1 && encTestMode.lossless ) @@ -3405,14 +3609,18 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be CodingUnit* cu = tempCS->getCU( partitioner.chType ); double bestCostInternal = MAX_DOUBLE; double bestCost = bestCS->cost; +#if !JVET_M0464_UNI_MTS const SPS& sps = *tempCS->sps; const int maxSizeEMT = EMT_INTER_MAX_CU_WITH_QTBT; +#endif bool swapped = false; // avoid unwanted data copy bool reloadCU = false; +#if !JVET_M0464_UNI_MTS const bool considerEmtSecondPass = emtMode && sps.getSpsNext().getUseInterEMT() && partitioner.currArea().lwidth() <= maxSizeEMT && partitioner.currArea().lheight() <= maxSizeEMT; int minEMTMode = 0; int maxEMTMode = (considerEmtSecondPass?1:0); +#endif // Not allow very big |MVd| to avoid CABAC crash caused by too large MVd. Normally no impact on coding performance. const int maxMvd = 1 << 15; @@ -3442,12 +3650,14 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be } } +#if !JVET_M0464_UNI_MTS if( emtMode == 2 ) { minEMTMode = maxEMTMode = (cu->emtFlag?1:0); } for( int curEmtMode = minEMTMode; curEmtMode <= maxEMTMode; curEmtMode++ ) +#endif { if( reloadCU ) { @@ -3479,7 +3689,9 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be reloadCU = true; // enable cu reloading cu->skip = false; +#if !JVET_M0464_UNI_MTS cu->emtFlag = curEmtMode; +#endif const bool skipResidual = residualPass == 1; m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual ); @@ -3518,7 +3730,9 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be } } +#if !JVET_M0464_UNI_MTS double emtFirstPassCost = tempCS->cost; +#endif if( imvCS && (tempCS->cost < imvCS->cost) ) { if( imvCS->cost != MAX_DOUBLE ) @@ -3550,6 +3764,7 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be #endif xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); +#if !JVET_M0464_UNI_MTS //now we check whether the second pass should be skipped or not if( !curEmtMode && maxEMTMode ) { @@ -3563,6 +3778,7 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be maxEMTMode = 0; // do not test EMT } } +#endif } //end emt loop } @@ -3571,6 +3787,9 @@ void EncCu::xEncodeDontSplit( CodingStructure &cs, Partitioner &partitioner ) { m_CABACEstimator->resetBits(); +#if JVET_M0421_SPLIT_SIG + m_CABACEstimator->split_cu_mode( CU_DONT_SPLIT, cs, partitioner ); +#else { if( partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) { @@ -3581,6 +3800,7 @@ void EncCu::xEncodeDontSplit( CodingStructure &cs, Partitioner &partitioner ) m_CABACEstimator->split_cu_mode_mt( CU_DONT_SPLIT, cs, partitioner ); } } +#endif cs.fracBits += m_CABACEstimator->getEstFracBits(); // split bits cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist ); @@ -3599,6 +3819,10 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best if( bestEncCache->setCsFrom( *tempCS, cachedMode, partitioner ) ) { CodingUnit& cu = *tempCS->cus.front(); +#if JVET_M0170_MRG_SHARELIST + cu.shareParentPos = tempCS->sharedBndPos; + cu.shareParentSize = tempCS->sharedBndSize; +#endif partitioner.setCUData( cu ); if( CU::isIntra( cu ) ) diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index 22e50e7a8aaa90999ca4bfb34be4e85c34369aaa..b239ed8baf3e9351f980d0f2c13265898150f27e 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -45,7 +45,7 @@ #include "CommonLib/TrQuant.h" #include "CommonLib/Unit.h" #include "CommonLib/UnitPartitioner.h" -#include "CommonLib/CprHashMap.h" +#include "CommonLib/IbcHashMap.h" #if REUSE_CU_RESULTS #include "DecoderLib/DecCu.h" @@ -109,9 +109,17 @@ private: CABACWriter* m_CABACEstimator; RateCtrl* m_pcRateCtrl; - CprHashMap m_cprHashMap; + IbcHashMap m_ibcHashMap; CodingStructure **m_pImvTempCS; EncModeCtrl *m_modeCtrl; +#if JVET_M0170_MRG_SHARELIST + int m_shareState; + uint32_t m_shareBndPosX; + uint32_t m_shareBndPosY; + SizeType m_shareBndSizeW; + SizeType m_shareBndSizeH; +#endif + PelStorage m_acMergeBuffer[MMVD_MRG_MAX_RD_BUF_NUM]; PelStorage m_acRealMergeBuffer[MRG_MAX_NUM_CANDS]; PelStorage m_acTriangleWeightedBuffer[TRIANGLE_MAX_NUM_CANDS]; // to store weighted prediction pixles @@ -120,8 +128,8 @@ private: unsigned int m_subMergeBlkSize[10]; unsigned int m_subMergeBlkNum[10]; unsigned int m_prevPOC; - int m_ctuCprSearchRangeX; - int m_ctuCprSearchRangeY; + int m_ctuIbcSearchRangeX; + int m_ctuIbcSearchRangeY; #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM EncLib* m_pcEncLib; #endif @@ -152,6 +160,10 @@ public: void setMergeBestSATDCost(double cost) { m_mergeBestSATDCost = cost; } double getMergeBestSATDCost() { return m_mergeBestSATDCost; } +#if JVET_M0170_MRG_SHARELIST + Position shareParentPos; + Size shareParentSize; +#endif ~EncCu(); protected: @@ -190,12 +202,24 @@ protected: void xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); +#if JVET_M0464_UNI_MTS + void xEncodeInterResidual( CodingStructure *&tempCS + , CodingStructure *&bestCS + , Partitioner &partitioner + , const EncTestMode& encTestMode + , int residualPass = 0 + , CodingStructure* imvCS = NULL + , bool* bestHasNonResi = NULL + , double* equGBiCost = NULL + ); +#else void xEncodeInterResidual ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass = 0 , CodingStructure* imvCS = NULL , int emtMode = 1 , bool* bestHasNonResi = NULL , double* equGBiCost = NULL ); +#endif #if REUSE_CU_RESULTS void xReuseCachedResult ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &Partitioner ); #endif @@ -210,8 +234,8 @@ protected: && (abs(cu.slice->getPOC() - cu.slice->getRefPOC(REF_PIC_LIST_0, cu.refIdxBi[0])) == 1 || abs(cu.slice->getPOC() - cu.slice->getRefPOC(REF_PIC_LIST_1, cu.refIdxBi[1])) == 1)))); } - void xCheckRDCostCPRMode ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); - void xCheckRDCostCPRModeMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ); + void xCheckRDCostIBCMode ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); + void xCheckRDCostIBCModeMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ); }; //! \} diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp index 56da75e88d36164dc175e84f936577ccd3bdad2b..0872980a6a75910484f743ffbbd9bd40ef719c8f 100644 --- a/source/Lib/EncoderLib/EncGOP.cpp +++ b/source/Lib/EncoderLib/EncGOP.cpp @@ -1456,7 +1456,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, { pcSlice->setSliceType(I_SLICE); } - if (pcSlice->getSliceType() == I_SLICE && pcSlice->getSPS()->getSpsNext().getCPRMode()) + if (pcSlice->getSliceType() == I_SLICE && pcSlice->getSPS()->getSpsNext().getIBCMode()) { pcSlice->setSliceType(P_SLICE); } @@ -1630,7 +1630,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && getPrepareLTRef()) { arrangeCompositeReference(pcSlice, rcListPic, pocCurr); } - if (pcSlice->getSPS()->getSpsNext().getCPRMode()) + if (pcSlice->getSPS()->getSpsNext().getIBCMode()) { if (m_pcCfg->getIntraPeriod() > 0 && pcSlice->getPOC() % m_pcCfg->getIntraPeriod() == 0) { @@ -1696,7 +1696,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, { pcSlice->setSliceType ( P_SLICE ); } - if (pcSlice->getSPS()->getSpsNext().getCPRMode() && pcSlice->getNumRefIdx(REF_PIC_LIST_0) == 1) + if (pcSlice->getSPS()->getSpsNext().getIBCMode() && pcSlice->getNumRefIdx(REF_PIC_LIST_0) == 1) { m_pcSliceEncoder->setEncCABACTableIdx(P_SLICE); } @@ -1786,7 +1786,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } // disable TMVP when current picture is the only ref picture - if (pcSlice->isIRAP() && pcSlice->getSPS()->getSpsNext().getCPRMode()) + if (pcSlice->isIRAP() && pcSlice->getSPS()->getSpsNext().getIBCMode()) { pcSlice->setEnableTMVPFlag(0); } @@ -1800,7 +1800,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, bool bGPBcheck=false; if ( pcSlice->getSliceType() == B_SLICE) { - if (pcSlice->getSPS()->getSpsNext().getCPRMode()) + if (pcSlice->getSPS()->getSpsNext().getIBCMode()) { if (pcSlice->getNumRefIdx(RefPicList(0)) - 1 == pcSlice->getNumRefIdx(RefPicList(1))) { @@ -1842,6 +1842,81 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcPic->slices[pcSlice->getSliceSegmentIdx()]->setMvdL1ZeroFlag(pcSlice->getMvdL1ZeroFlag()); #endif +#if JVET_M0444_SMVD + if ( pcSlice->getCheckLDC() == false && pcSlice->getMvdL1ZeroFlag() == false ) + { + int currPOC = pcSlice->getPOC(); + + int forwardPOC = currPOC; + int backwardPOC = currPOC; + int ref = 0, refIdx0 = -1, refIdx1 = -1; + + // search nearest forward POC in List 0 + for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ ) + { + int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC(); + if ( poc < currPOC && (poc > forwardPOC || refIdx0 == -1) ) + { + forwardPOC = poc; + refIdx0 = ref; + } + } + + // search nearest backward POC in List 1 + for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ ) + { + int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC(); + if ( poc > currPOC && (poc < backwardPOC || refIdx1 == -1) ) + { + backwardPOC = poc; + refIdx1 = ref; + } + } + + if ( !(forwardPOC < currPOC && backwardPOC > currPOC) ) + { + forwardPOC = currPOC; + backwardPOC = currPOC; + refIdx0 = -1; + refIdx1 = -1; + + // search nearest backward POC in List 0 + for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ ) + { + int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC(); + if ( poc > currPOC && (poc < backwardPOC || refIdx0 == -1) ) + { + backwardPOC = poc; + refIdx0 = ref; + } + } + + // search nearest forward POC in List 1 + for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ ) + { + int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC(); + if ( poc < currPOC && (poc > forwardPOC || refIdx1 == -1) ) + { + forwardPOC = poc; + refIdx1 = ref; + } + } + } + + if ( forwardPOC < currPOC && backwardPOC > currPOC ) + { + pcSlice->setBiDirPred( true, refIdx0, refIdx1 ); + } + else + { + pcSlice->setBiDirPred( false, -1, -1 ); + } + } + else + { + pcSlice->setBiDirPred( false, -1, -1 ); + } +#endif double lambda = 0.0; int actualHeadBits = 0; @@ -1991,14 +2066,14 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } #endif #endif - if (pcSlice->getSPS()->getUseSAO()) + if (pcSlice->getSPS()->getSAOEnabledFlag()) { pcPic->resizeSAO( numberOfCtusInFrame, 0 ); pcPic->resizeSAO( numberOfCtusInFrame, 1 ); } // it is used for signalling during CTU mode decision, i.e. before ALF processing - if( pcSlice->getSPS()->getUseALF() ) + if( pcSlice->getSPS()->getALFEnabledFlag() ) { pcPic->resizeAlfCtuEnableFlag( numberOfCtusInFrame ); std::memset( pcSlice->getAlfSliceParam().enabledFlag, false, sizeof( pcSlice->getAlfSliceParam().enabledFlag ) ); @@ -2095,7 +2170,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcSlice = pcPic->slices[0]; // SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas - if( pcSlice->getSPS()->getUseSAO() && m_pcCfg->getSaoCtuBoundary() ) + if( pcSlice->getSPS()->getSAOEnabledFlag() && m_pcCfg->getSaoCtuBoundary() ) { m_pcSAO->getPreDBFStatistics( cs ); } @@ -2121,7 +2196,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "final", 1 ) ) ); - if( pcSlice->getSPS()->getUseSAO() ) + if( pcSlice->getSPS()->getSAOEnabledFlag() ) { bool sliceEnabled[MAX_NUM_COMPONENT]; m_pcSAO->initCABACEstimator( m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice ); @@ -2144,7 +2219,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } } - if( pcSlice->getSPS()->getUseALF() ) + if( pcSlice->getSPS()->getALFEnabledFlag() ) { AlfSliceParam alfSliceParam; m_pcALF->initCABACEstimator( m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice ); @@ -2180,7 +2255,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } } #endif - if( pcSlice->getSPS()->getUseSAO() ) + if( pcSlice->getSPS()->getSAOEnabledFlag() ) { m_pcSAO->disabledRate( *pcPic->cs, pcPic->getSAO(1), m_pcCfg->getSaoEncodingRate(), m_pcCfg->getSaoEncodingRateChroma()); } @@ -2646,10 +2721,6 @@ void EncGOP::xGetBuffer( PicList& rcListPic, #ifndef BETA #define BETA 0.5 // value between 0.0 and 1; use 0.0 to obtain traditional PSNR #endif -#define GLOBAL_AVERAGING 1 // "global" averaging of a_k across a set instead of one picture -#if FRAME_WEIGHTING -static const uint32_t DQP[16] = { 4, 12, 11, 12, 9, 12, 11, 12, 6, 12, 11, 12, 9, 12, 11, 12 }; -#endif static inline double calcWeightedSquaredError(const CPelBuf& org, const CPelBuf& rec, double &sumAct, const uint32_t bitDepth, @@ -2753,9 +2824,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, } double wmse = 0.0, sumAct = 0.0; // compute activity normalized SNR value -#if !GLOBAL_AVERAGING - double numAct = 0.0; -#endif + for (y = 0; y < H; y += B) { for (x = 0; x < W; x += B) @@ -2765,29 +2834,13 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, W, H, x, y, B, B); -#if !GLOBAL_AVERAGING - numAct += 1.0; -#endif } } // integer weighted distortion -#if GLOBAL_AVERAGING - sumAct = 32.0 * double(1 << BD); - - if ((W << chromaShift) > 2048 && (H << chromaShift) > 1280) // for UHD/4K - { - sumAct *= 0.5; - } - else if ((W << chromaShift) <= 1024 || (H << chromaShift) <= 640) // 480p - { - sumAct *= 2.0; - } + sumAct = 16.0 * sqrt ((3840.0 * 2160.0) / double((W << chromaShift) * (H << chromaShift))) * double(1 << BD); return (wmse <= 0.0) ? 0 : uint64_t(wmse * pow(sumAct, BETA) + 0.5); -#else - return (wmse <= 0.0 || numAct <= 0.0) ? 0 : uint64_t(wmse * pow(sumAct / numAct, BETA) + 0.5); -#endif } #endif // ENABLE_QPA uiTotalDiff = 0; @@ -2992,12 +3045,7 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni const bool bPicIsField = pcPic->fieldPic; const Slice* pcSlice = pcPic->slices[0]; -#if ENABLE_QPA && FRAME_WEIGHTING - const uint32_t currDQP = (pcSlice->getPOC() % m_pcEncLib->getIntraPeriod()) == 0 ? 0 : DQP[pcSlice->getPOC() % m_pcEncLib->getGOPSize()]; - const double frameWeight = pow(2.0, (double)currDQP / -3.0); - if (useWPSNR) m_gcAnalyzeAll.addWeight(frameWeight); -#endif for (int comp = 0; comp < ::getNumberValidComponents(formatD); comp++) { const ComponentID compID = ComponentID(comp); @@ -3016,11 +3064,10 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni const uint32_t bitDepth = sps.getBitDepth(toChannelType(compID)); #if ENABLE_QPA const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX(compID, format)); - const uint32_t maxval = /*useWPSNR ? (1 << bitDepth) - 1 :*/ 255 << (bitDepth - 8); // fix with WPSNR: 1023 (4095) instead of 1020 (4080) for bit-depth 10 (12) #else const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, 0); - const uint32_t maxval = 255 << (bitDepth - 8); #endif + const uint32_t maxval = 255 << (bitDepth - 8); const uint32_t size = width * height; const double fRefValue = (double)maxval * maxval * size; dPSNR[comp] = uiSSDtemp ? 10.0 * log10(fRefValue / (double)uiSSDtemp) : 999.99; @@ -3033,10 +3080,6 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni MSEyuvframeWeighted[comp] = (double)uiSSDtempWeighted / size; } #endif - -#if ENABLE_QPA && FRAME_WEIGHTING - if (useWPSNR) m_gcAnalyzeAll.addWeightedSSD(frameWeight * (double)uiSSDtemp / fRefValue, compID); -#endif } #if EXTENSION_360_VIDEO @@ -3224,13 +3267,7 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* CHECK(!(acPicRecFields[0].chromaFormat==acPicRecFields[1].chromaFormat), "Unspecified error"); const uint32_t numValidComponents = ::getNumberValidComponents( acPicRecFields[0].chromaFormat ); -#if ENABLE_QPA && FRAME_WEIGHTING - const Slice* pcSlice = pcPicOrgFirstField->slices[0]; - const uint32_t currDQP = (pcSlice->getPOC() % m_pcEncLib->getIntraPeriod()) == 0 ? 0 : DQP[pcSlice->getPOC() % m_pcEncLib->getGOPSize()]; - const double frameWeight = pow(2.0, (double)currDQP / -3.0); - if (useWPSNR) m_gcAnalyzeAll_in.addWeight(frameWeight); -#endif for (int chan = 0; chan < numValidComponents; chan++) { const ComponentID ch=ComponentID(chan); @@ -3251,18 +3288,11 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), 0 ); #endif } -#if ENABLE_QPA - const uint32_t maxval = /*useWPSNR ? (1 << bitDepth) - 1 :*/ 255 << (bitDepth - 8); // fix with WPSNR: 1023 (4095) instead of 1020 (4080) for bit-depth 10 (12) -#else const uint32_t maxval = 255 << (bitDepth - 8); -#endif const uint32_t size = width * height * 2; const double fRefValue = (double)maxval * maxval * size; dPSNR[ch] = uiSSDtemp ? 10.0 * log10(fRefValue / (double)uiSSDtemp) : 999.99; MSEyuvframe[ch] = (double)uiSSDtemp / size; -#if ENABLE_QPA && FRAME_WEIGHTING - if (useWPSNR) m_gcAnalyzeAll_in.addWeightedSSD(frameWeight * (double)uiSSDtemp / fRefValue, ch); -#endif } uint32_t uibits = 0; // the number of bits for the pair is not calculated here - instead the overall total is used elsewhere. diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 68fdd2caf1ed82d37dcca64fd22a19d19fb1b3a1..56f49fc8620590b9f275d78e319747df63f7f826 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -797,7 +797,11 @@ void EncLib::xInitSPS(SPS &sps) sps.setNoSbtmvpConstraintFlag(m_SubPuMvpMode ? false : true); sps.setNoAmvrConstraintFlag(!m_bNoAmvrConstraintFlag); sps.setNoAffineMotionConstraintFlag(!m_Affine); +#if JVET_M0464_UNI_MTS + sps.setNoMtsConstraintFlag((m_IntraMTS || m_InterMTS) ? false : true); +#else sps.setNoMtsConstraintFlag((m_IntraEMT || m_InterEMT) ? false : true); +#endif sps.setNoLadfConstraintFlag(!m_LadfEnabled); sps.setNoDepQuantConstraintFlag(!m_DepQuantEnabledFlag); sps.setNoSignDataHidingConstraintFlag(!m_SignDataHidingEnabledFlag); @@ -847,20 +851,28 @@ void EncLib::xInitSPS(SPS &sps) sps.getSpsNext().setUseLargeCTU ( m_LargeCTU ); sps.setMaxBTDepth ( m_uiMaxBTDepth, m_uiMaxBTDepthI, m_uiMaxBTDepthIChroma ); sps.setUseDualITree ( m_dualITree ); - sps.getSpsNext().setSubPuMvpMode ( m_SubPuMvpMode ); + sps.setSBTMVPEnabledFlag ( m_SubPuMvpMode ); sps.getSpsNext().setImvMode ( ImvMode(m_ImvMode) ); sps.getSpsNext().setUseIMV ( m_ImvMode != IMV_OFF ); - sps.getSpsNext().setUseBIO ( m_BIO ); + sps.setBDOFEnabledFlag ( m_BIO ); sps.getSpsNext().setUseAffine ( m_Affine ); sps.getSpsNext().setUseAffineType ( m_AffineType ); sps.getSpsNext().setDisableMotCompress ( m_DisableMotionCompression ); sps.getSpsNext().setMTTMode ( m_MTTMode ); sps.getSpsNext().setUseLMChroma ( m_LMChroma ? true : false ); +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + sps.getSpsNext().setCclmCollocatedChromaFlag( m_cclmCollocatedChromaFlag ); +#endif #if ENABLE_WPP_PARALLELISM sps.getSpsNext().setUseNextDQP ( m_AltDQPCoding ); #endif +#if JVET_M0464_UNI_MTS + sps.getSpsNext().setUseIntraMTS ( m_IntraMTS ); + sps.getSpsNext().setUseInterMTS ( m_InterMTS ); +#else sps.getSpsNext().setUseIntraEMT ( m_IntraEMT ); sps.getSpsNext().setUseInterEMT ( m_InterEMT ); +#endif sps.getSpsNext().setUseCompositeRef ( m_compositeRefEnabled ); sps.getSpsNext().setUseGBi ( m_GBi ); #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET @@ -880,9 +892,9 @@ void EncLib::xInitSPS(SPS &sps) sps.getSpsNext().setUseMHIntra ( m_MHIntra ); sps.getSpsNext().setUseTriangle ( m_Triangle ); - sps.getSpsNext().setCPRMode ( m_CPRMode ); + sps.getSpsNext().setIBCMode ( m_IBCMode ); - sps.setUseWrapAround ( m_wrapAround ); + sps.setWrapAroundEnabledFlag ( m_wrapAround ); sps.setWrapAroundOffset ( m_wrapAroundOffset ); // ADD_NEW_TOOL : (encoder lib) set tool enabling flags and associated parameters here @@ -897,7 +909,7 @@ void EncLib::xInitSPS(SPS &sps) sps.setLog2MinCodingBlockSize(log2MinCUSize); sps.setPCMLog2MinSize (m_uiPCMLog2MinSize); - sps.setUsePCM ( m_usePCM ); + sps.setPCMEnabledFlag ( m_usePCM ); sps.setPCMLog2MaxSize( m_pcmLog2MaxSize ); sps.setQuadtreeTULog2MaxSize( m_uiQuadtreeTULog2MaxSize ); @@ -909,8 +921,6 @@ void EncLib::xInitSPS(SPS &sps) sps.setMaxTrSize ( 1 << m_uiQuadtreeTULog2MaxSize ); - sps.setUseAMP ( m_useAMP ); - for (uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++) { sps.setBitDepth (ChannelType(channelType), m_bitDepth[channelType] ); @@ -918,7 +928,7 @@ void EncLib::xInitSPS(SPS &sps) sps.setPCMBitDepth (ChannelType(channelType), m_PCMBitDepth[channelType] ); } - sps.setUseSAO( m_bUseSAO ); + sps.setSAOEnabledFlag( m_bUseSAO ); sps.setMaxTLayers( m_maxTempLayer ); sps.setTemporalIdNestingFlag( ( m_maxTempLayer == 1 ) ? true : false ); @@ -936,7 +946,7 @@ void EncLib::xInitSPS(SPS &sps) #if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 sps.setUseStrongIntraSmoothing( m_useStrongIntraSmoothing ); #endif - sps.setUseALF( m_alf ); + sps.setALFEnabledFlag( m_alf ); sps.setVuiParametersPresentFlag(getVuiParametersPresentFlag()); if (sps.getVuiParametersPresentFlag()) @@ -1385,7 +1395,7 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) } } CHECK(!(bestPos <= 15), "Unspecified error"); - if (sps.getSpsNext().getCPRMode()) + if (sps.getSpsNext().getIBCMode()) { pps.setNumRefIdxL0DefaultActive(bestPos + 1); } diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index f5032831e5bdf4c74c8e47dabfe943fe56c5e7e4..c9a6863301a9e2c439e7fafdb209a4f9ce99ee2d 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -528,7 +528,11 @@ uint8_t CacheBlkInfoCtrl::getGbiIdx(const UnitArea& area) } #if REUSE_CU_RESULTS -static bool isTheSameNbHood( const CodingUnit &cu, const CodingStructure& cs, const Partitioner &partitioner ) +static bool isTheSameNbHood( const CodingUnit &cu, const CodingStructure& cs, const Partitioner &partitioner +#if JVET_M0170_MRG_SHARELIST + , const PredictionUnit &pu, int picW, int picH +#endif + ) { if( cu.chType != partitioner.chType ) { @@ -549,6 +553,32 @@ static bool isTheSameNbHood( const CodingUnit &cu, const CodingStructure& cs, co const UnitArea &cmnAnc = ps[i - 1].parts[ps[i - 1].idx]; const UnitArea cuArea = CS::getArea( cs, cu, partitioner.chType ); +#if JVET_M0170_MRG_SHARELIST + bool sharedListReuseMode = true; + if( + pu.mergeFlag == true && + cu.affine == false && + cu.predMode == MODE_INTER + ) + { + sharedListReuseMode = false; + + if ((cu.lumaSize().width*cu.lumaSize().height) >= MRG_SHARELIST_SHARSIZE) + { + sharedListReuseMode = true; + } + + if (((cmnAnc.lumaSize().width)*(cmnAnc.lumaSize().height) <= MRG_SHARELIST_SHARSIZE)) + { + sharedListReuseMode = true; + } + } + else + { + sharedListReuseMode = true; + } +//#endif +#endif for( int i = 0; i < cmnAnc.blocks.size(); i++ ) { @@ -557,6 +587,13 @@ static bool isTheSameNbHood( const CodingUnit &cu, const CodingStructure& cs, co return false; } } +#if JVET_M0170_MRG_SHARELIST + if(!sharedListReuseMode) + { + return false; + } +#endif + return true; } @@ -754,8 +791,12 @@ bool BestEncInfoCache::isValid( const CodingStructure& cs, const Partitioner& pa if( encInfo.cu.qp != qp ) return false; - if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != CS::getArea( cs, encInfo.cu, partitioner.chType ) || !isTheSameNbHood( encInfo.cu, cs, partitioner ) - || encInfo.cu.cpr + if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != CS::getArea( cs, encInfo.cu, partitioner.chType ) || !isTheSameNbHood( encInfo.cu, cs, partitioner +#if JVET_M0170_MRG_SHARELIST + , encInfo.pu, (cs.picture->Y().width), (cs.picture->Y().height) +#endif +) + || encInfo.cu.ibc ) { return false; @@ -773,7 +814,11 @@ bool BestEncInfoCache::setCsFrom( CodingStructure& cs, EncTestMode& testMode, co BestEncodingInfo& encInfo = *m_bestEncInfo[idx1][idx2][idx3][idx4]; - if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != CS::getArea( cs, encInfo.cu, partitioner.chType ) || !isTheSameNbHood( encInfo.cu, cs, partitioner ) ) + if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != CS::getArea( cs, encInfo.cu, partitioner.chType ) || !isTheSameNbHood( encInfo.cu, cs, partitioner +#if JVET_M0170_MRG_SHARELIST + , encInfo.pu, (cs.picture->Y().width), (cs.picture->Y().height) +#endif +) ) { return false; } @@ -938,7 +983,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru { if (m_pcEncCfg->getUseAdaptiveQP()) { - baseQP = Clip3 (-cs.sps->getQpBDOffset (CHANNEL_TYPE_LUMA), MAX_QP, baseQP + xComputeDQP (cs, partitioner)); + baseQP = Clip3(-cs.sps->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, baseQP + xComputeDQP (cs, partitioner)); } #if ENABLE_QPA_SUB_CTU else if (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && cs.pps->getUseDQP() && cs.pps->getMaxCuDQPDepth() > 0) @@ -968,25 +1013,25 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru int maxQP = baseQP; xGetMinMaxQP( minQP, maxQP, cs, partitioner, baseQP, *cs.sps, *cs.pps, true ); - bool checkCpr = true; + bool checkIbc = true; if (cs.chType == CHANNEL_TYPE_CHROMA) { - CprLumaCoverage cprLumaCoverage = cs.getCprLumaCoverage(cs.area.Cb()); - switch (cprLumaCoverage) + IbcLumaCoverage ibcLumaCoverage = cs.getIbcLumaCoverage(cs.area.Cb()); + switch (ibcLumaCoverage) { - case CPR_LUMA_COVERAGE_FULL: - // check CPR + case IBC_LUMA_COVERAGE_FULL: + // check IBC break; - case CPR_LUMA_COVERAGE_PARTIAL: - // do not check CPR - checkCpr = false; + case IBC_LUMA_COVERAGE_PARTIAL: + // do not check IBC + checkIbc = false; break; - case CPR_LUMA_COVERAGE_NONE: - // do not check CPR - checkCpr = false; + case IBC_LUMA_COVERAGE_NONE: + // do not check IBC + checkIbc = false; break; default: - THROW("Unknown CPR luma coverage type"); + THROW("Unknown IBC luma coverage type"); } } // Add coding modes here @@ -1093,13 +1138,13 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru // add intra modes m_ComprCUCtxList.back().testModes.push_back( { ETM_IPCM, ETO_STANDARD, qp, lossless } ); m_ComprCUCtxList.back().testModes.push_back( { ETM_INTRA, ETO_STANDARD, qp, lossless } ); - // add cpr mode to intra path - if (cs.sps->getSpsNext().getCPRMode() && checkCpr ) + // add ibc mode to intra path + if (cs.sps->getSpsNext().getIBCMode() && checkIbc ) { - m_ComprCUCtxList.back().testModes.push_back({ ETM_CPR, ETO_STANDARD, qp, lossless }); + m_ComprCUCtxList.back().testModes.push_back({ ETM_IBC, ETO_STANDARD, qp, lossless }); if (cs.chType == CHANNEL_TYPE_LUMA) { - m_ComprCUCtxList.back().testModes.push_back({ ETM_CPR_MERGE, ETO_STANDARD, qp, lossless }); + m_ComprCUCtxList.back().testModes.push_back({ ETM_IBC_MERGE, ETO_STANDARD, qp, lossless }); } } } @@ -1129,7 +1174,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_TRIANGLE, ETO_STANDARD, qp, lossless } ); } m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP, ETO_STANDARD, qp, lossless } ); - if ( cs.sps->getSpsNext().getUseAffine() || cs.sps->getSpsNext().getUseSubPuMvp() ) + if ( cs.sps->getSpsNext().getUseAffine() || cs.sps->getSBTMVPEnabledFlag() ) { m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE, ETO_STANDARD, qp, lossless } ); } @@ -1143,7 +1188,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_TRIANGLE, ETO_STANDARD, qp, lossless } ); } m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP, ETO_STANDARD, qp, lossless } ); - if ( cs.sps->getSpsNext().getUseAffine() || cs.sps->getSpsNext().getUseSubPuMvp() ) + if ( cs.sps->getSpsNext().getUseAffine() || cs.sps->getSBTMVPEnabledFlag() ) { m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE, ETO_STANDARD, qp, lossless } ); } @@ -1258,7 +1303,7 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt } // INTRA MODES - if (cs.sps->getSpsNext().getCPRMode() && !cuECtx.bestTU) + if (cs.sps->getSpsNext().getIBCMode() && !cuECtx.bestTU) return true; CHECK( !slice.isIntra() && !cuECtx.bestTU, "No possible non-intra encoding for a P- or B-slice found" ); @@ -1271,8 +1316,8 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt { return false; } - if ((m_pcEncCfg->getCPRFastMethod() & CPR_FAST_METHOD_NOINTRA_CPRCBF0) - && (bestMode.type == ETM_CPR || bestMode.type == ETM_CPR_MERGE) + if ((m_pcEncCfg->getIBCFastMethod() & IBC_FAST_METHOD_NOINTRA_IBCCBF0) + && (bestMode.type == ETM_IBC || bestMode.type == ETM_IBC_MERGE) && (!cuECtx.bestCU->Y().valid() || cuECtx.bestTU->cbf[0] == 0) && (!cuECtx.bestCU->Cb().valid() || cuECtx.bestTU->cbf[1] == 0) && (!cuECtx.bestCU->Cr().valid() || cuECtx.bestTU->cbf[2] == 0)) @@ -1311,12 +1356,12 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt } // PCM MODES - return sps.getUsePCM() && width <= ( 1 << sps.getPCMLog2MaxSize() ) && width >= ( 1 << sps.getPCMLog2MinSize() ); + return sps.getPCMEnabledFlag() && width <= ( 1 << sps.getPCMLog2MaxSize() ) && width >= ( 1 << sps.getPCMLog2MinSize() ); } - else if (encTestmode.type == ETM_CPR || encTestmode.type == ETM_CPR_MERGE) + else if (encTestmode.type == ETM_IBC || encTestmode.type == ETM_IBC_MERGE) { - // CPR MODES - return sps.getSpsNext().getCPRMode() && width <= CPR_MAX_CAND_SIZE && partitioner.currArea().lumaSize().height <= CPR_MAX_CAND_SIZE; + // IBC MODES + return sps.getSpsNext().getIBCMode() && width <= IBC_MAX_CAND_SIZE && partitioner.currArea().lumaSize().height <= IBC_MAX_CAND_SIZE; } else if( isModeInter( encTestmode ) ) { @@ -1623,6 +1668,7 @@ bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingSt { cuECtx.set( BEST_TRIV_SPLIT_COST, tempCS->cost ); } +#if !JVET_M0464_UNI_MTS else if( encTestmode.type == ETM_INTRA ) { const CodingUnit cu = *tempCS->getCU( partitioner.chType ); @@ -1632,6 +1678,7 @@ bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingSt cuECtx.bestEmtSize2Nx2N1stPass = tempCS->cost; } } +#endif if( m_pcEncCfg->getIMV4PelFast() && m_pcEncCfg->getIMV() && encTestmode.type == ETM_INTER_ME ) { diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h index b320d6e036a179418c9edd12ca62ee28e30d78a7..422e4c80840c1bd6b3960c1b1abe75129e6c0249 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.h +++ b/source/Lib/EncoderLib/EncModeCtrl.h @@ -70,8 +70,8 @@ enum EncTestModeType ETM_RECO_CACHED, #endif ETM_TRIGGER_IMV_LIST, - ETM_CPR, // cpr mode - ETM_CPR_MERGE, // cpr merge mode + ETM_IBC, // ibc mode + ETM_IBC_MERGE, // ibc merge mode ETM_INVALID }; @@ -184,10 +184,12 @@ struct ComprCUCtx , extraFeatures ( ) , extraFeaturesd( ) , bestInterCost ( MAX_DOUBLE ) +#if !JVET_M0464_UNI_MTS , bestEmtSize2Nx2N1stPass ( MAX_DOUBLE ) , skipSecondEMTPass ( false ) +#endif , interHad (std::numeric_limits<Distortion>::max()) #if ENABLE_SPLIT_PARALLELISM , isLevelSplitParallel @@ -216,8 +218,10 @@ struct ComprCUCtx static_vector<int64_t, 30> extraFeatures; static_vector<double, 30> extraFeaturesd; double bestInterCost; +#if !JVET_M0464_UNI_MTS double bestEmtSize2Nx2N1stPass; bool skipSecondEMTPass; +#endif Distortion interHad; #if ENABLE_SPLIT_PARALLELISM bool isLevelSplitParallel; @@ -298,9 +302,11 @@ public: double getBestInterCost () const { return m_ComprCUCtxList.back().bestInterCost; } Distortion getInterHad () const { return m_ComprCUCtxList.back().interHad; } void enforceInterHad ( Distortion had ) { m_ComprCUCtxList.back().interHad = had; } +#if !JVET_M0464_UNI_MTS double getEmtSize2Nx2NFirstPassCost () const { return m_ComprCUCtxList.back().bestEmtSize2Nx2N1stPass; } bool getSkipSecondEMTPass () const { return m_ComprCUCtxList.back().skipSecondEMTPass; } void setSkipSecondEMTPass ( bool b ) { m_ComprCUCtxList.back().skipSecondEMTPass = b; } +#endif protected: void xExtractFeatures ( const EncTestMode encTestmode, CodingStructure& cs ); diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp index 1fa2f008aaaf0a19e12e575e3be2ea5909869a9e..2843c2696bea129ad063b6216fcce17a90eed295 100644 --- a/source/Lib/EncoderLib/EncSlice.cpp +++ b/source/Lib/EncoderLib/EncSlice.cpp @@ -193,18 +193,9 @@ static void filterAndCalculateAverageEnergies (const Pel* pSrc, const int iSrcS #if GLOBAL_AVERAGING static double getAveragePictureEnergy (const CPelBuf picOrig, const uint32_t uBitDepth) { - double hpEnerPic = 5.65625 * double(1 << (uBitDepth >> 1)); // square-root of a_pic value + const double hpEnerPic = 16.0 * sqrt ((3840.0 * 2160.0) / double(picOrig.width * picOrig.height)) * double(1 << uBitDepth); - if (picOrig.width > 2048 && picOrig.height > 1280) // for UHD/4K - { - hpEnerPic *= (4.0 / 5.65625); - } - else if (picOrig.width <= 1024 || picOrig.height <= 640) // 480p - { - hpEnerPic *= (8.0 / 5.65625); - } - - return hpEnerPic; + return sqrt (hpEnerPic); // square-root of a_pic value } #endif diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index 1a30af140b175a9f84bc6b4f6db7eed7976b3e51..cad55ecf97b431ed446a4f85cea1a22aa389b9a3 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -695,9 +695,9 @@ Distortion InterSearch::xGetInterPredictionError( PredictionUnit& pu, PelUnitBuf return (Distortion)cDistParam.distFunc( cDistParam ); } -/// add cpr search functions here +/// add ibc search functions here -void InterSearch::xCPRSearchMVCandUpdate(Distortion sad, int x, int y, Distortion* sadBestCand, Mv* cMVCand) +void InterSearch::xIBCSearchMVCandUpdate(Distortion sad, int x, int y, Distortion* sadBestCand, Mv* cMVCand) { int j = CHROMA_REFINEMENT_CANDIDATES - 1; @@ -720,7 +720,7 @@ void InterSearch::xCPRSearchMVCandUpdate(Distortion sad, int x, int y, Distorti } } -int InterSearch::xCPRSearchMVChromaRefine(PredictionUnit& pu, +int InterSearch::xIBCSearchMVChromaRefine(PredictionUnit& pu, int roiWidth, int roiHeight, int cuPelX, @@ -814,7 +814,7 @@ int InterSearch::xCPRSearchMVChromaRefine(PredictionUnit& pu, static unsigned int xMergeCandLists(Mv *dst, unsigned int dn, Mv *src, unsigned int sn) { - for (unsigned int cand = 0; cand < sn && dn<CPR_NUM_CANDIDATES; cand++) + for (unsigned int cand = 0; cand < sn && dn<IBC_NUM_CANDIDATES; cand++) { bool found = false; for (int j = 0; j<dn; j++) @@ -897,7 +897,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS Mv cMvPredEncOnly[16]; int nbPreds = 0; - PU::getCprMVPsEncOnly(pu, cMvPredEncOnly, nbPreds); + PU::getIbcMVPsEncOnly(pu, cMvPredEncOnly, nbPreds); m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, cMvPredEncOnly, nbPreds); for (unsigned int cand = 0; cand < m_numBVs; cand++) @@ -917,7 +917,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * yPred + xPred; sad += m_cDistParam.distFunc(m_cDistParam); - xCPRSearchMVCandUpdate(sad, xPred, yPred, sadBestCand, cMVCand); + xIBCSearchMVCandUpdate(sad, xPred, yPred, sadBestCand, cMVCand); } } } @@ -939,7 +939,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y; sad += m_cDistParam.distFunc(m_cDistParam); - xCPRSearchMVCandUpdate(sad, 0, y, sadBestCand, cMVCand); + xIBCSearchMVCandUpdate(sad, 0, y, sadBestCand, cMVCand); tempSadBest = sadBestCand[0]; if (sadBestCand[0] <= 3) { @@ -965,7 +965,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS sad += m_cDistParam.distFunc(m_cDistParam); - xCPRSearchMVCandUpdate(sad, x, 0, sadBestCand, cMVCand); + xIBCSearchMVCandUpdate(sad, x, 0, sadBestCand, cMVCand); tempSadBest = sadBestCand[0]; if (sadBestCand[0] <= 3) { @@ -984,7 +984,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS if ((!bestX && !bestY) || (sadBest - m_pcRdCost->getBvCostMultiplePreds(bestX, bestY, pu.cs->sps->getSpsNext().getImvMode() == IMV_4PEL) <= 32)) { //chroma refine - bestCandIdx = xCPRSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); + bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); sadBest = sadBestCand[bestCandIdx]; @@ -1015,7 +1015,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y + x; sad += m_cDistParam.distFunc(m_cDistParam); - xCPRSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand); + xIBCSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand); } } @@ -1025,7 +1025,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS if (sadBest - m_pcRdCost->getBvCostMultiplePreds(bestX, bestY, pu.cs->sps->getSpsNext().getImvMode() == IMV_4PEL) <= 16) { //chroma refine - bestCandIdx = xCPRSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); + bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); @@ -1056,11 +1056,11 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS sad += m_cDistParam.distFunc(m_cDistParam); - xCPRSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand); + xIBCSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand); if (sadBestCand[0] <= 5) { //chroma refine & return - bestCandIdx = xCPRSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); + bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); sadBest = sadBestCand[bestCandIdx]; @@ -1078,7 +1078,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS if ((sadBest >= tempSadBest) || ((sadBest - m_pcRdCost->getBvCostMultiplePreds(bestX, bestY, pu.cs->sps->getSpsNext().getImvMode() == IMV_4PEL)) <= 32)) { //chroma refine - bestCandIdx = xCPRSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); + bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); sadBest = sadBestCand[bestCandIdx]; @@ -1113,11 +1113,11 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS sad += m_cDistParam.distFunc(m_cDistParam); - xCPRSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand); + xIBCSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand); if (sadBestCand[0] <= 5) { //chroma refine & return - bestCandIdx = xCPRSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); + bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); sadBest = sadBestCand[bestCandIdx]; @@ -1130,7 +1130,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS } } - bestCandIdx = xCPRSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); + bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); @@ -1155,14 +1155,14 @@ end: // based on xMotionEstimation -void InterSearch::xCPREstimation(PredictionUnit& pu, PelUnitBuf& origBuf, +void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Mv *pcMvPred, Mv &rcMv, Distortion &ruiCost, const int localSearchRangeX, const int localSearchRangeY ) { bool buffered = false; - if (m_pcEncCfg->getCPRFastMethod() & CPR_FAST_METHOD_BUFFERBV) + if (m_pcEncCfg->getIBCFastMethod() & IBC_FAST_METHOD_BUFFERBV) { ruiCost = MAX_UINT; const int iPicWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); @@ -1280,10 +1280,10 @@ void InterSearch::xSetIntraSearchRange(PredictionUnit& pu, int iRoiWidth, int iR rcMvSrchRngRB >>= 2; } -bool InterSearch::predCPRSearch(CodingUnit& cu, Partitioner& partitioner, const int localSearchRangeX, const int localSearchRangeY, CprHashMap& cprHashMap) +bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const int localSearchRangeX, const int localSearchRangeY, IbcHashMap& ibcHashMap) { - // check only no greater than CPR_MAX_CAND_SIZE - if (cu.Y().width > CPR_MAX_CAND_SIZE || cu.Y().height > CPR_MAX_CAND_SIZE) + // check only no greater than IBC_MAX_CAND_SIZE + if (cu.Y().width > IBC_MAX_CAND_SIZE || cu.Y().height > IBC_MAX_CAND_SIZE) return false; Mv cMvSrchRngLT; Mv cMvSrchRngRB; @@ -1297,7 +1297,7 @@ bool InterSearch::predCPRSearch(CodingUnit& cu, Partitioner& partitioner, const CHECK(pu.cu != &cu, "PU is contained in another CU"); ////////////////////////////////////////////////////////// - /// cpr search + /// ibc search pu.cu->imv = 2; AMVPInfo amvpInfo4Pel; PU::fillMvpCand(pu, REF_PIC_LIST_0, pu.refIdx[REF_PIC_LIST_0], amvpInfo4Pel); @@ -1315,23 +1315,23 @@ bool InterSearch::predCPRSearch(CodingUnit& cu, Partitioner& partitioner, const cMv.setZero(); Distortion cost = 0; - if (m_pcEncCfg->getCPRHashSearch()) + if (m_pcEncCfg->getIBCHashSearch()) { - xxCPRHashSearch(pu, cMvPred, iBvpNum, cMv, bvpIdxBest, cprHashMap); + xxIBCHashSearch(pu, cMvPred, iBvpNum, cMv, bvpIdxBest, ibcHashMap); } if (cMv.getHor() == 0 && cMv.getVer() == 0) { // if hash search does not work or is not enabled PelUnitBuf origBuf = pu.cs->getOrgBuf(pu); - xCPREstimation(pu, origBuf, cMvPred, cMv, cost, localSearchRangeX, localSearchRangeY); + xIBCEstimation(pu, origBuf, cMvPred, cMv, cost, localSearchRangeX, localSearchRangeY); } if (cMv.getHor() == 0 && cMv.getVer() == 0) { return false; } - /// cpr search + /// ibc search ///////////////////////////////////////////////////////// unsigned int bitsBVPBest, bitsBVPTemp; bitsBVPBest = MAX_INT; @@ -1412,13 +1412,13 @@ bool InterSearch::predCPRSearch(CodingUnit& cu, Partitioner& partitioner, const return true; } -void InterSearch::xxCPRHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred, Mv &mv, int& idxMvPred, CprHashMap& cprHashMap) +void InterSearch::xxIBCHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred, Mv &mv, int& idxMvPred, IbcHashMap& ibcHashMap) { mv.setZero(); m_pcRdCost->setCostScale(0); std::vector<Position> candPos; - if (cprHashMap.cprHashMatch(pu.Y(), candPos, *pu.cs, m_pcEncCfg->getCPRHashSearchMaxCand(), m_pcEncCfg->getCPRHashSearchRange4SmallBlk())) + if (ibcHashMap.ibcHashMatch(pu.Y(), candPos, *pu.cs, m_pcEncCfg->getIBCHashSearchMaxCand(), m_pcEncCfg->getIBCHashSearchRange4SmallBlk())) { unsigned int minCost = MAX_UINT; @@ -1523,6 +1523,10 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uint32_t uiLastModeTemp = 0; int iRefStart, iRefEnd; +#if JVET_M0444_SMVD + int symMode = 0; +#endif + int bestBiPRefIdxL1 = 0; int bestBiPMvpL1 = 0; Distortion biPDistTemp = std::numeric_limits<Distortion>::max(); @@ -1543,7 +1547,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) CHECK(pu.cu != &cu, "PU is contained in another CU"); - if (cu.cs->sps->getSpsNext().getUseSubPuMvp()) + if (cu.cs->sps->getSBTMVPEnabledFlag()) { Size bufSize = g_miScaling.scale(pu.lumaSize()); mergeCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize); @@ -1584,7 +1588,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) { RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); int refPicNumber = cs.slice->getNumRefIdx(eRefPicList); - if (cs.slice->getSPS()->getSpsNext().getCPRMode() && eRefPicList == REF_PIC_LIST_0) + if (cs.slice->getSPS()->getSpsNext().getIBCMode() && eRefPicList == REF_PIC_LIST_0) { refPicNumber--; } @@ -1790,7 +1794,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) iRefStart = 0; iRefEnd = cs.slice->getNumRefIdx(eRefPicList)-1; - if (cs.slice->getSPS()->getSpsNext().getCPRMode() && eRefPicList == REF_PIC_LIST_0) + if (cs.slice->getSPS()->getSpsNext().getIBCMode() && eRefPicList == REF_PIC_LIST_0) { iRefEnd--; } @@ -1813,7 +1817,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) } } uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS]; - +#if JVET_M0444_SMVD + if ( cs.slice->getBiDirPred() ) + { + uiBitsTemp += 1; // add one bit for symmetrical MVD mode + } +#endif // call ME xCopyAMVPInfo(&aacAMVPInfo[iRefList][iRefIdxTemp], &amvp[eRefPicList] ); xMotionEstimation ( pu, origBuf, eRefPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList], true ); @@ -1860,6 +1869,113 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) } // for loop-iter cu.refIdxBi[0] = iRefIdxBi[0]; cu.refIdxBi[1] = iRefIdxBi[1]; + +#if JVET_M0444_SMVD + if ( cs.slice->getBiDirPred() ) + { + Distortion symCost; + Mv cMvPredSym[2]; + int mvpIdxSym[2]; + + int curRefList = REF_PIC_LIST_0; + int tarRefList = 1 - curRefList; + RefPicList eCurRefList = (curRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); + int refIdxCur = cs.slice->getSymRefIdx( curRefList ); + int refIdxTar = cs.slice->getSymRefIdx( tarRefList ); + + MvField cCurMvField, cTarMvField; + Distortion costStart = std::numeric_limits<Distortion>::max(); + for ( int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand; i++ ) + { + for ( int j = 0; j < aacAMVPInfo[tarRefList][refIdxTar].numCand; j++ ) + { + cCurMvField.setMvField( aacAMVPInfo[curRefList][refIdxCur].mvCand[i], refIdxCur ); + cTarMvField.setMvField( aacAMVPInfo[tarRefList][refIdxTar].mvCand[j], refIdxTar ); + Distortion cost = xGetSymmetricCost( pu, origBuf, eCurRefList, cCurMvField, cTarMvField, gbiIdx ); + if ( cost < costStart ) + { + costStart = cost; + cMvPredSym[curRefList] = aacAMVPInfo[curRefList][refIdxCur].mvCand[i]; + cMvPredSym[tarRefList] = aacAMVPInfo[tarRefList][refIdxTar].mvCand[j]; + mvpIdxSym[curRefList] = i; + mvpIdxSym[tarRefList] = j; + } + } + } + cCurMvField.mv = cMvPredSym[curRefList]; + cTarMvField.mv = cMvPredSym[tarRefList]; + + m_pcRdCost->setCostScale(0); + m_pcRdCost->setPredictor(cMvPredSym[curRefList]); + uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(cCurMvField.mv.hor, cCurMvField.mv.ver, (pu.cu->imv << 1)); + bits += m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS]; + bits += m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS]; + costStart += m_pcRdCost->getCost(bits); + + std::vector<Mv> symmvdCands; + symmvdCands.push_back(cMvTemp[curRefList][refIdxCur]); + if (iRefIdxBi[curRefList] == refIdxCur && cMvBi[curRefList] != cMvTemp[curRefList][refIdxCur]) + { + symmvdCands.push_back(cMvBi[curRefList]); + } + + for (auto mvStart : symmvdCands) + { + bool checked = false; //if it has been checkin in the mvPred. + for (int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand && !checked; i++) + { + checked |= (mvStart == aacAMVPInfo[curRefList][refIdxCur].mvCand[i]); + } + if (checked) + break; + + Distortion bestCost = costStart; + symmvdCheckBestMvp(pu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, gbiIdx, cMvPredSym, mvpIdxSym, costStart); + if (costStart < bestCost) + { + cCurMvField.setMvField(mvStart, refIdxCur); + cTarMvField.setMvField(mvStart.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar); + } + } + Mv startPtMv = cCurMvField.mv; + + Distortion mvpCost = m_pcRdCost->getCost(m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS] + m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS]); + symCost = costStart - mvpCost; + + // ME + xSymmetricMotionEstimation( pu, origBuf, cMvPredSym[curRefList], cMvPredSym[tarRefList], eCurRefList, cCurMvField, cTarMvField, symCost, gbiIdx ); + + symCost += mvpCost; + + if (startPtMv != cCurMvField.mv) + { // if ME change MV, run a final check for best MVP. + symmvdCheckBestMvp(pu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, gbiIdx, cMvPredSym, mvpIdxSym, symCost, true); + } + + bits = uiMbBits[2]; + bits += 1; // add one bit for symmetrical MVD mode + bits += ((cs.slice->getSPS()->getSpsNext().getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0); + symCost += m_pcRdCost->getCost(bits); + cTarMvField.setMvField(cCurMvField.mv.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar); + + // save results + if ( symCost < uiCostBi ) + { + uiCostBi = symCost; + symMode = 1 + curRefList; + + cMvBi[curRefList] = cCurMvField.mv; + iRefIdxBi[curRefList] = cCurMvField.refIdx; + aaiMvpIdxBi[curRefList][cCurMvField.refIdx] = mvpIdxSym[curRefList]; + cMvPredBi[curRefList][iRefIdxBi[curRefList]] = cMvPredSym[curRefList]; + + cMvBi[tarRefList] = cTarMvField.mv; + iRefIdxBi[tarRefList] = cTarMvField.refIdx; + aaiMvpIdxBi[tarRefList][cTarMvField.refIdx] = mvpIdxSym[tarRefList]; + cMvPredBi[tarRefList][iRefIdxBi[tarRefList]] = cMvPredSym[tarRefList]; + } + } +#endif } // if (B_SLICE) @@ -1906,6 +2022,10 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]]; pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]]; pu.interDir = 3; + +#if JVET_M0444_SMVD + pu.cu->smvdMode = symMode; +#endif } else if ( uiCost[0] <= uiCost[1] ) { @@ -1945,6 +2065,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uint32_t uiMRGIndex = pu.mergeIdx; bool bMergeFlag = pu.mergeFlag; uint32_t uiInterDir = pu.interDir; +#if JVET_M0444_SMVD + int iSymMode = cu.smvdMode; +#endif Mv cMvd[2]; uint32_t uiMvpIdx[2], uiMvpNum[2]; @@ -2042,6 +2165,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) pu.mergeFlag = bMergeFlag; pu.mergeIdx = uiMRGIndex; pu.interDir = uiInterDir; +#if JVET_M0444_SMVD + cu.smvdMode = iSymMode; +#endif pu.mv [REF_PIC_LIST_0] = cHevcMvField[0].mv; pu.refIdx[REF_PIC_LIST_0] = cHevcMvField[0].refIdx; pu.mv [REF_PIC_LIST_1] = cHevcMvField[1].mv; @@ -2055,6 +2181,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) } else { +#if JVET_M0444_SMVD + cu.smvdMode = 0; +#endif CHECK( !cu.affine, "Wrong." ); uiLastMode = uiLastModeTemp; } @@ -3133,6 +3262,153 @@ void InterSearch::xPatternSearchFracDIF( ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded ); } +#if JVET_M0444_SMVD +Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int gbiIdx ) +{ + Distortion cost = std::numeric_limits<Distortion>::max(); + RefPicList eTarRefPicList = (RefPicList)(1 - (int)eCurRefPicList); + + // get prediction of eCurRefPicList + PelUnitBuf predBufA = m_tmpPredStorage[eCurRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); + const Picture* picRefA = pu.cu->slice->getRefPic( eCurRefPicList, cCurMvField.refIdx ); + Mv mvA = cCurMvField.mv; + mvA.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); + clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); + xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); + + // get prediction of eTarRefPicList + PelUnitBuf predBufB = m_tmpPredStorage[eTarRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); + const Picture* picRefB = pu.cu->slice->getRefPic( eTarRefPicList, cTarMvField.refIdx ); + Mv mvB = cTarMvField.mv; + mvB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); + clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); + xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); + + PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) ); + if (gbiIdx != GBI_DEFAULT) + bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), gbiIdx); + else + bufTmp.Y().addAvg( predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng( COMPONENT_Y ) ); + + // calc distortion + cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD); + + return(cost); +} + +Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred + , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int gbiIdx ) +{ + const Mv mvSearchOffsetCross[4] = { Mv( 0 , 1 ) , Mv( 1 , 0 ) , Mv( 0 , -1 ) , Mv( -1 , 0 ) }; + const Mv mvSearchOffsetSquare[8] = { Mv( -1 , 1 ) , Mv( 0 , 1 ) , Mv( 1 , 1 ) , Mv( 1 , 0 ) , Mv( 1 , -1 ) , Mv( 0 , -1 ) , Mv( -1 , -1 ) , Mv( -1 , 0 ) }; + const Mv mvSearchOffsetDiamond[8] = { Mv( 0 , 2 ) , Mv( 1 , 1 ) , Mv( 2 , 0 ) , Mv( 1 , -1 ) , Mv( 0 , -2 ) , Mv( -1 , -1 ) , Mv( -2 , 0 ) , Mv( -1 , 1 ) }; + const Mv mvSearchOffsetHexagon[6] = { Mv( 2 , 0 ) , Mv( 1 , 2 ) , Mv( -1 , 2 ) , Mv( -2 , 0 ) , Mv( -1 , -2 ) , Mv( 1 , -2 ) }; + + int nDirectStart = 0, nDirectEnd = 0, nDirectRounding = 0, nDirectMask = 0; + const Mv * pSearchOffset; + if ( SearchPattern == 0 ) + { + nDirectEnd = 3; + nDirectRounding = 4; + nDirectMask = 0x03; + pSearchOffset = mvSearchOffsetCross; + } + else if ( SearchPattern == 1 ) + { + nDirectEnd = 7; + nDirectRounding = 8; + nDirectMask = 0x07; + pSearchOffset = mvSearchOffsetSquare; + } + else if ( SearchPattern == 2 ) + { + nDirectEnd = 7; + nDirectRounding = 8; + nDirectMask = 0x07; + pSearchOffset = mvSearchOffsetDiamond; + } + else if ( SearchPattern == 3 ) + { + nDirectEnd = 5; + pSearchOffset = mvSearchOffsetHexagon; + } + else + { + THROW( "Invalid search pattern" ); + } + + int nBestDirect; + for ( uint32_t uiRound = 0; uiRound < uiMaxSearchRounds; uiRound++ ) + { + nBestDirect = -1; + MvField mvCurCenter = rCurMvField; + for ( int nIdx = nDirectStart; nIdx <= nDirectEnd; nIdx++ ) + { + int nDirect; + if ( SearchPattern == 3 ) + { + nDirect = nIdx < 0 ? nIdx + 6 : nIdx >= 6 ? nIdx - 6 : nIdx; + } + else + { + nDirect = (nIdx + nDirectRounding) & nDirectMask; + } + + Mv mvOffset = pSearchOffset[nDirect]; + mvOffset <<= nSearchStepShift; + MvField mvCand = mvCurCenter, mvPair; + mvCand.mv += mvOffset; + + // get MVD cost + m_pcRdCost->setPredictor( rcMvCurPred ); + m_pcRdCost->setCostScale( 0 ); + uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mvCand.mv.getHor(), mvCand.mv.getVer(), (pu.cu->imv << 1) ); + Distortion uiCost = m_pcRdCost->getCost( uiMvBits ); + + // get MVD pair and set target MV + mvPair.refIdx = rTarMvField.refIdx; + mvPair.mv.set( rcMvTarPred.hor - (mvCand.mv.hor - rcMvCurPred.hor), rcMvTarPred.ver - (mvCand.mv.ver - rcMvCurPred.ver) ); + uiCost += xGetSymmetricCost( pu, origBuf, eRefPicList, mvCand, mvPair, gbiIdx ); + if ( uiCost < uiMinCost ) + { + uiMinCost = uiCost; + rCurMvField = mvCand; + rTarMvField = mvPair; + nBestDirect = nDirect; + } + } + + if ( nBestDirect == -1 ) + { + break; + } + int nStep = 1; + if ( SearchPattern == 1 || SearchPattern == 2 ) + { + nStep = 2 - (nBestDirect & 0x01); + } + nDirectStart = nBestDirect - nStep; + nDirectEnd = nBestDirect + nStep; + } + + return(uiMinCost); +} + + +void InterSearch::xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int gbiIdx ) +{ + // Refine Search + int nSearchStepShift = 0; + int nDiamondRound = 8; + int nCrossRound = 1; + + nSearchStepShift += (pu.cu->imv << 1); + nDiamondRound >>= pu.cu->imv; + + ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, gbiIdx ); + ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, gbiIdx ); +} +#endif // JVET_M0444_SMVD void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, PelUnitBuf& origBuf, @@ -3218,7 +3494,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, { RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); int refPicNumber = slice.getNumRefIdx(eRefPicList); - if (slice.getSPS()->getSpsNext().getCPRMode() && eRefPicList == REF_PIC_LIST_0) + if (slice.getSPS()->getSpsNext().getIBCMode() && eRefPicList == REF_PIC_LIST_0) { refPicNumber--; } @@ -3591,7 +3867,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, iRefStart = 0; iRefEnd = slice.getNumRefIdx(eRefPicList) - 1; - if (slice.getSPS()->getSpsNext().getCPRMode() && eRefPicList == REF_PIC_LIST_0) + if (slice.getSPS()->getSpsNext().getIBCMode() && eRefPicList == REF_PIC_LIST_0) { iRefEnd--; } @@ -4160,8 +4436,13 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, for ( int i = 0; i < mvNum; i++ ) { acMvTemp[i] += acDeltaMv[i]; +#if JVET_M0479_18BITS_MV_CLIP + acMvTemp[i].hor = Clip3( -131072, 131071, acMvTemp[i].hor ); + acMvTemp[i].ver = Clip3( -131072, 131071, acMvTemp[i].ver ); +#else acMvTemp[i].hor = Clip3( -32768, 32767, acMvTemp[i].hor ); acMvTemp[i].ver = Clip3( -32768, 32767, acMvTemp[i].ver ); +#endif acMvTemp[i].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); clipMv(acMvTemp[i], pu.cu->lumaPos(), pu.cu->lumaSize(), @@ -4721,7 +5002,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par { const UnitArea& currArea = partitioner.currArea(); const SPS &sps = *cs.sps; +#if !JVET_M0464_UNI_MTS const PPS &pps = *cs.pps; +#endif const uint32_t numValidComp = getNumberValidComponents( sps.getChromaFormatIdc() ); const uint32_t numTBlocks = getNumberValidTBlocks ( *cs.pcv ); const CodingUnit &cu = *cs.getCU(partitioner.chType); @@ -4753,10 +5036,16 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par { TransformUnit &tu = csFull->addTU(CS::isDualITree(cs) ? cu : currArea, partitioner.chType); tu.depth = currDepth; +#if JVET_M0464_UNI_MTS + tu.mtsIdx = 0; +#else tu.emtIdx = 0; +#endif double minCost [MAX_NUM_TBLOCKS]; +#if !JVET_M0464_UNI_MTS bool checkTransformSkip [MAX_NUM_TBLOCKS]; +#endif m_CABACEstimator->resetBits(); @@ -4783,19 +5072,22 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par continue; const CompArea& compArea = tu.blocks[compID]; const int channelBitDepth = sps.getBitDepth(toChannelType(compID)); - +#if !JVET_M0464_UNI_MTS checkTransformSkip[compID] = false; +#endif if( !tu.blocks[compID].valid() ) { continue; } +#if !JVET_M0464_UNI_MTS checkTransformSkip[compID] = pps.getUseTransformSkip() && TU::hasTransformSkipFlag( *tu.cs, tu.blocks[compID] ) && !cs.isLossless; if( isLuma(compID) ) { checkTransformSkip[compID] &= !tu.cu->emtFlag; } +#endif const bool isCrossCPredictionAvailable = TU::hasCrossCompPredInfo( tu, compID ); @@ -4808,10 +5100,32 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par preCalcAlpha = xCalcCrossComponentPredictionAlpha( tu, compID, m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate() ); } +#if JVET_M0464_UNI_MTS + const bool tsAllowed = TU::isTSAllowed ( tu, compID ); + const bool mtsAllowed = TU::isMTSAllowed( tu, compID ); + uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests + std::vector<TrMode> trModes; + trModes.push_back( TrMode( 0, true ) ); //DCT2 + if( tsAllowed ) + { + trModes.push_back( TrMode( 1, true ) ); + } + if( mtsAllowed ) + { + for( int i = 2; i < 6; i++ ) + { + trModes.push_back( TrMode( i, true ) ); + } + } +#endif const int crossCPredictionModesToTest = preCalcAlpha != 0 ? 2 : 1; +#if JVET_M0464_UNI_MTS + const int numTransformCandidates = nNumTransformCands; +#else const int numEmtTransformCandidates = isLuma(compID) && tu.cu->emtFlag && sps.getSpsNext().getUseInterEMT() ? 4 : 1; const int numTransformCandidates = checkTransformSkip[compID] ? ( numEmtTransformCandidates + 1 ) : numEmtTransformCandidates; int lastTransformModeIndex = numTransformCandidates - 1; //lastTransformModeIndex is the mode for transformSkip (if transformSkip is active) +#endif const bool isOneMode = crossCPredictionModesToTest == 1 && numTransformCandidates == 1; bool isLastBest = isOneMode; @@ -4829,8 +5143,23 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par m_CABACEstimator->getCtx() = ctxStart; m_CABACEstimator->resetBits(); +#if JVET_M0464_UNI_MTS + if( isLuma( compID ) ) + { + if( bestTU.mtsIdx == 1 && m_pcEncCfg->getUseTransformSkipFast() ) + { + continue; + } + if( !trModes[transformMode].second ) + { + continue; + } + tu.mtsIdx = trModes[transformMode].first; + } +#else if( isLuma( compID ) ) tu.emtIdx = transformMode; tu.transformSkip[compID] = checkTransformSkip[compID] && transformMode == lastTransformModeIndex; +#endif tu.compAlpha[compID] = bUseCrossCPrediction ? preCalcAlpha : 0; const QpParam cQP(tu, compID); // note: uses tu.transformSkip[compID] @@ -4853,7 +5182,23 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par crossComponentPrediction( tu, compID, lumaResi, resiBuf, resiBuf, false ); } +#if JVET_M0464_UNI_MTS + if( nNumTransformCands > 1 ) + { + if( transformMode == 0 ) + { + m_pcTrQuant->transformNxN( tu, compID, cQP, &trModes, CU::isIntra( *tu.cu ) ? m_pcEncCfg->getIntraMTSMaxCand() : m_pcEncCfg->getInterMTSMaxCand() ); + tu.mtsIdx = trModes[0].first; + } + m_pcTrQuant->transformNxN( tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true ); + } + else + { + m_pcTrQuant->transformNxN( tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx() ); + } +#else m_pcTrQuant->transformNxN(tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx()); +#endif if (isFirstMode || (currAbsSum == 0)) { @@ -4937,7 +5282,11 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par nonCoeffCost = MAX_DOUBLE; } } +#if JVET_M0464_UNI_MTS + else if( transformMode > 0 && !bUseCrossCPrediction ) +#else else if( ( transformMode == lastTransformModeIndex ) && checkTransformSkip[compID] && !bUseCrossCPrediction ) +#endif { currCompCost = MAX_DOUBLE; } @@ -4951,7 +5300,11 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par } // evaluate +#if JVET_M0464_UNI_MTS + if( ( currCompCost < minCost[compID] ) || ( transformMode == 1 && currCompCost == minCost[compID] ) ) +#else if( ( currCompCost < minCost[compID] ) || ( transformMode == lastTransformModeIndex && checkTransformSkip[compID] && currCompCost == minCost[compID] ) ) +#endif { // copy component if (isFirstMode && ((nonCoeffCost < currCompCost) || (currAbsSum == 0))) // check for forced null @@ -5073,10 +5426,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par ); csSplit->cost = m_pcRdCost->calcRdCost( csSplit->fracBits, csSplit->dist ); +#if !JVET_M0464_UNI_MTS if( csFull && csSplit->cost >= csFull->cost && m_pcEncCfg->getFastInterEMT() ) { break; } +#endif } while( partitioner.nextPart( *csSplit ) ); partitioner.exitCurrSplit(); @@ -5084,8 +5439,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par unsigned anyCbfSet = 0; unsigned compCbf[3] = { 0, 0, 0 }; +#if JVET_M0464_UNI_MTS + if( !bCheckFull ) +#else bool isSplit = bCheckFull ? false : true; if( !bCheckFull || ( csSplit->cost < csFull->cost && m_pcEncCfg->getFastInterEMT() ) || !m_pcEncCfg->getFastInterEMT() ) +#endif { for( auto &currTU : csSplit->traverseTUs( currArea, partitioner.chType ) ) { @@ -5137,10 +5496,13 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par { cs.useSubStructure( *csSplit, partitioner.chType, currArea, false, false, false, true ); cs.cost = csSplit->cost; +#if !JVET_M0464_UNI_MTS isSplit = true; +#endif } } +#if !JVET_M0464_UNI_MTS if( ( !isSplit && m_pcEncCfg->getFastInterEMT() ) || ( !m_pcEncCfg->getFastInterEMT() && !( !bCheckFull || ( anyCbfSet && csSplit->cost < csFull->cost ) ) ) ) { CHECK( !bCheckFull, "Error!" ); @@ -5148,6 +5510,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par cs.cost = csFull->cost; m_CABACEstimator->getCtx() = ctxBest; } +#endif if( csSplit && csFull ) { @@ -5475,3 +5838,83 @@ void InterSearch::initWeightIdxBits() } } +#if JVET_M0444_SMVD +void InterSearch::symmvdCheckBestMvp( + PredictionUnit& pu, + PelUnitBuf& origBuf, + Mv curMv, + RefPicList curRefList, + AMVPInfo amvpInfo[2][33], + int32_t gbiIdx, + Mv cMvPredSym[2], + int32_t mvpIdxSym[2], + Distortion& bestCost, + bool skip +) +{ + RefPicList tarRefList = (RefPicList)(1 - curRefList); + int32_t refIdxCur = pu.cu->slice->getSymRefIdx(curRefList); + int32_t refIdxTar = pu.cu->slice->getSymRefIdx(tarRefList); + + MvField cCurMvField, cTarMvField; + cCurMvField.setMvField(curMv, refIdxCur); + AMVPInfo& amvpCur = amvpInfo[curRefList][refIdxCur]; + AMVPInfo& amvpTar = amvpInfo[tarRefList][refIdxTar]; + m_pcRdCost->setCostScale(0); + + + // get prediction of eCurRefPicList + PelUnitBuf predBufA = m_tmpPredStorage[curRefList].getBuf(UnitAreaRelative(*pu.cu, pu)); + const Picture* picRefA = pu.cu->slice->getRefPic(curRefList, cCurMvField.refIdx); + Mv mvA = cCurMvField.mv; + mvA.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); + clipMv(mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps); + xPredInterBlk(COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false); + + int32_t skipMvpIdx[2]; + skipMvpIdx[0] = skip ? mvpIdxSym[0] : -1; + skipMvpIdx[1] = skip ? mvpIdxSym[1] : -1; + + for (int i = 0; i < amvpCur.numCand; i++) + { + for (int j = 0; j < amvpTar.numCand; j++) + { + if (skipMvpIdx[curRefList] == i && skipMvpIdx[tarRefList] == j) + continue; + + cTarMvField.setMvField(curMv.getSymmvdMv(amvpCur.mvCand[i], amvpTar.mvCand[j]), refIdxTar); + + // get prediction of eTarRefPicList + PelUnitBuf predBufB = m_tmpPredStorage[tarRefList].getBuf(UnitAreaRelative(*pu.cu, pu)); + const Picture* picRefB = pu.cu->slice->getRefPic(tarRefList, cTarMvField.refIdx); + Mv mvB = cTarMvField.mv; + mvB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); + clipMv(mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps); + xPredInterBlk(COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false); + + PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf(UnitAreaRelative(*pu.cu, pu)); + if (gbiIdx != GBI_DEFAULT) + bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), gbiIdx); + else + bufTmp.Y().addAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y)); + + // calc distortion + Distortion cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD); + + m_pcRdCost->setPredictor(amvpCur.mvCand[i]); + uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(curMv.hor, curMv.ver, (pu.cu->imv << 1)); + bits += m_auiMVPIdxCost[i][AMVP_MAX_NUM_CANDS]; + bits += m_auiMVPIdxCost[j][AMVP_MAX_NUM_CANDS]; + cost += m_pcRdCost->getCost(bits); + if (cost < bestCost) + { + bestCost = cost; + cMvPredSym[curRefList] = amvpCur.mvCand[i]; + cMvPredSym[tarRefList] = amvpTar.mvCand[j]; + mvpIdxSym[curRefList] = i; + mvpIdxSym[tarRefList] = j; + } + } + } +} +#endif \ No newline at end of file diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h index 05dcc7fa8aa389cf925e95727e6e4d0f7ba774a8..f9edc9438352622bd17b04cddf91a8c1a235074a 100644 --- a/source/Lib/EncoderLib/InterSearch.h +++ b/source/Lib/EncoderLib/InterSearch.h @@ -51,7 +51,7 @@ #include "CommonLib/RdCost.h" #include "CommonLib/AffineGradientSearch.h" -#include "CommonLib/CprHashMap.h" +#include "CommonLib/IbcHashMap.h" #include <unordered_map> #include <vector> //! \ingroup EncoderLib @@ -132,7 +132,7 @@ protected: bool m_isInitialized; unsigned int m_numBVs, m_numBV16s; - Mv m_acBVs[CPR_NUM_CANDIDATES]; + Mv m_acBVs[IBC_NUM_CANDIDATES]; public: InterSearch(); virtual ~InterSearch(); @@ -238,12 +238,12 @@ public: /// set ME search range void setAdaptiveSearchRange ( int iDir, int iRefIdx, int iSearchRange) { CHECK(iDir >= MAX_NUM_REF_LIST_ADAPT_SR || iRefIdx>=int(MAX_IDX_ADAPT_SR), "Invalid index"); m_aaiAdaptSR[iDir][iRefIdx] = iSearchRange; } - bool predCPRSearch ( CodingUnit& cu, Partitioner& partitioner, const int localSearchRangeX, const int localSearchRangeY, CprHashMap& cprHashMap); + bool predIBCSearch ( CodingUnit& cu, Partitioner& partitioner, const int localSearchRangeX, const int localSearchRangeY, IbcHashMap& ibcHashMap); void xIntraPatternSearch ( PredictionUnit& pu, IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiCost, Mv* cMvSrchRngLT, Mv* cMvSrchRngRB, Mv* pcMvPred); void xSetIntraSearchRange ( PredictionUnit& pu, int iRoiWidth, int iRoiHeight, const int localSearchRangeX, const int localSearchRangeY, Mv& rcMvSrchRngLT, Mv& rcMvSrchRngRB); - void xCPREstimation ( PredictionUnit& pu, PelUnitBuf& origBuf, Mv *pcMvPred, Mv &rcMv, Distortion &ruiCost, const int localSearchRangeX, const int localSearchRangeY); - void xCPRSearchMVCandUpdate ( Distortion uiSad, int x, int y, Distortion* uiSadBestCand, Mv* cMVCand); - int xCPRSearchMVChromaRefine( PredictionUnit& pu, int iRoiWidth, int iRoiHeight, int cuPelX, int cuPelY, Distortion* uiSadBestCand, Mv* cMVCand); + void xIBCEstimation ( PredictionUnit& pu, PelUnitBuf& origBuf, Mv *pcMvPred, Mv &rcMv, Distortion &ruiCost, const int localSearchRangeX, const int localSearchRangeY); + void xIBCSearchMVCandUpdate ( Distortion uiSad, int x, int y, Distortion* uiSadBestCand, Mv* cMVCand); + int xIBCSearchMVChromaRefine( PredictionUnit& pu, int iRoiWidth, int iRoiHeight, int cuPelX, int cuPelY, Distortion* uiSadBestCand, Mv* cMVCand); protected: // ------------------------------------------------------------------------------------------------------------------- @@ -399,6 +399,15 @@ protected: void xCopyAffineAMVPInfo ( AffineAMVPInfo& src, AffineAMVPInfo& dst ); void xCheckBestAffineMVP ( PredictionUnit &pu, AffineAMVPInfo &affineAMVPInfo, RefPicList eRefPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost ); +#if JVET_M0444_SMVD + Distortion xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField , int gbiIdx ); + + Distortion xSymmeticRefineMvSearch( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred + , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int searchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds , int gbiIdx ); + + void xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int gbiIdx ); +#endif + bool xReadBufferedAffineUniMv ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost); double xGetMEDistortionWeight ( uint8_t gbiIdx, RefPicList eRefPicList); bool xReadBufferedUniMv ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost); @@ -406,6 +415,20 @@ public: void resetBufferedUniMotions () { m_uniMotions.reset(); } uint32_t getWeightIdxBits ( uint8_t gbiIdx ) { return m_estWeightIdxBits[gbiIdx]; } void initWeightIdxBits (); +#if JVET_M0444_SMVD + void symmvdCheckBestMvp( + PredictionUnit& pu, + PelUnitBuf& origBuf, + Mv curMv, + RefPicList curRefList, + AMVPInfo amvpInfo[2][33], + int32_t gbiIdx, + Mv cMvPredSym[2], + int32_t mvpIdxSym[2], + Distortion& bestCost, + bool skip = false + ); +#endif protected: void xExtDIFUpSamplingH ( CPelBuf* pcPattern ); @@ -417,7 +440,7 @@ protected: void setWpScalingDistParam ( int iRefIdx, RefPicList eRefPicListCur, Slice *slice ); private: - void xxCPRHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred, Mv &mv, int& idxMvPred, CprHashMap& cprHashMap); + void xxIBCHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred, Mv &mv, int& idxMvPred, IbcHashMap& ibcHashMap); public: void encodeResAndCalcRdInterCU (CodingStructure &cs, Partitioner &partitioner, const bool &skipResidual diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index d8ebcd96c05cd8dd54bd97ac443144e0f9d48752..ac23b0a313c1459a5601ecc27059bbc6c00ab1d0 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -285,7 +285,7 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner ) uint32_t extraModes = 0; // add two extra modes, which would be used after uiMode <= DC_IDX is removed for cu.nsstIdx == 3 - +#if !JVET_M0464_UNI_MTS const int width = partitioner.currArea().lwidth(); const int height = partitioner.currArea().lheight(); @@ -306,6 +306,7 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner ) { emtUsageFlag = 0; //this forces the recalculation of the candidates list. Why is this necessary? (to be checked) } +#endif static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> uiHadModeList; static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList; @@ -315,7 +316,9 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner ) static_vector<int, FAST_UDI_MAX_RDMODE_NUM>* nullList = NULL; auto &pu = *cu.firstPU; +#if !JVET_M0464_UNI_MTS int puIndex = 0; +#endif { CandHadList.clear(); CandCostList.clear(); @@ -335,8 +338,9 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner ) numModesForFullRD = numModesAvailable; #endif - +#if !JVET_M0464_UNI_MTS if( emtUsageFlag != 2 ) +#endif { // this should always be true CHECK( !pu.Y().valid(), "PU is not valid" ); @@ -547,6 +551,7 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner ) uiRdModeList.push_back( i ); } } +#if !JVET_M0464_UNI_MTS if( emtUsageFlag == 1 ) { // Store the modes to be checked with RD @@ -554,7 +559,9 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner ) std::copy_n( uiRdModeList.begin(), numModesForFullRD, m_savedRdModeList[puIndex] ); std::copy_n(extendRefList.begin(), numModesForFullRD, m_savedExtendRefList[puIndex]); } +#endif } +#if !JVET_M0464_UNI_MTS else //emtUsage = 2 (here we potentially reduce the number of modes that will be full-RD checked) { if( isAllIntra && m_pcEncCfg->getFastIntraEMT() ) @@ -585,6 +592,7 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner ) std::copy_n(m_savedExtendRefList[puIndex], m_savedNumRdModes[puIndex], extendRefList.begin()); } } +#endif CHECK( numModesForFullRD != uiRdModeList.size(), "Inconsistent state!" ); @@ -592,7 +600,11 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner ) // after this point, don't use numModesForFullRD // PBINTRA fast +#if JVET_M0464_UNI_MTS + if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable ) +#else if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable && emtUsageFlag != 2 ) +#endif { if( CandHadList.size() < 3 || CandHadList[2] > cs.interHad * PBINTRA_RATIO ) { @@ -649,11 +661,12 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner ) xRecurIntraCodingLumaQT( *csTemp, partitioner ); +#if !JVET_M0464_UNI_MTS if( emtUsageFlag == 1 && m_pcEncCfg->getFastIntraEMT() ) { m_modeCostStore[puIndex][uiMode] = csTemp->cost; //cs.cost; } - +#endif DTRACE( g_trace_ctx, D_INTRA_COST, "IntraCost T %f (%d) \n", csTemp->cost, uiOrgMode ); @@ -664,11 +677,12 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner ) uiBestPUMode = uiOrgMode; bestExtendRef = multiRefIdx; - +#if !JVET_M0464_UNI_MTS if( ( emtUsageFlag == 1 ) && m_pcEncCfg->getFastIntraEMT() ) { m_bestModeCostStore[puIndex] = csBest->cost; //cs.cost; } +#endif } csTemp->releaseIntermediateData(); @@ -1048,7 +1062,9 @@ void IntraSearch::xEncSubdivCbfQT(CodingStructure &cs, Partitioner &partitioner, { const UnitArea &currArea = partitioner.currArea(); TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType ); +#if !JVET_M0464_UNI_MTS CodingUnit &currCU = *currTU.cu; +#endif uint32_t currDepth = partitioner.currTrDepth; const bool subdiv = currTU.depth > currDepth; @@ -1081,7 +1097,9 @@ void IntraSearch::xEncSubdivCbfQT(CodingStructure &cs, Partitioner &partitioner, if (subdiv) { +#if !JVET_M0464_UNI_MTS if( currDepth == 0 && bLuma ) m_CABACEstimator->emt_cu_flag( currCU ); +#endif if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) { @@ -1099,8 +1117,9 @@ void IntraSearch::xEncSubdivCbfQT(CodingStructure &cs, Partitioner &partitioner, } else { +#if !JVET_M0464_UNI_MTS if( currDepth == 0 && bLuma && TU::getCbfAtDepth( currTU, COMPONENT_Y, 0 ) ) m_CABACEstimator->emt_cu_flag( currCU ); - +#endif //===== Cbfs ===== if (bLuma) { @@ -1185,7 +1204,11 @@ uint64_t IntraSearch::xGetIntraFracBitsQTChroma(TransformUnit& currTU, const Com return fracBits; } +#if JVET_M0464_UNI_MTS +void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2, uint32_t* numSig, std::vector<TrMode>* trModes, const bool loadTr) +#else void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2, uint32_t* numSig ) +#endif { if (!tu.blocks[compID].valid()) { @@ -1280,8 +1303,16 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp m_pcTrQuant->selectLambda(compID); #endif - +#if JVET_M0464_UNI_MTS + if( trModes ) + { + m_pcTrQuant->transformNxN( tu, compID, cQP, trModes, CU::isIntra( *tu.cu ) ? m_pcEncCfg->getIntraMTSMaxCand() : m_pcEncCfg->getInterMTSMaxCand() ); + tu.mtsIdx = trModes->at(0).first; + } + m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr); +#else m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx()); +#endif DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum ); @@ -1322,8 +1353,10 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner ) { const UnitArea &currArea = partitioner.currArea(); +#if !JVET_M0464_UNI_MTS const CodingUnit &cu = *cs.getCU(currArea.lumaPos(), partitioner.chType); - uint32_t currDepth = partitioner.currTrDepth; +#endif + uint32_t currDepth = partitioner.currTrDepth; const PPS &pps = *cs.pps; const bool keepResi = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS; bool bCheckFull = true; @@ -1333,6 +1366,12 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par uint32_t numSig = 0; +#if JVET_M0464_UNI_MTS + double dSingleCost = MAX_DOUBLE; + Distortion uiSingleDistLuma = 0; + uint64_t singleFracBits = 0; + int bestModeId[MAX_NUM_COMPONENT] = { 0, 0, 0 }; +#else bool checkInitTrDepth = false, checkInitTrDepthTransformSkipWinner = false; double dSingleCost = MAX_DOUBLE; @@ -1344,6 +1383,7 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par bool isAllIntra = m_pcEncCfg->getIntraPeriod() == 1; uint8_t numTransformIndexCands = nNumTransformCands; +#endif const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() ); TempCtx ctxBest ( m_CtxCache ); @@ -1367,6 +1407,26 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par TransformUnit &tu = csFull->addTU( CS::getArea( *csFull, currArea, partitioner.chType ), partitioner.chType ); tu.depth = currDepth; +#if JVET_M0464_UNI_MTS + const bool tsAllowed = TU::isTSAllowed ( tu, COMPONENT_Y ); + const bool mtsAllowed = TU::isMTSAllowed( tu, COMPONENT_Y ); + uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests + std::vector<TrMode> trModes; + trModes.push_back( TrMode( 0, true ) ); //DCT2 + if( tsAllowed ) + { + trModes.push_back( TrMode( 1, true ) ); + } + if( mtsAllowed ) + { + for( int i = 2; i < 6; i++ ) + { + trModes.push_back( TrMode( i, true) ); + } + } + + CHECK( !tu.Y().valid(), "Invalid TU" ); +#else checkTransformSkip &= TU::hasTransformSkipFlag( *tu.cs, tu.Y() ); checkTransformSkip &= !cu.transQuantBypass; checkTransformSkip &= !cu.emtFlag; @@ -1378,6 +1438,7 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par CHECK( checkInitTrDepthTransformSkipWinner && !checkTransformSkip, "Transform Skip must be enabled if it was the winner in the previous call of xRecurIntraCodingLumaQT!" ); +#endif CodingStructure &saveCS = *m_pSaveCS[0]; @@ -1388,9 +1449,14 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par double singleCostTmp = 0; int firstCheckId = 0; +#if JVET_M0464_UNI_MTS + int lastCheckId = trModes[nNumTransformCands-1].first; + bool isNotOnlyOneMode = nNumTransformCands != 1; +#else //we add the EMT candidates to the loop. TransformSkip will still be the last one to be checked (when modeId == lastCheckId) as long as checkTransformSkip is true int lastCheckId = numTransformIndexCands - ( firstCheckId + 1 ) + ( int ) checkTransformSkip; bool isNotOnlyOneMode = lastCheckId != firstCheckId && !checkInitTrDepthTransformSkipWinner; +#endif if( isNotOnlyOneMode ) { @@ -1401,9 +1467,25 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par tmpTU = &saveCS.addTU(currArea, partitioner.chType); } +#if JVET_M0464_UNI_MTS + bool cbfDCT2 = true; +#else bool cbfBestMode = false; +#endif - +#if JVET_M0464_UNI_MTS + for( int modeId = firstCheckId; modeId < nNumTransformCands; modeId++ ) + { + if( !cbfDCT2 || ( m_pcEncCfg->getUseTransformSkipFast() && bestModeId[COMPONENT_Y] == 1 ) ) + { + break; + } + if( !trModes[modeId].second ) + { + continue; + } + tu.mtsIdx = trModes[modeId].first; +#else for( int modeId = firstCheckId; modeId <= lastCheckId; modeId++ ) { if( checkInitTrDepthTransformSkipWinner ) @@ -1426,6 +1508,7 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par continue; } } +#endif if ((modeId != firstCheckId) && isNotOnlyOneMode) { @@ -1435,7 +1518,11 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par int default0Save1Load2 = 0; singleDistTmpLuma = 0; +#if JVET_M0464_UNI_MTS + if( modeId == firstCheckId && nNumTransformCands > 1 ) +#else if (modeId == firstCheckId && modeId != lastCheckId && !checkInitTrDepthTransformSkipWinner ) +#endif { default0Save1Load2 = 1; } @@ -1443,7 +1530,26 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par { default0Save1Load2 = 2; } - +#if JVET_M0464_UNI_MTS + if( nNumTransformCands > 1 ) + { + xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true ); + if( modeId == 0 ) + { + for( int i = 0; i < nNumTransformCands; i++ ) + { + if( trModes[i].second ) + { + lastCheckId = trModes[i].first; + } + } + } + } + else + { + xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig ); + } +#else if (cu.emtFlag) { tu.emtIdx = transformIndex; @@ -1458,10 +1564,15 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par } xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig ); +#endif //----- determine rate and r-d cost ----- +#if JVET_M0464_UNI_MTS + if( ( trModes[modeId].first != 0 && !TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ) ) ) +#else //the condition (transformIndex != DCT2_EMT) seems to be irrelevant, since DCT2_EMT=7 and the highest value of transformIndex is 4 if( ( modeId == lastCheckId && checkTransformSkip && !TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ) ) ) +#endif { //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. singleCostTmp = MAX_DOUBLE; @@ -1478,9 +1589,16 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par uiSingleDistLuma = singleDistTmpLuma; singleFracBits = singleTmpFracBits; +#if JVET_M0464_UNI_MTS + bestModeId[COMPONENT_Y] = trModes[modeId].first; + if( trModes[modeId].first == 0 ) + { + cbfDCT2 = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ); + } +#else bestModeId[COMPONENT_Y] = modeId; cbfBestMode = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ); - +#endif if( bestModeId[COMPONENT_Y] != lastCheckId ) { @@ -1604,7 +1722,9 @@ ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT(CodingStructure &cs, Partition TransformUnit &currTU = *cs.getTU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA ); const PredictionUnit &pu = *cs.getPU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA ); +#if !JVET_M0464_UNI_MTS const TransformUnit &currTULuma = CS::isDualITree( cs ) ? *cs.picture->cs->getTU( currArea.lumaPos(), CHANNEL_TYPE_LUMA ) : currTU; +#endif uint32_t currDepth = partitioner.currTrDepth; const PPS &pps = *cs.pps; @@ -1617,6 +1737,7 @@ ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT(CodingStructure &cs, Partition return cbfs; } +#if !JVET_M0464_UNI_MTS bool checkTransformSkip = pps.getUseTransformSkip(); checkTransformSkip &= TU::hasTransformSkipFlag( *currTU.cs, partitioner.currArea().Cb() ); @@ -1638,6 +1759,7 @@ ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT(CodingStructure &cs, Partition checkTransformSkip &= ( nbLumaSkip > 0 ); } } +#endif CodingStructure &saveCS = *m_pSaveCS[1]; saveCS.pcv = cs.pcv; @@ -1666,8 +1788,12 @@ ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT(CodingStructure &cs, Partition const bool checkCrossComponentPrediction = PU::isChromaIntraModeCrossCheckMode( pu ) && pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && TU::getCbf( currTU, COMPONENT_Y ); const int crossCPredictionModesToTest = checkCrossComponentPrediction ? 2 : 1; +#if JVET_M0464_UNI_MTS + const int totalModesToTest = crossCPredictionModesToTest; +#else const int transformSkipModesToTest = checkTransformSkip ? 2 : 1; const int totalModesToTest = crossCPredictionModesToTest * transformSkipModesToTest; +#endif const bool isOneMode = (totalModesToTest == 1); int currModeId = 0; @@ -1681,12 +1807,16 @@ ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT(CodingStructure &cs, Partition ctxStart = m_CABACEstimator->getCtx(); } +#if !JVET_M0464_UNI_MTS for (int transformSkipModeId = 0; transformSkipModeId < transformSkipModesToTest; transformSkipModeId++) +#endif { for (int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++) { currTU.compAlpha [compID] = 0; +#if !JVET_M0464_UNI_MTS currTU.transformSkip[compID] = transformSkipModeId; +#endif currModeId++; @@ -1697,7 +1827,11 @@ ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT(CodingStructure &cs, Partition { default0Save1Load2 = 0; } +#if JVET_M0464_UNI_MTS + else if (!isOneMode && (crossCPredictionModeId == 0)) +#else else if (!isOneMode && (transformSkipModeId == 0) && (crossCPredictionModeId == 0)) +#endif { default0Save1Load2 = 1; //save prediction on first mode } @@ -1715,7 +1849,11 @@ ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT(CodingStructure &cs, Partition xIntraCodingTUBlock( currTU, compID, crossCPredictionModeId != 0, singleDistCTmp, default0Save1Load2 ); +#if JVET_M0464_UNI_MTS + if( ( ( crossCPredictionModeId == 1 ) && ( currTU.compAlpha[compID] == 0 ) ) ) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. +#else if( ( ( crossCPredictionModeId == 1 ) && ( currTU.compAlpha[compID] == 0 ) ) || ( ( transformSkipModeId == 1 ) && !TU::getCbf( currTU, compID ) ) ) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. +#endif { singleCostTmp = MAX_DOUBLE; } diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h index 92448ab87457a0f0ef76183f100679f7adef3e60..3c1ce2b8137592b7a868bf67ed1d2ee70c9b9003 100644 --- a/source/Lib/EncoderLib/IntraSearch.h +++ b/source/Lib/EncoderLib/IntraSearch.h @@ -75,10 +75,12 @@ private: CodingStructure **m_pSaveCS; //cost variables for the EMT algorithm and new modes list +#if !JVET_M0464_UNI_MTS double m_bestModeCostStore[4]; // RD cost of the best mode for each PU using DCT2 double m_modeCostStore [4][NUM_LUMA_MODE]; // RD cost of each mode for each PU using DCT2 uint32_t m_savedRdModeList [4][NUM_LUMA_MODE], m_savedNumRdModes[4]; int m_savedExtendRefList[4][NUM_LUMA_MODE]; +#endif protected: // interface to option @@ -142,7 +144,11 @@ protected: void xEncCoeffQT (CodingStructure &cs, Partitioner& pm, const ComponentID &compID); +#if JVET_M0464_UNI_MTS + void xIntraCodingTUBlock (TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2 = 0, uint32_t* numSig = nullptr, std::vector<TrMode>* trModes=nullptr, const bool loadTr=false ); +#else void xIntraCodingTUBlock (TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2 = 0, uint32_t* numSig = nullptr ); +#endif ChromaCbfs xRecurIntraChromaCodingQT (CodingStructure &cs, Partitioner& pm); diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index cbb7bbdc2b005623228053b7ad74defad7fa7256..34a8b308db02f6ede5d36002a8614f3214b78a6f 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -529,20 +529,29 @@ void HLSWriter::codeSPSNext( const SPSNext& spsNext, const bool usePCM ) { // tool enabling flags WRITE_FLAG( spsNext.getUseLargeCTU() ? 1 : 0, "large_ctu_flag" ); - WRITE_FLAG(spsNext.getUseSubPuMvp() ? 1 : 0, "subpu_tmvp_flag"); WRITE_FLAG( spsNext.getUseIMV() ? 1 : 0, "imv_enable_flag" ); - WRITE_FLAG( spsNext.getUseBIO() ? 1 : 0, "bio_enable_flag" ); WRITE_FLAG( spsNext.getDisableMotCompress() ? 1 : 0, "disable_motion_compression_flag" ); WRITE_FLAG( spsNext.getUseLMChroma() ? 1 : 0, "lm_chroma_enabled_flag" ); +#if JVET_M0142_CCLM_COLLOCATED_CHROMA + if ( spsNext.getUseLMChroma() && spsNext.getSPS().getChromaFormatIdc() == CHROMA_420 ) + { + WRITE_FLAG( spsNext.getCclmCollocatedChromaFlag() ? 1 : 0, "sps_cclm_collocated_chroma_flag" ); + } +#endif +#if JVET_M0464_UNI_MTS + WRITE_FLAG( spsNext.getUseIntraMTS() ? 1 : 0, "mts_intra_enabled_flag" ); + WRITE_FLAG( spsNext.getUseInterMTS() ? 1 : 0, "mts_inter_enabled_flag" ); +#else WRITE_FLAG( spsNext.getUseIntraEMT() ? 1 : 0, "emt_intra_enabled_flag" ); WRITE_FLAG( spsNext.getUseInterEMT() ? 1 : 0, "emt_inter_enabled_flag" ); +#endif WRITE_FLAG( spsNext.getUseAffine() ? 1 : 0, "affine_flag" ); if ( spsNext.getUseAffine() ) { WRITE_FLAG( spsNext.getUseAffineType() ? 1 : 0, "affine_type_flag" ); } WRITE_FLAG( spsNext.getUseGBi() ? 1 : 0, "gbi_flag" ); - WRITE_FLAG(spsNext.getCPRMode() ? 1 : 0, "cpr_flag" ); + WRITE_FLAG(spsNext.getIBCMode() ? 1 : 0, "ibc_flag" ); for( int k = 0; k < SPSNext::NumReservedFlags; k++ ) { WRITE_FLAG( 0, "reserved_flag" ); @@ -684,23 +693,12 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) } WRITE_UVLC( pcSPS->getQuadtreeTULog2MinSize() - 2, "log2_min_luma_transform_block_size_minus2" ); WRITE_UVLC( pcSPS->getQuadtreeTULog2MaxSize() - pcSPS->getQuadtreeTULog2MinSize(), "log2_diff_max_min_luma_transform_block_size" ); - WRITE_FLAG( pcSPS->getUseALF(), "sps_alf_enable_flag" ); -#if HEVC_USE_SCALING_LISTS - WRITE_FLAG( pcSPS->getScalingListFlag() ? 1 : 0, "scaling_list_enabled_flag" ); - if(pcSPS->getScalingListFlag()) - { - WRITE_FLAG( pcSPS->getScalingListPresentFlag() ? 1 : 0, "sps_scaling_list_data_present_flag" ); - if(pcSPS->getScalingListPresentFlag()) - { - codeScalingList( pcSPS->getScalingList() ); - } - } -#endif - WRITE_FLAG( pcSPS->getUseAMP() ? 1 : 0, "amp_enabled_flag" ); - WRITE_FLAG( pcSPS->getUseSAO() ? 1 : 0, "sample_adaptive_offset_enabled_flag"); - WRITE_FLAG( pcSPS->getUsePCM() ? 1 : 0, "pcm_enabled_flag"); - if( pcSPS->getUsePCM() ) + WRITE_FLAG( pcSPS->getSAOEnabledFlag(), "sps_sao_enabled_flag"); + WRITE_FLAG( pcSPS->getALFEnabledFlag(), "sps_alf_enabled_flag" ); + + WRITE_FLAG( pcSPS->getPCMEnabledFlag() ? 1 : 0, "pcm_enabled_flag"); + if( pcSPS->getPCMEnabledFlag() ) { WRITE_CODE( pcSPS->getPCMBitDepth(CHANNEL_TYPE_LUMA) - 1, 4, "pcm_sample_bit_depth_luma_minus1" ); WRITE_CODE( chromaEnabled ? (pcSPS->getPCMBitDepth(CHANNEL_TYPE_CHROMA) - 1) : 0, 4, "pcm_sample_bit_depth_chroma_minus1" ); @@ -709,12 +707,33 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) WRITE_FLAG( pcSPS->getPCMFilterDisableFlag()?1 : 0, "pcm_loop_filter_disable_flag"); } - WRITE_FLAG( pcSPS->getUseWrapAround() ? 1 : 0, "ref_wraparound_enabled_flag" ); - if( pcSPS->getUseWrapAround() ) + WRITE_FLAG( pcSPS->getWrapAroundEnabledFlag() ? 1 : 0, "sps_ref_wraparound_enabled_flag" ); + if( pcSPS->getWrapAroundEnabledFlag() ) { - WRITE_UVLC( pcSPS->getWrapAroundOffset(), "ref_wraparound_offset" ); + WRITE_UVLC( pcSPS->getWrapAroundOffset(), "sps_ref_wraparound_offset" ); } + WRITE_FLAG( pcSPS->getSPSTemporalMVPEnabledFlag() ? 1 : 0, "sps_temporal_mvp_enabled_flag" ); + + if ( pcSPS->getSPSTemporalMVPEnabledFlag() ) + { + WRITE_FLAG( pcSPS->getSBTMVPEnabledFlag() ? 1 : 0, "sps_sbtmvp_enabled_flag"); + } + + WRITE_FLAG( pcSPS->getBDOFEnabledFlag() ? 1 : 0, "sps_bdof_enabled_flag" ); + +#if HEVC_USE_SCALING_LISTS + WRITE_FLAG( pcSPS->getScalingListFlag() ? 1 : 0, "scaling_list_enabled_flag" ); + if(pcSPS->getScalingListFlag()) + { + WRITE_FLAG( pcSPS->getScalingListPresentFlag() ? 1 : 0, "sps_scaling_list_data_present_flag" ); + if(pcSPS->getScalingListPresentFlag()) + { + codeScalingList( pcSPS->getScalingList() ); + } + } +#endif + CHECK( pcSPS->getMaxTLayers() == 0, "Maximum number of T-layers is '0'" ); const RPSList* rpsList = pcSPS->getRPSList(); @@ -735,7 +754,6 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) WRITE_FLAG( pcSPS->getUsedByCurrPicLtSPSFlag(k), "used_by_curr_pic_lt_sps_flag[i]"); } } - WRITE_FLAG( pcSPS->getSPSTemporalMVPEnabledFlag() ? 1 : 0, "sps_temporal_mvp_enabled_flag" ); #if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 WRITE_FLAG( pcSPS->getUseStrongIntraSmoothing(), "strong_intra_smoothing_enable_flag" ); @@ -805,7 +823,7 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) } case SPS_EXT__NEXT: { - codeSPSNext( pcSPS->getSpsNext(), pcSPS->getUsePCM() ); + codeSPSNext( pcSPS->getSpsNext(), pcSPS->getPCMEnabledFlag() ); break; } default: @@ -1083,7 +1101,7 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) WRITE_FLAG( pcSlice->getEnableTMVPFlag() ? 1 : 0, "slice_temporal_mvp_enabled_flag" ); } } - if( pcSlice->getSPS()->getUseSAO() ) + if( pcSlice->getSPS()->getSAOEnabledFlag() ) { WRITE_FLAG( pcSlice->getSaoEnabledFlag( CHANNEL_TYPE_LUMA ), "slice_sao_luma_flag" ); if( chromaEnabled ) @@ -1092,7 +1110,7 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) } } - if( pcSlice->getSPS()->getUseALF() ) + if( pcSlice->getSPS()->getALFEnabledFlag() ) { alf( pcSlice->getAlfSliceParam() ); } @@ -1252,12 +1270,12 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) CHECK( pcSlice->getMaxNumMergeCand() > MRG_MAX_NUM_CANDS, "More merge candidates signalled than supported" ); WRITE_UVLC( MRG_MAX_NUM_CANDS - pcSlice->getMaxNumMergeCand(), "six_minus_max_num_merge_cand" ); - if ( pcSlice->getSPS()->getSpsNext().getUseSubPuMvp() && !pcSlice->getSPS()->getSpsNext().getUseAffine() ) // ATMVP only + if ( pcSlice->getSPS()->getSBTMVPEnabledFlag() && !pcSlice->getSPS()->getSpsNext().getUseAffine() ) // ATMVP only { CHECK( pcSlice->getMaxNumAffineMergeCand() != 1, "Sub-block merge can number should be 1" ); } else - if ( !pcSlice->getSPS()->getSpsNext().getUseSubPuMvp() && !pcSlice->getSPS()->getSpsNext().getUseAffine() ) // both off + if ( !pcSlice->getSPS()->getSBTMVPEnabledFlag() && !pcSlice->getSPS()->getSpsNext().getUseAffine() ) // both off { CHECK( pcSlice->getMaxNumAffineMergeCand() != 0, "Sub-block merge can number should be 0" ); } @@ -1305,7 +1323,7 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) } } - bool isSAOEnabled = pcSlice->getSPS()->getUseSAO() && (pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA) || (chromaEnabled && pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA))); + bool isSAOEnabled = pcSlice->getSPS()->getSAOEnabledFlag() && (pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA) || (chromaEnabled && pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA))); bool isDBFEnabled = (!pcSlice->getDeblockingFilterDisable()); if(pcSlice->getPPS()->getLoopFilterAcrossSlicesEnabledFlag() && ( isSAOEnabled || isDBFEnabled ))