From c704d7a961c36c18c2eab0e0c3a945917909c59d Mon Sep 17 00:00:00 2001 From: Frank Bossen <fbossen@gmail.com> Date: Thu, 13 Jan 2022 12:32:48 -0500 Subject: [PATCH] JVET-Y0155: Fixes and clean up for temporal prefilter --- cfg/encoder_lowdelay_P_vtm.cfg | 4 +- cfg/encoder_lowdelay_vtm.cfg | 4 +- cfg/encoder_randomaccess_vtm.cfg | 4 +- cfg/encoder_randomaccess_vtm_gop16.cfg | 4 +- cfg/per-class/classF.cfg | 3 +- cfg/per-sequence/BQMall.cfg | 1 + cfg/per-sequence/BQSquare.cfg | 1 + cfg/per-sequence/BQTerrace.cfg | 1 + cfg/per-sequence/BasketballDrill.cfg | 1 + cfg/per-sequence/BasketballDrillText.cfg | 1 + cfg/per-sequence/BasketballDrive.cfg | 1 + cfg/per-sequence/BasketballPass.cfg | 1 + cfg/per-sequence/BlowingBubbles.cfg | 1 + cfg/per-sequence/NebutaFestival_10bit.cfg | 1 + cfg/per-sequence/PartyScene.cfg | 1 + cfg/per-sequence/SocialNetworkMap_444.cfg | 1 + cfg/per-sequence/SocialNetworkMap_RGB.cfg | 1 + .../SteamLocomotiveTrain_10bit.cfg | 1 + doc/software-manual.tex | 28 +++-- source/App/EncoderApp/EncApp.cpp | 22 ++-- source/App/EncoderApp/EncAppCfg.cpp | 21 +++- source/App/EncoderApp/EncAppCfg.h | 7 +- source/Lib/EncoderLib/EncCfg.h | 27 +++- source/Lib/EncoderLib/EncLib.cpp | 23 ++-- source/Lib/EncoderLib/EncTemporalFilter.cpp | 118 ++++++++---------- source/Lib/EncoderLib/EncTemporalFilter.h | 29 ++--- 26 files changed, 176 insertions(+), 131 deletions(-) diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg index 2fb32e0a1..b259b35ca 100644 --- a/cfg/encoder_lowdelay_P_vtm.cfg +++ b/cfg/encoder_lowdelay_P_vtm.cfg @@ -64,8 +64,8 @@ TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) #=========== TemporalFilter ================= -TemporalFilter : 1 # Enable/disable GOP Based Temporal Filter -TemporalFilterFutureReference : 0 # Enable/disable reading future frames +TemporalFilterPastRefs : 4 # Number of past references for temporal prefilter +TemporalFilterFutureRefs : 0 # Number of future references for temporal prefilter TemporalFilterStrengthFrame8 : 0.2 # Enable filter at every 8th frame with strength #============ Rate Control ====================== diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg index c8198e089..66f5e5905 100644 --- a/cfg/encoder_lowdelay_vtm.cfg +++ b/cfg/encoder_lowdelay_vtm.cfg @@ -64,8 +64,8 @@ TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) #=========== TemporalFilter ================= -TemporalFilter : 1 # Enable/disable GOP Based Temporal Filter -TemporalFilterFutureReference : 0 # Enable/disable reading future frames +TemporalFilterPastRefs : 4 # Number of past references for temporal prefilter +TemporalFilterFutureRefs : 0 # Number of future references for temporal prefilter TemporalFilterStrengthFrame8 : 0.2 # Enable filter at every 8th frame with strength #============ Rate Control ====================== diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg index 82064508d..deb048e02 100644 --- a/cfg/encoder_randomaccess_vtm.cfg +++ b/cfg/encoder_randomaccess_vtm.cfg @@ -170,8 +170,8 @@ ALFAllowPredefinedFilters : 1 ALFStrengthTargetLuma : 1.0 ALFStrengthTargetChroma : 1.0 CCALFStrengthTarget : 1.0 -TemporalFilter : 1 # Enable/disable GOP Based Temporal Filter -TemporalFilterFutureReference : 1 # Enable/disable reading future frames +TemporalFilterPastRefs : 4 # Number of past references for temporal prefilter +TemporalFilterFutureRefs : 4 # Number of future references for temporal prefilter TemporalFilterStrengthFrame8 : 0.95 # Enable filter at every 8th frame with given strength TemporalFilterStrengthFrame16 : 1.5 # Enable filter at every 16th frame with given strength, longer intervals has higher priority ### DO NOT ADD ANYTHING BELOW THIS LINE ### diff --git a/cfg/encoder_randomaccess_vtm_gop16.cfg b/cfg/encoder_randomaccess_vtm_gop16.cfg index 48f3b2d00..cfb254258 100644 --- a/cfg/encoder_randomaccess_vtm_gop16.cfg +++ b/cfg/encoder_randomaccess_vtm_gop16.cfg @@ -154,8 +154,8 @@ ALFAllowPredefinedFilters : 1 ALFStrengthTargetLuma : 1.0 ALFStrengthTargetChroma : 1.0 CCALFStrengthTarget : 1.0 -TemporalFilter : 1 # Enable/disable GOP Based Temporal Filter -TemporalFilterFutureReference : 1 # Enable/disable reading future frames +TemporalFilterPastRefs : 4 # Number of past references for temporal prefilter +TemporalFilterFutureRefs : 4 # Number of future references for temporal prefilter TemporalFilterStrengthFrame8 : 0.95 # Enable filter at every 8th frame with given strength TemporalFilterStrengthFrame16 : 1.5 # Enable filter at every 16th frame with given strength, longer intervals has higher priority ### DO NOT ADD ANYTHING BELOW THIS LINE ### diff --git a/cfg/per-class/classF.cfg b/cfg/per-class/classF.cfg index 3c91f11c0..973528a14 100644 --- a/cfg/per-class/classF.cfg +++ b/cfg/per-class/classF.cfg @@ -1,4 +1,5 @@ IBC : 1 HashME : 1 BDPCM: 1 -TemporalFilter : 0 +TemporalFilterPastRefs : 0 # Number of past references for temporal prefilter +TemporalFilterFutureRefs : 0 # Number of future references for temporal prefilter diff --git a/cfg/per-sequence/BQMall.cfg b/cfg/per-sequence/BQMall.cfg index 03317919d..0d019abca 100644 --- a/cfg/per-sequence/BQMall.cfg +++ b/cfg/per-sequence/BQMall.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 832 # Input frame width SourceHeight : 480 # Input frame height FramesToBeEncoded : 600 # Number of frames to be coded +LastValidFrame : 599 Level : 3.1 diff --git a/cfg/per-sequence/BQSquare.cfg b/cfg/per-sequence/BQSquare.cfg index eebd2d4e9..a769a7971 100644 --- a/cfg/per-sequence/BQSquare.cfg +++ b/cfg/per-sequence/BQSquare.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 416 # Input frame width SourceHeight : 240 # Input frame height FramesToBeEncoded : 600 # Number of frames to be coded +LastValidFrame : 599 Level : 2.1 diff --git a/cfg/per-sequence/BQTerrace.cfg b/cfg/per-sequence/BQTerrace.cfg index b5d4c76d4..e7cace5fa 100644 --- a/cfg/per-sequence/BQTerrace.cfg +++ b/cfg/per-sequence/BQTerrace.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 1920 # Input frame width SourceHeight : 1080 # Input frame height FramesToBeEncoded : 600 # Number of frames to be coded +LastValidFrame : 599 Level : 4.1 diff --git a/cfg/per-sequence/BasketballDrill.cfg b/cfg/per-sequence/BasketballDrill.cfg index 6bfce02e7..87b55b88e 100644 --- a/cfg/per-sequence/BasketballDrill.cfg +++ b/cfg/per-sequence/BasketballDrill.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 832 # Input frame width SourceHeight : 480 # Input frame height FramesToBeEncoded : 500 # Number of frames to be coded +LastValidFrame : 499 Level : 3.1 diff --git a/cfg/per-sequence/BasketballDrillText.cfg b/cfg/per-sequence/BasketballDrillText.cfg index 04e614ffd..be30cfa58 100644 --- a/cfg/per-sequence/BasketballDrillText.cfg +++ b/cfg/per-sequence/BasketballDrillText.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 832 # Input frame width SourceHeight : 480 # Input frame height FramesToBeEncoded : 500 # Number of frames to be coded +LastValidFrame : 499 Level : 3.1 diff --git a/cfg/per-sequence/BasketballDrive.cfg b/cfg/per-sequence/BasketballDrive.cfg index ec2eb7631..4354a67bd 100644 --- a/cfg/per-sequence/BasketballDrive.cfg +++ b/cfg/per-sequence/BasketballDrive.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 1920 # Input frame width SourceHeight : 1080 # Input frame height FramesToBeEncoded : 500 # Number of frames to be coded +LastValidFrame : 499 Level : 4.1 diff --git a/cfg/per-sequence/BasketballPass.cfg b/cfg/per-sequence/BasketballPass.cfg index c6b756c94..99fb95505 100644 --- a/cfg/per-sequence/BasketballPass.cfg +++ b/cfg/per-sequence/BasketballPass.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 416 # Input frame width SourceHeight : 240 # Input frame height FramesToBeEncoded : 500 # Number of frames to be coded +LastValidFrame : 499 Level : 2.1 diff --git a/cfg/per-sequence/BlowingBubbles.cfg b/cfg/per-sequence/BlowingBubbles.cfg index 61a08aa46..da6d9a60c 100644 --- a/cfg/per-sequence/BlowingBubbles.cfg +++ b/cfg/per-sequence/BlowingBubbles.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 416 # Input frame width SourceHeight : 240 # Input frame height FramesToBeEncoded : 500 # Number of frames to be coded +LastValidFrame : 499 Level : 2.1 diff --git a/cfg/per-sequence/NebutaFestival_10bit.cfg b/cfg/per-sequence/NebutaFestival_10bit.cfg index 3daf33595..5e18b3a11 100644 --- a/cfg/per-sequence/NebutaFestival_10bit.cfg +++ b/cfg/per-sequence/NebutaFestival_10bit.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 2560 # Input frame width SourceHeight : 1600 # Input frame height FramesToBeEncoded : 300 # Number of frames to be coded +LastValidFrame : 299 Level : 5 diff --git a/cfg/per-sequence/PartyScene.cfg b/cfg/per-sequence/PartyScene.cfg index caff00737..4bd7066e2 100644 --- a/cfg/per-sequence/PartyScene.cfg +++ b/cfg/per-sequence/PartyScene.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 832 # Input frame width SourceHeight : 480 # Input frame height FramesToBeEncoded : 500 # Number of frames to be coded +LastValidFrame : 499 Level : 3.1 diff --git a/cfg/per-sequence/SocialNetworkMap_444.cfg b/cfg/per-sequence/SocialNetworkMap_444.cfg index 8f0916ec3..b2da2f194 100644 --- a/cfg/per-sequence/SocialNetworkMap_444.cfg +++ b/cfg/per-sequence/SocialNetworkMap_444.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 1920 # Input frame width SourceHeight : 1080 # Input frame height FramesToBeEncoded : 600 # Number of frames to be coded +LastValidFrame : 599 Level : 6.2 diff --git a/cfg/per-sequence/SocialNetworkMap_RGB.cfg b/cfg/per-sequence/SocialNetworkMap_RGB.cfg index 02dde0d07..db2beb6be 100644 --- a/cfg/per-sequence/SocialNetworkMap_RGB.cfg +++ b/cfg/per-sequence/SocialNetworkMap_RGB.cfg @@ -7,6 +7,7 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 1920 # Input frame width SourceHeight : 1080 # Input frame height FramesToBeEncoded : 600 # Number of frames to be coded +LastValidFrame : 599 InputColourSpaceConvert : RGBtoGBR # Non-normative colour space conversion to apply to input video SNRInternalColourSpace : 1 # Evaluate SNRs in GBR order OutputInternalColourSpace : 0 # Convert recon output back to RGB order. Use --OutputColourSpaceConvert GBRtoRGB on decoder to produce a matching output file. diff --git a/cfg/per-sequence/SteamLocomotiveTrain_10bit.cfg b/cfg/per-sequence/SteamLocomotiveTrain_10bit.cfg index 712ff44f5..d668cfb7a 100644 --- a/cfg/per-sequence/SteamLocomotiveTrain_10bit.cfg +++ b/cfg/per-sequence/SteamLocomotiveTrain_10bit.cfg @@ -7,5 +7,6 @@ FrameSkip : 0 # Number of frames to be skipped in SourceWidth : 2560 # Input frame width SourceHeight : 1600 # Input frame height FramesToBeEncoded : 300 # Number of frames to be coded +LastValidFrame : 299 Level : 5 diff --git a/doc/software-manual.tex b/doc/software-manual.tex index 9597cda7c..b217b1431 100644 --- a/doc/software-manual.tex +++ b/doc/software-manual.tex @@ -935,18 +935,30 @@ Picture output options: output upscaled (2), decoded but in full resolution buff %% GOP based temporal filter parameters %% -\begin{OptionTableNoShorthand}{GOP based temporal filter paramters}{tab:gop-based-temporal-filter} +\begin{OptionTableNoShorthand}{GOP based temporal filter parameters}{tab:gop-based-temporal-filter} -\Option{TemporalFilter} & +\Option{TemporalFilterPastRefs} & %\ShortOption{\None} & -\Default{false} & -Enables or disables GOP based temporal filter. +\Default{0} & +Number of past frames used by the temporal filter. If TemporalFilterPastRefs and TemporalFilterFutureRefs are 0, the filter is +disabled. \\ -\Option{TemporalFilterFutureReference} & +\Option{TemporalFilterFutureRefs} & %\ShortOption{\None} & -\Default{true} & -Enables or disable referencing future frames in the GOP based temporal filter. Can be used to disable future referencing for -low delay configurations. +\Default{0} & +Number of future frames used by the temporal filter. +\\ +\Option{FirstValidFrame} & +%\ShortOption{\None} & +\Default{0} & +Index of first frame in video sequence that may be used by the temporal filter. If a negative value is given, the index defaults to the value +of FrameSkip. +\\ +\Option{LastValidFrame} & +%\ShortOption{\None} & +\Default{MAX_INT} & +Index of last frame in video sequence that may be used by the temporal filter. If a negative value is given, the index defaults to the value +of FrameSkip + FramesToBeEncoded - 1. \\ \Option{TemporalFilterStrengthFrame*} & %\ShortOption{\None} & diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index cb9ed720a..e9b749f92 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -267,7 +267,7 @@ void EncApp::xInitLibCfg() m_cEncLib.setSwitchPocPeriod ( m_switchPocPeriod ); m_cEncLib.setUpscaledOutput ( m_upscaledOutput ); m_cEncLib.setFramesToBeEncoded ( m_framesToBeEncoded ); - + m_cEncLib.setValidFrames(m_firstValidFrame, m_lastValidFrame); m_cEncLib.setAvoidIntraInDepLayer ( m_avoidIntraInDepLayer ); //====== SPS constraint flags ======= @@ -1234,7 +1234,8 @@ void EncApp::xInitLibCfg() m_cEncLib.setTargetOlsIdx (m_targetOlsIdx); } } - m_cEncLib.setGopBasedTemporalFilterEnabled(m_gopBasedTemporalFilterEnabled); + + m_cEncLib.setGopBasedTemporalFilterRefs(m_gopBasedTemporalFilterPastRefs, m_gopBasedTemporalFilterFutureRefs); m_cEncLib.setNumRefLayers ( m_numRefLayers ); m_cEncLib.setVPSParameters(m_cfgVPSParameters); @@ -1317,7 +1318,7 @@ void EncApp::createLib( const int layerIdx ) m_trueOrgPic = new PelStorage; m_orgPic->create( unitArea ); m_trueOrgPic->create( unitArea ); - if(m_gopBasedTemporalFilterEnabled) + if (m_gopBasedTemporalFilterPastRefs != 0 || m_gopBasedTemporalFilterFutureRefs != 0) { m_filteredOrgPic = new PelStorage; m_filteredOrgPic->create( unitArea ); @@ -1349,12 +1350,13 @@ void EncApp::createLib( const int layerIdx ) m_ext360 = new TExt360AppEncTop( *this, m_cEncLib.getGOPEncoder()->getExt360Data(), *( m_cEncLib.getGOPEncoder() ), *m_orgPic ); #endif - if( m_gopBasedTemporalFilterEnabled ) + if (m_gopBasedTemporalFilterPastRefs != 0 || m_gopBasedTemporalFilterFutureRefs != 0) { - m_temporalFilter.init( m_FrameSkip, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth, m_sourceWidth, sourceHeight, - m_sourcePadding, m_bClipInputVideoToRec709Range, m_inputFileName, m_chromaFormatIDC, - m_inputColourSpaceConvert, m_iQP, m_gopBasedTemporalFilterStrengths, - m_gopBasedTemporalFilterFutureReference ); + m_temporalFilter.init(m_FrameSkip, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth, m_sourceWidth, + sourceHeight, m_sourcePadding, m_bClipInputVideoToRec709Range, m_inputFileName, + m_chromaFormatIDC, m_inputColourSpaceConvert, m_iQP, m_gopBasedTemporalFilterStrengths, + m_gopBasedTemporalFilterPastRefs, m_gopBasedTemporalFilterFutureRefs, m_firstValidFrame, + m_lastValidFrame); } if ( m_fgcSEIAnalysisEnabled ) { @@ -1401,7 +1403,7 @@ void EncApp::destroyLib() m_trueOrgPic->destroy(); delete m_trueOrgPic; delete m_orgPic; - if(m_gopBasedTemporalFilterEnabled) + if (m_gopBasedTemporalFilterPastRefs != 0 || m_gopBasedTemporalFilterFutureRefs != 0) { m_filteredOrgPic->destroy(); delete m_filteredOrgPic; @@ -1443,7 +1445,7 @@ bool EncApp::encodePrep( bool& eos ) { m_filteredOrgPicForFG->copyFrom(*m_orgPic); } - if (m_gopBasedTemporalFilterEnabled) + if (m_gopBasedTemporalFilterPastRefs != 0 || m_gopBasedTemporalFilterFutureRefs != 0) { m_temporalFilter.filter(m_orgPic, m_iFrameRcvd); m_filteredOrgPic->copyFrom(*m_orgPic); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index af7176059..e911c79ca 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -1608,9 +1608,11 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ; opts.addOptions() - ("TemporalFilter", m_gopBasedTemporalFilterEnabled, false, "Enable GOP based temporal filter. Disabled per default") - ("TemporalFilterFutureReference", m_gopBasedTemporalFilterFutureReference, true, "Enable referencing of future frames in the GOP based temporal filter. This is typically disabled for Low Delay configurations.") - ("TemporalFilterStrengthFrame*", m_gopBasedTemporalFilterStrengths, std::map<int, double>(), "Strength for every * frame in GOP based temporal filter, where * is an integer." + ("TemporalFilterPastRefs", m_gopBasedTemporalFilterPastRefs, 0, "Number of past references for temporal prefilter") + ("TemporalFilterFutureRefs", m_gopBasedTemporalFilterFutureRefs, 0, "Number of future references for temporal prefilter") + ("FirstValidFrame", m_firstValidFrame, 0, "First valid frame") + ("LastValidFrame", m_lastValidFrame, MAX_INT, "Last valid frame") + ("TemporalFilterStrengthFrame*", m_gopBasedTemporalFilterStrengths, std::map<int, double>(), "Strength for every * frame in GOP based temporal filter, where * is an integer." " E.g. --TemporalFilterStrengthFrame8 0.95 will enable GOP based temporal filter at every 8th frame with strength 0.95"); // clang-format on @@ -1864,6 +1866,15 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) } m_inputFileName = inputPathPrefix + m_inputFileName; + if (m_firstValidFrame < 0) + { + m_firstValidFrame = m_FrameSkip; + } + if (m_lastValidFrame < 0) + { + m_lastValidFrame = m_firstValidFrame + m_framesToBeEncoded - 1; + } + if( m_temporalSubsampleRatio < 1) { EXIT ( "Error: TemporalSubsampleRatio must be greater than 0" ); @@ -4355,7 +4366,7 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_decodeBitstreams[0] == m_bitstreamFileName, "Debug bitstream and the output bitstream cannot be equal.\n" ); xConfirmPara( m_decodeBitstreams[1] == m_bitstreamFileName, "Decode2 bitstream and the output bitstream cannot be equal.\n" ); xConfirmPara(unsigned(m_LMChroma) > 1, "LMMode exceeds range (0 to 1)"); - if (m_gopBasedTemporalFilterEnabled) + if (m_gopBasedTemporalFilterPastRefs != 0 || m_gopBasedTemporalFilterFutureRefs != 0) { xConfirmPara(m_temporalSubsampleRatio != 1, "GOP Based Temporal Filter only support Temporal sub-sample ratio 1"); } @@ -4718,7 +4729,7 @@ void EncAppCfg::xPrintParameter() { msg( VERBOSE, "RPR:%d ", 0 ); } - msg(VERBOSE, "TemporalFilter:%d ", m_gopBasedTemporalFilterEnabled); + msg(VERBOSE, "TemporalFilter:%d/%d ", m_gopBasedTemporalFilterPastRefs, m_gopBasedTemporalFilterFutureRefs); msg(VERBOSE, "SEI CTI:%d ", m_ctiSEIEnabled); #if EXTENSION_360_VIDEO m_ext360.outputConfigurationSummary(); diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 843d16282..ff81d1543 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -114,9 +114,12 @@ protected: int m_confWinTop; int m_confWinBottom; int m_sourcePadding[2]; ///< number of padded pixels for width and height + int m_firstValidFrame; + int m_lastValidFrame; int m_framesToBeEncoded; ///< number of encoded frames bool m_AccessUnitDelimiter; ///< add Access Unit Delimiter NAL units bool m_enablePictureHeaderInSliceHeader; ///< Enable Picture Header in Slice Header + InputColourSpaceConversion m_inputColourSpaceConvert; ///< colour space conversion to apply to input video bool m_snrInternalColourSpace; ///< if true, then no colour space conversion is applied for snr calculation, otherwise inverse of input is applied. bool m_outputInternalColourSpace; ///< if true, then no colour space conversion is applied for reconstructed video, otherwise inverse of input is applied. @@ -837,8 +840,8 @@ protected: bool m_rprRASLtoolSwitch; bool m_avoidIntraInDepLayer; - bool m_gopBasedTemporalFilterEnabled; ///< GOP-based Temporal Filter enable/disable - bool m_gopBasedTemporalFilterFutureReference; ///< Enable/disable future frame references in the GOP-based Temporal Filter + int m_gopBasedTemporalFilterPastRefs; + int m_gopBasedTemporalFilterFutureRefs; std::map<int, double> m_gopBasedTemporalFilterStrengths; ///< Filter strength per frame for the GOP-based Temporal Filter int m_maxLayers; diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 1909bc669..c78e1350d 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -160,12 +160,15 @@ protected: //==== File I/O ======== int m_iFrameRate; int m_FrameSkip; - uint32_t m_temporalSubsampleRatio; + uint32_t m_temporalSubsampleRatio; int m_sourceWidth; int m_sourceHeight; Window m_conformanceWindow; int m_sourcePadding[2]; int m_framesToBeEncoded; + int m_firstValidFrame; + int m_lastValidFrame; + double m_adLambdaModifier[ MAX_TLAYER ]; std::vector<double> m_adIntraLambdaModifier; double m_dIntraQpFactor; ///< Intra Q Factor. If negative, use a default equation: 0.57*(1.0 - Clip3( 0.0, 0.5, 0.05*(double)(isField ? (GopSize-1)/2 : GopSize-1) )) @@ -524,9 +527,11 @@ protected: bool m_bFastUDIUseMPMEnabled; bool m_bFastMEForGenBLowDelayEnabled; bool m_bUseBLambdaForNonKeyLowDelayPictures; - bool m_gopBasedTemporalFilterEnabled; + int m_gopBasedTemporalFilterPastRefs; + int m_gopBasedTemporalFilterFutureRefs; bool m_noPicPartitionFlag; ///< no picture partitioning flag (single tile, single slice) bool m_mixedLossyLossless; ///< enable mixed lossy/lossless coding + std::vector<uint16_t> m_sliceLosslessArray; ///< Slice lossless array std::vector<uint32_t> m_tileColumnWidth; ///< tile column widths in units of CTUs (last column width will be repeated uniformly to cover any remaining picture width) std::vector<uint32_t> m_tileRowHeight; ///< tile row heights in units of CTUs (last row height will be repeated uniformly to cover any remaining picture height) @@ -1049,6 +1054,12 @@ public: void setFramesToBeEncoded ( int i ) { m_framesToBeEncoded = i; } + void setValidFrames(const int first, const int last) + { + m_firstValidFrame = first; + m_lastValidFrame = last; + } + bool getPrintMSEBasedSequencePSNR () const { return m_printMSEBasedSequencePSNR; } void setPrintMSEBasedSequencePSNR (bool value) { m_printMSEBasedSequencePSNR = value; } @@ -1575,8 +1586,16 @@ public: bool getFastUDIUseMPMEnabled () { return m_bFastUDIUseMPMEnabled; } bool getFastMEForGenBLowDelayEnabled () { return m_bFastMEForGenBLowDelayEnabled; } bool getUseBLambdaForNonKeyLowDelayPictures () { return m_bUseBLambdaForNonKeyLowDelayPictures; } - void setGopBasedTemporalFilterEnabled(bool flag) { m_gopBasedTemporalFilterEnabled = flag; } - bool getGopBasedTemporalFilterEnabled() { return m_gopBasedTemporalFilterEnabled; } + + void setGopBasedTemporalFilterRefs(const int pastRefs, const int futureRefs) + { + m_gopBasedTemporalFilterPastRefs = pastRefs; + m_gopBasedTemporalFilterFutureRefs = futureRefs; + } + bool getGopBasedTemporalFilterEnabled() const + { + return m_gopBasedTemporalFilterPastRefs != 0 || m_gopBasedTemporalFilterFutureRefs != 0; + } bool getUseReconBasedCrossCPredictionEstimate () const { return m_reconBasedCrossCPredictionEstimate; } void setUseReconBasedCrossCPredictionEstimate (const bool value) { m_reconBasedCrossCPredictionEstimate = value; } diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index e887671ef..34115ba9c 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -364,7 +364,9 @@ void EncLib::init(AUWriterIf *auWriterIf) if (getUseCompositeRef()) { Picture *picBg = new Picture; - picBg->create( sps0.getChromaFormatIdc(), Size( pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples() ), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false, m_layerId, m_gopBasedTemporalFilterEnabled ); + picBg->create(sps0.getChromaFormatIdc(), Size(pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples()), + sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false, m_layerId, + getGopBasedTemporalFilterEnabled()); picBg->getRecoBuf().fill(0); #if GDR_ENABLED PicHeader *picHeader = new PicHeader(); @@ -377,7 +379,9 @@ void EncLib::init(AUWriterIf *auWriterIf) picBg->createSpliceIdx(pps0.pcv->sizeInCtus); m_cGOPEncoder.setPicBg(picBg); Picture *picOrig = new Picture; - picOrig->create( sps0.getChromaFormatIdc(), Size( pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples() ), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false, m_layerId, m_gopBasedTemporalFilterEnabled ); + picOrig->create(sps0.getChromaFormatIdc(), Size(pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples()), + sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false, m_layerId, + getGopBasedTemporalFilterEnabled()); picOrig->getOrigBuf().fill(0); m_cGOPEncoder.setPicOrig(picOrig); } @@ -567,7 +571,7 @@ bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYu pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).getBuf( COMPONENT_Cb ).copyFrom( cPicYuvTrueOrg->getBuf( COMPONENT_Cb ) ); pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).getBuf( COMPONENT_Cr ).copyFrom( cPicYuvTrueOrg->getBuf( COMPONENT_Cr ) ); - if(m_gopBasedTemporalFilterEnabled) + if (getGopBasedTemporalFilterEnabled()) { pcPicCurr->M_BUFS( 0, PIC_FILTERED_ORIGINAL_INPUT ).getBuf( COMPONENT_Y ).copyFrom( pcPicYuvFilteredOrg->getBuf( COMPONENT_Y ) ); pcPicCurr->M_BUFS( 0, PIC_FILTERED_ORIGINAL_INPUT ).getBuf( COMPONENT_Cb ).copyFrom( pcPicYuvFilteredOrg->getBuf( COMPONENT_Cb ) ); @@ -593,7 +597,7 @@ bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYu pSPS->getHorCollocatedChromaFlag(), pSPS->getVerCollocatedChromaFlag() ); Picture::rescalePicture( scalingRatio, *cPicYuvTrueOrg, refPPS->getScalingWindow(), pcPicCurr->getTrueOrigBuf(), pPPS->getScalingWindow(), chromaFormatIDC, pSPS->getBitDepths(), true, true, pSPS->getHorCollocatedChromaFlag(), pSPS->getVerCollocatedChromaFlag() ); - if(m_gopBasedTemporalFilterEnabled) + if (getGopBasedTemporalFilterEnabled()) { Picture::rescalePicture( scalingRatio, *pcPicYuvFilteredOrg, refPPS->getScalingWindow(), pcPicCurr->getFilteredOrigBuf(), pPPS->getScalingWindow(), chromaFormatIDC, pSPS->getBitDepths(), true, true, pSPS->getHorCollocatedChromaFlag(), pSPS->getVerCollocatedChromaFlag() ); @@ -603,7 +607,7 @@ bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYu { pcPicCurr->M_BUFS( 0, PIC_ORIGINAL ).swap( *pcPicYuvOrg ); pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL ).swap( *cPicYuvTrueOrg ); - if(m_gopBasedTemporalFilterEnabled) + if (getGopBasedTemporalFilterEnabled()) { pcPicCurr->M_BUFS( 0, PIC_FILTERED_ORIGINAL ).swap( *pcPicYuvFilteredOrg ); } @@ -746,7 +750,7 @@ bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* pcPicY compBuf.width, compBuf.height, isTopField); - if(m_gopBasedTemporalFilterEnabled) + if (getGopBasedTemporalFilterEnabled()) { compBuf = pcPicYuvFilteredOrg->get( compID ); separateFields( compBuf.buf, @@ -892,15 +896,14 @@ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Pict if (rpcPic==0) { rpcPic = new Picture; - rpcPic->create( sps.getChromaFormatIdc(), Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, false, m_layerId, m_gopBasedTemporalFilterEnabled - , m_fgcSEIAnalysisEnabled - ); + rpcPic->create(sps.getChromaFormatIdc(), Size(pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples()), + sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, false, m_layerId, getGopBasedTemporalFilterEnabled(), m_fgcSEIAnalysisEnabled); if (m_resChangeInClvsEnabled) { const PPS &pps0 = *m_ppsMap.getPS(0); rpcPic->M_BUFS(0, PIC_ORIGINAL_INPUT).create(sps.getChromaFormatIdc(), Area(Position(), Size(pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples()))); rpcPic->M_BUFS(0, PIC_TRUE_ORIGINAL_INPUT).create(sps.getChromaFormatIdc(), Area(Position(), Size(pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples()))); - if(m_gopBasedTemporalFilterEnabled) + if (getGopBasedTemporalFilterEnabled()) { rpcPic->M_BUFS(0, PIC_FILTERED_ORIGINAL_INPUT).create(sps.getChromaFormatIdc(), Area(Position(), Size(pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples()))); } diff --git a/source/Lib/EncoderLib/EncTemporalFilter.cpp b/source/Lib/EncoderLib/EncTemporalFilter.cpp index 867effa41..073a6230c 100644 --- a/source/Lib/EncoderLib/EncTemporalFilter.cpp +++ b/source/Lib/EncoderLib/EncTemporalFilter.cpp @@ -43,7 +43,6 @@ // Constructor / destructor / initialization / destroy // ==================================================================================================================== -const int EncTemporalFilter::m_range = 4; const double EncTemporalFilter::m_chromaFactor = 0.55; const double EncTemporalFilter::m_sigmaMultiplier = 9.0; const double EncTemporalFilter::m_sigmaZeroPoint = 10.0; @@ -69,12 +68,11 @@ const int EncTemporalFilter::m_interpolationFilter[16][8] = { 0, 0, -2, 4, 64, -3, 1, 0 } //15-->--> }; -const double EncTemporalFilter::m_refStrengths[3][4] = -{ // abs(POC offset) +const double EncTemporalFilter::m_refStrengths[2][4] = { + // abs(POC offset) // 1, 2 3 4 - {0.85, 0.57, 0.41, 0.33}, // m_range * 2 - {1.13, 0.97, 0.81, 0.57}, // m_range - {0.30, 0.30, 0.30, 0.30} // otherwise + { 0.85, 0.57, 0.41, 0.33 }, // random access + { 1.13, 0.97, 0.81, 0.57 }, // low delay }; EncTemporalFilter::EncTemporalFilter() : @@ -87,20 +85,13 @@ EncTemporalFilter::EncTemporalFilter() : m_inputColourSpaceConvert(NUMBER_INPUT_COLOUR_SPACE_CONVERSIONS) {} -void EncTemporalFilter::init(const int frameSkip, - const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], - const int msbExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], - const int internalBitDepth[MAX_NUM_CHANNEL_TYPE], - const int width, - const int height, - const int *pad, - const bool rec709, - const std::string &filename, - const ChromaFormat inputChromaFormatIDC, - const InputColourSpaceConversion colorSpaceConv, - const int qp, - const std::map<int, double> &temporalFilterStrengths, - const bool gopBasedTemporalFilterFutureReference) +void EncTemporalFilter::init(const int frameSkip, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], + const int msbExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], + const int internalBitDepth[MAX_NUM_CHANNEL_TYPE], const int width, const int height, + const int *pad, const bool rec709, const std::string &filename, + const ChromaFormat inputChromaFormatIDC, const InputColourSpaceConversion colorSpaceConv, + const int qp, const std::map<int, double> &temporalFilterStrengths, const int pastRefs, + const int futureRefs, const int firstValidFrame, const int lastValidFrame) { m_FrameSkip = frameSkip; for (int i = 0; i < MAX_NUM_CHANNEL_TYPE; i++) @@ -123,7 +114,11 @@ void EncTemporalFilter::init(const int frameSkip, m_area = Area(0, 0, width, height); m_QP = qp; m_temporalFilterStrengths = temporalFilterStrengths; - m_gopBasedTemporalFilterFutureReference = gopBasedTemporalFilterFutureReference; + + m_pastRefs = pastRefs; + m_futureRefs = futureRefs; + m_firstValidFrame = firstValidFrame; + m_lastValidFrame = lastValidFrame; } // ==================================================================================================================== @@ -148,21 +143,15 @@ bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc) if (isFilterThisFrame) { - int offset = m_FrameSkip; + const int currentFilePoc = receivedPoc + m_FrameSkip; + const int firstFrame = std::max(currentFilePoc - m_pastRefs, m_firstValidFrame); + const int lastFrame = std::min(currentFilePoc + m_futureRefs, m_lastValidFrame); VideoIOYuv yuvFrames; yuvFrames.open(m_inputFileName, false, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth); - yuvFrames.skipFrames(std::max(offset + receivedPoc - m_range, 0), m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIDC); + yuvFrames.skipFrames(firstFrame, m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIDC); std::deque<TemporalFilterSourcePicInfo> srcFrameInfo; - int firstFrame = receivedPoc + offset - m_range; - int lastFrame = receivedPoc + offset + m_range; - if (!m_gopBasedTemporalFilterFutureReference) - { - lastFrame = receivedPoc + offset - 1; - } - int origOffset = -m_range; - // subsample original picture so it only needs to be done once PelStorage origPadded; @@ -179,15 +168,9 @@ bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc) // determine motion vectors for (int poc = firstFrame; poc <= lastFrame; poc++) { - if (poc < 0) - { - origOffset++; - continue; // frame not available - } - else if (poc == offset + receivedPoc) + if (poc == currentFilePoc) { // hop over frame that will be filtered yuvFrames.skipFrames(1, m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIDC); - origOffset++; continue; } srcFrameInfo.push_back(TemporalFilterSourcePicInfo()); @@ -198,14 +181,16 @@ bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc) dummyPicBufferTO.create(m_chromaFormatIDC, m_area, 0, m_padding); if (!yuvFrames.read(srcPic.picBuffer, dummyPicBufferTO, m_inputColourSpaceConvert, m_pad, m_chromaFormatIDC, m_clipInputVideoToRec709Range)) { - return false; // eof or read fail + // eof or read fail + srcPic.picBuffer.destroy(); + srcFrameInfo.pop_back(); + break; } srcPic.picBuffer.extendBorderPel(m_padding, m_padding); srcPic.mvs.allocate(m_sourceWidth / 4, m_sourceHeight / 4); motionEstimation(srcPic.mvs, origPadded, srcPic.picBuffer, origSubsampled2, origSubsampled4); - srcPic.origOffset = origOffset; - origOffset++; + srcPic.origOffset = poc - currentFilePoc; } // filter @@ -608,15 +593,7 @@ void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic, applyMotion(srcFrameInfo[i].mvs, srcFrameInfo[i].picBuffer, correctedPics[i]); } - int refStrengthRow = 2; - if (numRefs == m_range * 2) - { - refStrengthRow = 0; - } - else if (numRefs == m_range) - { - refStrengthRow = 1; - } + const int refStrengthRow = m_futureRefs > 0 ? 0 : 1; const double lumaSigmaSq = (m_QP - m_sigmaZeroPoint) * (m_QP - m_sigmaZeroPoint) * m_sigmaMultiplier; const double chromaSigmaSq = 30 * 30; @@ -654,27 +631,36 @@ void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic, for (int i = 0; i < numRefs; i++) { double variance = 0, diffsum = 0; - for (int y1 = 0; y1 < blockSizeY - 1; y1++) + const ptrdiff_t refStride = correctedPics[i].bufs[c].stride; + const Pel * refPel = correctedPics[i].bufs[c].buf + y * refStride + x; + for (int y1 = 0; y1 < blockSizeY; y1++) { - for (int x1 = 0; x1 < blockSizeX - 1; x1++) + for (int x1 = 0; x1 < blockSizeX; x1++) { - int pix = *(srcPel + x1); - int pixR = *(srcPel + x1 + 1); - int pixD = *(srcPel + x1 + srcStride); - int ref = *(correctedPics[i].bufs[c].buf + ((y + y1) * correctedPics[i].bufs[c].stride + x + x1)); - int refR = *(correctedPics[i].bufs[c].buf + ((y + y1) * correctedPics[i].bufs[c].stride + x + x1 + 1)); - int refD = *(correctedPics[i].bufs[c].buf + ((y + y1 + 1) * correctedPics[i].bufs[c].stride + x + x1)); - - int diff = pix - ref; - int diffR = pixR - refR; - int diffD = pixD - refD; - + const Pel pix = *(srcPel + srcStride * y1 + x1); + const Pel ref = *(refPel + refStride * y1 + x1); + const int diff = pix - ref; variance += diff * diff; - diffsum += (diffR - diff) * (diffR - diff); - diffsum += (diffD - diff) * (diffD - diff); + if (x1 != blockSizeX - 1) + { + const Pel pixR = *(srcPel + srcStride * y1 + x1 + 1); + const Pel refR = *(refPel + refStride * y1 + x1 + 1); + const int diffR = pixR - refR; + diffsum += (diffR - diff) * (diffR - diff); + } + if (y1 != blockSizeY - 1) + { + const Pel pixD = *(srcPel + srcStride * y1 + x1 + srcStride); + const Pel refD = *(refPel + refStride * y1 + x1 + refStride); + const int diffD = pixD - refD; + diffsum += (diffD - diff) * (diffD - diff); + } } } - srcFrameInfo[i].mvs.get(x / blockSizeX, y / blockSizeY).noise = (int) round((300 * variance + 50) / (10 * diffsum + 50)); + const int cntV = blockSizeX * blockSizeY; + const int cntD = 2 * cntV - blockSizeX - blockSizeY; + srcFrameInfo[i].mvs.get(x / blockSizeX, y / blockSizeY).noise = + (int) round((15.0 * cntD / cntV * variance + 5.0) / (diffsum + 5.0)); } } double minError = 9999999; diff --git a/source/Lib/EncoderLib/EncTemporalFilter.h b/source/Lib/EncoderLib/EncTemporalFilter.h index 42f2f88c8..fe095e604 100644 --- a/source/Lib/EncoderLib/EncTemporalFilter.h +++ b/source/Lib/EncoderLib/EncTemporalFilter.h @@ -104,33 +104,24 @@ public: EncTemporalFilter(); ~EncTemporalFilter() {} - void init(const int frameSkip, - const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], - const int msbExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], - const int internalBitDepth[MAX_NUM_CHANNEL_TYPE], - const int width, - const int height, - const int *pad, - const bool rec709, - const std::string &filename, - const ChromaFormat inputChroma, - const InputColourSpaceConversion colorSpaceConv, - const int qp, - const std::map<int, double> &temporalFilterStrengths, - const bool gopBasedTemporalFilterFutureReference); + void init(const int frameSkip, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], + const int msbExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE], + const int width, const int height, const int *pad, const bool rec709, const std::string &filename, + const ChromaFormat inputChroma, const InputColourSpaceConversion colorSpaceConv, const int qp, + const std::map<int, double> &temporalFilterStrengths, const int pastRefs, const int futureRefs, + const int firstValidFrame, const int lastValidFrame); bool filter(PelStorage *orgPic, int frame); private: // Private static member variables - static const int m_range; static const double m_chromaFactor; static const double m_sigmaMultiplier; static const double m_sigmaZeroPoint; static const int m_motionVectorFactor; static const int m_padding; static const int m_interpolationFilter[16][8]; - static const double m_refStrengths[3][4]; + static const double m_refStrengths[2][4]; // Private member variables int m_FrameSkip; @@ -147,7 +138,11 @@ private: bool m_clipInputVideoToRec709Range; InputColourSpaceConversion m_inputColourSpaceConvert; Area m_area; - bool m_gopBasedTemporalFilterFutureReference; + + int m_pastRefs; + int m_futureRefs; + int m_firstValidFrame; + int m_lastValidFrame; // Private functions void subsampleLuma(const PelStorage &input, PelStorage &output, const int factor = 2) const; -- GitLab