diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg index d2001e4a5691fac2b250977f20b840615d27b24d..9ec4a5d13cf00e9d8e1da1bfc47971a6f37dc64c 100644 --- a/cfg/encoder_lowdelay_P_vtm.cfg +++ b/cfg/encoder_lowdelay_P_vtm.cfg @@ -56,6 +56,11 @@ TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1 TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) +#=========== TemporalFilter ================= +TemporalFilter : 0 # Enable/disable GOP Based Temporal Filter +TemporalFilterFutureReference : 0 # Enable/disable reading future frames +TemporalFilterStrengthFrame4 : 0.4 # Enable filter at every 4th frame with strength + #============ Slices ================ SliceMode : 0 # 0: Disable all slice options. # 1: Enforce maximum number of LCU in an slice, diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg index 0125529b61d6ecedf749e5189460431c2fa1f420..c8899e2e74339001db4bce7515fd2c23997dd3a0 100644 --- a/cfg/encoder_lowdelay_vtm.cfg +++ b/cfg/encoder_lowdelay_vtm.cfg @@ -56,6 +56,11 @@ TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1 TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) +#=========== TemporalFilter ================= +TemporalFilter : 0 # Enable/disable GOP Based Temporal Filter +TemporalFilterFutureReference : 0 # Enable/disable reading future frames +TemporalFilterStrengthFrame4 : 0.4 # Enable filter at every 4th frame with strength + #============ Slices ================ SliceMode : 0 # 0: Disable all slice options. # 1: Enforce maximum number of LCU in an slice, diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg index 631c6cb006f179d2d8da722839c7c170019cc0b9..e11813ba3f1e9f7352d753f5ff48ca0db0e1bfe4 100644 --- a/cfg/encoder_randomaccess_vtm.cfg +++ b/cfg/encoder_randomaccess_vtm.cfg @@ -70,6 +70,12 @@ TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1 TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) +#=========== TemporalFilter ================= +TemporalFilter : 0 # Enable/disable GOP Based Temporal Filter +TemporalFilterFutureReference : 1 # Enable/disable reading future frames +TemporalFilterStrengthFrame8 : 0.95 # Enable filter at every 8th frame with given strength +TemporalFilterStrengthFrame16 : 1.5 # Enable filter at every 16th frame with given strength, longer intervals has higher priority + #============ Slices ================ SliceMode : 0 # 0: Disable all slice options. # 1: Enforce maximum number of LCU in an slice, diff --git a/doc/software-manual.tex b/doc/software-manual.tex old mode 100755 new mode 100644 index 820df1b14e1df0497b45e0d89078b692af8037b6..db557a0b809463dd14c99ac25c6c777c8e84ae11 --- a/doc/software-manual.tex +++ b/doc/software-manual.tex @@ -890,6 +890,32 @@ Picture output options: output upscaled (2), decoded but in full resolution buff \end{OptionTableNoShorthand} +%% +%% GOP based temporal filter parameters +%% + +\begin{OptionTableNoShorthand}{GOP based temporal filter paramters}{tab:gop-based-temporal-filter} + +\Option{TemporalFilter} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables GOP based temporal filter. +\\ +\Option{TemporalFilterFutureReference} & +%\ShortOption{\None} & +\Default{true} & +Enables or disable referencing future frames in the GOP based temporal filter. Can be used to disable future referencing for +low delay configurations. +\\ +\Option{TemporalFilterStrengthFrame*} & +%\ShortOption{\None} & +\Default{} & +Strength for every * frame in GOP based temporal filter, where * is an integer. E.g. --TemporalFilterStrengthFrame8 0.95 will +enable GOP based temporal filter at every 8th frame with strength 0.95. Longer intervals overrides shorter when there are +multiple matches. +\\ +\end{OptionTableNoShorthand} + %% %% profile, level and conformance options %% diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index c60e719034f6d90c2bfedabaf4f87ffb24578f76..9bef109b99561eb948a10028cfded011392901cd 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -48,6 +48,10 @@ #include "AppEncHelper360/TExt360AppEncTop.h" #endif +#if JVET_O0549_ENCODER_ONLY_FILTER +#include "EncoderLib/EncTemporalFilter.h" +#endif + using namespace std; //! \ingroup EncoderApp @@ -644,6 +648,9 @@ void EncApp::xInitLibCfg() m_cEncLib.setCropOffsetBottom (m_cropOffsetBottom); m_cEncLib.setCalculateHdrMetrics (m_calculateHdrMetrics); #endif +#if JVET_O0549_ENCODER_ONLY_FILTER + m_cEncLib.setGopBasedTemporalFilterEnabled(m_gopBasedTemporalFilterEnabled); +#endif } void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList @@ -744,6 +751,17 @@ void EncApp::encode() TExt360AppEncTop ext360(*this, m_cEncLib.getGOPEncoder()->getExt360Data(), *(m_cEncLib.getGOPEncoder()), orgPic); #endif +#if JVET_O0549_ENCODER_ONLY_FILTER + EncTemporalFilter temporalFilter; + if (m_gopBasedTemporalFilterEnabled) + { + temporalFilter.init(m_FrameSkip, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth, m_iSourceWidth, m_iSourceHeight, + m_aiPad, m_bClipInputVideoToRec709Range, m_inputFileName, m_chromaFormatIDC, + m_inputColourSpaceConvert, m_iQP, m_gopBasedTemporalFilterStrengths, + m_gopBasedTemporalFilterFutureReference); + } +#endif + while ( !bEos ) { // read input YUV file @@ -760,6 +778,13 @@ void EncApp::encode() m_cVideoIOYuvInputFile.read( orgPic, trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range ); #endif +#if JVET_O0549_ENCODER_ONLY_FILTER + if (m_gopBasedTemporalFilterEnabled) + { + temporalFilter.filter(&orgPic, m_iFrameRcvd); + } +#endif + // increase number of received frames m_iFrameRcvd++; diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 386380bb28dcdb8e11d2fe3b109660915ffa4850..0b07ec200515a68fb181fbab8f0324f161bc9cf1 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -608,6 +608,27 @@ static inline istream& operator >> (std::istream &in, EncAppCfg::OptionalValue<T } #endif +#if JVET_O0549_ENCODER_ONLY_FILTER +template <class T1, class T2> +static inline istream& operator >> (std::istream& in, std::map<T1, T2>& map) +{ + T1 key; + T2 value; + try + { + in >> key; + in >> value; + } + catch (...) + { + in.setstate(ios::failbit); + } + + map[key] = value; + return in; +} +#endif + static void automaticallySelectRExtProfile(const bool bUsingGeneralRExtTools, const bool bUsingChromaQPAdjustment, @@ -1374,6 +1395,14 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ( "UpscaledOutput", m_upscaledOutput, 0, "Output upscaled (2), decoded but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR" ) ; +#if JVET_O0549_ENCODER_ONLY_FILTER + opts.addOptions() + ("TemporalFilter", m_gopBasedTemporalFilterEnabled, false, "Enable GOP based temporal filter. Disabled per default") + ("TemporalFilterFutureReference", m_gopBasedTemporalFilterFutureReference, true, "Enable referencing of future frames in the GOP based temporal filter. This is typically disabled for Low Delay configurations.") + ("TemporalFilterStrengthFrame*", m_gopBasedTemporalFilterStrengths, std::map<int, double>(), "Strength for every * frame in GOP based temporal filter, where * is an integer." + " E.g. --TemporalFilterStrengthFrame8 0.95 will enable GOP based temporal filter at every 8th frame with strength 0.95"); +#endif + #if EXTENSION_360_VIDEO TExt360AppEncCfg::TExt360AppEncCfgContext ext360CfgContext; m_ext360.addOptions(opts, ext360CfgContext); @@ -3368,6 +3397,12 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_decodeBitstreams[0] == m_bitstreamFileName, "Debug bitstream and the output bitstream cannot be equal.\n" ); xConfirmPara( m_decodeBitstreams[1] == m_bitstreamFileName, "Decode2 bitstream and the output bitstream cannot be equal.\n" ); xConfirmPara(unsigned(m_LMChroma) > 1, "LMMode exceeds range (0 to 1)"); +#if JVET_O0549_ENCODER_ONLY_FILTER + if (m_gopBasedTemporalFilterEnabled) + { + xConfirmPara(m_temporalSubsampleRatio != 1, "GOP Based Temporal Filter only support Temporal sub-sample ratio 1"); + } +#endif #if EXTENSION_360_VIDEO check_failed |= m_ext360.verifyParameters(); #endif @@ -3690,7 +3725,9 @@ void EncAppCfg::xPrintParameter() { msg( VERBOSE, "RPR:%d", 0 ); } - +#if JVET_O0549_ENCODER_ONLY_FILTER + msg(VERBOSE, "TemporalFilter:%d ", m_gopBasedTemporalFilterEnabled); +#endif #if EXTENSION_360_VIDEO m_ext360.outputConfigurationSummary(); #endif diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index c993b9ddab39ac0b284a62e5ec921f57f735165c..1269ebd1f1589a45a7e70ff5d275ba80b17e8054 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -39,6 +39,14 @@ #define __ENCAPPCFG__ #include "CommonLib/CommonDef.h" +#if JVET_O0549_ENCODER_ONLY_FILTER + +#include <map> +template <class T1, class T2> +static inline std::istream& operator >> (std::istream &in, std::map<T1, T2> &map); + +#include "Utilities/program_options_lite.h" +#endif #include "EncoderLib/EncCfg.h" #if EXTENSION_360_VIDEO @@ -48,6 +56,9 @@ #if JVET_O0756_CALCULATE_HDRMETRICS #include "HDRLib/inc/DistortionMetric.H" #endif +#if JVET_O0549_ENCODER_ONLY_FILTER +namespace po = df::program_options_lite; +#endif #include <sstream> #include <vector> @@ -605,6 +616,12 @@ protected: int m_switchPocPeriod; int m_upscaledOutput; ////< Output upscaled (2), decoded cropped but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR. +#if JVET_O0549_ENCODER_ONLY_FILTER + bool m_gopBasedTemporalFilterEnabled; ///< GOP-based Temporal Filter enable/disable + bool m_gopBasedTemporalFilterFutureReference; ///< Enable/disable future frame references in the GOP-based Temporal Filter + std::map<int, double> m_gopBasedTemporalFilterStrengths; ///< Filter strength per frame for the GOP-based Temporal Filter +#endif + #if EXTENSION_360_VIDEO TExt360AppEncCfg m_ext360; friend class TExt360AppEncCfg; diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp index 425a0929b31b56a884affffb2780d1f3af1e5453..9c46592574f294c6d1f6d0ae418906e93ad102fa 100644 --- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp +++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp @@ -665,17 +665,24 @@ void AdaptiveLoopFilter::create( const int picWidth, const int picHeight, const static_assert( AlfNumClippingValues[CHANNEL_TYPE_LUMA] > 0, "AlfNumClippingValues[CHANNEL_TYPE_LUMA] must be at least one" ); for( int i = 0; i < AlfNumClippingValues[CHANNEL_TYPE_LUMA]; ++i ) { +# if JVET_P0505_ALF_CLIP_VALUE + m_alfClippingValues[CHANNEL_TYPE_LUMA][i] = (Pel)std::round( std::pow(2., double(m_inputBitDepth[CHANNEL_TYPE_LUMA] - 2.35*i)) ); +#else m_alfClippingValues[CHANNEL_TYPE_LUMA][i] = (Pel) std::round( std::pow( 2., double( m_inputBitDepth[CHANNEL_TYPE_LUMA] * ( AlfNumClippingValues[CHANNEL_TYPE_LUMA] - i ) ) / AlfNumClippingValues[CHANNEL_TYPE_LUMA] ) ); +#endif } static_assert( AlfNumClippingValues[CHANNEL_TYPE_CHROMA] > 0, "AlfNumClippingValues[CHANNEL_TYPE_CHROMA] must be at least one" ); m_alfClippingValues[CHANNEL_TYPE_CHROMA][0] = 1 << m_inputBitDepth[CHANNEL_TYPE_CHROMA]; for( int i = 1; i < AlfNumClippingValues[CHANNEL_TYPE_CHROMA]; ++i ) { +# if JVET_P0505_ALF_CLIP_VALUE + m_alfClippingValues[CHANNEL_TYPE_CHROMA][i] = (Pel)std::round( std::pow(2., double(m_inputBitDepth[CHANNEL_TYPE_CHROMA] - 2.35*i)) ); +#else m_alfClippingValues[CHANNEL_TYPE_CHROMA][i] = (Pel) std::round( std::pow( @@ -683,6 +690,7 @@ void AdaptiveLoopFilter::create( const int picWidth, const int picHeight, const m_inputBitDepth[CHANNEL_TYPE_CHROMA] - 8 + 8. * ( AlfNumClippingValues[CHANNEL_TYPE_CHROMA] - i - 1 ) / ( AlfNumClippingValues[CHANNEL_TYPE_CHROMA] - 1 ) ) ); +#endif } if (m_created) diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index a06167006a134f2ad5d51725cf7a2ff76f0fbfda..7c1bb9f1d7e080fe27d9de64c9f4e60af4f49830 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -167,19 +167,27 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str for (int x = 0; x < width; x += 4) { b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]); +#if !JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT b = ((b + 1) >> 1); +#endif dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng); b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]); +#if !JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT b = ((b + 1) >> 1); +#endif dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng); b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]); +#if !JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT b = ((b + 1) >> 1); +#endif dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng); b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]); +#if !JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT b = ((b + 1) >> 1); +#endif dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng); } dst += dstStride; src0 += src0Stride; src1 += src1Stride; diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index e714d2869888ebeb6cf2a80c2cb0288acdac23a8..f4ea3ad6c5d7af238669cd71de3e037635788db0 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -118,6 +118,9 @@ struct AreaBuf : public Size void subtract ( const AreaBuf<const T> &other ); void extendSingleBorderPel(); void extendBorderPel ( unsigned margin ); +#if JVET_O0549_ENCODER_ONLY_FILTER + void extendBorderPel(unsigned marginX, unsigned marginY); +#endif void addWeightedAvg ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng, const int8_t gbiIdx); void removeWeightHighFreq ( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t iGbiWeight); void addAvg ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng ); @@ -526,6 +529,46 @@ void AreaBuf<T>::updateHistogram( std::vector<int32_t>& hist ) const } } +#if JVET_O0549_ENCODER_ONLY_FILTER +template<typename T> +void AreaBuf<T>::extendBorderPel(unsigned marginX, unsigned marginY) +{ + T* p = buf; + int h = height; + int w = width; + int s = stride; + + CHECK((w + 2 * marginX) > s, "Size of buffer too small to extend"); + // do left and right margins + for (int y = 0; y < h; y++) + { + for (int x = 0; x < marginX; x++) + { + *(p - marginX + x) = p[0]; + p[w + x] = p[w - 1]; + } + p += s; + } + + // p is now the (0,height) (bottom left of image within bigger picture + p -= (s + marginX); + // p is now the (-margin, height-1) + for (int y = 0; y < marginY; y++) + { + ::memcpy(p + (y + 1) * s, p, sizeof(T) * (w + (marginX << 1))); + } + + // p is still (-marginX, height-1) + p -= ((h - 1) * s); + // p is now (-marginX, 0) + for (int y = 0; y < marginY; y++) + { + ::memcpy(p - (y + 1) * s, p, sizeof(T) * (w + (marginX << 1))); + } +} +#endif + + template<typename T> void AreaBuf<T>::extendBorderPel( unsigned margin ) { @@ -693,6 +736,9 @@ struct UnitBuf void addWeightedAvg ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t gbiIdx = GBI_DEFAULT, const bool chromaOnly = false, const bool lumaOnly = false); void addAvg ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly = false, const bool lumaOnly = false); void extendSingleBorderPel(); +#if JVET_O0549_ENCODER_ONLY_FILTER + void extendBorderPel(unsigned marginX, unsigned marginY); +#endif void extendBorderPel ( unsigned margin ); void removeHighFreq ( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs , const int8_t gbiWeight = g_GbiWeights[GBI_DEFAULT] @@ -802,6 +848,17 @@ void UnitBuf<T>::extendSingleBorderPel() } } +#if JVET_O0549_ENCODER_ONLY_FILTER +template<typename T> +void UnitBuf<T>::extendBorderPel(unsigned marginX, unsigned marginY) +{ + for (unsigned i = 0; i < bufs.size(); i++) + { + bufs[i].extendBorderPel(marginX >> getComponentScaleX(ComponentID(i), chromaFormat), marginY >> getComponentScaleY(ComponentID(i), chromaFormat)); + } +} +#endif + template<typename T> void UnitBuf<T>::extendBorderPel( unsigned margin ) { diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index f491e1bebbde8e385f7444236f43364ed204d580..b3894ed01e7ced2bcda3a0fef84685463d411724 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -458,7 +458,7 @@ static const int MVD_MIN = -(1 << 17); static const int PIC_ANALYZE_CW_BINS = 32; static const int PIC_CODE_CW_BINS = 16; -static const int LMCS_SEG_SIZE = (PIC_CODE_CW_BINS << 1); +static const int LMCS_SEG_NUM = 32; static const int FP_PREC = 11; static const int CSCALE_FP_PREC = 11; static const int NEIG_NUM_LOG = 6; diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 9f705c3a407b6154decd0329956465ba6dfae8f2..e2bc7dc8dadb9a169766b4a8723eae8e557b866f 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -301,6 +301,9 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R int fstStep = (!verMC ? puHeight : puWidth); int secStep = (!verMC ? puWidth : puHeight); + pu.refIdx[0] = 0; pu.refIdx[1] = pu.cs->slice->getSliceType() == B_SLICE ? 0 : -1; + bool scaled = !PU::isRefPicSameSize( pu ); + m_subPuMC = true; for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep) @@ -317,7 +320,7 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R while (later < secEnd) { const MotionInfo &laterMi = !verMC ? pu.getMotionInfo(Position{ later, fstDim }) : pu.getMotionInfo(Position{ fstDim, later }); - if (laterMi == curMi) + if (!scaled && laterMi == curMi) { length += secStep; } @@ -423,7 +426,10 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& if( !pu.cu->affine ) { - clipMv( mv[0], pu.cu->lumaPos(), pu.cu->lumaSize(), sps, *pu.cs->pps ); + if( pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx ) == SCALE_1X ) + { + clipMv( mv[0], pu.cu->lumaPos(), pu.cu->lumaSize(), sps, *pu.cs->pps ); + } } for( uint32_t comp = COMPONENT_Y; comp < pcYuvPred.bufs.size() && comp <= m_maxCompIDToPred; comp++ ) @@ -1002,8 +1008,11 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio { wrapRef = false; m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer); - iMvScaleTmpHor = std::min<int>(iHorMax, std::max<int>(iHorMin, iMvScaleTmpHor)); - iMvScaleTmpVer = std::min<int>(iVerMax, std::max<int>(iVerMin, iMvScaleTmpVer)); + if( scalingRatio == SCALE_1X ) + { + iMvScaleTmpHor = std::min<int>(iHorMax, std::max<int>(iHorMin, iMvScaleTmpHor)); + iMvScaleTmpVer = std::min<int>(iVerMax, std::max<int>(iVerMin, iMvScaleTmpVer)); + } } } else @@ -1018,8 +1027,11 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio else { wrapRef = false; - curMv.hor = std::min<int>(iHorMax, std::max<int>(iHorMin, curMv.hor)); - curMv.ver = std::min<int>(iVerMax, std::max<int>(iVerMin, curMv.ver)); + if( scalingRatio == SCALE_1X ) + { + curMv.hor = std::min<int>(iHorMax, std::max<int>(iHorMin, curMv.hor)); + curMv.ver = std::min<int>(iVerMax, std::max<int>(iVerMin, curMv.ver)); + } } iMvScaleTmpHor = curMv.hor; iMvScaleTmpVer = curMv.ver; @@ -1190,7 +1202,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; #if JVET_P0653_BDOF_PROF_PARA_DEV #if JVET_P0491_BDOFPROF_MVD_RANGE +#if JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT + const int limit = ( 1 << 4 ) - 1; +#else const int limit = ( 1 << 5 ) - 1; +#endif #else const int limit = (1 << 5); #endif @@ -1221,7 +1237,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf const Pel* SrcY0Tmp = srcY0 + (xu << 2) + (yu << 2) * src0Stride; g_pelBufOP.calcBIOSums(SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride, src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX); +#if JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT + tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX << 2, sumAbsGX)); +#else tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX << 3, sumAbsGX)); +#endif #if JVET_P0057_BDOF_PROF_HARMONIZATION && !JVET_P0491_BDOFPROF_MVD_RANGE tmpx = Clip3(-limit, limit - 1, tmpx); #else @@ -1232,7 +1252,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf int secsGxGy = sumSignGY_GX & ((1 << 12) - 1); int tmpData = tmpx * mainsGxGy; tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1; +#if JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT + tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY << 2) - tmpData), sumAbsGY)); +#else tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY << 3) - tmpData), sumAbsGY)); +#endif #if JVET_P0057_BDOF_PROF_HARMONIZATION && !JVET_P0491_BDOFPROF_MVD_RANGE tmpy = Clip3(-limit, limit - 1, tmpy); #else @@ -2228,10 +2252,11 @@ void InterPrediction::xFillIBCBuffer(CodingUnit &cu) continue; const unsigned int lcuWidth = cu.cs->slice->getSPS()->getMaxCUWidth(); - const int shiftSample = ::getComponentScaleX(area.compID, cu.chromaFormat); - const int ctuSizeLog2 = floorLog2(lcuWidth) - shiftSample; - const int pux = area.x & ((m_IBCBufferWidth >> shiftSample) - 1); - const int puy = area.y & (( 1 << ctuSizeLog2 ) - 1); + const int shiftSampleHor = ::getComponentScaleX(area.compID, cu.chromaFormat); + const int shiftSampleVer = ::getComponentScaleY(area.compID, cu.chromaFormat); + const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer; + const int pux = area.x & ((m_IBCBufferWidth >> shiftSampleHor) - 1); + const int puy = area.y & (( 1 << ctuSizeLog2Ver ) - 1); const CompArea dstArea = CompArea(area.compID, cu.chromaFormat, Position(pux, puy), Size(area.width, area.height)); CPelBuf srcBuf = cu.cs->getRecoBuf(area); PelBuf dstBuf = m_IBCBuffer.getBuf(dstArea); @@ -2244,8 +2269,9 @@ void InterPrediction::xFillIBCBuffer(CodingUnit &cu) void InterPrediction::xIntraBlockCopy(PredictionUnit &pu, PelUnitBuf &predBuf, const ComponentID compID) { const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); - int shiftSample = ::getComponentScaleX(compID, pu.chromaFormat); - const int ctuSizeLog2 = floorLog2(lcuWidth) - shiftSample; + const int shiftSampleHor = ::getComponentScaleX(compID, pu.chromaFormat); + const int shiftSampleVer = ::getComponentScaleY(compID, pu.chromaFormat); + const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer; pu.bv = pu.mv[REF_PIC_LIST_0]; pu.bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); int refx, refy; @@ -2256,13 +2282,13 @@ void InterPrediction::xIntraBlockCopy(PredictionUnit &pu, PelUnitBuf &predBuf, c } else {//Cb or Cr - refx = pu.Cb().x + (pu.bv.hor >> shiftSample); - refy = pu.Cb().y + (pu.bv.ver >> shiftSample); + refx = pu.Cb().x + (pu.bv.hor >> shiftSampleHor); + refy = pu.Cb().y + (pu.bv.ver >> shiftSampleVer); } - refx &= ((m_IBCBufferWidth >> shiftSample) - 1); - refy &= ((1 << ctuSizeLog2) - 1); + refx &= ((m_IBCBufferWidth >> shiftSampleHor) - 1); + refy &= ((1 << ctuSizeLog2Ver) - 1); - if (refx + predBuf.bufs[compID].width <= (m_IBCBufferWidth >> shiftSample)) + if (refx + predBuf.bufs[compID].width <= (m_IBCBufferWidth >> shiftSampleHor)) { const CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, predBuf.bufs[compID].height)); const CPelBuf refBuf = m_IBCBuffer.getBuf(srcArea); @@ -2270,16 +2296,16 @@ void InterPrediction::xIntraBlockCopy(PredictionUnit &pu, PelUnitBuf &predBuf, c } else {//wrap around - int width = (m_IBCBufferWidth >> shiftSample) - refx; + int width = (m_IBCBufferWidth >> shiftSampleHor) - refx; CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(width, predBuf.bufs[compID].height)); CPelBuf srcBuf = m_IBCBuffer.getBuf(srcArea); PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); dstBuf.copyFrom(srcBuf); - width = refx + predBuf.bufs[compID].width - (m_IBCBufferWidth >> shiftSample); + width = refx + predBuf.bufs[compID].width - (m_IBCBufferWidth >> shiftSampleHor); srcArea = CompArea(compID, pu.chromaFormat, Position(0, refy), Size(width, predBuf.bufs[compID].height)); srcBuf = m_IBCBuffer.getBuf(srcArea); - dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_IBCBufferWidth >> shiftSample) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); + dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_IBCBufferWidth >> shiftSampleHor) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); dstBuf.copyFrom(srcBuf); } } @@ -2339,6 +2365,33 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, int refPicWidth = refPic->cs->pps->getPicWidthInLumaSamples(); int refPicHeight = refPic->cs->pps->getPicHeightInLumaSamples(); +#if JVET_P0088_P0353_RPR_FILTERS + int xFilter = filterIndex; + int yFilter = filterIndex; + const int rprThreshold1 = ( 1 << SCALE_RATIO_BITS ) * 5 / 4; + const int rprThreshold2 = ( 1 << SCALE_RATIO_BITS ) * 7 / 4; + if( filterIndex == 0 ) + { + if( scalingRatio.first > rprThreshold2 ) + { + xFilter = 4; + } + else if( scalingRatio.first > rprThreshold1 ) + { + xFilter = 3; + } + + if( scalingRatio.second > rprThreshold2 ) + { + yFilter = 4; + } + else if( scalingRatio.second > rprThreshold1 ) + { + yFilter = 3; + } + } +#endif + const int posShift = SCALE_RATIO_BITS - 4; int stepX = ( scalingRatio.first + 8 ) >> 4; int stepY = ( scalingRatio.second + 8 ) >> 4; @@ -2347,17 +2400,24 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, int offX = 1 << ( posShift - shiftHor - 1 ); int offY = 1 << ( posShift - shiftVer - 1 ); - x0Int = ( ( blk.pos().x << ( 4 + ::getComponentScaleX( compID, chFmt ) ) ) + mv.getHor() )* scalingRatio.first; + x0Int = ( ( blk.pos().x << ( 4 + ::getComponentScaleX( compID, chFmt ) ) ) + mv.getHor() )* (int64_t)scalingRatio.first; x0Int = SIGN( x0Int ) * ( ( llabs( x0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleX( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleX( compID, chFmt ) ) ); - y0Int = ( ( blk.pos().y << ( 4 + ::getComponentScaleY( compID, chFmt ) ) ) + mv.getVer() )* scalingRatio.second; + y0Int = ( ( blk.pos().y << ( 4 + ::getComponentScaleY( compID, chFmt ) ) ) + mv.getVer() )* (int64_t)scalingRatio.second; y0Int = SIGN( y0Int ) * ( ( llabs( y0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleY( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleY( compID, chFmt ) ) ); const int extSize = isLuma( compID ) ? 1 : 2; int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA; - int refHeight = height * scalingRatio.second >> SCALE_RATIO_BITS; + int yInt0 = ( (int32_t)y0Int + offY ) >> posShift; + yInt0 = std::min( std::max( -4, yInt0 ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) + 4 ); + + int xInt0 = ( (int32_t)x0Int + offX ) >> posShift; + xInt0 = std::min( std::max( -4, xInt0 ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) + 4 ); + + int refHeight = ((((int32_t)y0Int + (height-1) * stepY) + offY ) >> posShift) - ((((int32_t)y0Int + 0 * stepY) + offY ) >> posShift) + 1; + refHeight = std::max<int>( 1, refHeight ); CHECK( MAX_CU_SIZE * MAX_SCALING_RATIO < refHeight + vFilterSize - 1 + extSize, "Buffer size is not enough, increase MAX_SCALING_RATIO" ); @@ -2366,12 +2426,6 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, int tmpStride = width; - int yInt0 = ( (int32_t)y0Int + offY ) >> posShift; - yInt0 = std::min( std::max( 0, yInt0 ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) ); - - int xInt0 = ( (int32_t)x0Int + offX ) >> posShift; - xInt0 = std::min( std::max( 0, xInt0 ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) ); - int xInt = 0, yInt = 0; for( col = 0; col < width; col++ ) @@ -2387,7 +2441,11 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, Size( 1, refHeight ) ), wrapRef ); Pel* tempBuf = buffer + col; +#if JVET_P0088_P0353_RPR_FILTERS + m_if.filterHor( compID, (Pel*)refBuf.buf - ( ( vFilterSize >> 1 ) - 1 ) * refBuf.stride, refBuf.stride, tempBuf, tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, chFmt, clpRng, xFilter, false, useAltHpelIf ); +#else m_if.filterHor( compID, (Pel*)refBuf.buf - ( ( vFilterSize >> 1 ) - 1 ) * refBuf.stride, refBuf.stride, tempBuf, tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, chFmt, clpRng, filterIndex, false, useAltHpelIf ); +#endif } for( row = 0; row < height; row++ ) @@ -2402,7 +2460,11 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, Pel* tempBuf = buffer + ( yInt - yInt0 ) * tmpStride; JVET_J0090_SET_CACHE_ENABLE( false ); +#if JVET_P0088_P0353_RPR_FILTERS + m_if.filterVer( compID, tempBuf + ( ( vFilterSize >> 1 ) - 1 ) * tmpStride, tmpStride, dst + row * dstStride, dstStride, width, 1, yFrac, false, rndRes, chFmt, clpRng, yFilter, false, useAltHpelIf ); +#else m_if.filterVer( compID, tempBuf + ( ( vFilterSize >> 1 ) - 1 ) * tmpStride, tmpStride, dst + row * dstStride, dstStride, width, 1, yFrac, false, rndRes, chFmt, clpRng, filterIndex, false, useAltHpelIf ); +#endif JVET_J0090_SET_CACHE_ENABLE( true ); } diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp index 1b0a09738af87c7ae5ee3df93342434857f1d6fe..f7115296e953bb600ed0f780c7b885361b15d30e 100644 --- a/source/Lib/CommonLib/InterpolationFilter.cpp +++ b/source/Lib/CommonLib/InterpolationFilter.cpp @@ -94,6 +94,50 @@ const TFilterCoeff InterpolationFilter::m_lumaFilter[LUMA_INTERPOLATION_FILTER_S { 0, 1, -2, 4, 63, -3, 1, 0 } }; +#if JVET_P0088_P0353_RPR_FILTERS +// 1.5x +const TFilterCoeff InterpolationFilter::m_lumaFilterRPR1[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] = +{ + { -1, -5, 17, 42, 17, -5, -1, 0 }, + { 0, -5, 15, 41, 19, -5, -1, 0 }, + { 0, -5, 13, 40, 21, -4, -1, 0 }, + { 0, -5, 11, 39, 24, -4, -2, 1 }, + { 0, -5, 9, 38, 26, -3, -2, 1 }, + { 0, -5, 7, 38, 28, -2, -3, 1 }, + { 1, -5, 5, 36, 30, -1, -3, 1 }, + { 1, -4, 3, 35, 32, 0, -4, 1 }, + { 1, -4, 2, 33, 33, 2, -4, 1 }, + { 1, -4, 0, 32, 35, 3, -4, 1 }, + { 1, -3, -1, 30, 36, 5, -5, 1 }, + { 1, -3, -2, 28, 38, 7, -5, 0 }, + { 1, -2, -3, 26, 38, 9, -5, 0 }, + { 1, -2, -4, 24, 39, 11, -5, 0 }, + { 0, -1, -4, 21, 40, 13, -5, 0 }, + { 0, -1, -5, 19, 41, 15, -5, 0 } +}; + +// 2x +const TFilterCoeff InterpolationFilter::m_lumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] = +{ + { -4, 2, 20, 28, 20, 2, -4, 0 }, + { -4, 0, 19, 29, 21, 5, -4, -2 }, + { -4, -1, 18, 29, 22, 6, -4, -2 }, + { -4, -1, 16, 29, 23, 7, -4, -2 }, + { -4, -1, 16, 28, 24, 7, -4, -2 }, + { -4, -1, 14, 28, 25, 8, -4, -2 }, + { -3, -3, 14, 27, 26, 9, -3, -3 }, + { -3, -1, 12, 28, 25, 10, -4, -3 }, + { -3, -3, 11, 27, 27, 11, -3, -3 }, + { -3, -4, 10, 25, 28, 12, -1, -3 }, + { -3, -3, 9, 26, 27, 14, -3, -3 }, + { -2, -4, 8, 25, 28, 14, -1, -4 }, + { -2, -4, 7, 24, 28, 16, -1, -4 }, + { -2, -4, 7, 23, 29, 16, -1, -4 }, + { -2, -4, 6, 22, 29, 18, -1, -4 }, + { -2, -4, 5, 21, 29, 19, 0, -4 } +}; +#endif + const TFilterCoeff InterpolationFilter::m_lumaAltHpelIFilter[NTAPS_LUMA] = { 0, 3, 9, 20, 20, 9, 3, 0 }; const TFilterCoeff InterpolationFilter::m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA] = { @@ -131,6 +175,82 @@ const TFilterCoeff InterpolationFilter::m_chromaFilter[CHROMA_INTERPOLATION_FILT { 0, 2, 63, -1 }, }; +#if JVET_P0088_P0353_RPR_FILTERS +//1.5x +const TFilterCoeff InterpolationFilter::m_chromaFilterRPR1[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA] = +{ + { 12, 40, 12, 0 }, + { 11, 40, 13, 0 }, + { 10, 40, 15, -1 }, + { 9, 40, 16, -1 }, + { 8, 40, 17, -1 }, + { 8, 39, 18, -1 }, + { 7, 39, 19, -1 }, + { 6, 38, 21, -1 }, + { 5, 38, 22, -1 }, + { 4, 38, 23, -1 }, + { 4, 37, 24, -1 }, + { 3, 36, 25, 0 }, + { 3, 35, 26, 0 }, + { 2, 34, 28, 0 }, + { 2, 33, 29, 0 }, + { 1, 33, 30, 0 }, + { 1, 31, 31, 1 }, + { 0, 30, 33, 1 }, + { 0, 29, 33, 2 }, + { 0, 28, 34, 2 }, + { 0, 26, 35, 3 }, + { 0, 25, 36, 3 }, + { -1, 24, 37, 4 }, + { -1, 23, 38, 4 }, + { -1, 22, 38, 5 }, + { -1, 21, 38, 6 }, + { -1, 19, 39, 7 }, + { -1, 18, 39, 8 }, + { -1, 17, 40, 8 }, + { -1, 16, 40, 9 }, + { -1, 15, 40, 10 }, + { 0, 13, 40, 11 }, +}; + +//2x +const TFilterCoeff InterpolationFilter::m_chromaFilterRPR2[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA] = +{ + { 17, 30, 17, 0 }, + { 17, 30, 18, -1 }, + { 16, 30, 18, 0 }, + { 16, 30, 18, 0 }, + { 15, 30, 18, 1 }, + { 14, 30, 18, 2 }, + { 13, 29, 19, 3 }, + { 13, 29, 19, 3 }, + { 12, 29, 20, 3 }, + { 11, 28, 21, 4 }, + { 10, 28, 22, 4 }, + { 10, 27, 22, 5 }, + { 9, 27, 23, 5 }, + { 9, 26, 24, 5 }, + { 8, 26, 24, 6 }, + { 7, 26, 25, 6 }, + { 7, 25, 25, 7 }, + { 6, 25, 26, 7 }, + { 6, 24, 26, 8 }, + { 5, 24, 26, 9 }, + { 5, 23, 27, 9 }, + { 5, 22, 27, 10 }, + { 4, 22, 28, 10 }, + { 4, 21, 28, 11 }, + { 3, 20, 29, 12 }, + { 3, 19, 29, 13 }, + { 3, 19, 29, 13 }, + { 2, 18, 30, 14 }, + { 1, 18, 30, 15 }, + { 0, 18, 30, 16 }, + { 0, 18, 30, 16 }, + { -1, 18, 30, 17 } +}; +#endif + const TFilterCoeff InterpolationFilter::m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR] = { { 64, 0, }, @@ -578,45 +698,69 @@ void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int sr */ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf) { +#if JVET_P0088_P0353_RPR_FILTERS + if( frac == 0 && nFilterIdx < 2 ) +#else if( frac == 0 ) +#endif { - m_filterCopy[true][isLast](clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR); + m_filterCopy[true][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR ); } else if( isLuma( compID ) ) { CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); if( nFilterIdx == 1 ) { - filterHor<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR); + filterHor<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR ); } else if( nFilterIdx == 2 ) { filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); } +#if JVET_P0088_P0353_RPR_FILTERS + else if( nFilterIdx == 3 ) + { + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilterRPR1[frac], biMCForDMVR ); + } + else if( nFilterIdx == 4 ) + { + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilterRPR2[frac], biMCForDMVR ); + } +#endif + else if( frac == 8 && useAltHpelIf ) + { + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaAltHpelIFilter, biMCForDMVR ); + } + else if( ( width == 4 && height == 4 ) || ( width == 4 && height == ( 4 + NTAPS_LUMA - 1 ) ) ) + { + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); + } else { - if (frac == 8 && useAltHpelIf) - { - filterHor<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaAltHpelIFilter, biMCForDMVR); - } - else - { - if ((width == 4 && height == 4) || (width == 4 && height == (4 + NTAPS_LUMA - 1))) - { - filterHor<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR); - } - else - { - filterHor<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac], biMCForDMVR); - } + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac], biMCForDMVR ); + } } - } else { const uint32_t csx = getComponentScaleX( compID, fmt ); CHECK( frac < 0 || csx >= 2 || ( frac << ( 1 - csx ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); - filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )], biMCForDMVR); +#if JVET_P0088_P0353_RPR_FILTERS + if( nFilterIdx == 3 ) + { + filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR1[frac << ( 1 - csx )], biMCForDMVR ); + } + else if( nFilterIdx == 4 ) + { + filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR2[frac << ( 1 - csx )], biMCForDMVR ); + } + else + { + filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )], biMCForDMVR ); + } +#else + filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )], biMCForDMVR ); +#endif } } @@ -639,45 +783,68 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in */ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf) { +#if JVET_P0088_P0353_RPR_FILTERS + if( frac == 0 && nFilterIdx < 2 ) +#else if( frac == 0 ) +#endif { - m_filterCopy[isFirst][isLast](clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR); + m_filterCopy[isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR ); } else if( isLuma( compID ) ) { CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); - if (nFilterIdx == 1) + if( nFilterIdx == 1 ) { - filterVer<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR); + filterVer<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR ); } else if( nFilterIdx == 2 ) { filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); } +#if JVET_P0088_P0353_RPR_FILTERS + else if( nFilterIdx == 3 ) + { + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilterRPR1[frac], biMCForDMVR ); + } + else if( nFilterIdx == 4 ) + { + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilterRPR2[frac], biMCForDMVR ); + } +#endif + else if( frac == 8 && useAltHpelIf ) + { + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaAltHpelIFilter, biMCForDMVR ); + } + else if( width == 4 && height == 4 ) + { + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); + } else { - if (frac == 8 && useAltHpelIf) - { - filterVer<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaAltHpelIFilter, biMCForDMVR); - } - else - { - if (width == 4 && height == 4) - { - filterVer<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR); - } - else - { - filterVer<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac], biMCForDMVR); - } + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac], biMCForDMVR ); } } - } else { const uint32_t csy = getComponentScaleY( compID, fmt ); CHECK( frac < 0 || csy >= 2 || ( frac << ( 1 - csy ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); - filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << (1 - csy)], biMCForDMVR); +#if JVET_P0088_P0353_RPR_FILTERS + if( nFilterIdx == 3 ) + { + filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilterRPR1[frac << ( 1 - csy )], biMCForDMVR ); + } + else if( nFilterIdx == 4 ) + { + filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilterRPR2[frac << ( 1 - csy )], biMCForDMVR ); + } + else + { + filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << ( 1 - csy )], biMCForDMVR ); + } +#else + filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << ( 1 - csy )], biMCForDMVR ); +#endif } } diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h index 14f49ec4d994f536459216ec3aa0c974fa35941f..f58d198ed78adcc652d38fe8e0fccacb4aaa4cd9 100644 --- a/source/Lib/CommonLib/InterpolationFilter.h +++ b/source/Lib/CommonLib/InterpolationFilter.h @@ -59,6 +59,12 @@ class InterpolationFilter public: static const TFilterCoeff m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps static const TFilterCoeff m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps +#if JVET_P0088_P0353_RPR_FILTERS + static const TFilterCoeff m_lumaFilterRPR1[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps 1.5x + static const TFilterCoeff m_lumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps 2x + static const TFilterCoeff m_chromaFilterRPR1[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps 1.5x + static const TFilterCoeff m_chromaFilterRPR2[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps 2x +#endif private: static const TFilterCoeff m_lumaAltHpelIFilter[NTAPS_LUMA]; ///< Luma filter taps static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp index 66817bcb11e998151450660902355c0ca3592c24..c7b4d9fd2e6c5b084efdfb96327abd1824ac776a 100644 --- a/source/Lib/CommonLib/LoopFilter.cpp +++ b/source/Lib/CommonLib/LoopFilter.cpp @@ -1217,6 +1217,9 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed const int maxFilterLengthP = m_maxFilterLengthP[COMPONENT_Cb][(pos.x-m_ctuXLumaSamples)>>m_shiftHor][(pos.y-m_ctuYLumaSamples)>>m_shiftVer]; const int maxFilterLengthQ = m_maxFilterLengthQ[COMPONENT_Cb][(pos.x-m_ctuXLumaSamples)>>m_shiftHor][(pos.y-m_ctuYLumaSamples)>>m_shiftVer]; bool largeBoundary = false; +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + bool isChromaHorCTBBoundary = false; +#endif if ( maxFilterLengthP >= 3 && maxFilterLengthQ >= 3 ) { largeBoundary = true; @@ -1224,7 +1227,11 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed if (edgeDir == EDGE_HOR && pos.y % cuP.slice->getSPS()->getCTUSize() == 0) { +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + isChromaHorCTBBoundary = true; +#else largeBoundary = false; +#endif } for( int chromaIdx = 0; chromaIdx < 2; chromaIdx++ ) @@ -1266,10 +1273,18 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed const int indexB = Clip3<int>(0, MAX_QP, iQP + (betaOffsetDiv2 << 1)); const int beta = sm_betaTable[indexB] * iBitdepthScale; +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + const int dp0 = xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, isChromaHorCTBBoundary); +#else const int dp0 = xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset); +#endif const int dq0 = xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset); const int subSamplingShift = ( edgeDir == EDGE_VER ) ? m_shiftVer : m_shiftHor; - const int dp3 = ( subSamplingShift == 1 ) ? xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset) : xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset); +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + const int dp3 = (subSamplingShift == 1) ? xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset, isChromaHorCTBBoundary) : xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset, isChromaHorCTBBoundary); +#else + const int dp3 = (subSamplingShift == 1) ? xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset) : xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset); +#endif const int dq3 = ( subSamplingShift == 1 ) ? xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset) : xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset); const int d0 = dp0 + dq0; @@ -1279,12 +1294,24 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed if (d < beta) { useLongFilter = true; +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + const bool sw = xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, 2 * d0, beta, iTc, false, false, 7, 7, isChromaHorCTBBoundary) +#else const bool sw = xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, 2 * d0, beta, iTc) - && xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + ( ( subSamplingShift == 1 ) ? 1 : 3 ) ), iOffset, 2 * d3, beta, iTc); +#endif +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + && xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + ((subSamplingShift == 1) ? 1 : 3)), iOffset, 2 * d3, beta, iTc, false, false, 7, 7, isChromaHorCTBBoundary); +#else + && xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + ((subSamplingShift == 1) ? 1 : 3)), iOffset, 2 * d3, beta, iTc); +#endif for (unsigned step = 0; step < uiLoopLength; step++) { +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, sw, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary, isChromaHorCTBBoundary); +#else xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, sw, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary); +#endif } } } @@ -1292,7 +1319,11 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed { for (unsigned step = 0; step < uiLoopLength; step++) { +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, false, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary, isChromaHorCTBBoundary); +#else xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, false, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary); +#endif } } } @@ -1512,7 +1543,11 @@ inline void LoopFilter::xPelFilterLuma(Pel* piSrc, const int iOffset, const int \param bPartQNoFilter indicator to disable filtering on partQ \param bitDepthChroma chroma bit depth */ -inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary ) const +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX +inline void LoopFilter::xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary, const bool isChromaHorCTBBoundary) const +#else +inline void LoopFilter::xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary) const +#endif { int delta; @@ -1527,12 +1562,31 @@ inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const i if (sw) { +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + if (isChromaHorCTBBoundary) + { + piSrc[-iOffset * 1] = Clip3(m3 - tc, m3 + tc, ((3 * m2 + 2 * m3 + m4 + m5 + m6 + 4) >> 3)); // p0 + piSrc[0] = Clip3(m4 - tc, m4 + tc, ((2 * m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0 + piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3)); // q1 + piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3)); // q2 + } + else + { piSrc[-iOffset * 3] = Clip3(m1 - tc, m1 + tc, ((3 * m0 + 2 * m1 + m2 + m3 + m4 + 4) >> 3)); // p2 piSrc[-iOffset * 2] = Clip3(m2 - tc, m2 + tc, ((2 * m0 + m1 + 2 * m2 + m3 + m4 + m5 + 4) >> 3)); // p1 piSrc[-iOffset * 1] = Clip3(m3 - tc, m3 + tc, ((m0 + m1 + m2 + 2 * m3 + m4 + m5 + m6 + 4) >> 3)); // p0 - piSrc[0] = Clip3(m4 - tc, m4 + tc, ((m1 + m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0 - piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3)); // q1 - piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3)); // q2 + piSrc[0] = Clip3(m4 - tc, m4 + tc, ((m1 + m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0 + piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3)); // q1 + piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3)); // q2 + } +#else + piSrc[-iOffset * 3] = Clip3(m1 - tc, m1 + tc, ((3 * m0 + 2 * m1 + m2 + m3 + m4 + 4) >> 3)); // p2 + piSrc[-iOffset * 2] = Clip3(m2 - tc, m2 + tc, ((2 * m0 + m1 + 2 * m2 + m3 + m4 + m5 + 4) >> 3)); // p1 + piSrc[-iOffset * 1] = Clip3(m3 - tc, m3 + tc, ((m0 + m1 + m2 + 2 * m3 + m4 + m5 + m6 + 4) >> 3)); // p0 + piSrc[0] = Clip3(m4 - tc, m4 + tc, ((m1 + m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0 + piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3)); // q1 + piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3)); // q2 +#endif } else { @@ -1571,13 +1625,26 @@ inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const i \param tc tc value \param piSrc pointer to picture data */ -inline bool LoopFilter::xUseStrongFiltering( Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ ) const +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX +inline bool LoopFilter::xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ, bool isChromaHorCTBBoundary) const +#else +inline bool LoopFilter::xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ) const +#endif { const Pel m4 = piSrc[ 0 ]; const Pel m3 = piSrc[-iOffset ]; const Pel m7 = piSrc[ iOffset * 3]; const Pel m0 = piSrc[-iOffset * 4]; - int sp3 = abs(m0 - m3); +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + const Pel m2 = piSrc[-iOffset * 2]; + int sp3 = abs(m0 - m3); + if (isChromaHorCTBBoundary) + { + sp3 = abs(m2 - m3); + } +#else + int sp3 = abs(m0 - m3); +#endif int sq3 = abs(m7 - m4); const int d_strong = sp3 + sq3; @@ -1616,9 +1683,24 @@ inline bool LoopFilter::xUseStrongFiltering( Pel* piSrc, const int iOffset, cons return ( ( d_strong < ( beta >> 3 ) ) && ( d < ( beta >> 2 ) ) && ( abs( m3 - m4 ) < ( ( tc * 5 + 1 ) >> 1 ) ) ); } -inline int LoopFilter::xCalcDP( Pel* piSrc, const int iOffset ) const +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX +inline int LoopFilter::xCalcDP(Pel* piSrc, const int iOffset, const bool isChromaHorCTBBoundary) const +#else +inline int LoopFilter::xCalcDP(Pel* piSrc, const int iOffset) const +#endif { - return abs( piSrc[-iOffset * 3] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset] ); +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + if (isChromaHorCTBBoundary) + { + return abs(piSrc[-iOffset * 2] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset]); + } + else + { + return abs(piSrc[-iOffset * 3] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset]); + } +#else + return abs(piSrc[-iOffset * 3] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset]); +#endif } inline int LoopFilter::xCalcDQ( Pel* piSrc, const int iOffset ) const diff --git a/source/Lib/CommonLib/LoopFilter.h b/source/Lib/CommonLib/LoopFilter.h index 0b2e05f6a67d1a2a7363038171846e081dd84abb..4191d7146fd9d3f4367949802eff14eb66ff592d 100644 --- a/source/Lib/CommonLib/LoopFilter.h +++ b/source/Lib/CommonLib/LoopFilter.h @@ -91,15 +91,27 @@ private: inline void xBilinearFilter ( Pel* srcP, Pel* srcQ, int offset, int refMiddle, int refP, int refQ, int numberPSide, int numberQSide, const int* dbCoeffsP, const int* dbCoeffsQ, int tc ) const; inline void xFilteringPandQ ( Pel* src, int offset, int numberPSide, int numberQSide, int tc ) const; inline void xPelFilterLuma ( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const int iThrCut, const bool bFilterSecondP, const bool bFilterSecondQ, const ClpRng& clpRng, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7 ) const; - inline void xPelFilterChroma ( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary ) const; - inline bool xUseStrongFiltering ( Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7 ) const;//move the computation outside the function +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + inline void xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary, const bool isChromaHorCTBBoundary) const; +#else + inline void xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary) const; +#endif +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + inline bool xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7, bool isChromaHorCTBBoundary = false) const;//move the computation outside the function +#else + inline bool xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7) const;//move the computation outside the function +#endif inline unsigned BsSet(unsigned val, const ComponentID compIdx) const; inline unsigned BsGet(unsigned val, const ComponentID compIdx) const; inline bool isCrossedByVirtualBoundaries ( const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PPS* pps ); inline void xDeriveEdgefilterParam ( const int xPos, const int yPos, const int numVerVirBndry, const int numHorVirBndry, const int verVirBndryPos[], const int horVirBndryPos[], bool &verEdgeFilter, bool &horEdgeFilter ); - inline int xCalcDP ( Pel* piSrc, const int iOffset ) const; +#if JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX + inline int xCalcDP(Pel* piSrc, const int iOffset, const bool isChromaHorCTBBoundary = false) const; +#else + inline int xCalcDP(Pel* piSrc, const int iOffset) const; +#endif inline int xCalcDQ ( Pel* piSrc, const int iOffset ) const; static const uint16_t sm_tcTable[MAX_QP + 3]; static const uint8_t sm_betaTable[MAX_QP + 1]; diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp index 212f46d1b246444895d703ab0d6b52f9526ac4f4..e72c83a866e6d92351b9580d06fac4d1a3877894 100644 --- a/source/Lib/CommonLib/QuantRDOQ.cpp +++ b/source/Lib/CommonLib/QuantRDOQ.cpp @@ -1749,7 +1749,15 @@ inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double& rd64CodedCost double dErr = 0.0; dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits)); coeffLevelError[errorInd] = dErr * dErr * errorScale; +#if JVET_P0298_DISABLE_LEVELMAPPING_IN_BYPASS + int modAbsLevel = absLevel; + if (cctx.numCtxBins() >= 4) + { + modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm); + } +#else int modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm); +#endif #if JVET_P0072_SIMPLIFIED_TSRC int numCtxBins = 0; double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, useLimitedPrefixLength, maxLog2TrDynamicRange)); diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index baa0221479fc19880525067166914eb039e97d98..863883a45704cecc4de5c04e6558777daa208533 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,7 +50,15 @@ #include <assert.h> #include <cassert> -#define JVET_P1018_IBC_NO_WRAPAROUND 1 // JVET-P1018: Disable reference sample wrapping around +#define JVET_P0298_DISABLE_LEVELMAPPING_IN_BYPASS 1 // JVET-P0298: Disable level mapping in bypass mode + +#define JVET_P0325_CHANGE_MERGE_CANDIDATE_ORDER 1 // JVET-P0325: reorder the spatial merge candidates + +#define JVET_P1018_IBC_NO_WRAPAROUND 1 // JVET-P1018: Disable reference sample wrapping around + +#define JVET_P0578_MINIMUM_CU_SIZE_CONSTRAINT 1 // JVET-P0578: minimum CU size constraint + +#define JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT 1 // JVET-P0091: Align sample offset calculation of BDOF and PROF #define JVET_P0512_SIMD_HIGH_BITDEPTH 1 // JVET-P0512: MC SIMD support for high internal bit-depthf @@ -76,6 +84,8 @@ #define JVET_P0436_CQP_OFFSET_SIGNALLING 1 // JVET_P0436: CU chroma QP offset signalling consistent with VPDU and bugfix +#define JVET_P0505_ALF_CLIP_VALUE 1 // JVET-P0505: Modified non-linear ALF clipping value derivations + #define JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING 1 // JVET-P0154/P0094/P0172/P0413/P0518/P0281: Clip the PROF sample offset to 14-bit #define JVET_P1023_DMVR_BDOF_RP_CONDITION 1 // JVET_P1023: Reference picture conditions in DMVR and BDOF @@ -86,6 +96,8 @@ #define JVET_P0164_ALF_SYNTAX_SIMP 1 // JVET-p0164: simplify alf syntax with method2 +#define JVET_O0549_ENCODER_ONLY_FILTER 1 // JVET-O0549: Encoder-only temporal filter, no decoder changes + #define JVET_P0042_FIX_INTER_DIR_CTX 1 // JVET-P0042: Fix overlap in context between the bi-pred flag for 8x8 CUs and the L0/L1 flag for all size CUs #define JVET_P0111_CHROMA_422_FIX 1 // JVET-P0422: Bug fix of chroma 422 intra mode mapping @@ -94,6 +106,8 @@ #define JVET_P0329_PLANAR_SIMPLIFICATION 1 // JVET-P0329: simplify planar prediction by comparison removal +#define JVET_P0081_CHROMA_LONG_DEBLOCKING_FIX 1 // JVET-P0081: Apply asymmetric long tap deblocking (1 + 3) filter at horizontal CTB boundaries for Chroma + #define JVET_P0516_PLT_BINARIZATION 1 // JVET-P0516: PLT is always signaled when pred mode is euqal to 1 (intra mode) #define JVET_P0562_TS_RESIDUAL_CODING_SIMP 1 // JVET-P0562: Fix the Rice parameter equal to 1 for the remainder of TS residual coding @@ -106,6 +120,8 @@ #define JVET_O0145_ENTRYPOINT_SIGNALLING 0 // JVET-O0145: Not signalling num_entry_point_offsets but derive it at decoder +#define JVET_P0088_P0353_RPR_FILTERS 1 // JVET-P0088 and JVET-P0353 Filters to use for downsampling in RPR + #define JVET_O0625_ALF_PADDING 1 // JVET-O0625/O0654/O0662: Unified padding method in ALF #if !JVET_P0400_REMOVE_SHARED_MERGE_LIST diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index 5c0e1c37fcb93ea5206d6e9b01604923f2d779b4..467d56ecbe11baabb1f5ca2b7a8317be905b69dc 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -893,6 +893,72 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, const Position posLB = pu.Y().bottomLeft(); MotionInfo miAbove, miLeft, miAboveLeft, miAboveRight, miBelowLeft; +#if JVET_P0325_CHANGE_MERGE_CANDIDATE_ORDER + // above + const PredictionUnit *puAbove = cs.getPURestricted(posRT.offset(0, -1), pu, pu.chType); + + bool isAvailableB1 = puAbove && isDiffMER(pu, *puAbove) && pu.cu != puAbove->cu && CU::isInter(*puAbove->cu); + + if (isAvailableB1) + { + miAbove = puAbove->getMotionInfo(posRT.offset(0, -1)); + + // get Inter Dir + mrgCtx.interDirNeighbours[cnt] = miAbove.interDir; + mrgCtx.useAltHpelIf[cnt] = miAbove.useAltHpelIf; + // get Mv from Above + mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAbove->cu->GBiIdx : GBI_DEFAULT; + mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAbove.mv[0], miAbove.refIdx[0]); + + if (slice.isInterB()) + { + mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miAbove.mv[1], miAbove.refIdx[1]); + } + if (mrgCandIdx == cnt && canFastExit) + { + return; + } + + cnt++; + } + + // early termination + if (cnt == maxNumMergeCand) + { + return; + } + + //left + const PredictionUnit* puLeft = cs.getPURestricted(posLB.offset(-1, 0), pu, pu.chType); + + const bool isAvailableA1 = puLeft && isDiffMER(pu, *puLeft) && pu.cu != puLeft->cu && CU::isInter(*puLeft->cu); + + if (isAvailableA1) + { + miLeft = puLeft->getMotionInfo(posLB.offset(-1, 0)); + + if (!isAvailableB1 || (miAbove != miLeft)) + { + // get Inter Dir + mrgCtx.interDirNeighbours[cnt] = miLeft.interDir; + mrgCtx.useAltHpelIf[cnt] = miLeft.useAltHpelIf; + mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeft->cu->GBiIdx : GBI_DEFAULT; + // get Mv from Left + mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]); + + if (slice.isInterB()) + { + mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miLeft.mv[1], miLeft.refIdx[1]); + } + if (mrgCandIdx == cnt && canFastExit) + { + return; + } + + cnt++; + } + } +#else //left const PredictionUnit* puLeft = cs.getPURestricted( posLB.offset( -1, 0 ), pu, pu.chType ); @@ -960,6 +1026,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, cnt++; } } +#endif // early termination if( cnt == maxNumMergeCand ) diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index dfce5c6d391b0420236137a085e225ce815358fc..b91ef72f2fada8d561d6b9582587ba08e4048d3c 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -248,9 +248,14 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St a = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i *) (src0 + x)), _mm_loadl_epi64((const __m128i *) (src1 + x))); +#if JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT + sum = _mm_add_epi32(sum, _mm_set1_epi32(2 * offset)); + sum = _mm_sra_epi32(sum, _mm_cvtsi32_si128(shift)); +#else sum = _mm_add_epi32(sum, _mm_madd_epi16(a, _mm_set1_epi16(2))); sum = _mm_add_epi32(sum, _mm_set1_epi32(2 * offset + 1)); sum = _mm_sra_epi32(sum, _mm_cvtsi32_si128(shift + 1)); +#endif sum = _mm_packs_epi32(sum, sum); sum = _mm_max_epi16(sum, vibdimin); sum = _mm_min_epi16(sum, vibdimax); diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index b5804f31d5d26d4495372608995ca6f576031d0f..260c5751ff7efb6e827f63545d370e7a20bbb8e7 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -3590,7 +3590,11 @@ void CABACReader::residual_coding_subblockTS( CoeffCodingContext& cctx, TCoeff* tcoeff += ( rem << 1 ); #endif } +#if JVET_P0298_DISABLE_LEVELMAPPING_IN_BYPASS + if (!cctx.bdpcm() && cutoffVal) +#else if (!cctx.bdpcm()) +#endif { if (tcoeff > 0) { diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index d52a2f480a05db04310582b935852dace4a23e9e..09c625f2662a33307d7fd68813e92234e789587f 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -1171,7 +1171,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) pcSPS->setMinQpPrimeTsMinus4(CHANNEL_TYPE_LUMA, uiCode); #if JVET_P0460_PLT_TS_MIN_QP pcSPS->setMinQpPrimeTsMinus4(CHANNEL_TYPE_CHROMA, uiCode); -#endif +#endif + READ_FLAG( uiCode, "sps_weighted_pred_flag" ); pcSPS->setUseWP( uiCode ? true : false ); + READ_FLAG( uiCode, "sps_weighted_bipred_flag" ); pcSPS->setUseWPBiPred( uiCode ? true : false ); READ_UVLC( uiCode, "log2_max_pic_order_cnt_lsb_minus4" ); pcSPS->setBitsForPOC( 4 + uiCode ); CHECK(uiCode > 12, "Invalid code"); @@ -1266,6 +1268,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) int log2MinCUSize = uiCode + 2; pcSPS->setLog2MinCodingBlockSize(log2MinCUSize); +#if JVET_P0578_MINIMUM_CU_SIZE_CONSTRAINT + CHECK(log2MinCUSize > std::min(6, (int)(ctbLog2SizeY)), "log2_min_luma_coding_block_size_minus2 shall be in the range of 0 to min (4, log2_ctu_size - 2)"); +#endif CHECK( ( pcSPS->getMaxPicWidthInLumaSamples() % ( std::max( 8, int( pcSPS->getMaxCUWidth() >> ( pcSPS->getMaxCodingDepth() - 1 ) ) ) ) ) != 0, "Coded frame width must be a multiple of Max(8, the minimum unit size)" ); CHECK( ( pcSPS->getMaxPicHeightInLumaSamples() % ( std::max( 8, int( pcSPS->getMaxCUHeight() >> ( pcSPS->getMaxCodingDepth() - 1 ) ) ) ) ) != 0, "Coded frame height must be a multiple of Max(8, the minimum unit size)" ); @@ -1336,13 +1341,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) pcSPS->derivedChromaQPMappingTables(); } - READ_FLAG( uiCode, "sps_weighted_pred_flag" ); pcSPS->setUseWP( uiCode ? true : false ); - READ_FLAG( uiCode, "sps_weighted_bipred_flag" ); pcSPS->setUseWPBiPred( uiCode ? true : false ); - READ_FLAG( uiCode, "sps_sao_enabled_flag" ); pcSPS->setSAOEnabledFlag ( uiCode ? true : false ); READ_FLAG( uiCode, "sps_alf_enabled_flag" ); pcSPS->setALFEnabledFlag ( uiCode ? true : false ); - READ_FLAG(uiCode, "sps_transform_skip_enabled_flag"); pcSPS->setTransformSkipEnabledFlag(uiCode ? true : false); if (pcSPS->getTransformSkipEnabledFlag()) { diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index b4ec13d5107cf96f9af1a3404903b21d69475aae..6d9ab1d4a2f624f10b31b4f0cc32f8e4f9c40544 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -3290,10 +3290,16 @@ void CABACWriter::residual_coding_subblockTS( CoeffCodingContext& cctx, const TC { unsigned absLevel; cctx.neighTS(rightPixel, belowPixel, scanPos, coeff); +#if JVET_P0298_DISABLE_LEVELMAPPING_IN_BYPASS + cutoffVal = (scanPos <= lastScanPosPass2 ? 10 : (scanPos <= lastScanPosPass1 ? 2 : 0)); + absLevel = cctx.deriveModCoeff(rightPixel, belowPixel, abs(coeff[cctx.blockPos(scanPos)]), cctx.bdpcm()||!cutoffVal); +#else absLevel = cctx.deriveModCoeff(rightPixel, belowPixel, abs(coeff[cctx.blockPos(scanPos)]), cctx.bdpcm()); #if JVET_P0072_SIMPLIFIED_TSRC cutoffVal = (scanPos <= lastScanPosPass2 ? 10 : (scanPos <= lastScanPosPass1 ? 2 : 0)); #endif +#endif + if( absLevel >= cutoffVal ) { int rice = cctx.templateAbsSumTS( scanPos, coeff ); diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 043a6356775d9bc59363972e0a8a1e08ec9e58b6..ce58afcab59f5507930dc3f297b212389748fc80 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -450,6 +450,9 @@ protected: bool m_bFastUDIUseMPMEnabled; bool m_bFastMEForGenBLowDelayEnabled; bool m_bUseBLambdaForNonKeyLowDelayPictures; +#if JVET_O0549_ENCODER_ONLY_FILTER + bool m_gopBasedTemporalFilterEnabled; +#endif //====== Slice ======== SliceConstraint m_sliceMode; int m_sliceArgument; @@ -1192,6 +1195,10 @@ public: bool getFastUDIUseMPMEnabled () { return m_bFastUDIUseMPMEnabled; } bool getFastMEForGenBLowDelayEnabled () { return m_bFastMEForGenBLowDelayEnabled; } bool getUseBLambdaForNonKeyLowDelayPictures () { return m_bUseBLambdaForNonKeyLowDelayPictures; } +#if JVET_O0549_ENCODER_ONLY_FILTER + void setGopBasedTemporalFilterEnabled(bool flag) { m_gopBasedTemporalFilterEnabled = flag; } + bool getGopBasedTemporalFilterEnabled() { return m_gopBasedTemporalFilterEnabled; } +#endif bool getCrossComponentPredictionEnabledFlag () const { return m_crossComponentPredictionEnabledFlag; } void setCrossComponentPredictionEnabledFlag (const bool value) { m_crossComponentPredictionEnabledFlag = value; } diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp index 88fc69c47a25ad044b010df85ae48478da75af79..c3b588dbee0506d52da2fcbf9f7277778ab19f12 100644 --- a/source/Lib/EncoderLib/EncGOP.cpp +++ b/source/Lib/EncoderLib/EncGOP.cpp @@ -3645,7 +3645,11 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni const CPelUnitBuf& pic = cPicD; CHECK(!(conversion == IPCOLOURSPACE_UNCHANGED), "Unspecified error"); // const CPelUnitBuf& org = (conversion != IPCOLOURSPACE_UNCHANGED) ? pcPic->getPicYuvTrueOrg()->getBuf() : pcPic->getPicYuvOrg()->getBuf(); +#if JVET_O0549_ENCODER_ONLY_FILTER + const CPelUnitBuf& org = (sps.getUseReshaper() || m_pcCfg->getGopBasedTemporalFilterEnabled()) ? pcPic->getTrueOrigBuf() : pcPic->getOrigBuf(); +#else const CPelUnitBuf& org = sps.getUseReshaper() ? pcPic->getTrueOrigBuf() : pcPic->getOrigBuf(); +#endif #if ENABLE_QPA const bool useWPSNR = m_pcEncLib->getUseWPSNR(); #endif diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index b1188a23bc1a7eb23115a88dfe4c54fe710e944a..cfd0e5e35a1ecd6ce2d4a70da830dd78329e5280 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -1025,7 +1025,9 @@ void EncLib::xInitSPS(SPS &sps) } sps.setLog2MinCodingBlockSize(log2MinCUSize); - +#if JVET_P0578_MINIMUM_CU_SIZE_CONSTRAINT + CHECK(log2MinCUSize > std::min(6, floorLog2(sps.getMaxCUWidth())), "log2_min_luma_coding_block_size_minus2 shall be in the range of 0 to min (4, log2_ctu_size - 2)"); +#endif sps.setTransformSkipEnabledFlag(m_useTransformSkip); sps.setBDPCMEnabledFlag(m_useBDPCM); diff --git a/source/Lib/EncoderLib/EncReshape.cpp b/source/Lib/EncoderLib/EncReshape.cpp index 247c4cb3c650e438e81c6b9ae0f038791b0cfc52..b92b0cdb46b4da91a6835d9046c3a23045cae6d3 100644 --- a/source/Lib/EncoderLib/EncReshape.cpp +++ b/source/Lib/EncoderLib/EncReshape.cpp @@ -1016,8 +1016,6 @@ void EncReshape::constructReshaperLMCS() } } - adjustLmcsPivot(); - if (bdShift != 0) { for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++) @@ -1026,6 +1024,8 @@ void EncReshape::constructReshaperLMCS() } } + adjustLmcsPivot(); + int maxAbsDeltaCW = 0, absDeltaCW = 0, deltaCW = 0; for (int i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++) { @@ -1080,7 +1080,7 @@ void EncReshape::adjustLmcsPivot() int bdShift = m_lumaBD - 10; int totCW = bdShift != 0 ? (bdShift > 0 ? m_reshapeLUTSize / (1 << bdShift) : m_reshapeLUTSize * (1 << (-bdShift))) : m_reshapeLUTSize; int orgCW = totCW / PIC_CODE_CW_BINS; - int log2SegSize = floorLog2(LMCS_SEG_SIZE); + int log2SegSize = m_lumaBD - floorLog2(LMCS_SEG_NUM); m_reshapePivot[0] = 0; for (int i = 0; i < PIC_CODE_CW_BINS; i++) diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp index b3a604e5c576c6023a3244a7f0bb8e95b74ccab7..79620960b9412aa2be0dee2d218eed2df76b2196 100644 --- a/source/Lib/EncoderLib/EncSlice.cpp +++ b/source/Lib/EncoderLib/EncSlice.cpp @@ -1345,6 +1345,7 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c #endif m_pcInterSearch->resetAffineMVList(); m_pcInterSearch->resetUniMvList(); + ::memset(g_isReusedUniMVsFilled, 0, sizeof(g_isReusedUniMVsFilled)); encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, startCtuTsAddr, boundingCtuTsAddr, m_pcLib ); if (checkPLTRatio) m_pcLib->checkPltStats( pcPic ); } diff --git a/source/Lib/EncoderLib/EncTemporalFilter.cpp b/source/Lib/EncoderLib/EncTemporalFilter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..147d0cbb9c1e2dea44c279a821ee66dea2024d11 --- /dev/null +++ b/source/Lib/EncoderLib/EncTemporalFilter.cpp @@ -0,0 +1,628 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2019, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** \file EncTemporalFilter.cpp +\brief EncTemporalFilter class +*/ + +#include "EncTemporalFilter.h" +#include <math.h> + +#if JVET_O0549_ENCODER_ONLY_FILTER + +// ==================================================================================================================== +// Constructor / destructor / initialization / destroy +// ==================================================================================================================== + +const int EncTemporalFilter::m_range = 2; +const double EncTemporalFilter::m_chromaFactor = 0.55; +const double EncTemporalFilter::m_sigmaMultiplier = 9.0; +const double EncTemporalFilter::m_sigmaZeroPoint = 10.0; +const int EncTemporalFilter::m_motionVectorFactor = 16; +const int EncTemporalFilter::m_padding = 128; +const int EncTemporalFilter::m_interpolationFilter[16][8] = +{ + { 0, 0, 0, 64, 0, 0, 0, 0 }, //0 + { 0, 1, -3, 64, 4, -2, 0, 0 }, //1 -->--> + { 0, 1, -6, 62, 9, -3, 1, 0 }, //2 --> + { 0, 2, -8, 60, 14, -5, 1, 0 }, //3 -->--> + { 0, 2, -9, 57, 19, -7, 2, 0 }, //4 + { 0, 3, -10, 53, 24, -8, 2, 0 }, //5 -->--> + { 0, 3, -11, 50, 29, -9, 2, 0 }, //6 --> + { 0, 3, -11, 44, 35, -10, 3, 0 }, //7 -->--> + { 0, 1, -7, 38, 38, -7, 1, 0 }, //8 + { 0, 3, -10, 35, 44, -11, 3, 0 }, //9 -->--> + { 0, 2, -9, 29, 50, -11, 3, 0 }, //10--> + { 0, 2, -8, 24, 53, -10, 3, 0 }, //11-->--> + { 0, 2, -7, 19, 57, -9, 2, 0 }, //12 + { 0, 1, -5, 14, 60, -8, 2, 0 }, //13-->--> + { 0, 1, -3, 9, 62, -6, 1, 0 }, //14--> + { 0, 0, -2, 4, 64, -3, 1, 0 } //15-->--> +}; + +const double EncTemporalFilter::m_refStrengths[3][2] = +{ // abs(POC offset) + // 1, 2 + {0.85, 0.60}, // m_range * 2 + {1.20, 1.00}, // m_range + {0.30, 0.30} // otherwise +}; + +EncTemporalFilter::EncTemporalFilter() : + m_FrameSkip(0), + m_chromaFormatIDC(NUM_CHROMA_FORMAT), + m_sourceWidth(0), + m_sourceHeight(0), + m_QP(0), + m_clipInputVideoToRec709Range(false), + m_inputColourSpaceConvert(NUMBER_INPUT_COLOUR_SPACE_CONVERSIONS) +{} + +void EncTemporalFilter::init(const int frameSkip, + const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], + const int msbExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], + const int internalBitDepth[MAX_NUM_CHANNEL_TYPE], + const int width, + const int height, + const int *pad, + const bool rec709, + const std::string &filename, + const ChromaFormat inputChromaFormatIDC, + const InputColourSpaceConversion colorSpaceConv, + const int qp, + const std::map<int, double> &temporalFilterStrengths, + const bool gopBasedTemporalFilterFutureReference) +{ + m_FrameSkip = frameSkip; + for (int i = 0; i < MAX_NUM_CHANNEL_TYPE; i++) + { + m_inputBitDepth[i] = inputBitDepth[i]; + m_MSBExtendedBitDepth[i] = msbExtendedBitDepth[i]; + m_internalBitDepth[i] = internalBitDepth[i]; + } + + m_sourceWidth = width; + m_sourceHeight = height; + for (int i = 0; i < 2; i++) + { + m_pad[i] = pad[i]; + } + m_clipInputVideoToRec709Range = rec709; + m_inputFileName = filename; + m_chromaFormatIDC = inputChromaFormatIDC; + m_inputColourSpaceConvert = colorSpaceConv; + m_area = Area(0, 0, width, height); + m_QP = qp; + m_temporalFilterStrengths = temporalFilterStrengths; + m_gopBasedTemporalFilterFutureReference = gopBasedTemporalFilterFutureReference; +} + +// ==================================================================================================================== +// Public member functions +// ==================================================================================================================== + +bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc) +{ + bool isFilterThisFrame = false; + if (m_QP >= 17) // disable filter for QP < 17 + { + for (map<int, double>::iterator it = m_temporalFilterStrengths.begin(); it != m_temporalFilterStrengths.end(); ++it) + { + int filteredFrame = it->first; + if (receivedPoc % filteredFrame == 0) + { + isFilterThisFrame = true; + break; + } + } + } + + if (isFilterThisFrame) + { + int offset = m_FrameSkip; + VideoIOYuv yuvFrames; + yuvFrames.open(m_inputFileName, false, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth); + yuvFrames.skipFrames(std::max(offset + receivedPoc - m_range, 0), m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIDC); + + + std::deque<TemporalFilterSourcePicInfo> srcFrameInfo; + + int firstFrame = receivedPoc + offset - m_range; + int lastFrame = receivedPoc + offset + m_range; + if (!m_gopBasedTemporalFilterFutureReference) + { + lastFrame = receivedPoc + offset - 1; + } + int origOffset = -m_range; + + // subsample original picture so it only needs to be done once + PelStorage origPadded; + + origPadded.create(m_chromaFormatIDC, m_area, 0, m_padding); + origPadded.copyFrom(*orgPic); + origPadded.extendBorderPel(m_padding, m_padding); + + PelStorage origSubsampled2; + PelStorage origSubsampled4; + + subsampleLuma(origPadded, origSubsampled2); + subsampleLuma(origSubsampled2, origSubsampled4); + + // determine motion vectors + for (int poc = firstFrame; poc <= lastFrame; poc++) + { + if (poc < 0) + { + origOffset++; + continue; // frame not available + } + else if (poc == offset + receivedPoc) + { // hop over frame that will be filtered + yuvFrames.skipFrames(1, m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIDC); + origOffset++; + continue; + } + srcFrameInfo.push_back(TemporalFilterSourcePicInfo()); + TemporalFilterSourcePicInfo &srcPic=srcFrameInfo.back(); + + PelStorage dummyPicBufferTO; // Only used temporary in yuvFrames.read + srcPic.picBuffer.create(m_chromaFormatIDC, m_area, 0, m_padding); + dummyPicBufferTO.create(m_chromaFormatIDC, m_area, 0, m_padding); + if (!yuvFrames.read(srcPic.picBuffer, dummyPicBufferTO, m_inputColourSpaceConvert, m_pad, m_chromaFormatIDC, m_clipInputVideoToRec709Range)) + { + return false; // eof or read fail + } + srcPic.picBuffer.extendBorderPel(m_padding, m_padding); + srcPic.mvs.allocate(m_sourceWidth / 4, m_sourceHeight / 4); + + motionEstimation(srcPic.mvs, origPadded, srcPic.picBuffer, origSubsampled2, origSubsampled4); + srcPic.origOffset = origOffset; + origOffset++; + } + + // filter + PelStorage newOrgPic; + newOrgPic.create(m_chromaFormatIDC, m_area, 0, m_padding); + double overallStrength = -1.0; + for (map<int, double>::iterator it = m_temporalFilterStrengths.begin(); it != m_temporalFilterStrengths.end(); ++it) + { + int frame = it->first; + double strength = it->second; + if (receivedPoc % frame == 0) + { + overallStrength = strength; + } + } + + bilateralFilter(origPadded, srcFrameInfo, newOrgPic, overallStrength); + + // move filtered to orgPic + orgPic->copyFrom(newOrgPic); + + yuvFrames.close(); + return true; + } + return false; +} + +// ==================================================================================================================== +// Private member functions +// ==================================================================================================================== + +void EncTemporalFilter::subsampleLuma(const PelStorage &input, PelStorage &output, const int factor) const +{ + const int newWidth = input.Y().width / factor; + const int newHeight = input.Y().height / factor; + output.create(m_chromaFormatIDC, Area(0, 0, newWidth, newHeight), 0, m_padding); + + const Pel* srcRow = input.Y().buf; + const int srcStride = input.Y().stride; + Pel *dstRow = output.Y().buf; + const int dstStride = output.Y().stride; + + for (int y = 0; y < newHeight; y++, srcRow+=factor*srcStride, dstRow+=dstStride) + { + const Pel *inRow = srcRow; + const Pel *inRowBelow = srcRow+srcStride; + Pel *target = dstRow; + + for (int x = 0; x < newWidth; x++) + { + target[x] = (inRow[0] + inRowBelow[0] + inRow[1] + inRowBelow[1] + 2) >> 2; + inRow += 2; + inRowBelow += 2; + } + } + output.extendBorderPel(m_padding, m_padding); +} + +int EncTemporalFilter::motionErrorLuma(const PelStorage &orig, + const PelStorage &buffer, + const int x, + const int y, + int dx, + int dy, + const int bs, + const int besterror = 8 * 8 * 1024 * 1024) const +{ + const Pel* origOrigin = orig.Y().buf; + const int origStride = orig.Y().stride; + const Pel *buffOrigin = buffer.Y().buf; + const int buffStride = buffer.Y().stride; + + int error = 0;// dx * 10 + dy * 10; + if (((dx | dy) & 0xF) == 0) + { + dx /= m_motionVectorFactor; + dy /= m_motionVectorFactor; + for (int y1 = 0; y1 < bs; y1++) + { + const Pel* origRowStart = origOrigin + (y+y1)*origStride + x; + const Pel* bufferRowStart = buffOrigin + (y+y1+dy)*buffStride + (x+dx); + for (int x1 = 0; x1 < bs; x1 += 2) + { + int diff = origRowStart[x1] - bufferRowStart[x1]; + error += diff * diff; + diff = origRowStart[x1 + 1] - bufferRowStart[x1 + 1]; + error += diff * diff; + } + if (error > besterror) + { + return error; + } + } + } + else + { + const int *xFilter = m_interpolationFilter[dx & 0xF]; + const int *yFilter = m_interpolationFilter[dy & 0xF]; + int tempArray[64 + 8][64]; + + int sum, base; + for (int y1 = 1; y1 < bs + 7; y1++) + { + const int yOffset = y + y1 + (dy >> 4) - 3; + const Pel *sourceRow = buffOrigin + (yOffset)*buffStride + 0; + for (int x1 = 0; x1 < bs; x1++) + { + sum = 0; + base = x + x1 + (dx >> 4) - 3; + const Pel *rowStart = sourceRow + base; + + sum += xFilter[1] * rowStart[1]; + sum += xFilter[2] * rowStart[2]; + sum += xFilter[3] * rowStart[3]; + sum += xFilter[4] * rowStart[4]; + sum += xFilter[5] * rowStart[5]; + sum += xFilter[6] * rowStart[6]; + + tempArray[y1][x1] = sum; + } + } + + const Pel maxSampleValue = (1<<m_internalBitDepth[CHANNEL_TYPE_LUMA])-1; + for (int y1 = 0; y1 < bs; y1++) + { + const Pel *origRow = origOrigin + (y+y1)*origStride + 0; + for (int x1 = 0; x1 < bs; x1++) + { + sum = 0; + sum += yFilter[1] * tempArray[y1 + 1][x1]; + sum += yFilter[2] * tempArray[y1 + 2][x1]; + sum += yFilter[3] * tempArray[y1 + 3][x1]; + sum += yFilter[4] * tempArray[y1 + 4][x1]; + sum += yFilter[5] * tempArray[y1 + 5][x1]; + sum += yFilter[6] * tempArray[y1 + 6][x1]; + + sum = (sum + (1 << 11)) >> 12; + sum = sum < 0 ? 0 : (sum > maxSampleValue ? maxSampleValue : sum); + + error += (sum - origRow[x + x1]) * (sum - origRow[x + x1]); + } + if (error > besterror) + { + return error; + } + } + } + return error; +} + +void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const PelStorage &orig, const PelStorage &buffer, const int blockSize, + const Array2D<MotionVector> *previous, const int factor, const bool doubleRes) const +{ + int range = 5; + const int stepSize = blockSize; + + const int origWidth = orig.Y().width; + const int origHeight = orig.Y().height; + + for (int blockY = 0; blockY + blockSize < origHeight; blockY += stepSize) + { + for (int blockX = 0; blockX + blockSize < origWidth; blockX += stepSize) + { + MotionVector best; + + if (previous == NULL) + { + range = 8; + } + else + { + for (int py = -2; py <= 2; py++) + { + int testy = blockY / (2 * blockSize) + py; + for (int px = -2; px <= 2; px++) + { + int testx = blockX / (2 * blockSize) + px; + if ((testx >= 0) && (testx < origWidth / (2 * blockSize)) && (testy >= 0) && (testy < origHeight / (2 * blockSize))) + { + MotionVector old = previous->get(testx, testy); + int error = motionErrorLuma(orig, buffer, blockX, blockY, old.x * factor, old.y * factor, blockSize, best.error); + if (error < best.error) + { + best.set(old.x * factor, old.y * factor, error); + } + } + } + } + } + MotionVector prevBest = best; + for (int y2 = prevBest.y / m_motionVectorFactor - range; y2 <= prevBest.y / m_motionVectorFactor + range; y2++) + { + for (int x2 = prevBest.x / m_motionVectorFactor - range; x2 <= prevBest.x / m_motionVectorFactor + range; x2++) + { + int error = motionErrorLuma(orig, buffer, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, best.error); + if (error < best.error) + { + best.set(x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, error); + } + } + } + if (doubleRes) + { // merge into one loop, probably with precision array (here [12, 3] or maybe [4, 1]) with setable number of iterations + prevBest = best; + int doubleRange = 3 * 4; + for (int y2 = prevBest.y - doubleRange; y2 <= prevBest.y + doubleRange; y2 += 4) + { + for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x + doubleRange; x2 += 4) + { + int error = motionErrorLuma(orig, buffer, blockX, blockY, x2, y2, blockSize, best.error); + if (error < best.error) + { + best.set(x2, y2, error); + } + + } + } + + prevBest = best; + doubleRange = 3; + for (int y2 = prevBest.y - doubleRange; y2 <= prevBest.y + doubleRange; y2++) + { + for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x + doubleRange; x2++) + { + int error = motionErrorLuma(orig, buffer, blockX, blockY, x2, y2, blockSize, best.error); + if (error < best.error) + { + best.set(x2, y2, error); + } + + } + } + + } + mvs.get(blockX / stepSize, blockY / stepSize) = best; + } + } +} + +void EncTemporalFilter::motionEstimation(Array2D<MotionVector> &mv, const PelStorage &orgPic, const PelStorage &buffer, const PelStorage &origSubsampled2, const PelStorage &origSubsampled4) const +{ + const int width = m_sourceWidth; + const int height = m_sourceHeight; + Array2D<MotionVector> mv_0(width / 16, height / 16); + Array2D<MotionVector> mv_1(width / 16, height / 16); + Array2D<MotionVector> mv_2(width / 16, height / 16); + + PelStorage bufferSub2; + PelStorage bufferSub4; + + subsampleLuma(buffer, bufferSub2); + subsampleLuma(bufferSub2, bufferSub4); + + motionEstimationLuma(mv_0, origSubsampled4, bufferSub4, 16); + motionEstimationLuma(mv_1, origSubsampled2, bufferSub2, 16, &mv_0, 2); + motionEstimationLuma(mv_2, orgPic, buffer, 16, &mv_1, 2); + + motionEstimationLuma(mv, orgPic, buffer, 8, &mv_2, 1, true); +} + +void EncTemporalFilter::applyMotion(const Array2D<MotionVector> &mvs, const PelStorage &input, PelStorage &output) const +{ + static const int lumaBlockSize=8; + + for(int c=0; c< getNumberValidComponents(m_chromaFormatIDC); c++) + { + const ComponentID compID=(ComponentID)c; + const int csx=getComponentScaleX(compID, m_chromaFormatIDC); + const int csy=getComponentScaleY(compID, m_chromaFormatIDC); + const int blockSizeX = lumaBlockSize>>csx; + const int blockSizeY = lumaBlockSize>>csy; + const int height = input.bufs[c].height; + const int width = input.bufs[c].width; + + const Pel maxValue = (1<<m_internalBitDepth[toChannelType(compID)])-1; + + const Pel *srcImage = input.bufs[c].buf; + const int srcStride = input.bufs[c].stride; + + Pel *dstImage = output.bufs[c].buf; + int dstStride = output.bufs[c].stride; + + for (int y = 0, blockNumY = 0; y + blockSizeY <= height; y += blockSizeY, blockNumY++) + { + for (int x = 0, blockNumX = 0; x + blockSizeX <= width; x += blockSizeX, blockNumX++) + { + const MotionVector &mv = mvs.get(blockNumX,blockNumY); + const int dx = mv.x >> csx ; + const int dy = mv.y >> csy ; + const int xInt = mv.x >> (4+csx) ; + const int yInt = mv.y >> (4+csy) ; + + const int *xFilter = m_interpolationFilter[dx & 0xf]; + const int *yFilter = m_interpolationFilter[dy & 0xf]; // will add 6 bit. + const int numFilterTaps=7; + const int centreTapOffset=3; + + int tempArray[lumaBlockSize + numFilterTaps][lumaBlockSize]; + + for (int by = 1; by < blockSizeY + numFilterTaps; by++) + { + const int yOffset = y + by + yInt - centreTapOffset; + const Pel *sourceRow = srcImage+yOffset*srcStride; + for (int bx = 0; bx < blockSizeX; bx++) + { + int base = x + bx + xInt - centreTapOffset; + const Pel *rowStart = sourceRow + base; + + int sum = 0; + sum += xFilter[1] * rowStart[1]; + sum += xFilter[2] * rowStart[2]; + sum += xFilter[3] * rowStart[3]; + sum += xFilter[4] * rowStart[4]; + sum += xFilter[5] * rowStart[5]; + sum += xFilter[6] * rowStart[6]; + + tempArray[by][bx] = sum; + } + } + + Pel *dstRow = dstImage+y*dstStride; + for (int by = 0; by < blockSizeY; by++, dstRow+=dstStride) + { + Pel *dstPel=dstRow+x; + for (int bx = 0; bx < blockSizeX; bx++, dstPel++) + { + int sum = 0; + + sum += yFilter[1] * tempArray[by + 1][bx]; + sum += yFilter[2] * tempArray[by + 2][bx]; + sum += yFilter[3] * tempArray[by + 3][bx]; + sum += yFilter[4] * tempArray[by + 4][bx]; + sum += yFilter[5] * tempArray[by + 5][bx]; + sum += yFilter[6] * tempArray[by + 6][bx]; + + sum = (sum + (1 << 11)) >> 12; + sum = sum < 0 ? 0 : (sum > maxValue ? maxValue : sum); + *dstPel = sum; + } + } + } + } + } +} + +void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic, + const std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo, + PelStorage &newOrgPic, + double overallStrength) const +{ + const int numRefs = int(srcFrameInfo.size()); + std::vector<PelStorage> correctedPics(numRefs); + for (int i = 0; i < numRefs; i++) + { + correctedPics[i].create(m_chromaFormatIDC, m_area, 0, m_padding); + applyMotion(srcFrameInfo[i].mvs, srcFrameInfo[i].picBuffer, correctedPics[i]); + } + + int refStrengthRow = 2; + if (numRefs == m_range*2) + { + refStrengthRow = 0; + } + else if (numRefs == m_range) + { + refStrengthRow = 1; + } + + const double lumaSigmaSq = (m_QP - m_sigmaZeroPoint) * (m_QP - m_sigmaZeroPoint) * m_sigmaMultiplier; + const double chromaSigmaSq = 30 * 30; + + for(int c=0; c< getNumberValidComponents(m_chromaFormatIDC); c++) + { + const ComponentID compID=(ComponentID)c; + const int height = orgPic.bufs[c].height; + const int width = orgPic.bufs[c].width; + const Pel *srcPelRow = orgPic.bufs[c].buf; + const int srcStride = orgPic.bufs[c].stride; + Pel *dstPelRow = newOrgPic.bufs[c].buf; + const int dstStride = newOrgPic.bufs[c].stride; + const double sigmaSq = isChroma(compID)? chromaSigmaSq : lumaSigmaSq; + const double weightScaling = overallStrength * (isChroma(compID) ? m_chromaFactor : 0.4); + const Pel maxSampleValue = (1<<m_internalBitDepth[toChannelType(compID)])-1; + const double bitDepthDiffWeighting=1024.0 / (maxSampleValue+1); + + for (int y = 0; y < height; y++, srcPelRow+=srcStride, dstPelRow+=dstStride) + { + const Pel *srcPel=srcPelRow; + Pel *dstPel=dstPelRow; + for (int x = 0; x < width; x++, srcPel++, dstPel++) + { + const int orgVal = (int) *srcPel; + double temporalWeightSum = 1.0; + double newVal = (double) orgVal; + for (int i = 0; i < numRefs; i++) + { + const Pel *pCorrectedPelPtr=correctedPics[i].bufs[c].buf+(y*correctedPics[i].bufs[c].stride+x); + const int refVal = (int) *pCorrectedPelPtr; + double diff = (double)(refVal - orgVal); + diff *= bitDepthDiffWeighting; + double diffSq = diff * diff; + const int index = std::min(1, std::abs(srcFrameInfo[i].origOffset) - 1); + const double weight = weightScaling * m_refStrengths[refStrengthRow][index] * exp(-diffSq / (2 * sigmaSq)); + newVal += weight * refVal; + temporalWeightSum += weight; + } + newVal /= temporalWeightSum; + Pel sampleVal = (Pel)round(newVal); + sampleVal=(sampleVal<0?0 : (sampleVal>maxSampleValue ? maxSampleValue : sampleVal)); + *dstPel = sampleVal; + } + } + } +} + +//! \} + +#endif diff --git a/source/Lib/EncoderLib/EncTemporalFilter.h b/source/Lib/EncoderLib/EncTemporalFilter.h new file mode 100644 index 0000000000000000000000000000000000000000..e9dbe86d711cd1ee8f6e6c4ef14e0942746bbe6b --- /dev/null +++ b/source/Lib/EncoderLib/EncTemporalFilter.h @@ -0,0 +1,167 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2019, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** \file EncTemporalFilter.h +\brief EncTemporalFilter class (header) +*/ + +#ifndef __TEMPORAL_FILTER__ +#define __TEMPORAL_FILTER__ +#include "EncLib.h" +#include "CommonLib/Buffer.h" +#include <sstream> +#include <map> +#include <deque> + +#if JVET_O0549_ENCODER_ONLY_FILTER + +//! \ingroup EncoderLib +//! \{ + +struct MotionVector +{ + int x, y; + int error; + MotionVector() : x(0), y(0), error(INT_LEAST32_MAX) {} + void set(int vectorX, int vectorY, int errorValue) { x = vectorX; y = vectorY; error = errorValue; } +}; + +template <class T> +struct Array2D +{ +private: + int m_width, m_height; + std::vector< T > v; +public: + Array2D() : m_width(0), m_height(0), v() { } + Array2D(int width, int height, const T& value=T()) : m_width(0), m_height(0), v() { allocate(width, height, value); } + + void allocate(int width, int height, const T& value=T()) + { + m_width=width; + m_height=height; + v.resize(std::size_t(m_width*m_height), value); + } + + T& get(int x, int y) + { + assert(x<m_width && y<m_height); + return v[y*m_width+x]; + } + + const T& get(int x, int y) const + { + assert(x<m_width && y<m_height); + return v[y*m_width+x]; + } +}; + +struct TemporalFilterSourcePicInfo +{ + TemporalFilterSourcePicInfo() : picBuffer(), mvs(), origOffset(0) { } + PelStorage picBuffer; + Array2D<MotionVector> mvs; + int origOffset; +}; + +// ==================================================================================================================== +// Class definition +// ==================================================================================================================== + +class EncTemporalFilter +{ +public: + EncTemporalFilter(); + ~EncTemporalFilter() {} + + void init(const int frameSkip, + const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], + const int msbExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], + const int internalBitDepth[MAX_NUM_CHANNEL_TYPE], + const int width, + const int height, + const int *pad, + const bool rec709, + const std::string &filename, + const ChromaFormat inputChroma, + const InputColourSpaceConversion colorSpaceConv, + const int qp, + const std::map<int, double> &temporalFilterStrengths, + const bool gopBasedTemporalFilterFutureReference); + + bool filter(PelStorage *orgPic, int frame); + +private: + // Private static member variables + static const int m_range; + static const double m_chromaFactor; + static const double m_sigmaMultiplier; + static const double m_sigmaZeroPoint; + static const int m_motionVectorFactor; + static const int m_padding; + static const int m_interpolationFilter[16][8]; + static const double m_refStrengths[3][2]; + + // Private member variables + int m_FrameSkip; + std::string m_inputFileName; + int m_inputBitDepth[MAX_NUM_CHANNEL_TYPE]; + int m_MSBExtendedBitDepth[MAX_NUM_CHANNEL_TYPE]; + int m_internalBitDepth[MAX_NUM_CHANNEL_TYPE]; + ChromaFormat m_chromaFormatIDC; + int m_sourceWidth; + int m_sourceHeight; + int m_QP; + std::map<int, double> m_temporalFilterStrengths; + int m_pad[2]; + bool m_clipInputVideoToRec709Range; + InputColourSpaceConversion m_inputColourSpaceConvert; + Area m_area; + bool m_gopBasedTemporalFilterFutureReference; + + // Private functions + void subsampleLuma(const PelStorage &input, PelStorage &output, const int factor = 2) const; + int motionErrorLuma(const PelStorage &orig, const PelStorage &buffer, const int x, const int y, int dx, int dy, const int bs, const int besterror) const; + void motionEstimationLuma(Array2D<MotionVector> &mvs, const PelStorage &orig, const PelStorage &buffer, const int bs, + const Array2D<MotionVector> *previous=0, const int factor = 1, const bool doubleRes = false) const; + void motionEstimation(Array2D<MotionVector> &mvs, const PelStorage &orgPic, const PelStorage &buffer, const PelStorage &origSubsampled2, const PelStorage &origSubsampled4) const; + + void bilateralFilter(const PelStorage &orgPic, const std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo, PelStorage &newOrgPic, double overallStrength) const; + void applyMotion(const Array2D<MotionVector> &mvs, const PelStorage &input, PelStorage &output) const; +}; // END CLASS DEFINITION EncTemporalFilter + + //! \} + +#endif + +#endif // __TEMPORAL_FILTER__ diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index d0346b12ef593a2ba61664681780e4b7407a98aa..280c6a9e6a770402cec0d44644e205af0166b815 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -707,6 +707,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) WRITE_UVLC( chromaEnabled ? (pcSPS->getBitDepth(CHANNEL_TYPE_CHROMA) - 8):0, "bit_depth_chroma_minus8" ); WRITE_UVLC( pcSPS->getMinQpPrimeTsMinus4(CHANNEL_TYPE_LUMA), "min_qp_prime_ts_minus4" ); + + WRITE_FLAG( pcSPS->getUseWP() ? 1 : 0, "sps_weighted_pred_flag" ); // Use of Weighting Prediction (P_SLICE) + WRITE_FLAG( pcSPS->getUseWPBiPred() ? 1 : 0, "sps_weighted_bipred_flag" ); // Use of Weighting Bi-Prediction (B_SLICE) WRITE_UVLC( pcSPS->getBitsForPOC()-4, "log2_max_pic_order_cnt_lsb_minus4" ); WRITE_FLAG( pcSPS->getIDRRefParamListPresent(), "sps_idr_rpl_present_flag" ); @@ -800,13 +803,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) } } - WRITE_FLAG( pcSPS->getUseWP() ? 1 : 0, "sps_weighted_pred_flag" ); // Use of Weighting Prediction (P_SLICE) - WRITE_FLAG( pcSPS->getUseWPBiPred() ? 1 : 0, "sps_weighted_bipred_flag" ); // Use of Weighting Bi-Prediction (B_SLICE) - WRITE_FLAG( pcSPS->getSAOEnabledFlag(), "sps_sao_enabled_flag"); WRITE_FLAG( pcSPS->getALFEnabledFlag(), "sps_alf_enabled_flag" ); - WRITE_FLAG(pcSPS->getTransformSkipEnabledFlag() ? 1 : 0, "sps_transform_skip_enabled_flag"); if (pcSPS->getTransformSkipEnabledFlag()) { @@ -1331,6 +1330,12 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) WRITE_UVLC( pcSlice->getColRefIdx(), "collocated_ref_idx" ); } } + + if( ( pcSlice->getPPS()->getUseWP() && pcSlice->getSliceType() == P_SLICE ) || ( pcSlice->getPPS()->getWPBiPred() && pcSlice->getSliceType() == B_SLICE ) ) + { + xCodePredWeightTable( pcSlice ); + } + if (!cs.slice->isIntra()) { CHECK(pcSlice->getMaxNumMergeCand() > MRG_MAX_NUM_CANDS, "More merge candidates signalled than supported"); @@ -1414,10 +1419,6 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) WRITE_FLAG(pcSlice->getUseChromaQpAdj(), "cu_chroma_qp_offset_enabled_flag"); } - if( ( pcSlice->getPPS()->getUseWP() && pcSlice->getSliceType() == P_SLICE ) || ( pcSlice->getPPS()->getWPBiPred() && pcSlice->getSliceType() == B_SLICE ) ) - { - xCodePredWeightTable( pcSlice ); - } if( pcSlice->getSPS()->getSAOEnabledFlag() ) { WRITE_FLAG( pcSlice->getSaoEnabledFlag( CHANNEL_TYPE_LUMA ), "slice_sao_luma_flag" ); diff --git a/source/Lib/Utilities/program_options_lite.cpp b/source/Lib/Utilities/program_options_lite.cpp index 0c4bba0502cc08c2caa01b4cd61f66554dfe30ab..4a380e04cc1b64761d11aed3852252fc47addca4 100644 --- a/source/Lib/Utilities/program_options_lite.cpp +++ b/source/Lib/Utilities/program_options_lite.cpp @@ -96,8 +96,22 @@ namespace df } else { +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + if (opt_name.size() > 0 && opt_name.back() == '*') + { + string prefix_name = opt_name.substr(0, opt_name.size() - 1); + names->opt_prefix.push_back(prefix_name); + opt_prefix_map[prefix_name].push_back(names); + } + else + { + names->opt_long.push_back(opt_name); + opt_long_map[opt_name].push_back(names); + } +#else names->opt_long.push_back(opt_name); opt_long_map[opt_name].push_back(names); +#endif } opt_start += opt_end + 1; } @@ -150,6 +164,12 @@ namespace df { out << "--" << entry.opt_long.front(); } +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + else if (!entry.opt_prefix.empty()) + { + out << "--" << entry.opt_prefix.front() << "*"; + } +#endif } /* format the help text */ @@ -271,6 +291,9 @@ namespace df bool OptionWriter::storePair(bool allow_long, bool allow_short, const string& name, const string& value) { bool found = false; +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + std::string val = value; +#endif Options::NamesMap::iterator opt_it; if (allow_long) { @@ -290,15 +313,34 @@ namespace df found = true; } } - +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + bool allow_prefix = allow_long; + if (allow_prefix && !found) + { + for (opt_it = opts.opt_prefix_map.begin(); opt_it != opts.opt_prefix_map.end(); opt_it++) + { + std::string name_prefix = name.substr(0, opt_it->first.size()); + if (name_prefix == opt_it->first) + { + // prepend value matching * + val = name.substr(name_prefix.size()) + std::string(" ") + val; + found = true; + break; + } + } + } +#endif if (!found) { error_reporter.error(where()) << "Unknown option `" << name << "' (value:`" << value << "')\n"; return false; } - +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + setOptions((*opt_it).second, val, error_reporter); +#else setOptions((*opt_it).second, value, error_reporter); +#endif return true; } diff --git a/source/Lib/Utilities/program_options_lite.h b/source/Lib/Utilities/program_options_lite.h index 2ce2bd26ed80c6066ec93401034513b2b4b71b4a..dfd082cb73f9ca67f58d04a6ce1a2fd97b51910c 100644 --- a/source/Lib/Utilities/program_options_lite.h +++ b/source/Lib/Utilities/program_options_lite.h @@ -36,6 +36,8 @@ #include <list> #include <map> +#define JVET_O0549_ENCODER_ONLY_FILTER_POL 1 // JVET-O0549: Encoder-only GOP-based temporal filter. Program Options Lite related changes. + #ifndef __PROGRAM_OPTIONS_LITE__ #define __PROGRAM_OPTIONS_LITE__ @@ -196,6 +198,9 @@ namespace df } std::list<std::string> opt_long; std::list<std::string> opt_short; +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + std::list<std::string> opt_prefix; +#endif OptionBase* opt; }; @@ -207,6 +212,9 @@ namespace df typedef std::map<std::string, NamesPtrList> NamesMap; NamesMap opt_long_map; NamesMap opt_short_map; +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + NamesMap opt_prefix_map; +#endif }; /* Class with templated overloaded operator(), for use by Options::addOptions() */