diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg index e61a59988798c265db5a7693d30cf0e3e5c47215..9737a2854824f560bfef606585e012ef738f0307 100644 --- a/cfg/encoder_lowdelay_vtm.cfg +++ b/cfg/encoder_lowdelay_vtm.cfg @@ -127,6 +127,8 @@ LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 2 ALF : 1 +GBi : 1 +GBiFast : 1 # Fast tools PBIntraFast : 1 diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg index b2d2eddee705a14bf256652b90652740fa696edf..63ff58df9358f54e7567d150a6e8993a48ca04ea 100644 --- a/cfg/encoder_randomaccess_vtm.cfg +++ b/cfg/encoder_randomaccess_vtm.cfg @@ -141,6 +141,8 @@ LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 2 ALF : 1 +GBi : 1 +GBiFast : 1 # Fast tools PBIntraFast : 1 diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index 93bf0194c23c886cb794efd1a729e6265099ec41..3c6efc53764d3d8d710daab9abefc93c43b8926d 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -239,6 +239,10 @@ void EncApp::xInitLibCfg() m_cEncLib.setInterEMT ( ( m_EMT >> 1 ) & 1 ); m_cEncLib.setFastInterEMT ( ( m_FastEMT >> 1 ) & ( m_EMT >> 1 ) & 1 ); m_cEncLib.setUseCompositeRef ( m_compositeRefEnabled ); +#if JVET_L0646_GBI + m_cEncLib.setUseGBi ( m_GBi ); + m_cEncLib.setUseGBiFast ( m_GBiFast ); +#endif // ADD_NEW_TOOL : (encoder app) add setting of tool enabling flags and associated parameters here m_cEncLib.setMaxCUWidth ( m_QTBT ? m_uiCTUSize : m_uiMaxCUWidth ); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 52be0b4aebf2d94a4cc514722f788aaa756f302f..0b7da7327be4a55d7219ebc19efa06771ef3f98a 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -844,6 +844,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) "\t2: Enable fast methods only for Inter EMT\n" "\t3: Enable fast methods for both Intra & Inter EMT\n") ("CompositeLTReference", m_compositeRefEnabled, false, "Enable Composite Long Term Reference Frame") +#if JVET_L0646_GBI + ("GBi", m_GBi, false, "Enable Generalized Bi-prediction(GBi)") + ("GBiFast", m_GBiFast, false, "Fast methods for Generalized Bi-prediction(GBi)\n") +#endif // ADD_NEW_TOOL : (encoder app) add parsing parameters here ("LCTUFast", m_useFastLCTU, false, "Fast methods for large CTU") @@ -1916,7 +1920,11 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_useFastLCTU, "Fast large CTU can only be applied when encoding with NEXT profile" ); xConfirmPara( m_EMT, "EMT only allowed with NEXT profile" ); xConfirmPara( m_FastEMT, "EMT only allowed with NEXT profile" ); - xConfirmPara(m_compositeRefEnabled, "Composite Reference Frame is only allowed with NEXT profile"); + xConfirmPara( m_compositeRefEnabled, "Composite Reference Frame is only allowed with NEXT profile" ); +#if JVET_L0646_GBI + xConfirmPara( m_GBi, "GBi is only allowed with NEXT profile" ); + xConfirmPara( m_GBiFast, "GBiFast is only allowed with NEXT profile" ); +#endif // ADD_NEW_TOOL : (parameter check) add a check for next tools here } else @@ -3111,6 +3119,10 @@ void EncAppCfg::xPrintParameter() msg( VERBOSE, "LMChroma:%d ", m_LMChroma ); msg( VERBOSE, "EMT: %1d(intra) %1d(inter) ", m_EMT & 1, ( m_EMT >> 1 ) & 1 ); msg(VERBOSE, "CompositeLTReference:%d ", m_compositeRefEnabled); +#if JVET_L0646_GBI + msg( VERBOSE, "GBi:%d ", m_GBi ); + msg( VERBOSE, "GBiFast:%d ", m_GBiFast ); +#endif } // ADD_NEW_TOOL (add some output indicating the usage of tools) diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index c30670b9872dad3e6d8897be13b525a73cc39825..a9eb00f22b16e1ccb7a754209c6b9110af5a4229 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -221,6 +221,10 @@ protected: int m_FastEMT; ///< XZ: Fast Methods of Enhanced Multiple Transform bool m_compositeRefEnabled; +#if JVET_L0646_GBI + bool m_GBi; + bool m_GBiFast; +#endif // ADD_NEW_TOOL : (encoder app) add tool enabling flags and associated parameters here unsigned m_uiMaxCUWidth; ///< max. CU width in pixel diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index 4ad657d48214846ee476e4d8bcc4479e3fe61cf6..f31a22044ec3f8d59704b0ccf47387f1f513f70d 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -62,6 +62,40 @@ void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T #undef ADD_AVG_CORE_INC } +#if ENABLE_SIMD_OPT_GBI && JVET_L0646_GBI +void removeWeightHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height, int shift, int gbiWeight) +{ + int normalizer = ((1 << 16) + (gbiWeight > 0 ? (gbiWeight >> 1) : -(gbiWeight >> 1))) / gbiWeight; + int weight0 = normalizer << g_GbiLog2WeightBase; + int weight1 = (g_GbiWeightBase - gbiWeight)*normalizer; +#define REM_HF_INC \ + src += srcStride; \ + dst += dstStride; \ + +#define REM_HF_OP( ADDR ) dst[ADDR] = (dst[ADDR]*weight0 - src[ADDR]*weight1 + (1<<15))>>16 + + SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC); + +#undef REM_HF_INC +#undef REM_HF_OP +#undef REM_HF_OP_CLIP +} + +void removeHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height) +{ +#define REM_HF_INC \ + src += srcStride; \ + dst += dstStride; \ + +#define REM_HF_OP( ADDR ) dst[ADDR] = 2 * dst[ADDR] - src[ADDR] + + SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC); + +#undef REM_HF_INC +#undef REM_HF_OP +#undef REM_HF_OP_CLIP +} +#endif template<typename T> void reconstructCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, const ClpRng& clpRng ) @@ -103,6 +137,14 @@ PelBufferOps::PelBufferOps() linTf4 = linTfCore<Pel>; linTf8 = linTfCore<Pel>; + +#if ENABLE_SIMD_OPT_GBI + removeWeightHighFreq8 = removeWeightHighFreq; + removeWeightHighFreq4 = removeWeightHighFreq; + removeHighFreq8 = removeHighFreq; + removeHighFreq4 = removeHighFreq; +#endif + } PelBufferOps g_pelBufOP = PelBufferOps(); @@ -110,6 +152,37 @@ PelBufferOps g_pelBufOP = PelBufferOps(); #endif #endif +#if JVET_L0646_GBI +template<> +void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t gbiIdx) +{ + const int8_t w0 = getGbiWeight(gbiIdx, REF_PIC_LIST_0); + const int8_t w1 = getGbiWeight(gbiIdx, REF_PIC_LIST_1); + const int8_t log2WeightBase = g_GbiLog2WeightBase; + + const Pel* src0 = other1.buf; + const Pel* src2 = other2.buf; + Pel* dest = buf; + + const unsigned src1Stride = other1.stride; + const unsigned src2Stride = other2.stride; + const unsigned destStride = stride; + const int clipbd = clpRng.bd; + const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase; + const int offset = (1 << (shiftNum - 1)) + (IF_INTERNAL_OFFS << log2WeightBase); + +#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng ) +#define ADD_AVG_INC \ + src0 += src1Stride; \ + src2 += src2Stride; \ + dest += destStride; \ + + SIZE_AWARE_PER_EL_OP(ADD_AVG_OP, ADD_AVG_INC); + +#undef ADD_AVG_OP +#undef ADD_AVG_INC +} +#endif template<> void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng) diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index 1763242e3dc0af2fd30165b09045a5028c460863..fdf3b962f774c18efc1c9433d496415ffcc74cf2 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -68,6 +68,12 @@ struct PelBufferOps void ( *reco8 ) ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, const ClpRng& clpRng ); void ( *linTf4 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip ); void ( *linTf8 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip ); +#if ENABLE_SIMD_OPT_GBI + void ( *removeWeightHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight); + void ( *removeWeightHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight); + void ( *removeHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height); + void ( *removeHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height); +#endif }; extern PelBufferOps g_pelBufOP; @@ -102,6 +108,10 @@ struct AreaBuf : public Size void subtract ( const AreaBuf<const T> &other ); void extendSingleBorderPel(); void extendBorderPel ( unsigned margin ); +#if JVET_L0646_GBI + void addWeightedAvg ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng, const int8_t gbiIdx); + void removeWeightHighFreq ( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t iGbiWeight); +#endif void addAvg ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng ); void removeHighFreq ( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng); void updateHistogram ( std::vector<int32_t>& hist ) const; @@ -384,6 +394,59 @@ void AreaBuf<T>::toLast( const ClpRng& clpRng ) template<> void AreaBuf<Pel>::toLast( const ClpRng& clpRng ); +#if JVET_L0646_GBI +template<typename T> +void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t gbiWeight) +{ + const int8_t gbiWeightOther = g_GbiWeightBase - gbiWeight; + const int8_t log2WeightBase = g_GbiLog2WeightBase; + + const Pel* src = other.buf; + const int srcStride = other.stride; + + Pel* dst = buf; + const int dstStride = stride; + +#if ENABLE_SIMD_OPT_GBI + if(!bClip) + { + if(!(width & 7)) + g_pelBufOP.removeWeightHighFreq8(dst, dstStride, src, srcStride, width, height, 16, gbiWeight); + else if(!(width & 3)) + g_pelBufOP.removeWeightHighFreq4(dst, dstStride, src, srcStride, width, height, 16, gbiWeight); + else + CHECK(true, "Not supported"); + } + else + { +#endif + int normalizer = ((1 << 16) + (gbiWeight > 0 ? (gbiWeight >> 1) : -(gbiWeight >> 1))) / gbiWeight; + int weight0 = normalizer << log2WeightBase; + int weight1 = gbiWeightOther * normalizer; +#define REM_HF_INC \ + src += srcStride; \ + dst += dstStride; \ + +#define REM_HF_OP_CLIP( ADDR ) dst[ADDR] = ClipPel<T>( (dst[ADDR]*weight0 - src[ADDR]*weight1 + (1<<15))>>16, clpRng ) +#define REM_HF_OP( ADDR ) dst[ADDR] = (dst[ADDR]*weight0 - src[ADDR]*weight1 + (1<<15))>>16 + + if(bClip) + { + SIZE_AWARE_PER_EL_OP(REM_HF_OP_CLIP, REM_HF_INC); + } + else + { + SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC); + } + +#undef REM_HF_INC +#undef REM_HF_OP +#undef REM_HF_OP_CLIP +#if ENABLE_SIMD_OPT_GBI + } +#endif +} +#endif template<typename T> void AreaBuf<T>::removeHighFreq( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng ) @@ -394,6 +457,20 @@ void AreaBuf<T>::removeHighFreq( const AreaBuf<T>& other, const bool bClip, cons T* dst = buf; const int dstStride = stride; +#if ENABLE_SIMD_OPT_GBI && JVET_L0646_GBI + if (!bClip) + { + if(!(width & 7)) + g_pelBufOP.removeHighFreq8(dst, dstStride, src, srcStride, width, height); + else if (!(width & 3)) + g_pelBufOP.removeHighFreq4(dst, dstStride, src, srcStride, width, height); + else + CHECK(true, "Not supported"); + } + else + { +#endif + #define REM_HF_INC \ src += srcStride; \ dst += dstStride; \ @@ -413,6 +490,10 @@ void AreaBuf<T>::removeHighFreq( const AreaBuf<T>& other, const bool bClip, cons #undef REM_HF_INC #undef REM_HF_OP #undef REM_HF_OP_CLIP + +#if ENABLE_SIMD_OPT_GBI && JVET_L0646_GBI + } +#endif } @@ -576,10 +657,16 @@ struct UnitBuf void reconstruct ( const UnitBuf<const T> &pred, const UnitBuf<const T> &resi, const ClpRngs& clpRngs ); void copyClip ( const UnitBuf<const T> &src, const ClpRngs& clpRngs ); void subtract ( const UnitBuf<const T> &other ); +#if JVET_L0646_GBI + void addWeightedAvg ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t gbiIdx = GBI_DEFAULT, const bool chromaOnly = false, const bool lumaOnly = false); +#endif void addAvg ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly = false, const bool lumaOnly = false); void extendSingleBorderPel(); void extendBorderPel ( unsigned margin ); void removeHighFreq ( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs +#if JVET_L0646_GBI + , const int8_t gbiWeight = g_GbiWeights[GBI_DEFAULT] +#endif ); UnitBuf< T> subBuf (const UnitArea& subArea); @@ -649,6 +736,21 @@ void UnitBuf<T>::reconstruct(const UnitBuf<const T> &pred, const UnitBuf<const T } } +#if JVET_L0646_GBI +template<typename T> +void UnitBuf<T>::addWeightedAvg(const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t gbiIdx /* = GBI_DEFAULT */, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */) +{ + const size_t istart = chromaOnly ? 1 : 0; + const size_t iend = lumaOnly ? 1 : bufs.size(); + + CHECK(lumaOnly && chromaOnly, "should not happen"); + + for(size_t i = istart; i < iend; i++) + { + bufs[i].addWeightedAvg(other1.bufs[i], other2.bufs[i], clpRngs.comp[i], gbiIdx); + } +} +#endif template<typename T> void UnitBuf<T>::addAvg(const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */) @@ -684,12 +786,25 @@ void UnitBuf<T>::extendBorderPel( unsigned margin ) template<typename T> void UnitBuf<T>::removeHighFreq( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs +#if JVET_L0646_GBI + , const int8_t gbiWeight +#endif ) { - for( unsigned i = 0; i < bufs.size(); i++ ) +#if JVET_L0646_GBI + if(gbiWeight != g_GbiWeights[GBI_DEFAULT]) { - bufs[i].removeHighFreq(other.bufs[i], bClip, clpRngs.comp[i] ); + bufs[0].removeWeightHighFreq(other.bufs[0], bClip, clpRngs.comp[0], gbiWeight); + return; } + bufs[0].removeHighFreq(other.bufs[0], bClip, clpRngs.comp[0]); +#else + for (unsigned i = 0; i <bufs.size(); i++) + { + bufs[i].removeHighFreq(other.bufs[i], bClip, clpRngs.comp[i]); + } +#endif + } template<typename T> diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h index 57a3263fee2d321728311da4a35a347a03484ffc..be99dbbe7dc88237915100a14a634d2dcc445f33 100644 --- a/source/Lib/CommonLib/CodingStatistics.h +++ b/source/Lib/CommonLib/CodingStatistics.h @@ -100,6 +100,9 @@ enum CodingStatisticsType STATS__CABAC_BITS__PAR_FLAG, STATS__CABAC_BITS__ALF, STATS__CABAC_BITS__IMV_FLAG, +#if JVET_L0646_GBI + STATS__CABAC_BITS__GBI_IDX, +#endif STATS__CABAC_BITS__EMT_CU_FLAG, STATS__CABAC_BITS__EMT_TU_INDEX, STATS__TOOL_EMT, @@ -170,6 +173,9 @@ static inline const char* getName(CodingStatisticsType name) "CABAC_BITS__ALIGNED_SIGN_BIT", "CABAC_BITS__ALIGNED_ESCAPE_BITS", "CABAC_BITS__IMV_FLAG", +#if JVET_L0646_GBI + "CABAC_BITS__GBI_IDX", +#endif "CABAC_BITS__EMT_CU_FLAG", "CABAC_BITS__EMT_TU_INDX", "CABAC_BITS__OTHER", diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 55c0bd395a0544eafd1901fd1c9ff71e1f4311a4..14a5671cebd5d82bea22b83fc756ff5de6153f75 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -291,6 +291,11 @@ static const int AFFINE_MAX_NUM_V2 = 2; ///< max static const int AFFINE_MAX_NUM_COMB = 12; ///< max number of combined motion candidates static const int AFFINE_MIN_BLOCK_SIZE = 4; ///< Minimum affine MC block size +#if JVET_L0646_GBI +static const int GBI_NUM = 5; ///< the number of weight options +static const int GBI_DEFAULT = ((uint8_t)(GBI_NUM >> 1)); ///< Default weighting index representing for w=0.5 +static const int GBI_SIZE_CONSTRAINT = 256; ///< disabling GBi if cu size is smaller than 256 +#endif #if W0038_DB_OPT static const int MAX_ENCODER_DEBLOCKING_QUALITY_LAYERS = 8 ; diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp index cfd5e8b1adf6e7ff1ec4aa399ac229f548fa6bed..4f2296d89d8ddfc86a40b055842c345f539a6bce 100644 --- a/source/Lib/CommonLib/ContextModelling.cpp +++ b/source/Lib/CommonLib/ContextModelling.cpp @@ -347,5 +347,8 @@ void MergeCtx::setMergeInfo( PredictionUnit& pu, int candIdx ) pu.mvpNum [REF_PIC_LIST_0] = NOT_VALID; pu.mvpNum [REF_PIC_LIST_1] = NOT_VALID; - +#if JVET_L0646_GBI + pu.cu->GBiIdx = ( interDirNeighbours[candIdx] == 3 ) ? GBiIdx[candIdx] : GBI_DEFAULT; +#endif + } diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h index 681631922ed8df536bce7a8d1c95aaa84935ebb5..1a353fd6579319f32648e630c59674f13da1527e 100644 --- a/source/Lib/CommonLib/ContextModelling.h +++ b/source/Lib/CommonLib/ContextModelling.h @@ -265,6 +265,9 @@ public: ~MergeCtx() {} public: MvField mvFieldNeighbours [ MRG_MAX_NUM_CANDS << 1 ]; // double length for mv of both lists +#if JVET_L0646_GBI + uint8_t GBiIdx [ MRG_MAX_NUM_CANDS ]; +#endif unsigned char interDirNeighbours[ MRG_MAX_NUM_CANDS ]; MergeType mrgTypeNeighbours [ MRG_MAX_NUM_CANDS ]; int numValidMergeCand; diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index c735ab2d932ef6917a7797c16786d6b45400e7c4..334ab7a792e461d545d1343bff92d887f2ee96b8 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -367,6 +367,15 @@ const CtxSet ContextSetCfg::AffineType = ContextSetCfg::addCtxSet { CNU, }, }); +#if JVET_L0646_GBI +const CtxSet ContextSetCfg::GBiIdx = ContextSetCfg::addCtxSet +({ + // 4 ctx for 1st bin; 1 ctx for each of rest bins + { 95, 79, 63, 31, 31, 31, 31, }, + { 95, 79, 63, 31, 31, 31, 31, }, + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + }); +#endif const CtxSet ContextSetCfg::Mvd = ContextSetCfg::addCtxSet ({ diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index 8f18ae58e7bf974738cddbe5edfb4ebe96373c92..3e4679c06bd64c00f0f7611e8ca52e739902a135 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -191,6 +191,9 @@ public: static const CtxSet ChromaQpAdjFlag; static const CtxSet ChromaQpAdjIdc; static const CtxSet ImvFlag; +#if JVET_L0646_GBI + static const CtxSet GBiIdx; +#endif static const CtxSet ctbAlfFlag; static const unsigned NumberOfContexts; diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 8030555ab0e6e24a5f1525fcb53b65195d828a42..c6ea5e914dc34da5d0886b1afb10108142b98bf4 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -620,6 +620,13 @@ void InterPrediction::xWeightedAverage( const PredictionUnit& pu, const CPelUnit if( iRefIdx0 >= 0 && iRefIdx1 >= 0 ) { +#if JVET_L0646_GBI + if( pu.cu->GBiIdx != GBI_DEFAULT ) + { + pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx); + return; + } +#endif pcYuvDst.addAvg( pcYuvSrc0, pcYuvSrc1, clpRngs ); } else if( iRefIdx0 >= 0 && iRefIdx1 < 0 ) diff --git a/source/Lib/CommonLib/MotionInfo.h b/source/Lib/CommonLib/MotionInfo.h index d41a9371209cdb9661377de1858e540b6f8ef7c0..26fcb5a712797ddf09b3b2b262617542e8c8269d 100644 --- a/source/Lib/CommonLib/MotionInfo.h +++ b/source/Lib/CommonLib/MotionInfo.h @@ -140,5 +140,73 @@ struct MotionInfo } }; +#if JVET_L0646_GBI +class GBiMotionParam +{ + bool m_readOnly[2][33]; // 2 RefLists, 33 RefFrams + Mv m_mv[2][33]; + Distortion m_dist[2][33]; + + bool m_readOnlyAffine[2][2][33]; + Mv m_mvAffine[2][2][33][3]; + Distortion m_distAffine[2][2][33]; + +public: + + void reset() + { + Mv* pMv = &(m_mv[0][0]); + for (int ui = 0; ui < 1 * 2 * 33; ++ui, ++pMv) + { + pMv->set(std::numeric_limits<int16_t>::max(), std::numeric_limits<int16_t>::max()); + } + + Mv* pAffineMv = &(m_mvAffine[0][0][0][0]); + for (int ui = 0; ui < 2 * 2 * 33 * 3; ++ui, ++pMv) + { + pAffineMv->set(0, 0); + } + + memset(m_readOnly, false, 2 * 33 * sizeof(bool)); + memset(m_dist, -1, 2 * 33 * sizeof(Distortion)); + memset(m_readOnlyAffine, false, 2 * 2 * 33 * sizeof(bool)); + memset(m_distAffine, -1, 2 * 2 * 33 * sizeof(Distortion)); + } + + void setReadMode(bool b, uint32_t uiRefList, uint32_t uiRefIdx) { m_readOnly[uiRefList][uiRefIdx] = b; } + bool isReadMode(uint32_t uiRefList, uint32_t uiRefIdx) { return m_readOnly[uiRefList][uiRefIdx]; } + + void setReadModeAffine(bool b, uint32_t uiRefList, uint32_t uiRefIdx, int bP4) { m_readOnlyAffine[bP4][uiRefList][uiRefIdx] = b; } + bool isReadModeAffine(uint32_t uiRefList, uint32_t uiRefIdx, int bP4) { return m_readOnlyAffine[bP4][uiRefList][uiRefIdx]; } + + Mv& getMv(uint32_t uiRefList, uint32_t uiRefIdx) { return m_mv[uiRefList][uiRefIdx]; } + + void copyFrom(Mv& rcMv, Distortion uiDist, uint32_t uiRefList, uint32_t uiRefIdx) + { + m_mv[uiRefList][uiRefIdx] = rcMv; + m_dist[uiRefList][uiRefIdx] = uiDist; + } + + void copyTo(Mv& rcMv, Distortion& ruiDist, uint32_t uiRefList, uint32_t uiRefIdx) + { + rcMv = m_mv[uiRefList][uiRefIdx]; + ruiDist = m_dist[uiRefList][uiRefIdx]; + } + + Mv& getAffineMv(uint32_t uiRefList, uint32_t uiRefIdx, uint32_t uiAffineMvIdx, int bP4) { return m_mvAffine[bP4][uiRefList][uiRefIdx][uiAffineMvIdx]; } + + void copyAffineMvFrom(Mv(&racAffineMvs)[3], Distortion uiDist, uint32_t uiRefList, uint32_t uiRefIdx, int bP4) + { + memcpy(m_mvAffine[bP4][uiRefList][uiRefIdx], racAffineMvs, 3 * sizeof(Mv)); + m_distAffine[bP4][uiRefList][uiRefIdx] = uiDist; + } + + void copyAffineMvTo(Mv acAffineMvs[3], Distortion& ruiDist, uint32_t uiRefList, uint32_t uiRefIdx, int bP4) + { + memcpy(acAffineMvs, m_mvAffine[bP4][uiRefList][uiRefIdx], 3 * sizeof(Mv)); + ruiDist = m_distAffine[bP4][uiRefList][uiRefIdx]; + } +}; +#endif #endif // __MOTIONINFO__ diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index e2f8ccaca4af12c2862205ae42f2be8c6b255c1f..55061897d875aa7670c99123b2b5bfab943448b3 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -183,7 +183,71 @@ public: const int g_aiNonLMPosThrs[] = { 3, 1, 0 }; +#if JVET_L0646_GBI +const int8_t g_GbiLog2WeightBase = 3; +const int8_t g_GbiWeightBase = (1 << g_GbiLog2WeightBase); +const int8_t g_GbiWeights[GBI_NUM] = { -2, 3, 4, 5, 10 }; +const int8_t g_GbiSearchOrder[GBI_NUM] = { GBI_DEFAULT, GBI_DEFAULT - 2, GBI_DEFAULT + 2, GBI_DEFAULT - 1, GBI_DEFAULT + 1 }; +int8_t g_GbiCodingOrder[GBI_NUM]; +int8_t g_GbiParsingOrder[GBI_NUM]; + +int8_t getGbiWeight(uint8_t gbiIdx, uint8_t uhRefFrmList) +{ + // Weghts for the model: P0 + w * (P1 - P0) = (1-w) * P0 + w * P1 + // Retuning 1-w for P0 or w for P1 + return (uhRefFrmList == REF_PIC_LIST_0 ? g_GbiWeightBase - g_GbiWeights[gbiIdx] : g_GbiWeights[gbiIdx]); +} + +void resetGbiCodingOrder(bool bRunDecoding, const CodingStructure &cs) +{ + // Form parsing order: { GBI_DEFAULT, GBI_DEFAULT+1, GBI_DEFAULT-1, GBI_DEFAULT+2, GBI_DEFAULT-2, ... } + g_GbiParsingOrder[0] = GBI_DEFAULT; + for (int i = 1; i <= (GBI_NUM >> 1); ++i) + { + g_GbiParsingOrder[2 * i - 1] = GBI_DEFAULT + (int8_t)i; + g_GbiParsingOrder[2 * i] = GBI_DEFAULT - (int8_t)i; + } + + // Form encoding order + if (!bRunDecoding) + { + for (int i = 0; i < GBI_NUM; ++i) + { + g_GbiCodingOrder[(uint32_t)g_GbiParsingOrder[i]] = i; + } + } +} + +uint32_t deriveWeightIdxBits(uint8_t gbiIdx) // Note: align this with TEncSbac::codeGbiIdx and TDecSbac::parseGbiIdx +{ + uint32_t numBits = 1; + uint8_t gbiCodingIdx = (uint8_t)g_GbiCodingOrder[gbiIdx]; + + if (GBI_NUM > 2 && gbiCodingIdx != 0) + { + uint32_t prefixNumBits = GBI_NUM - 2; + uint32_t step = 1; + uint8_t prefixSymbol = gbiCodingIdx; + // Truncated unary code + uint8_t idx = 1; + for (int ui = 0; ui < prefixNumBits; ++ui) + { + if (prefixSymbol == idx) + { + ++numBits; + break; + } + else + { + ++numBits; + idx += step; + } + } + } + return numBits; +} +#endif // initialize ROM variables void initROM() diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h index e8c0c5acbd49b56815db5051c3782eaf898e49ea..e60c5127eab38cf836243de3a4e66220c365c514 100644 --- a/source/Lib/CommonLib/Rom.h +++ b/source/Lib/CommonLib/Rom.h @@ -212,6 +212,20 @@ extern MsgLevel g_verbosity; extern const int g_aiNonLMPosThrs[]; +#if JVET_L0646_GBI +extern const int8_t g_GbiLog2WeightBase; +extern const int8_t g_GbiWeightBase; +extern const int8_t g_GbiWeights[GBI_NUM]; +extern const int8_t g_GbiSearchOrder[GBI_NUM]; +extern int8_t g_GbiCodingOrder[GBI_NUM]; +extern int8_t g_GbiParsingOrder[GBI_NUM]; + +class CodingStructure; +int8_t getGbiWeight(uint8_t gbiIdx, uint8_t uhRefFrmList); +void resetGbiCodingOrder(bool bRunDecoding, const CodingStructure &cs); +uint32_t deriveWeightIdxBits(uint8_t gbiIdx); +#endif + constexpr uint8_t g_tbMax[257] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 57acda967f0835978a34a19b2612f9a557d29927..d6ef610c7304e8361981d2524201642290e29fd9 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -810,6 +810,9 @@ private: bool m_InterEMT; // 19 bool m_Affine; bool m_AffineType; +#if JVET_L0646_GBI + bool m_GBi; // +#endif bool m_MTTEnabled; // #if ENABLE_WPP_PARALLELISM bool m_NextDQP; @@ -876,6 +879,10 @@ public: bool getUseIntraEMT () const { return m_IntraEMT; } void setUseInterEMT ( bool b ) { m_InterEMT = b; } bool getUseInterEMT () const { return m_InterEMT; } +#if JVET_L0646_GBI + void setUseGBi ( bool b ) { m_GBi = b; } + bool getUseGBi () const { return m_GBi; } +#endif //===== additional parameters ===== // qtbt void setCTUSize ( unsigned ctuSize ) { m_CTUSize = ctuSize; } diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 3ef736ae4442374b74280f6aa6d676a46b62d62c..d4a439ae7b4fdb1ef9bc0fd70367f5be1ce99a27 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,6 +50,8 @@ #include <assert.h> #include <cassert> +#define JVET_L0646_GBI 1 // Generalized bi-prediction (GBi) + #define REUSE_CU_RESULTS 1 #define REMOVE_MV_ADAPT_PREC 1 // remove the high precision flag in the MV class @@ -213,6 +215,10 @@ #define ENABLE_SIMD_OPT_DIST ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the distortion calculations(SAD,SSE,HADAMARD), no impact on RD performance #define ENABLE_SIMD_OPT_AFFINE_ME ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for affine ME, no impact on RD performance #define ENABLE_SIMD_OPT_ALF ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for ALF +#if ENABLE_SIMD_OPT_BUFFER +#define ENABLE_SIMD_OPT_GBI 1 ///< SIMD optimization for GBi +#endif + // End of SIMD optimizations diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 7483af675d2db2af484d9ed6579645714d50ee37..6152c93f57b58f37865eb29ad338f314b8b6babd 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -266,6 +266,9 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) #endif imv = other.imv; imvNumCand = other.imvNumCand; +#if JVET_L0646_GBI + GBiIdx = other.GBiIdx; +#endif return *this; } @@ -292,6 +295,9 @@ void CodingUnit::initData() #endif imv = 0; imvNumCand = 0; +#if JVET_L0646_GBI + GBiIdx = GBI_DEFAULT; +#endif } diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index 9085d6e2285e5c72da206142e043edce2c2a6fab..93924a20065a649ddfe4e80adaf1b4553766616c 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -305,6 +305,10 @@ struct CodingUnit : public UnitArea uint32_t tileIdx; #endif uint8_t emtFlag; +#if JVET_L0646_GBI + uint8_t GBiIdx; + int refIdxBi[2]; +#endif // needed for fast imv mode decisions int8_t imvNumCand; diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index feb106da69a7e27d1bf3a27f4015b3240c911d77..6d0c6f999c0565746035886fd95d87b84e3aca24 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -507,6 +507,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co for (uint32_t ui = 0; ui < maxNumMergeCand; ++ui) { isCandInter[ui] = false; +#if JVET_L0646_GBI + mrgCtx.GBiIdx[ui] = GBI_DEFAULT; +#endif mrgCtx.interDirNeighbours[ui] = 0; mrgCtx.mrgTypeNeighbours [ui] = MRG_TYPE_DEFAULT_N; mrgCtx.mvFieldNeighbours[(ui << 1) ].refIdx = NOT_VALID; @@ -536,6 +539,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miLeft.interDir; +#if JVET_L0646_GBI + mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeft->cu->GBiIdx : GBI_DEFAULT; +#endif // get Mv from Left mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]); @@ -575,6 +581,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miAbove.interDir; // get Mv from Above +#if JVET_L0646_GBI + mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAbove->cu->GBiIdx : GBI_DEFAULT; +#endif mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAbove.mv[0], miAbove.refIdx[0] ); if( slice.isInterB() ) @@ -617,6 +626,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miAboveRight.interDir; // get Mv from Above-right +#if JVET_L0646_GBI + mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveRight->cu->GBiIdx : GBI_DEFAULT; +#endif mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveRight.mv[0], miAboveRight.refIdx[0] ); if( slice.isInterB() ) @@ -657,6 +669,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miBelowLeft.interDir; +#if JVET_L0646_GBI + mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeftBottom->cu->GBiIdx : GBI_DEFAULT; +#endif // get Mv from Bottom-Left mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miBelowLeft.mv[0], miBelowLeft.refIdx[0] ); @@ -740,6 +755,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miAboveLeft.interDir; +#if JVET_L0646_GBI + mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveLeft->cu->GBiIdx : GBI_DEFAULT; +#endif // get Mv from Above-Left mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveLeft.mv[0], miAboveLeft.refIdx[0] ); @@ -867,6 +885,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co if( addTMvp ) { mrgCtx.interDirNeighbours[uiArrayAddr] = dir; +#if JVET_L0646_GBI + mrgCtx.GBiIdx [uiArrayAddr] = GBI_DEFAULT; +#endif isCandInter [uiArrayAddr] = true; if( mrgCandIdx == cnt && canFastExit ) @@ -903,6 +924,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co { isCandInter[uiArrayAddr] = true; mrgCtx.interDirNeighbours[uiArrayAddr] = 3; +#if JVET_L0646_GBI + mrgCtx.GBiIdx[uiArrayAddr] = ((mrgCtx.interDirNeighbours[uiArrayAddr] == 3)) ? CU::deriveGbiIdx(mrgCtx.GBiIdx[i], mrgCtx.GBiIdx[j]) : GBI_DEFAULT; +#endif // get Mv from cand[i] and cand[j] mrgCtx.mvFieldNeighbours[ uiArrayAddr << 1 ].setMvField(mrgCtx.mvFieldNeighbours[ i << 1 ].mv, mrgCtx.mvFieldNeighbours[ i << 1 ].refIdx); @@ -937,6 +961,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co { isCandInter [uiArrayAddr ] = true; mrgCtx.interDirNeighbours [uiArrayAddr ] = 1; +#if JVET_L0646_GBI + mrgCtx.GBiIdx [uiArrayAddr ] = GBI_DEFAULT; +#endif mrgCtx.mvFieldNeighbours [uiArrayAddr << 1].setMvField(Mv(0, 0), r); if (slice.isInterB()) @@ -1867,7 +1894,11 @@ bool PU::isAffineMrgFlagCoded( const PredictionUnit &pu ) } return getFirstAvailableAffineNeighbour( pu ) != nullptr; } +#if JVET_L0646_GBI +void PU::getAffineMergeCand( const PredictionUnit &pu, MvField(*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, unsigned char &gbiIdx, int &numValidMergeCand ) +#else void PU::getAffineMergeCand( const PredictionUnit &pu, MvField (*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, int &numValidMergeCand ) +#endif { for ( int mvNum = 0; mvNum < 3; mvNum++ ) { @@ -1879,6 +1910,9 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, MvField (*mvFieldNeighbou if( puFirstNeighbour == nullptr ) { numValidMergeCand = -1; +#if JVET_L0646_GBI + gbiIdx = GBI_DEFAULT; +#endif return; } else @@ -1913,6 +1947,9 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, MvField (*mvFieldNeighbou } } } +#if JVET_L0646_GBI + gbiIdx = puFirstNeighbour->cu->GBiIdx; +#endif } void PU::setAllAffineMvField( PredictionUnit &pu, MvField *mvField, RefPicList eRefList ) @@ -2211,6 +2248,9 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b mrgCtx.mvFieldNeighbours[(count << 1) + currRefListId].setMvField(cColMv, 0); mrgCtx.interDirNeighbours[count] |= (1 << currRefListId); LICFlag = tempLICFlag; +#if JVET_L0646_GBI + mrgCtx.GBiIdx[count] = GBI_DEFAULT; +#endif found = true; } else @@ -2456,6 +2496,9 @@ void PU::restrictBiPredMergeCands( const PredictionUnit &pu, MergeCtx& mergeCtx { mergeCtx.interDirNeighbours[ mergeCand ] = 1; mergeCtx.mvFieldNeighbours[( mergeCand << 1 ) + 1].setMvField( Mv( 0, 0 ), -1 ); +#if JVET_L0646_GBI + mergeCtx.GBiIdx[mergeCand] = GBI_DEFAULT; +#endif } } } @@ -2572,9 +2615,108 @@ int CU::getMaxNeighboriMVCandNum( const CodingStructure& cs, const Position& pos return maxImvNumCand; } +#if JVET_L0646_GBI +bool CU::isGBiIdxCoded( const CodingUnit &cu ) +{ + if( cu.cs->sps->getSpsNext().getUseGBi() == false ) + { + CHECK(cu.GBiIdx != GBI_DEFAULT, "Error: cu.GBiIdx != GBI_DEFAULT"); + return false; + } + + if( cu.predMode == MODE_INTRA || cu.cs->slice->isInterP() ) + { + return false; + } + if( cu.lwidth() * cu.lheight() < GBI_SIZE_CONSTRAINT ) + { + return false; + } + if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag ) + { + return true; + } + return false; +} + +uint8_t CU::getValidGbiIdx( const CodingUnit &cu ) +{ + if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag ) + { + return cu.GBiIdx; + } + else if( cu.firstPU->interDir == 3 && cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_DEFAULT_N ) + { + // This is intended to do nothing here. + } + else if( cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_SUBPU_ATMVP ) + { + CHECK(cu.GBiIdx != GBI_DEFAULT, " cu.GBiIdx != GBI_DEFAULT "); + } + else + { + CHECK(cu.GBiIdx != GBI_DEFAULT, " cu.GBiIdx != GBI_DEFAULT "); + } + + return GBI_DEFAULT; +} + +void CU::setGbiIdx( CodingUnit &cu, uint8_t uh ) +{ + int8_t uhCnt = 0; + + if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag ) + { + cu.GBiIdx = uh; + ++uhCnt; + } + else if( cu.firstPU->interDir == 3 && cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_DEFAULT_N ) + { + // This is intended to do nothing here. + } + else if( cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_SUBPU_ATMVP ) + { + cu.GBiIdx = GBI_DEFAULT; + } + else + { + cu.GBiIdx = GBI_DEFAULT; + } + + CHECK(uhCnt <= 0, " uhCnt <= 0 "); +} + +uint8_t CU::deriveGbiIdx( uint8_t gbiLO, uint8_t gbiL1 ) +{ + if( gbiLO == gbiL1 ) + { + return gbiLO; + } + const int8_t w0 = getGbiWeight(gbiLO, REF_PIC_LIST_0); + const int8_t w1 = getGbiWeight(gbiL1, REF_PIC_LIST_1); + const int8_t th = g_GbiWeightBase >> 1; + const int8_t off = 1; + + if( w0 == w1 || (w0 < (th - off) && w1 < (th - off)) || (w0 >(th + off) && w1 >(th + off)) ) + { + return GBI_DEFAULT; + } + else + { + if( w0 > w1 ) + { + return ( w0 >= th ? gbiLO : gbiL1 ); + } + else + { + return ( w1 >= th ? gbiL1 : gbiLO ); + } + } +} +#endif // TU tools diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index 909810f89df86612343aa66f3b77dd680251744f..3247170c6f357a977fdc39d16be213adee1a28ee 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -84,6 +84,12 @@ namespace CU bool hasNonTsCodedBlock (const CodingUnit& cu); uint32_t getNumNonZeroCoeffNonTs (const CodingUnit& cu); +#if JVET_L0646_GBI + bool isGBiIdxCoded (const CodingUnit& cu); + uint8_t getValidGbiIdx (const CodingUnit& cu); + void setGbiIdx (CodingUnit& cu, uint8_t uh); + uint8_t deriveGbiIdx (uint8_t gbiLO, uint8_t gbiL1); +#endif PUTraverser traversePUs ( CodingUnit& cu); TUTraverser traverseTUs ( CodingUnit& cu); @@ -115,7 +121,11 @@ namespace PU bool isBipredRestriction (const PredictionUnit &pu); void spanMotionInfo ( PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx() ); void applyImv ( PredictionUnit &pu, MergeCtx &mrgCtx, InterPrediction *interPred = NULL ); +#if JVET_L0646_GBI + void getAffineMergeCand (const PredictionUnit &pu, MvField(*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, unsigned char &gbiIdx, int &numValidMergeCand); +#else void getAffineMergeCand (const PredictionUnit &pu, MvField (*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, int &numValidMergeCand ); +#endif bool isAffineMrgFlagCoded (const PredictionUnit &pu ); void getAffineMergeCand (const PredictionUnit &pu, MvField (*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, int &numValidMergeCand ); void setAllAffineMvField ( PredictionUnit &pu, MvField *mvField, RefPicList eRefList ); diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index d9693eedff4621d12ad655e76dbf118d991b4a52..ba4024fe55ad2b0412830bf5b57a1d303c96b12d 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -211,6 +211,171 @@ void reco_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int src } } +#if ENABLE_SIMD_OPT_GBI +template< X86_VEXT vext, int W > +void removeWeightHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1, int src1Stride, int width, int height, int shift, int gbiWeight) +{ + int normalizer = ((1 << 16) + (gbiWeight>0 ? (gbiWeight >> 1) : -(gbiWeight >> 1))) / gbiWeight; + int weight0 = normalizer << g_GbiLog2WeightBase; + int weight1 = (g_GbiWeightBase - gbiWeight)*normalizer; + int offset = 1 << (shift - 1); + if (W == 8) + { +#if 0//USE_AVX2 + if (vext >= AVX2) + { + __m256i vzero = _mm256_setzero_si256(); + __m256i voffset = _mm256_set1_epi32(offset); + __m256i vw0 = _mm256_set1_epi32(weight0); + __m256i vw1 = _mm256_set1_epi32(weight1); + + for (int row = 0; row < height; row++) + { + for (int col = 0; col < width; col += 8) + { + __m256i vsrc0, vsrc1; + __m128i a = _mm_load_si128((const __m128i *)&src0[col]); + __m128i b = _mm_load_si128((const __m128i *)&src1[col]); + + vsrc0 = _mm256_cvtepi16_epi32(a); + vsrc1 = _mm256_cvtepi16_epi32(b); + vsrc0 = _mm256_mullo_epi32(vsrc0, vw0); + vsrc1 = _mm256_mullo_epi32(vsrc1, vw1); + vsrc0 = _mm256_add_epi32(_mm256_sub_epi32(vsrc0, vsrc1), voffset); + vsrc0 = _mm256_srai_epi32(vsrc0, shift); + + vsrc0 = _mm256_packs_epi32(vsrc0, vzero); + + _mm_store_si128((__m128i *)&src0[col], _mm256_castsi256_si128(vsrc0)); + } + + src0 += src0Stride; + src1 += src1Stride; + } + } + else +#endif + { + __m128i vzero = _mm_setzero_si128(); + __m128i voffset = _mm_set1_epi32(offset); + __m128i vw0 = _mm_set1_epi32(weight0); + __m128i vw1 = _mm_set1_epi32(weight1); + + for (int row = 0; row < height; row++) + { + for (int col = 0; col < width; col += 8) + { + __m128i vsrc0 = _mm_load_si128((const __m128i *)&src0[col]); + __m128i vsrc1 = _mm_load_si128((const __m128i *)&src1[col]); + + __m128i vtmp, vdst, vsrc; + vdst = _mm_cvtepi16_epi32(vsrc0); + vsrc = _mm_cvtepi16_epi32(vsrc1); + vdst = _mm_mullo_epi32(vdst, vw0); + vsrc = _mm_mullo_epi32(vsrc, vw1); + vtmp = _mm_add_epi32(_mm_sub_epi32(vdst, vsrc), voffset); + vtmp = _mm_srai_epi32(vtmp, shift); + + vsrc0 = _mm_unpackhi_epi64(vsrc0, vzero); + vsrc1 = _mm_unpackhi_epi64(vsrc1, vzero); + vdst = _mm_cvtepi16_epi32(vsrc0); + vsrc = _mm_cvtepi16_epi32(vsrc1); + vdst = _mm_mullo_epi32(vdst, vw0); + vsrc = _mm_mullo_epi32(vsrc, vw1); + vdst = _mm_add_epi32(_mm_sub_epi32(vdst, vsrc), voffset); + vdst = _mm_srai_epi32(vdst, shift); + vdst = _mm_packs_epi32(vtmp, vdst); + + _mm_store_si128((__m128i *)&src0[col], vdst); + } + + src0 += src0Stride; + src1 += src1Stride; + } + } + } + else if (W == 4) + { + __m128i vzero = _mm_setzero_si128(); + __m128i voffset = _mm_set1_epi32(offset); + __m128i vw0 = _mm_set1_epi32(weight0); + __m128i vw1 = _mm_set1_epi32(weight1); + + for (int row = 0; row < height; row++) + { + __m128i vsum = _mm_loadl_epi64((const __m128i *)src0); + __m128i vdst = _mm_loadl_epi64((const __m128i *)src1); + + vsum = _mm_cvtepi16_epi32(vsum); + vdst = _mm_cvtepi16_epi32(vdst); + vsum = _mm_mullo_epi32(vsum, vw0); + vdst = _mm_mullo_epi32(vdst, vw1); + vsum = _mm_add_epi32(_mm_sub_epi32(vsum, vdst), voffset); + vsum = _mm_srai_epi32(vsum, shift); + vsum = _mm_packs_epi32(vsum, vzero); + + _mm_storel_epi64((__m128i *)src0, vsum); + + src0 += src0Stride; + src1 += src1Stride; + } + } + else + { + THROW("Unsupported size"); + } +} + +template< X86_VEXT vext, int W > +void removeHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1, int src1Stride, int width, int height) +{ + if (W == 8) + { + // TODO: AVX2 impl + { + for (int row = 0; row < height; row++) + { + for (int col = 0; col < width; col += 8) + { + __m128i vsrc0 = _mm_load_si128((const __m128i *)&src0[col]); + __m128i vsrc1 = _mm_load_si128((const __m128i *)&src1[col]); + + vsrc0 = _mm_sub_epi16(_mm_slli_epi16(vsrc0, 1), vsrc1); + _mm_store_si128((__m128i *)&src0[col], vsrc0); + } + + src0 += src0Stride; + src1 += src1Stride; + } + } + } + else if (W == 4) + { + for (int row = 0; row < height; row += 2) + { + __m128i vsrc0 = _mm_loadl_epi64((const __m128i *)src0); + __m128i vsrc1 = _mm_loadl_epi64((const __m128i *)src1); + __m128i vsrc0_2 = _mm_loadl_epi64((const __m128i *)(src0 + src0Stride)); + __m128i vsrc1_2 = _mm_loadl_epi64((const __m128i *)(src1 + src1Stride)); + + vsrc0 = _mm_unpacklo_epi64(vsrc0, vsrc0_2); + vsrc1 = _mm_unpacklo_epi64(vsrc1, vsrc1_2); + + vsrc0 = _mm_sub_epi16(_mm_slli_epi16(vsrc0, 1), vsrc1); + _mm_storel_epi64((__m128i *)src0, vsrc0); + _mm_storel_epi64((__m128i *)(src0 + src0Stride), _mm_unpackhi_epi64(vsrc0, vsrc0)); + + src0 += (src0Stride << 1); + src1 += (src1Stride << 1); + } + } + else + { + THROW("Unsupported size"); + } +} +#endif + template<bool doShift, bool shiftR, typename T> static inline void do_shift( T &vreg, int num ); #if USE_AVX2 template<> inline void do_shift<true, true , __m256i>( __m256i &vreg, int num ) { vreg = _mm256_srai_epi32( vreg, num ); } @@ -373,6 +538,12 @@ void PelBufferOps::_initPelBufOpsX86() linTf8 = linTf_SSE_entry<vext, 8>; linTf4 = linTf_SSE_entry<vext, 4>; +#if ENABLE_SIMD_OPT_GBI + removeWeightHighFreq8 = removeWeightHighFreq_SSE<vext, 8>; + removeWeightHighFreq4 = removeWeightHighFreq_SSE<vext, 4>; + removeHighFreq8 = removeHighFreq_SSE<vext, 8>; + removeHighFreq4 = removeHighFreq_SSE<vext, 4>; +#endif } template void PelBufferOps::_initPelBufOpsX86<SIMDX86>(); diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index 8d7ebb8d0b6104ca610c040e3f2709e9bbadc0d0..dc8bf0308500a1099a1f541513f1e976eff6a337 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -834,10 +834,62 @@ void CABACReader::cu_pred_data( CodingUnit &cu ) imv_mode ( cu, mrgCtx ); +#if JVET_L0646_GBI + cu_gbi_flag( cu ); +#endif + } +#if JVET_L0646_GBI +void CABACReader::cu_gbi_flag(CodingUnit& cu) +{ + if(!CU::isGBiIdxCoded(cu)) + { + return; + } + + uint8_t gbiIdx = GBI_DEFAULT; + + CHECK(!(GBI_NUM > 1 && (GBI_NUM == 2 || (GBI_NUM & 0x01) == 1)), " !( GBI_NUM > 1 && ( GBI_NUM == 2 || ( GBI_NUM & 0x01 ) == 1 ) ) "); + + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__GBI_IDX); + + int ctxId = 0; + + uint32_t idx = 0; + uint32_t symbol; + + symbol = (m_BinDecoder.decodeBin(Ctx::GBiIdx(ctxId))); + + int32_t numGBi = (cu.slice->getCheckLDC()) ? 5 : 3; + if(symbol == 0) + { + uint32_t prefixNumBits = numGBi - 2; + uint32_t step = 1; + + unsigned ctxIdGBi = 4; + idx = 1; + for(int ui = 0; ui < prefixNumBits; ++ui) + { + symbol = (m_BinDecoder.decodeBin(Ctx::GBiIdx(ctxIdGBi))); + + if (symbol == 1) + { + break; + } + ctxIdGBi += step; + idx += step; + } + } + + gbiIdx = (uint8_t)g_GbiParsingOrder[idx]; + CU::setGbiIdx(cu, gbiIdx); + + DTRACE(g_trace_ctx, D_SYNTAX, "cu_gbi_flag() gbi_idx=%d\n", cu.GBiIdx ? 1 : 0); +} +#endif void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) { @@ -1153,6 +1205,9 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx ) pu.mv [REF_PIC_LIST_1] = Mv(0, 0); pu.refIdx[REF_PIC_LIST_1] = -1; pu.interDir = 1; +#if JVET_L0646_GBI + pu.cu->GBiIdx = GBI_DEFAULT; +#endif } PU::spanMotionInfo( pu, mrgCtx ); diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index 611b1a7b5332cf03c86c5037153b969d612397ad..fd194650c2275ac547e9cff453b7c7f04d167b64 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -80,6 +80,9 @@ public: void pred_mode ( CodingUnit& cu ); void pcm_flag ( CodingUnit& cu ); void cu_pred_data ( CodingUnit& cu ); +#if JVET_L0646_GBI + void cu_gbi_flag ( CodingUnit& cu ); +#endif void intra_luma_pred_modes ( CodingUnit& cu ); void intra_chroma_pred_modes ( CodingUnit& cu ); bool intra_chroma_lmc_mode ( PredictionUnit& pu ); diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index 458230ba81d26716989eb8dacd7fe25925349d64..8fb1c75ba89f278c1a621440ce9e8fd16f45bade 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -407,6 +407,11 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) CodingStatistics::IncrementStatisticTool( CodingStatisticsClassType{ STATS__TOOL_AFF, pu.Y().width, pu.Y().height } ); } #endif + +#if JVET_L0646_GBI + uint8_t gbiIdx = GBI_DEFAULT; +#endif + if( pu.mergeFlag ) { { @@ -416,7 +421,11 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) MvField affineMvField[2][3]; unsigned char interDirNeighbours; int numValidMergeCand; +#if JVET_L0646_GBI + PU::getAffineMergeCand( pu, affineMvField, interDirNeighbours, gbiIdx, numValidMergeCand); +#else PU::getAffineMergeCand( pu, affineMvField, interDirNeighbours, numValidMergeCand ); +#endif pu.interDir = interDirNeighbours; for( int i = 0; i < 2; ++i ) { @@ -428,6 +437,9 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) pu.mvpNum[i] = 0; pu.mvd[i] = Mv(); PU::setAllAffineMvField( pu, mvField, RefPicList( i ) ); +#if JVET_L0646_GBI + pu.cu->GBiIdx = gbiIdx; +#endif } } PU::spanMotionInfo( pu, mrgCtx ); @@ -466,6 +478,9 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) pu.mv [REF_PIC_LIST_1] = Mv(0, 0); pu.refIdx[REF_PIC_LIST_1] = -1; pu.interDir = 1; +#if JVET_L0646_GBI + pu.cu->GBiIdx = GBI_DEFAULT; +#endif } PU::spanMotionInfo( pu, mrgCtx ); diff --git a/source/Lib/DecoderLib/DecSlice.cpp b/source/Lib/DecoderLib/DecSlice.cpp index 806a506bc1b4f62659d92f4e085486ad07c7cb74..3ad162dd6c0d53af6c3f6ef882f28afc768c69fc 100644 --- a/source/Lib/DecoderLib/DecSlice.cpp +++ b/source/Lib/DecoderLib/DecSlice.cpp @@ -222,7 +222,13 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream ) } #endif - +#if JVET_L0646_GBI + bool updateGbiCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuTsAddr == startCtuTsAddr; + if(updateGbiCodingOrder) + { + resetGbiCodingOrder(true, cs); + } +#endif isLastCtuOfSliceSegment = cabacReader.coding_tree_unit( cs, ctuArea, pic->m_prevQP, ctuRsAddr ); diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index d6fb31df22c0cc30ce33710a9052b897ee778e5c..2a5117e4604f87bbb6bc884da33fbd54d5556ed6 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -804,6 +804,9 @@ void HLSyntaxReader::parseSPSNext( SPSNext& spsNext, const bool usePCM ) { READ_FLAG( symbol, "affine_type_flag" ); spsNext.setUseAffineType ( symbol != 0 ); } +#if JVET_L0646_GBI + READ_FLAG( symbol, "gbi_flag" ); spsNext.setUseGBi ( symbol != 0 ); +#endif for( int k = 0; k < SPSNext::NumReservedFlags; k++ ) { READ_FLAG( symbol, "reserved_flag" ); if( symbol != 0 ) EXIT("Incompatible version: SPSNext reserved flag not equal to zero (bitstream was probably created with newer software version)" ); diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index c46fc745e2604d26a310858acd373faa5256f81e..0b1bc55580cd8ef6258887272f67bfbea4cc70e9 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -708,9 +708,56 @@ void CABACWriter::cu_pred_data( const CodingUnit& cu ) imv_mode ( cu ); +#if JVET_L0646_GBI + cu_gbi_flag( cu ); +#endif + } +#if JVET_L0646_GBI +void CABACWriter::cu_gbi_flag(const CodingUnit& cu) +{ + if(!CU::isGBiIdxCoded(cu)) + { + return; + } + + CHECK(!(GBI_NUM > 1 && (GBI_NUM == 2 || (GBI_NUM & 0x01) == 1)), " !( GBI_NUM > 1 && ( GBI_NUM == 2 || ( GBI_NUM & 0x01 ) == 1 ) ) "); + const uint8_t gbiCodingIdx = (uint8_t)g_GbiCodingOrder[CU::getValidGbiIdx(cu)]; + + int ctxId = 0; + + int32_t numGBi = (cu.slice->getCheckLDC()) ? 5 : 3; + + m_BinEncoder.encodeBin((gbiCodingIdx == 0 ? 1 : 0), Ctx::GBiIdx(ctxId)); + + if(numGBi > 2 && gbiCodingIdx != 0) + { + uint32_t prefixNumBits = numGBi - 2; + uint32_t step = 1; + uint8_t prefixSymbol = gbiCodingIdx; + + int ctxIdGBi = 4; + uint8_t idx = 1; + for(int ui = 0; ui < prefixNumBits; ++ui) + { + if (prefixSymbol == idx) + { + m_BinEncoder.encodeBin(1, Ctx::GBiIdx(ctxIdGBi)); + break; + } + else + { + m_BinEncoder.encodeBin(0, Ctx::GBiIdx(ctxIdGBi)); + ctxIdGBi += step; + idx += step; + } + } + } + DTRACE(g_trace_ctx, D_SYNTAX, "cu_gbi_flag() gbi_idx=%d\n", cu.GBiIdx ? 1 : 0); +} +#endif void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu ) { diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index ce9b4423c4dc77901215dc9afa80a9cc61d69c0d..b17bfadeef0a8c6034921de77b5838e451e51257 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -93,6 +93,9 @@ public: void pcm_data ( const CodingUnit& cu ); void pcm_flag ( const CodingUnit& cu ); void cu_pred_data ( const CodingUnit& cu ); +#if JVET_L0646_GBI + void cu_gbi_flag ( const CodingUnit& cu ); +#endif void intra_luma_pred_modes ( const CodingUnit& cu ); void intra_luma_pred_mode ( const PredictionUnit& pu ); void intra_chroma_pred_modes ( const CodingUnit& cu ); diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 871fca519e72a4a6a1eb1d704544cb7a097f89bf..71f50a481222dc17cdb80a41932e80d642a51ef8 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -204,6 +204,10 @@ protected: bool m_AltDQPCoding; #endif bool m_compositeRefEnabled; //composite reference +#if JVET_L0646_GBI + bool m_GBi; + bool m_GBiFast; +#endif // ADD_NEW_TOOL : (encoder lib) add tool enabling flags and associated parameters here bool m_useFastLCTU; @@ -641,7 +645,12 @@ public: void setUseCompositeRef (bool b) { m_compositeRefEnabled = b; } bool getUseCompositeRef () const { return m_compositeRefEnabled; } - +#if JVET_L0646_GBI + void setUseGBi ( bool b ) { m_GBi = b; } + bool getUseGBi () const { return m_GBi; } + void setUseGBiFast ( uint32_t b ) { m_GBiFast = b; } + bool getUseGBiFast () const { return m_GBiFast; } +#endif // ADD_NEW_TOOL : (encoder lib) add access functions here void setMaxCUWidth ( uint32_t u ) { m_maxCUWidth = u; } @@ -767,7 +776,7 @@ public: #if X0038_LAMBDA_FROM_QP_CAPABILITY int getIntraQPOffset () const { return m_intraQPOffset; } int getLambdaFromQPEnable () const { return m_lambdaFromQPEnable; } -#if ENABLE_QPA +#if ENABLE_QPA | JVET_L0646_GBI public: #else protected: diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index a78001f3f5777a2bcd780fec484fce72f5387497..926841c1057fb2740c447752eab355c7ab3f2bc3 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -1641,6 +1641,9 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct unsigned char interDirNeighbours; int numValidMergeCand; bool hasNoResidual = false; +#if JVET_L0646_GBI + uint8_t gbiIdx = GBI_DEFAULT; +#endif tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); @@ -1664,7 +1667,11 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct cu.firstPU->mergeFlag = true; cu.firstPU->mergeIdx = 0; +#if JVET_L0646_GBI + PU::getAffineMergeCand( *cu.firstPU, affineMvField, interDirNeighbours, gbiIdx, numValidMergeCand ); +#else PU::getAffineMergeCand( *cu.firstPU, affineMvField, interDirNeighbours, numValidMergeCand ); +#endif if( numValidMergeCand == -1 ) { return; @@ -1673,6 +1680,9 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct cu.firstPU->interDir = interDirNeighbours; PU::setAllAffineMvField( *cu.firstPU, affineMvField[REF_PIC_LIST_0], REF_PIC_LIST_0 ); PU::setAllAffineMvField( *cu.firstPU, affineMvField[REF_PIC_LIST_1], REF_PIC_LIST_1 ); +#if JVET_L0646_GBI + cu.GBiIdx = gbiIdx; +#endif PU::spanMotionInfo( *cu.firstPU ); @@ -1699,6 +1709,53 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC { tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); +#if JVET_L0646_GBI + + m_pcInterSearch->setAffineModeSelected(false); + + if( tempCS->slice->getCheckLDC() ) + { + m_bestGbiCost[0] = m_bestGbiCost[1] = std::numeric_limits<double>::max(); + m_bestGbiIdx[0] = m_bestGbiIdx[1] = -1; + } + + m_pcInterSearch->resetBufferedUniMotions(); + int gbiLoopNum = (tempCS->slice->isInterB() ? GBI_NUM : 1); + gbiLoopNum = (tempCS->sps->getSpsNext().getUseGBi() ? gbiLoopNum : 1); + + if( tempCS->area.lwidth() * tempCS->area.lheight() < GBI_SIZE_CONSTRAINT ) + { + gbiLoopNum = 1; + } + + double curBestCost = bestCS->cost; + double equGBiCost = MAX_DOUBLE; + + for( int gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ ) + { + if( m_pcEncCfg->getUseGBiFast() ) + { + auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl); + + if( blkCache ) + { + bool isBestInter = blkCache->getInter(bestCS->area); + uint8_t bestGBiIdx = blkCache->getGbiIdx(bestCS->area); + + if( isBestInter && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT && g_GbiSearchOrder[gbiLoopIdx] != bestGBiIdx ) + { + continue; + } + } + } + if( !tempCS->slice->getCheckLDC() ) + { + if( gbiLoopIdx != 0 && gbiLoopIdx != 3 && gbiLoopIdx != 4 ) + { + continue; + } + } +#endif CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); @@ -1716,18 +1773,70 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC cu.qp = encTestMode.qp; CU::addPUs( cu ); +#if JVET_L0646_GBI + cu.GBiIdx = g_GbiSearchOrder[gbiLoopIdx]; + uint8_t gbiIdx = cu.GBiIdx; + bool testGbi = (gbiIdx != GBI_DEFAULT); +#endif m_pcInterSearch->predInterSearch( cu, partitioner ); const unsigned wIdx = gp_sizeIdxInfo->idxFrom( tempCS->area.lwidth () ); +#if JVET_L0646_GBI + gbiIdx = CU::getValidGbiIdx(cu); + if( testGbi && gbiIdx == GBI_DEFAULT ) // Enabled GBi but the search results is uni. + { + tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + continue; + } + CHECK(!(testGbi || (!testGbi && gbiIdx == GBI_DEFAULT)), " !( bTestGbi || (!bTestGbi && gbiIdx == GBI_DEFAULT ) )"); + + bool isEqualUni = false; + if( m_pcEncCfg->getUseGBiFast() ) + { + if( cu.firstPU->interDir != 3 && testGbi == 0 ) + { + isEqualUni = true; + } + } +#endif xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0 , m_pImvTempCS ? m_pImvTempCS[wIdx][encTestMode.partSize] : NULL , 1 , 0 +#if JVET_L0646_GBI + , &equGBiCost +#endif ); +#if JVET_L0646_GBI + if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT ) + m_pcInterSearch->setAffineModeSelected((bestCS->cus.front()->affine && !(bestCS->cus.front()->firstPU->mergeFlag))); + + tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + + double skipTH = MAX_DOUBLE; + skipTH = (m_pcEncCfg->getUseGBiFast() ? 1.05 : MAX_DOUBLE); + if( equGBiCost > curBestCost * skipTH ) + { + break; + } + + if( m_pcEncCfg->getUseGBiFast() ) + { + if( isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1 ) + { + break; + } + } + if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT && xIsGBiSkip(cu) && m_pcEncCfg->getUseGBiFast() ) + { + break; + } + } // for( UChar gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ ) +#endif } @@ -1737,6 +1846,9 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { int iIMV = int( ( encTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT ); +#if JVET_L0646_GBI + m_pcInterSearch->setAffineModeSelected(false); +#endif // Only int-Pel, 4-Pel and fast 4-Pel allowed CHECK( iIMV != 1 && iIMV != 2 && iIMV != 3, "Unsupported IMV Mode" ); // Fast 4-Pel Mode @@ -1762,6 +1874,53 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be } } +#if JVET_L0646_GBI + m_pcInterSearch->resetBufferedUniMotions(); + int gbiLoopNum = (tempCS->slice->isInterB() ? GBI_NUM : 1); + gbiLoopNum = (pcCUInfo2Reuse != NULL ? 1 : gbiLoopNum); + gbiLoopNum = (tempCS->slice->getSPS()->getSpsNext().getUseGBi() ? gbiLoopNum : 1); + + if( tempCS->area.lwidth() * tempCS->area.lheight() < GBI_SIZE_CONSTRAINT ) + { + gbiLoopNum = 1; + } + + double curBestCost = bestCS->cost; + double equGBiCost = MAX_DOUBLE; + + for( int gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ ) + { + if( m_pcEncCfg->getUseGBiFast() ) + { + auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl); + + if( blkCache ) + { + bool isBestInter = blkCache->getInter(bestCS->area); + uint8_t bestGBiIdx = blkCache->getGbiIdx(bestCS->area); + + if( isBestInter && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT && g_GbiSearchOrder[gbiLoopIdx] != bestGBiIdx ) + { + continue; + } + } + } + + if( !tempCS->slice->getCheckLDC() ) + { + if( gbiLoopIdx != 0 && gbiLoopIdx != 3 && gbiLoopIdx != 4 ) + { + continue; + } + } + + if( m_pcEncCfg->getUseGBiFast() && tempCS->slice->getCheckLDC() && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT + && (m_bestGbiIdx[0] >= 0 && g_GbiSearchOrder[gbiLoopIdx] != m_bestGbiIdx[0]) + && (m_bestGbiIdx[1] >= 0 && g_GbiSearchOrder[gbiLoopIdx] != m_bestGbiIdx[1])) + { + continue; + } +#endif CodingUnit &cu = ( pcCUInfo2Reuse != nullptr ) ? *tempCS->getCU( partitioner.chType ) : tempCS->addCU( tempCS->area, partitioner.chType ); @@ -1795,12 +1954,21 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be cu.imv = iIMV > 1 ? 2 : 1; cu.emtFlag = false; +#if JVET_L0646_GBI + bool testGbi; + uint8_t gbiIdx; +#endif if( pcCUInfo2Reuse != nullptr ) { // reuse the motion info from pcCUInfo2Reuse CU::resetMVDandMV2Int( cu, m_pcInterSearch ); +#if JVET_L0646_GBI + CHECK(cu.GBiIdx < 0 || cu.GBiIdx >= GBI_NUM, "cu.GBiIdx < 0 || cu.GBiIdx >= GBI_NUM"); + gbiIdx = CU::getValidGbiIdx(cu); + testGbi = (gbiIdx != GBI_DEFAULT); +#endif if( !CU::hasSubCUNonZeroMVd( cu ) ) { @@ -1814,11 +1982,36 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be } else { +#if JVET_L0646_GBI + cu.GBiIdx = g_GbiSearchOrder[gbiLoopIdx]; + gbiIdx = cu.GBiIdx; + testGbi = (gbiIdx != GBI_DEFAULT); +#endif m_pcInterSearch->predInterSearch( cu, partitioner ); +#if JVET_L0646_GBI + gbiIdx = CU::getValidGbiIdx(cu); +#endif } +#if JVET_L0646_GBI + if( testGbi && gbiIdx == GBI_DEFAULT ) // Enabled GBi but the search results is uni. + { + tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + continue; + } + CHECK(!(testGbi || (!testGbi && gbiIdx == GBI_DEFAULT)), " !( bTestGbi || (!bTestGbi && gbiIdx == GBI_DEFAULT ) )"); + + bool isEqualUni = false; + if( m_pcEncCfg->getUseGBiFast() ) + { + if( cu.firstPU->interDir != 3 && testGbi == 0 ) + { + isEqualUni = true; + } + } +#endif if( !CU::hasSubCUNonZeroMVd( cu ) ) { @@ -1830,8 +2023,35 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be , NULL , true , 0 +#if JVET_L0646_GBI + , &equGBiCost +#endif ); +#if JVET_L0646_GBI + tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + + double skipTH = MAX_DOUBLE; + skipTH = (m_pcEncCfg->getUseGBiFast() ? 1.05 : MAX_DOUBLE); + if( equGBiCost > curBestCost * skipTH ) + { + break; + } + + if( m_pcEncCfg->getUseGBiFast() ) + { + if( isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1 ) + { + break; + } + } + if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT && xIsGBiSkip(cu) && m_pcEncCfg->getUseGBiFast() ) + { + break; + } + } // for( UChar gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ ) +#endif + return true; } @@ -1839,6 +2059,9 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be , CodingStructure* imvCS , int emtMode , bool* bestHasNonResi +#if JVET_L0646_GBI + , double* equGBiCost +#endif ) { if( residualPass == 1 && encTestMode.lossless ) @@ -1904,6 +2127,37 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be xCheckDQP( *tempCS, partitioner ); +#if JVET_L0646_GBI + if( ETM_INTER_ME == encTestMode.type ) + { + if( equGBiCost != NULL ) + { + if( tempCS->cost < (*equGBiCost) && cu->GBiIdx == GBI_DEFAULT ) + { + (*equGBiCost) = tempCS->cost; + } + } + else + { + CHECK(equGBiCost == NULL, "equGBiCost == NULL"); + } + if( tempCS->slice->getCheckLDC() && !cu->imv && cu->GBiIdx != GBI_DEFAULT && tempCS->cost < m_bestGbiCost[1] ) + { + if( tempCS->cost < m_bestGbiCost[0] ) + { + m_bestGbiCost[1] = m_bestGbiCost[0]; + m_bestGbiCost[0] = tempCS->cost; + m_bestGbiIdx[1] = m_bestGbiIdx[0]; + m_bestGbiIdx[0] = cu->GBiIdx; + } + else + { + m_bestGbiCost[1] = tempCS->cost; + m_bestGbiIdx[1] = cu->GBiIdx; + } + } + } +#endif double emtFirstPassCost = tempCS->cost; if( imvCS && (tempCS->cost < imvCS->cost) ) @@ -2038,4 +2292,5 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best #endif + //! \} diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index da5ca4d9be280affd153c924895ac9dc33009ce0..d8131f4532a7f5b5e278e8284af1d8cf9275b42c 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -118,7 +118,10 @@ private: #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM EncLib* m_pcEncLib; #endif - +#if JVET_L0646_GBI + int m_bestGbiIdx[2]; + double m_bestGbiCost[2]; +#endif #if SHARP_LUMA_DELTA_QP void updateLambda ( Slice* slice, double dQP ); #endif @@ -192,10 +195,23 @@ protected: , CodingStructure* imvCS = NULL , int emtMode = 1 , bool* bestHasNonResi = NULL +#if JVET_L0646_GBI + , double* equGBiCost = NULL +#endif ); #if REUSE_CU_RESULTS void xReuseCachedResult ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &Partitioner ); #endif + +#if JVET_L0646_GBI + bool xIsGBiSkip(const CodingUnit& cu) + { + return((m_pcEncCfg->getBaseQP() > 32) && ((cu.slice->getTLayer() >= 4) + || ((cu.refIdxBi[0] >= 0 && cu.refIdxBi[1] >= 0) + && (abs(cu.slice->getPOC() - cu.slice->getRefPOC(REF_PIC_LIST_0, cu.refIdxBi[0])) == 1 + || abs(cu.slice->getPOC() - cu.slice->getRefPOC(REF_PIC_LIST_1, cu.refIdxBi[1])) == 1)))); + } +#endif }; //! \} diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 3460e0c9f90b54afe65158eaddff8021b9bd8ced..38661820cc96a0e04fc02b0db5244357eca8fc3c 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -852,7 +852,9 @@ void EncLib::xInitSPS(SPS &sps) sps.getSpsNext().setUseIntraEMT ( m_IntraEMT ); sps.getSpsNext().setUseInterEMT ( m_InterEMT ); sps.getSpsNext().setUseCompositeRef ( m_compositeRefEnabled ); - +#if JVET_L0646_GBI + sps.getSpsNext().setUseGBi ( m_GBi ); +#endif // ADD_NEW_TOOL : (encoder lib) set tool enabling flags and associated parameters here int minCUSize = ( /*sps.getSpsNext().getUseQTBT() ? 1 << MIN_CU_LOG2 :*/ sps.getMaxCUWidth() >> sps.getLog2DiffMaxMinCodingBlockSize() ); diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index 2871f8c41a76a533b22ed10e07a4fc7bb9c6f006..db0ef5bf6b7d7b56b9aaafd1edfb1007e80262f6 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -503,6 +503,29 @@ bool CacheBlkInfoCtrl::getMv( const UnitArea& area, const RefPicList refPicList, return m_codedCUInfo[idx1][idx2][idx3][idx4]->validMv[refPicList][iRefIdx]; } +#if JVET_L0646_GBI +bool CacheBlkInfoCtrl::getInter(const UnitArea& area) +{ + unsigned idx1, idx2, idx3, idx4; + getAreaIdx(area.Y(), *m_slice_chblk->getPPS()->pcv, idx1, idx2, idx3, idx4); + + return m_codedCUInfo[idx1][idx2][idx3][idx4]->isInter; +} +void CacheBlkInfoCtrl::setGbiIdx(const UnitArea& area, uint8_t gBiIdx) +{ + unsigned idx1, idx2, idx3, idx4; + getAreaIdx(area.Y(), *m_slice_chblk->getPPS()->pcv, idx1, idx2, idx3, idx4); + + m_codedCUInfo[idx1][idx2][idx3][idx4]->GBiIdx = gBiIdx; +} +uint8_t CacheBlkInfoCtrl::getGbiIdx(const UnitArea& area) +{ + unsigned idx1, idx2, idx3, idx4; + getAreaIdx(area.Y(), *m_slice_chblk->getPPS()->pcv, idx1, idx2, idx3, idx4); + + return m_codedCUInfo[idx1][idx2][idx3][idx4]->GBiIdx; +} +#endif #if REUSE_CU_RESULTS static bool isTheSameNbHood( const CodingUnit &cu, const Partitioner &partitioner ) @@ -1482,6 +1505,9 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt relatedCU.isSkip |= bestCU->skip; #else relatedCU.isSkip = bestCU->skip; +#endif +#if JVET_L0646_GBI + relatedCU.GBiIdx = bestCU->GBiIdx; #endif } else if( CU::isIntra( *bestCU ) ) diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h index 7209db6e34492a8aa9c97f5987d7785dc8f00344..987c6a6750dc5ff2fb66ab023c83c529c5324bdd 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.h +++ b/source/Lib/EncoderLib/EncModeCtrl.h @@ -321,6 +321,9 @@ struct CodedCUInfo bool validMv[NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS]; Mv saveMv [NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS]; +#if JVET_L0646_GBI + uint8_t GBiIdx; +#endif #if ENABLE_SPLIT_PARALLELISM @@ -369,6 +372,11 @@ public: bool getMv ( const UnitArea& area, const RefPicList refPicList, const int iRefIdx, Mv& rMv ) const; void setMv ( const UnitArea& area, const RefPicList refPicList, const int iRefIdx, const Mv& rMv ); +#if JVET_L0646_GBI + bool getInter( const UnitArea& area ); + void setGbiIdx( const UnitArea& area, uint8_t gBiIdx ); + uint8_t getGbiIdx( const UnitArea& area ); +#endif }; #if REUSE_CU_RESULTS diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp index dd02a24b0caba44506d5d2c488319e37f9e9e3ff..c9791afa2acfb7ad0632ce30a6cf5a2896ec3257 100644 --- a/source/Lib/EncoderLib/EncSlice.cpp +++ b/source/Lib/EncoderLib/EncSlice.cpp @@ -1615,7 +1615,14 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons } #endif - +#if JVET_L0646_GBI + bool updateGbiCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuTsAddr == startCtuTsAddr; + if( updateGbiCodingOrder ) + { + resetGbiCodingOrder(false, cs); + m_pcInterSearch->initWeightIdxBits(); + } +#endif #if ENABLE_WPP_PARALLELISM pEncLib->getCuEncoder( dataId )->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP ); diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index bfd32f0cb2b4dbf7671eebb0b8c4bdc13801aed8..2bff5100b06f4d8e3edc7999211bed1c84235e5a 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -727,6 +727,12 @@ void InterSearch::xMergeEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, int uiMergeIdx = uiMergeCand; } } +#if JVET_L0646_GBI + if( pu.cu->GBiIdx != GBI_DEFAULT ) + { + pu.cu->GBiIdx = GBI_DEFAULT; // Reset to default for the rest modes. + } +#endif } @@ -772,7 +778,10 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) int bestBiPMvpL1 = 0; Distortion biPDistTemp = std::numeric_limits<Distortion>::max(); - +#if JVET_L0646_GBI + uint8_t gbiIdx = (cu.cs->slice->isInterB() ? cu.GBiIdx : GBI_DEFAULT); + bool enforceGBiPred = false; +#endif MergeCtx mergeCtx; // Loop over Prediction Units @@ -875,6 +884,14 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) { xMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList] ); } +#if JVET_L0646_GBI + if( cu.cs->sps->getSpsNext().getUseGBi() && cu.GBiIdx == GBI_DEFAULT && cu.cs->slice->isInterB() ) + { + const bool checkIdentical = true; + m_cUniMotions.setReadMode(checkIdentical, (uint32_t)iRefList, (uint32_t)iRefIdxTemp); + m_cUniMotions.copyFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint32_t)iRefList, (uint32_t)iRefIdxTemp); + } +#endif xCopyAMVPInfo( &amvp[eRefPicList], &aacAMVPInfo[iRefList][iRefIdxTemp]); // must always be done ( also when AMVP_MODE = AM_NONE ) xCheckBestMVP( eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], amvp[eRefPicList], uiBitsTemp, uiCostTemp, pu.cu->imv ); @@ -916,7 +933,11 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) ::memcpy( cMvHevcTemp, cMvTemp, sizeof( cMvTemp ) ); } // Bi-predictive Motion estimation - if( ( cs.slice->isInterB() ) && ( PU::isBipredRestriction( pu ) == false ) ) + if( ( cs.slice->isInterB() ) && ( PU::isBipredRestriction( pu ) == false ) +#if JVET_L0646_GBI + && (cu.slice->getCheckLDC() || gbiIdx == GBI_DEFAULT || !m_affineModeSelected || !m_pcEncCfg->getUseGBiFast()) +#endif + ) { cMvBi[0] = cMv[0]; cMvBi[1] = cMv[1]; @@ -981,6 +1002,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) iNumIter = 1; } +#if JVET_L0646_GBI + enforceGBiPred = (gbiIdx != GBI_DEFAULT); +#endif for ( int iIter = 0; iIter < iNumIter; iIter++ ) { int iRefList = iIter % 2; @@ -995,6 +1019,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) { iRefList = 0; } +#if JVET_L0646_GBI + if( gbiIdx != GBI_DEFAULT ) + { + iRefList = ( abs( getGbiWeight(gbiIdx, REF_PIC_LIST_0 ) ) > abs( getGbiWeight(gbiIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 ); + } +#endif } else if ( iIter == 0 ) { @@ -1025,9 +1055,20 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) iRefStart = 0; iRefEnd = cs.slice->getNumRefIdx(eRefPicList)-1; - for ( int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ ) + for( int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ ) { +#if JVET_L0646_GBI + if( m_pcEncCfg->getUseGBiFast() && (gbiIdx != GBI_DEFAULT) + && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC() == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC()) + && (!pu.cu->imv && pu.cu->slice->getTLayer()>1)) + { + continue; + } +#endif uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList]; +#if JVET_L0646_GBI + uiBitsTemp += ((cs.slice->getSPS()->getSpsNext().getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0); +#endif if ( cs.slice->getNumRefIdx(eRefPicList) > 1 ) { uiBitsTemp += iRefIdxTemp+1; @@ -1051,6 +1092,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uiCostBi = uiCostTemp; uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList]; +#if JVET_L0646_GBI + uiMotBits[iRefList] -= ((cs.slice->getSPS()->getSpsNext().getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0); +#endif uiBits[2] = uiBitsTemp; if(iNumIter!=1) @@ -1071,7 +1115,11 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) if ( !bChanged ) { +#if JVET_L0646_GBI + if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceGBiPred) +#else if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) +#endif { xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], &amvp[REF_PIC_LIST_0]); xCheckBestMVP( REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[eRefPicList], uiBits[2], uiCostBi, pu.cu->imv); @@ -1084,6 +1132,10 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) break; } } // for loop-iter +#if JVET_L0646_GBI + cu.refIdxBi[0] = iRefIdxBi[0]; + cu.refIdxBi[1] = iRefIdxBi[1]; +#endif } // if (B_SLICE) @@ -1110,6 +1162,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uiBits [1] = bitsValidList1; uiCost [1] = costValidList1; +#if JVET_L0646_GBI + if( enforceGBiPred ) + { + uiCost[0] = uiCost[1] = MAX_UINT; + } +#endif uiLastModeTemp = uiLastMode; if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) @@ -1168,6 +1226,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uiMEBits = uiBits[1]; } +#if JVET_L0646_GBI + if( gbiIdx != GBI_DEFAULT ) + { + cu.GBiIdx = GBI_DEFAULT; // Reset to default for the Non-NormalMC modes. + } +#endif if ( cu.partSize != SIZE_2Nx2N ) { @@ -1201,7 +1265,11 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uiHevcCost = ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) ? uiCostBi : ( ( uiCost[0] <= uiCost[1] ) ? uiCost[0] : uiCost[1] ); } CHECK( !( !cu.cs->pcv->only2Nx2N || cu.partSize == SIZE_2Nx2N ), "Unexpected part size for QTBT." ); - if (cu.Y().width > 8 && cu.Y().height > 8 && cu.partSize == SIZE_2Nx2N && cu.slice->getSPS()->getSpsNext().getUseAffine() && cu.imv == 0) + if (cu.Y().width > 8 && cu.Y().height > 8 && cu.partSize == SIZE_2Nx2N && cu.slice->getSPS()->getSpsNext().getUseAffine() && cu.imv == 0 +#if JVET_L0646_GBI + && (gbiIdx == GBI_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseGBiFast()) +#endif + ) { // save normal hevc result uint32_t uiMRGIndex = pu.mergeIdx; @@ -1226,7 +1294,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) Mv acMvAffine4Para[2][33][3]; int refIdx4Para[2] = { -1, -1 }; +#if JVET_L0646_GBI + xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, gbiIdx, enforceGBiPred, + ((cu.slice->getSPS()->getSpsNext().getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0)); +#else xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para); +#endif if ( cu.slice->getSPS()->getSpsNext().getUseAffineType() ) { if ( uiAffineCost < uiHevcCost * 1.05 ) ///< condition for 6 parameter affine ME @@ -1261,7 +1334,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) Distortion uiAffine6Cost = std::numeric_limits<Distortion>::max(); cu.affineType = AFFINEMODEL_6PARAM; +#if JVET_L0646_GBI + xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, gbiIdx, enforceGBiPred, + ((cu.slice->getSPS()->getSpsNext().getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0)); +#else xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para); +#endif // reset to 4 parameter affine inter mode if ( uiAffineCost <= uiAffine6Cost ) @@ -1326,7 +1404,15 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) } } - +#if JVET_L0646_GBI + if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag ) + { + if (gbiIdx != GBI_DEFAULT) + { + cu.GBiIdx = gbiIdx; + } + } +#endif m_maxCompIDToPred = MAX_NUM_COMPONENT; { @@ -1573,6 +1659,12 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf& void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi) { +#if JVET_L0646_GBI + if( pu.cu->cs->sps->getSpsNext().getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedUniMv(pu, eRefPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost) ) + { + return; + } +#endif Mv cMvHalf, cMvQter; @@ -1591,10 +1683,17 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative(*pu.cu, pu )); origBufTmp.copyFrom(origBuf); origBufTmp.removeHighFreq( otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs() +#if JVET_L0646_GBI + ,getGbiWeight( pu.cu->GBiIdx, eRefPicList ) +#endif ); pBuf = &origBufTmp; +#if JVET_L0646_GBI + fWeight = xGetMEDistortionWeight( pu.cu->GBiIdx, eRefPicList ); +#else fWeight = 0.5; +#endif } m_cDistParam.isBiPred = bBi; @@ -2457,6 +2556,11 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, Mv hevcMv[2][33] , Mv mvAffine4Para[2][33][3] , int refIdx4Para[2] +#if JVET_L0646_GBI + , uint8_t gbiIdx + , bool enforceGBiPred + , uint32_t gbiIdxBits +#endif ) { const Slice &slice = *pu.cu->slice; @@ -2522,6 +2626,12 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, pu.cu->affine = true; pu.mergeFlag = false; +#if JVET_L0646_GBI + if( gbiIdx != GBI_DEFAULT ) + { + pu.cu->GBiIdx = gbiIdx; + } +#endif // Uni-directional prediction for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ ) @@ -2665,6 +2775,14 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, { xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp ); } +#if JVET_L0646_GBI + if(pu.cu->cs->sps->getSpsNext().getUseGBi() && pu.cu->GBiIdx == GBI_DEFAULT && pu.cu->slice->isInterB()) + { + m_cUniMotions.setReadModeAffine(true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType); + m_cUniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType + ); + } +#endif // Set best AMVP Index xCopyAffineAMVPInfo( affiAMVPInfoTemp[eRefPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp] ); xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp ); @@ -2785,6 +2903,12 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, { iRefList = 0; } +#if JVET_L0646_GBI + if( gbiIdx != GBI_DEFAULT ) + { + iRefList = ( abs( getGbiWeight( gbiIdx, REF_PIC_LIST_0 ) ) > abs( getGbiWeight( gbiIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 ); + } +#endif } else if ( iIter == 0 ) { @@ -2823,9 +2947,19 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, { continue; } - +#if JVET_L0646_GBI + if(m_pcEncCfg->getUseGBiFast() && (gbiIdx != GBI_DEFAULT) + && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC() == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC()) + && (pu.cu->affineType == AFFINEMODEL_4PARAM && pu.cu->slice->getTLayer()>1)) + { + continue; + } +#endif // update bits uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList]; +#if JVET_L0646_GBI + uiBitsTemp += ((pu.cu->slice->getSPS()->getSpsNext().getUseGBi() == true) ? gbiIdxBits : 0); +#endif if( slice.getNumRefIdx(eRefPicList) > 1 ) { uiBitsTemp += iRefIdxTemp+1; @@ -2849,6 +2983,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, uiCostBi = uiCostTemp; uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList]; +#if JVET_L0646_GBI + uiMotBits[iRefList] -= ((pu.cu->slice->getSPS()->getSpsNext().getUseGBi() == true) ? gbiIdxBits : 0); +#endif uiBits[2] = uiBitsTemp; if ( iNumIter != 1 ) // MC for next iter @@ -2868,7 +3005,11 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, if ( !bChanged ) { +#if JVET_L0646_GBI + if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceGBiPred) +#else if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) +#endif { xCopyAffineAMVPInfo( aacAffineAMVPInfo[0][iRefIdxBi[0]], affiAMVPInfoTemp[REF_PIC_LIST_0] ); xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_0], REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi ); @@ -2907,6 +3048,12 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, uiBits[1] = bitsValidList1; uiCost[1] = costValidList1; +#if JVET_L0646_GBI + if( enforceGBiPred ) + { + uiCost[0] = uiCost[1] = MAX_UINT; + } +#endif // Affine ME result set if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) // Bi @@ -2995,6 +3142,12 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]]; pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]]; } +#if JVET_L0646_GBI + if( gbiIdx != GBI_DEFAULT ) + { + pu.cu->GBiIdx = GBI_DEFAULT; + } +#endif } void solveEqual( double** dEqualCoeff, int iOrder, double* dAffinePara ) @@ -3161,6 +3314,12 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, Distortion& ruiCost, bool bBi ) { +#if JVET_L0646_GBI + if( pu.cu->cs->sps->getSpsNext().getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedAffineUniMv(pu, eRefPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost) ) + { + return; + } +#endif const int width = pu.Y().width; const int height = pu.Y().height; @@ -3180,10 +3339,17 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); origBufTmp.copyFrom(origBuf); origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs() +#if JVET_L0646_GBI + ,getGbiWeight(pu.cu->GBiIdx, eRefPicList) +#endif ); pBuf = &origBufTmp; +#if JVET_L0646_GBI + fWeight = xGetMEDistortionWeight( pu.cu->GBiIdx, eRefPicList ); +#else fWeight = 0.5; +#endif } // pred YUV @@ -4547,3 +4713,69 @@ uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner & return fracBits; } +#if JVET_L0646_GBI +double InterSearch::xGetMEDistortionWeight(uint8_t gbiIdx, RefPicList eRefPicList) +{ + if( gbiIdx != GBI_DEFAULT ) + { + return fabs((double)getGbiWeight(gbiIdx, eRefPicList) / (double)g_GbiWeightBase); + } + else + { + return 0.5; + } +} +bool InterSearch::xReadBufferedUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost) +{ + if (m_cUniMotions.isReadMode((uint32_t)eRefPicList, (uint32_t)iRefIdx)) + { + m_cUniMotions.copyTo(rcMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx); + + m_pcRdCost->setPredictor(pcMvPred); + m_pcRdCost->setCostScale(0); + + unsigned imvShift = pu.cu->imv << 1; + uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(rcMv.getHor(), rcMv.getVer(), imvShift); + + ruiBits += uiMvBits; + ruiCost += m_pcRdCost->getCost(ruiBits); + return true; + } + return false; +} +bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost) +{ + if (m_cUniMotions.isReadModeAffine((uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType)) + { + m_cUniMotions.copyAffineMvTo(acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType); + m_pcRdCost->setCostScale(0); + + uint32_t uiMvBits = 0; + for (int iVerIdx = 0; iVerIdx<(pu.cu->affineType ? 3 : 2); iVerIdx++) + { + if (iVerIdx) + { + m_pcRdCost->setPredictor(acMvPred[iVerIdx] + acMv[0] - acMvPred[0]); + } + else + { + m_pcRdCost->setPredictor(acMvPred[iVerIdx]); + } + const int shift = VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE; + uiMvBits += m_pcRdCost->getBitsOfVectorWithPredictor(acMv[iVerIdx].getHor() >> shift, acMv[iVerIdx].getVer() >> shift, 0); + } + ruiBits += uiMvBits; + ruiCost += m_pcRdCost->getCost(ruiBits); + return true; + } + return false; +} +void InterSearch::initWeightIdxBits() +{ + for (int n = 0; n < GBI_NUM; ++n) + { + m_auiEstWeightIdxBits[n] = deriveWeightIdxBits(n); + } +} +#endif + diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h index bcb4c92613cf90b6f40310669717e379f5510e8a..c4689cca5f16cec59d86c53a2985a2174cd6d6cc 100644 --- a/source/Lib/EncoderLib/InterSearch.h +++ b/source/Lib/EncoderLib/InterSearch.h @@ -81,6 +81,11 @@ private: CodingStructure **m_pSaveCS; ClpRng m_lumaClpRng; +#if JVET_L0646_GBI + uint32_t m_auiEstWeightIdxBits[GBI_NUM]; + GBiMotionParam m_cUniMotions; + bool m_affineModeSelected; +#endif protected: // interface to option @@ -133,7 +138,9 @@ public: #if ENABLE_SPLIT_PARALLELISM void copyState ( const InterSearch& other ); #endif - +#if JVET_L0646_GBI + void setAffineModeSelected ( bool flag) { m_affineModeSelected = flag; } +#endif protected: /// sub-function for motion vector refinement used in fractional-pel accuracy @@ -316,6 +323,11 @@ protected: Mv hevcMv[2][33] , Mv mvAffine4Para[2][33][3] , int refIdx4Para[2] +#if JVET_L0646_GBI + , uint8_t gbiIdx = GBI_DEFAULT + , bool enforceGBiPred = false + , uint32_t gbiIdxBits = 0 +#endif ); void xAffineMotionEstimation ( PredictionUnit& pu, @@ -343,7 +355,16 @@ protected: void xCopyAffineAMVPInfo ( AffineAMVPInfo& src, AffineAMVPInfo& dst ); void xCheckBestAffineMVP ( PredictionUnit &pu, AffineAMVPInfo &affineAMVPInfo, RefPicList eRefPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost ); - +#if JVET_L0646_GBI + bool xReadBufferedAffineUniMv ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost); + double xGetMEDistortionWeight ( uint8_t gbiIdx, RefPicList eRefPicList); + bool xReadBufferedUniMv ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost); +public: + void resetBufferedUniMotions () { m_cUniMotions.reset(); } + uint32_t getWeightIdxBits ( uint8_t gbiIdx ) { return m_auiEstWeightIdxBits[gbiIdx]; } + void initWeightIdxBits (); +protected: +#endif void xExtDIFUpSamplingH ( CPelBuf* pcPattern ); void xExtDIFUpSamplingQ ( CPelBuf* pcPatternKey, Mv halfPelRef ); diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index 31d205d12e0b73173c23ec2217a0abcb0d107b7c..5d4a3cee0c213fc2d9754746002536a9a2bde442 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -544,7 +544,9 @@ void HLSWriter::codeSPSNext( const SPSNext& spsNext, const bool usePCM ) { WRITE_FLAG( spsNext.getUseAffineType() ? 1 : 0, "affine_type_flag" ); } - +#if JVET_L0646_GBI + WRITE_FLAG( spsNext.getUseGBi() ? 1 : 0, "gbi_flag" ); +#endif for( int k = 0; k < SPSNext::NumReservedFlags; k++ ) { WRITE_FLAG( 0, "reserved_flag" );