diff --git a/doc/software-manual.tex b/doc/software-manual.tex index b98ab091cd0955c1333b1086d09cddee49ef2c64..70f66e6a8a846ba9aa1aec933a73e8e72ebf1ea0 100644 --- a/doc/software-manual.tex +++ b/doc/software-manual.tex @@ -1968,9 +1968,7 @@ Enables or disables the Intra Sub-Partitions coding mode. \Option{ISPFast} & %\ShortOption{\None} & \Default{false} & -Enables or disables reduced testing of non-DCT-II transforms if ISP is likely to become the best mode for a given CU. -\par -This option has no effect if either ISP or MTS are disabled. +Enables or disables fast encoder methods for ISP. \\ \Option{JointCbCr} & diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index 10ef069fe19f734a6e24fd73f91294a76af8a5f4..4d2d7d0097b3f9198366c167aed491f1279823bf 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -559,7 +559,9 @@ void EncApp::xInitLibCfg() m_cEncLib.setPPSDepQuantEnabledIdc ( m_PPSDepQuantEnabledIdc ); m_cEncLib.setPPSRefPicListSPSIdc0 ( m_PPSRefPicListSPSIdc0 ); m_cEncLib.setPPSRefPicListSPSIdc1 ( m_PPSRefPicListSPSIdc1 ); +#if !JVET_P0206_TMVP_flags m_cEncLib.setPPSTemporalMVPEnabledIdc ( m_PPSTemporalMVPEnabledIdc ); +#endif m_cEncLib.setPPSMvdL1ZeroIdc ( m_PPSMvdL1ZeroIdc ); m_cEncLib.setPPSCollocatedFromL0Idc ( m_PPSCollocatedFromL0Idc ); m_cEncLib.setPPSSixMinusMaxNumMergeCandPlus1 ( m_PPSSixMinusMaxNumMergeCandPlus1 ); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 99f747a44378c7a576be08a249909210f37472d2..e80890500c26ee326ec201be3ebfcb7dc53f43b5 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -3265,7 +3265,9 @@ bool EncAppCfg::xCheckParameter() m_PPSDepQuantEnabledIdc = 0; m_PPSRefPicListSPSIdc0 = 0; m_PPSRefPicListSPSIdc1 = 0; +#if !JVET_P0206_TMVP_flags m_PPSTemporalMVPEnabledIdc = 0; +#endif m_PPSMvdL1ZeroIdc = 0; m_PPSCollocatedFromL0Idc = 0; m_PPSSixMinusMaxNumMergeCandPlus1 = 0; @@ -3277,7 +3279,9 @@ bool EncAppCfg::xCheckParameter() m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1; m_PPSRefPicListSPSIdc0 = 0; m_PPSRefPicListSPSIdc1 = 0; +#if !JVET_P0206_TMVP_flags m_PPSTemporalMVPEnabledIdc = 0; +#endif m_PPSMvdL1ZeroIdc = 0; m_PPSCollocatedFromL0Idc = 0; m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1; @@ -3289,7 +3293,9 @@ bool EncAppCfg::xCheckParameter() m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1; m_PPSRefPicListSPSIdc0 = 2; m_PPSRefPicListSPSIdc1 = 2; - m_PPSTemporalMVPEnabledIdc = m_TMVPModeId == 2 ? 0: ( int(m_TMVPModeId == 1 ? 1: 0) + 1); +#if !JVET_P0206_TMVP_flags + m_PPSTemporalMVPEnabledIdc = m_TMVPModeId == 2 ? 0: ( int(m_TMVPModeId == 1 ? 1: 0) + 1); +#endif m_PPSMvdL1ZeroIdc = 2; m_PPSCollocatedFromL0Idc = 1; m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1; @@ -3301,7 +3307,9 @@ bool EncAppCfg::xCheckParameter() m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1; m_PPSRefPicListSPSIdc0 = 2; m_PPSRefPicListSPSIdc1 = 2; +#if !JVET_P0206_TMVP_flags m_PPSTemporalMVPEnabledIdc = m_TMVPModeId == 2 ? 0: ( int(m_TMVPModeId == 1 ? 1: 0) + 1); +#endif m_PPSMvdL1ZeroIdc = 0; m_PPSCollocatedFromL0Idc = 0; m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1; diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 23803e0a433231707707d42259d4051af6a5ce21..10b3380b7aa6ff9a03b8097cc7559790a51129b6 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -545,7 +545,9 @@ protected: int m_PPSDepQuantEnabledIdc; int m_PPSRefPicListSPSIdc0; int m_PPSRefPicListSPSIdc1; +#if !JVET_P0206_TMVP_flags int m_PPSTemporalMVPEnabledIdc; +#endif int m_PPSMvdL1ZeroIdc; int m_PPSCollocatedFromL0Idc; uint32_t m_PPSSixMinusMaxNumMergeCandPlus1; diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index 73023d91e6187e24248f624e03d18979d1ac6180..0803b217cdd4cbdc877952b4202c845fdf0a297f 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -42,7 +42,11 @@ #include "Buffer.h" #include "InterpolationFilter.h" +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING +void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng) +#else void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, int shiftNum, Pel offset, const ClpRng& clpRng) +#endif { int idx = 0; #if !JVET_P0057_BDOF_PROF_HARMONIZATION @@ -63,10 +67,16 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int w #endif #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING dI = Clip3(-dILimit, dILimit - 1, dI); -#endif - + dst[w] = src[w] + dI; + if (!bi) + { + dst[w] = (dst[w] + offset) >> shiftNum; + dst[w] = ClipPel(dst[w], clpRng); + } +#else dI = (src[w] + dI + offset) >> shiftNum; dst[w] = (Pel)ClipPel(dI, clpRng); +#endif idx++; } @@ -77,6 +87,7 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int w } } +#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING template<bool l1PROFEnabled = true> void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int srcStride, int width, int height, const Pel* gradX0, const Pel* gradY0, const Pel* gradX1, const Pel* gradY1, int gradStride, const int* dMvX0, const int* dMvY0, const int* dMvX1, const int* dMvY1, int dMvStride, const int8_t w0, const ClpRng& clpRng) { @@ -142,6 +153,7 @@ void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, src1 += srcStride; } } +#endif template< typename T > void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, int rshift, int offset, const ClpRng& clpRng ) @@ -433,8 +445,10 @@ PelBufferOps::PelBufferOps() profGradFilter = gradFilterCore <false>; applyPROF = applyPROFCore; +#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING applyBiPROF[1] = applyBiPROFCore; applyBiPROF[0] = applyBiPROFCore <false>; +#endif roundIntVector = nullptr; } diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index 81be539d873628c94751f740b50ad68860fb9f47..4f09f9def0548aab8a811176ef04317409730494 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -81,8 +81,12 @@ struct PelBufferOps void ( *removeHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height); #endif void (*profGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth); +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + void (*applyPROF) (Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng); +#else void (*applyPROF) (Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, int shiftNum, Pel offset, const ClpRng& clpRng); void (*applyBiPROF[2]) (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int srcStride, int width, int height, const Pel* gradX0, const Pel* gradY0, const Pel* gradX1, const Pel* gradY1, int gradStride, const int* dMvX0, const int* dMvY0, const int* dMvX1, const int* dMvY1, int dMvStride, const int8_t gbiWeightL0, const ClpRng& clpRng); +#endif void (*roundIntVector) (int* v, int size, unsigned int nShift, const int dmvLimit); }; diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h index 150796090b4fc175cbfa33351287b8fa75118be4..575b2aab4bde637496fb734b73e849ce80036b47 100644 --- a/source/Lib/CommonLib/ContextModelling.h +++ b/source/Lib/CommonLib/ContextModelling.h @@ -448,7 +448,10 @@ public: { violatesLfnstConstrained[CHANNEL_TYPE_LUMA ] = false; violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false; - lfnstLastScanPos = false; + lfnstLastScanPos = false; +#if JVET_P1026_MTS_SIGNALLING + violatesMtsCoeffConstraint = false; +#endif } CUCtx(int _qp) : isDQPCoded(false), isChromaQpAdjCoded(false), qgStart(false), @@ -456,7 +459,10 @@ public: { violatesLfnstConstrained[CHANNEL_TYPE_LUMA ] = false; violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false; - lfnstLastScanPos = false; + lfnstLastScanPos = false; +#if JVET_P1026_MTS_SIGNALLING + violatesMtsCoeffConstraint = false; +#endif } ~CUCtx() {} public: @@ -466,6 +472,9 @@ public: bool lfnstLastScanPos; int8_t qp; // used as a previous(last) QP and for QP prediction bool violatesLfnstConstrained[MAX_NUM_CHANNEL_TYPE]; +#if JVET_P1026_MTS_SIGNALLING + bool violatesMtsCoeffConstraint; +#endif }; class MergeCtx diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 6acfedc03e93fe26989016a68a11e8c55e3e5a2f..6537c4f89c8896183159b2405baff48de129e3a2 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -872,19 +872,28 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio enablePROF &= !m_encOnly || pu.cu->slice->getCheckLDC() || iDMvHorX > profThres || iDMvHorY > profThres || iDMvVerX > profThres || iDMvVerY > profThres || iDMvHorX < -profThres || iDMvHorY < -profThres || iDMvVerX < -profThres || iDMvVerY < -profThres; enablePROF &= pu.cs->pps->getPicWidthInLumaSamples() == refPic->getPicWidthInLumaSamples() && pu.cs->pps->getPicHeightInLumaSamples() == refPic->getPicHeightInLumaSamples(); +#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING if (compID == COMPONENT_Y) { m_applyPROF[m_iRefListIdx] = enablePROF; } +#endif bool isLast = enablePROF ? false : !bi; +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + const int cuExtW = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_W * 2; + const int cuExtH = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_H * 2; + + PelBuf gradXExt(m_gradBuf[0], cuExtW, cuExtH); + PelBuf gradYExt(m_gradBuf[1], cuExtW, cuExtH); +#else const int cuExtW = pu.blocks[compID].width + PROF_BORDER_EXT_W * 2; const int cuExtH = pu.blocks[compID].height + PROF_BORDER_EXT_H * 2; PelBuf gradXExt(m_gradBuf[m_iRefListIdx][0], cuExtW, cuExtH); PelBuf gradYExt(m_gradBuf[m_iRefListIdx][1], cuExtW, cuExtH); - +#endif const int MAX_FILTER_SIZE = std::max<int>(NTAPS_LUMA, NTAPS_CHROMA); const int dstExtW = ((blockWidth + PROF_BORDER_EXT_W * 2 + 7) >> 3) << 3; const int dstExtH = blockHeight + PROF_BORDER_EXT_H * 2; @@ -898,7 +907,11 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio int *dMvScaleHor = m_dMvBuf[m_iRefListIdx]; int *dMvScaleVer = m_dMvBuf[m_iRefListIdx] + 16; +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + if (enablePROF) +#else if (enablePROF && !bi) +#endif { int* dMvH = dMvScaleHor; int* dMvV = dMvScaleVer; @@ -1130,8 +1143,13 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio dstPel[blockWidth] = leftShift_round(refPel[blockWidth], shift) - (Pel)IF_INTERNAL_OFFS; } +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + PelBuf gradXBuf = gradXExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2); + PelBuf gradYBuf = gradYExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2); +#else PelBuf gradXBuf = gradXExt.subBuf(w, h, blockWidth + 2, blockHeight + 2); PelBuf gradYBuf = gradYExt.subBuf(w, h, blockWidth + 2, blockHeight + 2); +#endif g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, gradXBuf.stride, gradXBuf.buf, gradYBuf.buf, clpRng.bd); const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); @@ -1142,6 +1160,9 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio Pel * dstY = dstBuf.bufAt(w, h); +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, bi, shiftNum, offset, clpRng); +#else if (!bi) { g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, shiftNum, offset, clpRng); @@ -1152,6 +1173,7 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio PelBuf destBuf(dstY, dstBuf.stride, Size(blockWidth, blockHeight)); destBuf.copyFrom(srcExtBuf); } +#endif } } } @@ -1310,6 +1332,7 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB if( iRefIdx0 >= 0 && iRefIdx1 >= 0 ) { +#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING if (pu.cu->affine && (m_applyPROF[0] || m_applyPROF[1])) { xApplyBiPROF(pu, pcYuvSrc0.bufs[COMPONENT_Y], pcYuvSrc1.bufs[COMPONENT_Y], pcYuvDst.bufs[COMPONENT_Y], clpRngs.comp[COMPONENT_Y]); @@ -1317,6 +1340,7 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB CHECK(yuvDstTmp, "yuvDstTmp is disallowed with PROF"); return; } +#endif if( pu.cu->GBiIdx != GBI_DEFAULT && (yuvDstTmp || !pu.mhIntraFlag) ) { CHECK(bioApplied, "GBi is disallowed with BIO"); @@ -1399,6 +1423,7 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB } } +#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYuvSrc0, const CPelBuf& pcYuvSrc1, PelBuf& pcYuvDst, const ClpRng& clpRng) { int blockWidth = AFFINE_MIN_BLOCK_SIZE; @@ -1542,6 +1567,7 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu else g_pelBufOP.applyBiPROF[0](dstY, pcYuvDst.stride, srcY1, srcY0, pcYuvSrc0.stride, width, height, gX1, gY1, gX0, gY0, gradXExt0.stride, dMvX1, dMvY1, dMvX0, dMvY0, blockWidth, getGbiWeight(pu.cu->GBiIdx, REF_PIC_LIST_1), clpRng); } +#endif void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList , const bool luma, const bool chroma @@ -1787,11 +1813,12 @@ void InterPrediction::xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicLis width = pcPad.bufs[compID].width; height = pcPad.bufs[compID].height; offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1); - int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int mvshiftTempHor = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int mvshiftTempVer = mvShift + getComponentScaleY((ComponentID)compID, pu.chromaFormat); width += (filtersize - 1); height += (filtersize - 1); - cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), - -(((filtersize >> 1) - 1) << mvshiftTemp)); + cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTempHor), + -(((filtersize >> 1) - 1) << mvshiftTempVer)); bool wrapRef = false; if( pu.cs->sps->getWrapAroundEnabledFlag() ) { @@ -1804,7 +1831,7 @@ void InterPrediction::xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicLis /* Pre-fetch similar to HEVC*/ { CPelBuf refBuf; - Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp); + Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTempHor, cMv.getVer() >> mvshiftTempVer); refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()), wrapRef); PelBuf &dstBuf = pcPad.bufs[compID]; g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height); @@ -1822,7 +1849,8 @@ void InterPrediction::xPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList ref width = pcPad.bufs[compID].width; height = pcPad.bufs[compID].height; offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1); - padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat); + /*using the larger padsize for 422*/ + padsize = (DMVR_NUM_ITERATION) >> getComponentScaleY((ComponentID)compID, pu.chromaFormat); width += (filtersize - 1); height += (filtersize - 1); /*padding on all side of size DMVR_PAD_LENGTH*/ @@ -1982,7 +2010,8 @@ void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYu if (blockMoved || (compID == 0)) { pcPadstride = pcPadTemp.bufs[compID].stride; - int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int mvshiftTempHor = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int mvshiftTempVer = mvShift + getComponentScaleY((ComponentID)compID, pu.chromaFormat); int leftPixelExtra; if (compID == COMPONENT_Y) { @@ -1993,10 +2022,10 @@ void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYu leftPixelExtra = (NTAPS_CHROMA >> 1) - 1; } PelBuf &srcBuf = pcPadTemp.bufs[compID]; - deltaIntMvX = (cMv.getHor() >> mvshiftTemp) - - (startMv.getHor() >> mvshiftTemp); - deltaIntMvY = (cMv.getVer() >> mvshiftTemp) - - (startMv.getVer() >> mvshiftTemp); + deltaIntMvX = (cMv.getHor() >> mvshiftTempHor) - + (startMv.getHor() >> mvshiftTempHor); + deltaIntMvY = (cMv.getVer() >> mvshiftTempVer) - + (startMv.getVer() >> mvshiftTempVer); CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement"); diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index 85f008395f12a20cba23344c30c09e9eab01da5b..452c2c84429605471f267fd0930e599a0170c6f8 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -102,9 +102,15 @@ protected: Mv(-2, 2), Mv(-1, 2), Mv(0, 2), Mv(1, 2), Mv(2, 2) }; uint64_t m_SADsArray[((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)]; +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + Pel m_gradBuf[2][(AFFINE_MIN_BLOCK_SIZE + 2) * (AFFINE_MIN_BLOCK_SIZE + 2)]; +#else Pel m_gradBuf[2][2][(MAX_CU_SIZE + 2) * (MAX_CU_SIZE + 2)]; +#endif int m_dMvBuf[2][16 * 2]; +#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING bool m_applyPROF[2]; +#endif bool m_skipPROF; bool m_encOnly; bool m_isBi; @@ -141,7 +147,9 @@ protected: void xCalcBIOPar (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth); void xCalcBlkGradient (int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize); void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, PelUnitBuf* yuvDstTmp = NULL ); +#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING void xApplyBiPROF (const PredictionUnit& pu, const CPelBuf& pcYuvSrc0, const CPelBuf& pcYuvSrc1, PelBuf& pcYuvDst, const ClpRng& clpRng); +#endif void xPredAffineBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const std::pair<int, int> scalingRatio = SCALE_1X ); void xWeightedTriangleBlk ( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 ); diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index 56ba627b164ad57d15ab32d1b0c7160b88f3f863..01295fa4e31ca594aec6b52a1800ac349a7e5c75 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -67,6 +67,7 @@ const uint8_t IntraPrediction::m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS] = 0 // 128xn }; +#if !JVET_P0599_INTRA_SMOOTHING_INTERP_FILT const TFilterCoeff g_intraGaussFilter[32][4] = { { 16, 32, 16, 0 }, { 15, 29, 17, 3 }, @@ -101,6 +102,7 @@ const TFilterCoeff g_intraGaussFilter[32][4] = { { 3, 17, 29, 15 }, { 3, 17, 29, 15 } }; +#endif //!JVET_P0599_INTRA_SMOOTHING_INTERP_FILT // ==================================================================================================================== // Constructor / destructor / initialize @@ -556,7 +558,7 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch // Extend main reference to right using replication const int log2Ratio = floorLog2(width) - floorLog2(height); const int s = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio); - const int maxIndex = (multiRefIdx << s) + 2; + const int maxIndex = (multiRefIdx << s) + 1; const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength; const Pel val = refMain[refLength + multiRefIdx]; for (int z = 1; z <= maxIndex; z++) @@ -618,8 +620,13 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch { const bool useCubicFilter = !m_ipaParam.interpolationFlag; +#if JVET_P0599_INTRA_SMOOTHING_INTERP_FILT + const TFilterCoeff intraSmoothingFilter[4] = {TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(32 - (deltaFract >> 1)), TFilterCoeff(16 + (deltaFract >> 1)), TFilterCoeff(deltaFract >> 1)}; + const TFilterCoeff* const f = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : intraSmoothingFilter; +#else //!JVET_P0599_INTRA_SMOOTHING_INTERP_FILT const TFilterCoeff *const f = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : g_intraGaussFilter[deltaFract]; +#endif //JVET_P0599_INTRA_SMOOTHING_INTERP_FILT for (int x = 0; x < width; x++) { @@ -1665,7 +1672,7 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component const int baseUnitSize = 1 << MIN_CU_LOG2; const int unitWidth = baseUnitSize >> getComponentScaleX(chromaArea.compID, nChromaFormat); - const int unitHeight = baseUnitSize >> getComponentScaleX(chromaArea.compID, nChromaFormat); + const int unitHeight = baseUnitSize >> getComponentScaleY(chromaArea.compID, nChromaFormat); const int tuWidthInUnits = tuWidth / unitWidth; const int tuHeightInUnits = tuHeight / unitHeight; diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp index c7b4d9fd2e6c5b084efdfb96327abd1824ac776a..2200bfd2b91494dce95bdc2c09e4ca4a78d9ca01 100644 --- a/source/Lib/CommonLib/LoopFilter.cpp +++ b/source/Lib/CommonLib/LoopFilter.cpp @@ -689,15 +689,23 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De const CodingUnit& cuQ = cu; const CodingUnit& cuP = *cu.cs->getCU( posP, cu.chType ); +#if !JVET_P0571_FIX_BS_BDPCM_CHROMA if( ( MODE_INTRA == cuP.predMode && cuP.bdpcmMode ) && ( MODE_INTRA == cuQ.predMode && cuQ.bdpcmMode ) ) { return 0; } +#endif //-- Set BS for Intra MB : BS = 4 or 3 if( ( MODE_INTRA == cuP.predMode ) || ( MODE_INTRA == cuQ.predMode ) ) { +#if JVET_P0571_FIX_BS_BDPCM_CHROMA + int bsY = (MODE_INTRA == cuP.predMode && cuP.bdpcmMode) && (MODE_INTRA == cuQ.predMode && cuQ.bdpcmMode) ? 0 : 2; + int bsC = 2; + return (BsSet(bsY, COMPONENT_Y) + BsSet(bsC, COMPONENT_Cb) + BsSet(bsC, COMPONENT_Cr)); +#else return (BsSet(2, COMPONENT_Y) + BsSet(2, COMPONENT_Cb) + BsSet(2, COMPONENT_Cr)); +#endif } const TransformUnit& tuQ = *cuQ.cs->getTU(posQ, cuQ.chType); diff --git a/source/Lib/CommonLib/Quant.cpp b/source/Lib/CommonLib/Quant.cpp index 78f49d973be2c2b117314b6a2be0d5bd41ed233c..480fffc30715a321d32764e544c3e592d157a671 100644 --- a/source/Lib/CommonLib/Quant.cpp +++ b/source/Lib/CommonLib/Quant.cpp @@ -405,7 +405,11 @@ void Quant::dequant(const TransformUnit &tu, const int QP_per = cQP.per(isTransformSkip); const int QP_rem = cQP.rem(isTransformSkip); +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + const int rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : iTransformShift) + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); +#else const int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); +#endif if(enableScalingLists) { @@ -977,8 +981,11 @@ void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf iTransformShift = std::max<int>(0, iTransformShift); } - +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + (useTransformSkip ? 0 : iTransformShift); +#else const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + iTransformShift; +#endif // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset const int64_t iAdd = int64_t(tu.cs->slice->isIRAP() ? 171 : 85) << int64_t(iQBits - 9); @@ -1109,8 +1116,11 @@ void Quant::transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &co * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result */ - +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + (useTransformSkip ? 0 : iTransformShift); +#else const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + iTransformShift; +#endif // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset const int iAdd = int64_t(bUseHalfRoundingPoint ? 256 : (tu.cs->slice->isIRAP() ? 171 : 85)) << int64_t(iQBits - 9); TCoeff transformedCoefficient; @@ -1162,7 +1172,16 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE +#if JVET_P0058_CHROMA_TS + const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); +#else + const bool isTransformSkip = (tu.mtsIdx == MTS_SKIP && isLuma(compID)); +#endif + const int rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : iTransformShift) + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); +#else const int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); +#endif const TCoeff transformMinimum = -(1 << maxLog2TrDynamicRange); const TCoeff transformMaximum = (1 << maxLog2TrDynamicRange) - 1; @@ -1228,7 +1247,9 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp } // Inverse transform-skip - +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + reconSample = Pel(dequantisedSample); +#else if (iTransformShift >= 0) { const TCoeff offset = iTransformShift == 0 ? 0 : (1 << (iTransformShift - 1)); @@ -1239,6 +1260,7 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp const int iTrShiftNeg = -iTransformShift; reconSample = Pel(dequantisedSample << iTrShiftNeg); } +#endif } #if ADAPTIVE_COLOR_TRANSFORM diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp index ff150c610b2aa4ba2c3a4b61c00dc9701a9f51c1..7cc8a7891be50e4e028284e1d9800a43b7b82ea2 100644 --- a/source/Lib/CommonLib/QuantRDOQ.cpp +++ b/source/Lib/CommonLib/QuantRDOQ.cpp @@ -372,10 +372,17 @@ void QuantRDOQ::setScalingList(ScalingList *scalingList, const int maxLog2TrDyna } - +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE +double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false) +#else double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth) +#endif { +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange); +#else const int iTransformShift = getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange); +#endif double dErrScale = (double)(1 << SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function double dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0); dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift)); // Compensate for scaling through forward transform @@ -1241,9 +1248,17 @@ void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compI #else const bool isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID); #endif +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + const int qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits +#else const int qBits = QUANT_SHIFT + qp.per(isTransformSkip) + transformShift + ( needsSqrt2Scale ? -1 : 0 ); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits +#endif const int quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)]; +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + const double errorScale = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip); +#else const double errorScale = xGetErrScaleCoeff( TU::needsSqrt2Scale( tu, compID ), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth ); +#endif const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1; @@ -1481,12 +1496,23 @@ void QuantRDOQ::forwardRDPCM( TransformUnit &tu, const ComponentID &compID, cons #else const bool isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID); #endif +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + const int qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits +#else const int qBits = QUANT_SHIFT + qp.per(isTransformSkip) + transformShift + ( needsSqrt2Scale ? -1 : 0 ); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits +#endif const int quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)]; +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip); +#else const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth); - +#endif TrQuantParams trQuantParams; +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip))); +#else trQuantParams.rightShift = (IQUANT_SHIFT - (transformShift + qp.per(isTransformSkip))); +#endif trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)]; const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; diff --git a/source/Lib/CommonLib/QuantRDOQ.h b/source/Lib/CommonLib/QuantRDOQ.h index d733613435cae903f4aa274e784eb2f4abd4e756..301e5a0a6bc7748b0cf456dd55cd2547bd2998c5 100644 --- a/source/Lib/CommonLib/QuantRDOQ.h +++ b/source/Lib/CommonLib/QuantRDOQ.h @@ -69,7 +69,11 @@ public: private: double* xGetErrScaleCoeffSL ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ) { return m_errScale[sizeX][sizeY][list][qp]; }; //!< get Error Scale Coefficent +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + double xGetErrScaleCoeff ( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip); +#else double xGetErrScaleCoeff ( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth); +#endif double& xGetErrScaleCoeffNoScalingList ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ) { return m_errScaleNoScalingList[sizeX][sizeY][list][qp]; }; //!< get Error Scale Coefficent void xInitScalingList ( const QuantRDOQ* other ); void xDestroyScalingList (); diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index ffd2e912747c15878df5633d44b9fe763a4d190c..0b607527abf8862924a2b6eefb7dddbe8c5d2f8d 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -1816,7 +1816,9 @@ PPS::PPS() , m_PPSDepQuantEnabledIdc (0) , m_PPSRefPicListSPSIdc0 (0) , m_PPSRefPicListSPSIdc1 (0) +#if !JVET_P0206_TMVP_flags , m_PPSTemporalMVPEnabledIdc (0) +#endif , m_PPSMvdL1ZeroIdc (0) , m_PPSCollocatedFromL0Idc (0) , m_PPSSixMinusMaxNumMergeCandPlus1 (0) @@ -2506,6 +2508,10 @@ void Slice::scaleRefPicList( Picture *scaledRefPic[ ], APS** apss, APS* lmcsAps, const SPS* sps = getSPS(); const PPS* pps = getPPS(); +#if JVET_P0206_TMVP_flags + bool refPicIsSameRes = false; +#endif + // this is needed for IBC m_pcPic->unscaledPic = m_pcPic; @@ -2532,6 +2538,13 @@ void Slice::scaleRefPicList( Picture *scaledRefPic[ ], APS** apss, APS* lmcsAps, CU::getRprScaling( sps, pps, m_apcRefPicList[refList][rIdx], xScale, yScale ); m_scalingRatio[refList][rIdx] = std::pair<int, int>( xScale, yScale ); +#if JVET_P0206_TMVP_flags + if( m_scalingRatio[refList][rIdx] == SCALE_1X ) + { + refPicIsSameRes = true; + } +#endif + if( m_scalingRatio[refList][rIdx] == SCALE_1X || isDecoder ) { m_scaledRefPicList[refList][rIdx] = m_apcRefPicList[refList][rIdx]; @@ -2617,6 +2630,14 @@ void Slice::scaleRefPicList( Picture *scaledRefPic[ ], APS** apss, APS* lmcsAps, m_apcRefPicList[refList][rIdx]->unscaledPic = m_savedRefPicList[refList][rIdx]; } } + +#if JVET_P0206_TMVP_flags + //Make sure that TMVP is disabled when there are no reference pictures with the same resolution + if(!refPicIsSameRes) + { + CHECK(m_enableTMVPFlag != 0, "TMVP cannot be enabled in slices that have no reference pictures with the same resolution") + } +#endif } void Slice::freeScaledRefPicList( Picture *scaledRefPic[] ) diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 122023d00681c72d68484701f4a39016c619f714..513b31aecb316dc29644ca3e32b09613bf9c670e 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -1246,7 +1246,9 @@ private: int m_PPSDepQuantEnabledIdc; int m_PPSRefPicListSPSIdc0; int m_PPSRefPicListSPSIdc1; +#if !JVET_P0206_TMVP_flags int m_PPSTemporalMVPEnabledIdc; +#endif int m_PPSMvdL1ZeroIdc; int m_PPSCollocatedFromL0Idc; uint32_t m_PPSSixMinusMaxNumMergeCandPlus1; @@ -1446,8 +1448,10 @@ public: void setPPSRefPicListSPSIdc0(int u) { m_PPSRefPicListSPSIdc0 = u; } int getPPSRefPicListSPSIdc1() const { return m_PPSRefPicListSPSIdc1; } void setPPSRefPicListSPSIdc1(int u) { m_PPSRefPicListSPSIdc1 = u; } +#if !JVET_P0206_TMVP_flags int getPPSTemporalMVPEnabledIdc() const { return m_PPSTemporalMVPEnabledIdc; } void setPPSTemporalMVPEnabledIdc(int u) { m_PPSTemporalMVPEnabledIdc = u; } +#endif int getPPSMvdL1ZeroIdc() const { return m_PPSMvdL1ZeroIdc; } void setPPSMvdL1ZeroIdc(int u) { m_PPSMvdL1ZeroIdc = u; } int getPPSCollocatedFromL0Idc() const { return m_PPSCollocatedFromL0Idc; } diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index 51ba7016fdafa50ffca741cd3bb9e74922721e1c..c5524917d20379aacbf1adb84ada39280b12fb32 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -940,6 +940,16 @@ void TrQuant::xITransformSkip(const CCoeffBuf &pCoeff, const CompArea &area = tu.blocks[compID]; const int width = area.width; const int height = area.height; + +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + pResidual.at(x, y) = Pel(pCoeff.at(x, y)); + } + } +#else const int maxLog2TrDynamicRange = tu.cs->sps->getMaxLog2TrDynamicRange(toChannelType(compID)); const int channelBitDepth = tu.cs->sps->getBitDepth(toChannelType(compID)); @@ -981,6 +991,7 @@ void TrQuant::xITransformSkip(const CCoeffBuf &pCoeff, } } } +#endif } void TrQuant::xQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx) @@ -1051,6 +1062,18 @@ void TrQuant::transformNxN( TransformUnit& tu, const ComponentID& compID, const { scaleSAD=1.0/1.414213562; // compensate for not scaling transform skip coefficients by 1/sqrt(2) } +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE +#if JVET_P0058_CHROMA_TS + if (tu.mtsIdx[compID] == MTS_SKIP) +#else + if (isLuma(compID) && tu.mtsIdx == MTS_SKIP) +#endif + { + int trShift = getTransformShift(tu.cu->slice->getSPS()->getBitDepth(toChannelType(compID)), rect.size(), tu.cu->slice->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID))); + scaleSAD *= pow(2, trShift); + } +#endif + trCosts.push_back( TrCost( int(sumAbs*scaleSAD), pos++ ) ); it++; } @@ -1306,11 +1329,24 @@ void TrQuant::rdpcmNxN(TransformUnit &tu, const ComponentID &compID, const QpPar void TrQuant::xTransformSkip(const TransformUnit &tu, const ComponentID &compID, const CPelBuf &resi, TCoeff* psCoeff) { - const SPS &sps = *tu.cs->sps; - const CompArea &rect = tu.blocks[compID]; - const uint32_t width = rect.width; - const uint32_t height = rect.height; - const ChannelType chType = toChannelType(compID); +#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE + const CompArea &rect = tu.blocks[compID]; + const uint32_t width = rect.width; + const uint32_t height = rect.height; + + for (uint32_t y = 0, coefficientIndex = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++, coefficientIndex++) + { + psCoeff[ coefficientIndex ] = TCoeff(resi.at(x, y)); + } + } +#else + const SPS &sps = *tu.cs->sps; + const CompArea &rect = tu.blocks[compID]; + const uint32_t width = rect.width; + const uint32_t height = rect.height; + const ChannelType chType = toChannelType(compID); const int channelBitDepth = sps.getBitDepth(chType); const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType); int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); @@ -1348,6 +1384,7 @@ void TrQuant::xTransformSkip(const TransformUnit &tu, const ComponentID &compID, } } } +#endif } //! \} diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 4451e6203c1493288d6466fe71b7183c7667d987..6589a2e84d87da337c5e307e3da8f1965e432bab 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -49,6 +49,15 @@ #include <cstring> #include <assert.h> #include <cassert> + +#define JVET_P0206_TMVP_flags 1 // JVET-P0206: Signalling TMVP usage (remove pps TMVP idc and constraint when RPR is used) + +#define JVET_P0599_INTRA_SMOOTHING_INTERP_FILT 1 // JVET-P0599: Cleanup of interpolation filtering for intra prediction + +#define JVET_P1026_MTS_SIGNALLING 1 // JVET-P1026: CU level MTS signalling + +#define JVET_P0571_FIX_BS_BDPCM_CHROMA 1 // JVET-P0571: align boundary strength for Chroma BDPCM + #define JVET_P0983_REMOVE_SPS_SBT_MAX_SIZE_FLAG 1 // JVET-P0983/JVET-P0391: Remove sps_sbt_max_size_64_flag #define JVET_P0530_TPM_WEIGHT_ALIGN 1 // JVET-P0530: align chroma weights with luma weights for TPM blending @@ -63,7 +72,7 @@ #define DELTA_QP_FOR_Co -3 #endif -#define JVET_P0298_DISABLE_LEVELMAPPING_IN_BYPASS 1 // JVET-P0298: Disable level mapping in bypass mode +#define JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE 1 // JVET-P1000: Remove Transformshift in TS mode#define JVET_P0298_DISABLE_LEVELMAPPING_IN_BYPASS 1 // JVET-P0298: Disable level mapping in bypass mode #define JVET_P0347_MAX_MTT_DEPTH_CONSTRAINT 1 // JVET-P0347: Max MTT Depth constraint #define JVET_P0325_CHANGE_MERGE_CANDIDATE_ORDER 1 // JVET-P0325: reorder the spatial merge candidates diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 5c1d2f782ba4e3258d0a99e555b064cef1a15d7f..b696d5e253c449036972ae25d6a4f7f9601e7762 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -865,10 +865,15 @@ int TransformUnit::getTbAreaAfterCoefZeroOut(ComponentID compID) const int tbArea = blocks[compID].width * blocks[compID].height; int tbZeroOutWidth = blocks[compID].width; int tbZeroOutHeight = blocks[compID].height; + +#if JVET_P1026_MTS_SIGNALLING + if ( cs->sps->getUseMTS() && cu->sbtInfo != 0 && blocks[compID].width <= 32 && blocks[compID].height <= 32 && !cu->transQuantBypass && compID == COMPONENT_Y ) +#else #if JVET_P0058_CHROMA_TS if ((mtsIdx[compID] > MTS_SKIP || (cs->sps->getUseMTS() && cu->sbtInfo != 0 && blocks[compID].width <= 32 && blocks[compID].height <= 32)) && !cu->transQuantBypass && compID == COMPONENT_Y) #else if ((mtsIdx > MTS_SKIP || (cs->sps->getUseMTS() && cu->sbtInfo != 0 && blocks[compID].width <= 32 && blocks[compID].height <= 32)) && !cu->transQuantBypass && compID == COMPONENT_Y) +#endif #endif { tbZeroOutWidth = (blocks[compID].width == 32) ? 16 : tbZeroOutWidth; diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index dd49aed52a395140fc5d90784cec7693ebfb7cc8..95141fe5bfba2f4255b414d800c0391218af6fe4 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -3765,6 +3765,25 @@ bool CU::bdpcmAllowed( const CodingUnit& cu, const ComponentID compID ) return bdpcmAllowed; } + +#if JVET_P1026_MTS_SIGNALLING +bool CU::isMTSAllowed(const CodingUnit &cu, const ComponentID compID) +{ + SizeType tsMaxSize = 1 << cu.cs->pps->getLog2MaxTransformSkipBlockSize(); + const int maxSize = CU::isIntra( cu ) ? MTS_INTRA_MAX_CU_SIZE : MTS_INTER_MAX_CU_SIZE; + const int cuWidth = cu.blocks[0].lumaSize().width; + const int cuHeight = cu.blocks[0].lumaSize().height; + bool mtsAllowed = cu.chType == CHANNEL_TYPE_LUMA && compID == COMPONENT_Y; + + mtsAllowed &= CU::isIntra( cu ) ? cu.cs->sps->getUseIntraMTS() : cu.cs->sps->getUseInterMTS() && CU::isInter( cu ); + mtsAllowed &= cuWidth <= maxSize && cuHeight <= maxSize; + mtsAllowed &= !cu.ispMode; + mtsAllowed &= !cu.sbtInfo; + mtsAllowed &= !(cu.bdpcmMode && cuWidth <= tsMaxSize && cuHeight <= tsMaxSize); + return mtsAllowed; +} +#endif + // TU tools bool TU::isNonTransformedResidualRotated(const TransformUnit &tu, const ComponentID &compID) @@ -3822,6 +3841,7 @@ bool TU::isTSAllowed(const TransformUnit &tu, const ComponentID compID) return tsAllowed; } +#if !JVET_P1026_MTS_SIGNALLING bool TU::isMTSAllowed(const TransformUnit &tu, const ComponentID compID) { bool mtsAllowed = compID == COMPONENT_Y; @@ -3835,6 +3855,7 @@ bool TU::isMTSAllowed(const TransformUnit &tu, const ComponentID compID) mtsAllowed &= !( tu.cu->bdpcmMode && tu.lwidth() <= transformSkipMaxSize && tu.lheight() <= transformSkipMaxSize); return mtsAllowed; } +#endif int TU::getICTMode( const TransformUnit& tu, int jointCbCr ) { diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index a1c766ff382a35bb7b2a6ef16892523d363d5a5d..497679860c42c1483a8f7307c98f2ac1d29d9d3c 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -90,6 +90,9 @@ namespace CU void setGbiIdx (CodingUnit& cu, uint8_t uh); uint8_t deriveGbiIdx (uint8_t gbiLO, uint8_t gbiL1); bool bdpcmAllowed (const CodingUnit& cu, const ComponentID compID); +#if JVET_P1026_MTS_SIGNALLING + bool isMTSAllowed (const CodingUnit& cu, const ComponentID compID); +#endif bool divideTuInRows ( const CodingUnit &cu ); @@ -199,7 +202,9 @@ namespace TU bool getCbfAtDepth (const TransformUnit &tu, const ComponentID &compID, const unsigned &depth); void setCbfAtDepth ( TransformUnit &tu, const ComponentID &compID, const unsigned &depth, const bool &cbf); bool isTSAllowed (const TransformUnit &tu, const ComponentID compID); +#if !JVET_P1026_MTS_SIGNALLING bool isMTSAllowed (const TransformUnit &tu, const ComponentID compID); +#endif bool hasCrossCompPredInfo (const TransformUnit &tu, const ComponentID &compID); diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index b91ef72f2fada8d561d6b9582587ba08e4048d3c..ae44540199073b88010b619e5d22261730164e0e 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -249,7 +249,8 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St a = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i *) (src0 + x)), _mm_loadl_epi64((const __m128i *) (src1 + x))); #if JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT - sum = _mm_add_epi32(sum, _mm_set1_epi32(2 * offset)); + sum = _mm_add_epi32(sum, _mm_madd_epi16(a, _mm_set1_epi16(1))); + sum = _mm_add_epi32(sum, _mm_set1_epi32(offset)); sum = _mm_sra_epi32(sum, _mm_cvtsi32_si128(shift)); #else sum = _mm_add_epi32(sum, _mm_madd_epi16(a, _mm_set1_epi16(2))); @@ -350,10 +351,45 @@ void calcBIOSums_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* } template< X86_VEXT vext > +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING +void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng) +#else void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, int shiftNum, Pel offset, const ClpRng& clpRng) +#endif { CHECKD((width & 3), "block width error!"); +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING + const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); + +#ifdef USE_AVX2 + __m256i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0, mm_src; +#if !JVET_P0057_BDOF_PROF_HARMONIZATION + __m256i mm_dIoffset = _mm256_set1_epi32(1); +#endif + __m256i mm_offset = _mm256_set1_epi16(offset); + __m256i vibdimin = _mm256_set1_epi16(clpRng.min); + __m256i vibdimax = _mm256_set1_epi16(clpRng.max); + __m256i mm_dimin = _mm256_set1_epi32(-dILimit); + __m256i mm_dimax = _mm256_set1_epi32(dILimit - 1); +#else + __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0; +#if !JVET_P0057_BDOF_PROF_HARMONIZATION + __m128i mm_dIoffset = _mm_set1_epi32(1); +#endif + __m128i mm_offset = _mm_set1_epi16(offset); + __m128i vibdimin = _mm_set1_epi16(clpRng.min); + __m128i vibdimax = _mm_set1_epi16(clpRng.max); + __m128i mm_dimin = _mm_set1_epi32(-dILimit); + __m128i mm_dimax = _mm_set1_epi32(dILimit - 1); +#endif + +#if USE_AVX2 + for (int h = 0; h < height; h += 4) +#else + for (int h = 0; h < height; h += 2) +#endif +#else __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_src; #if !JVET_P0057_BDOF_PROF_HARMONIZATION __m128i mm_dIoffset = _mm_set1_epi32(1); @@ -363,13 +399,8 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, __m128i vibdimax = _mm_set1_epi32(clpRng.max); __m128i vzero = _mm_setzero_si128(); -#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING - const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); - __m128i vdImin = _mm_set1_epi32(-dILimit); - __m128i vdImax = _mm_set1_epi32(dILimit - 1); -#endif - for (int h = 0; h < height; h++) +#endif { const int* vX = dMvX; const int* vY = dMvY; @@ -380,6 +411,100 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, for (int w = 0; w < width; w += 4) { +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING +#if USE_AVX2 + const int *vX0 = vX, *vY0 = vY; + const Pel *gX0 = gX, *gY0 = gY; + + // first two rows + mm_dmvx = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vX0)), _mm_loadu_si128((const __m128i *)(vX0 + dMvStride)), 1); + mm_dmvy = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vY0)), _mm_loadu_si128((const __m128i *)(vY0 + dMvStride)), 1); + mm_gradx = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0))), + _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX0 + gradStride))), 1); + mm_grady = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))), + _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1); + mm_dI0 = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady)); +#if !JVET_P0057_BDOF_PROF_HARMONIZATION + mm_dI0 = _mm256_srai_epi32(_mm256_add_epi32(mm_dI0, mm_dIoffset), 1); +#endif + mm_dI0 = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI0)); + + // next two rows + vX0 += (dMvStride << 1); vY0 += (dMvStride << 1); gX0 += (gradStride << 1); gY0 += (gradStride << 1); + mm_dmvx = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vX0)), _mm_loadu_si128((const __m128i *)(vX0 + dMvStride)), 1); + mm_dmvy = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vY0)), _mm_loadu_si128((const __m128i *)(vY0 + dMvStride)), 1); + mm_gradx = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0))), + _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX0 + gradStride))), 1); + mm_grady = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))), + _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1); + mm_dI = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady)); +#if !JVET_P0057_BDOF_PROF_HARMONIZATION + mm_dI = _mm256_srai_epi32(_mm256_add_epi32(mm_dI, mm_dIoffset), 1); +#endif + mm_dI = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI)); + + // combine four rows + mm_dI = _mm256_packs_epi32(mm_dI0, mm_dI); + const Pel* src0 = src + srcStride; + mm_src = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src), _mm_loadl_epi64((const __m128i *)(src + (srcStride << 1))))), + _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src0), _mm_loadl_epi64((const __m128i *)(src0 + (srcStride << 1)))), + 1 + ); + mm_dI = _mm256_add_epi16(mm_dI, mm_src); + if (!bi) + { + mm_dI = _mm256_srai_epi16(_mm256_add_epi16(mm_dI, mm_offset), shiftNum); + mm_dI = _mm256_min_epi16(vibdimax, _mm256_max_epi16(vibdimin, mm_dI)); + } + + // store final results + __m128i dITmp = _mm256_extractf128_si256(mm_dI, 1); + Pel* dst0 = dst; + _mm_storel_epi64((__m128i *)dst0, _mm256_castsi256_si128(mm_dI)); + dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, dITmp); + dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, _mm_unpackhi_epi64(_mm256_castsi256_si128(mm_dI), _mm256_castsi256_si128(mm_dI))); + dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, _mm_unpackhi_epi64(dITmp, dITmp)); +#else + // first row + mm_dmvx = _mm_loadu_si128((const __m128i *)vX); + mm_dmvy = _mm_loadu_si128((const __m128i *)vY); + mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX)); + mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY)); + mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady)); +#if !JVET_P0057_BDOF_PROF_HARMONIZATION + mm_dI0 = _mm_srai_epi32(_mm_add_epi32(mm_dI0, mm_dIoffset), 1); +#endif + mm_dI0 = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI0)); + + // second row + mm_dmvx = _mm_loadu_si128((const __m128i *)(vX + dMvStride)); + mm_dmvy = _mm_loadu_si128((const __m128i *)(vY + dMvStride)); + mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX + gradStride))); + mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY + gradStride))); + mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady)); +#if !JVET_P0057_BDOF_PROF_HARMONIZATION + mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1); +#endif + mm_dI = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI)); + + // combine both rows + mm_dI = _mm_packs_epi32(mm_dI0, mm_dI); + mm_dI = _mm_add_epi16(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src), _mm_loadl_epi64((const __m128i *)(src + srcStride))), mm_dI); + if (!bi) + { + mm_dI = _mm_srai_epi16(_mm_add_epi16(mm_dI, mm_offset), shiftNum); + mm_dI = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, mm_dI)); + } + + _mm_storel_epi64((__m128i *)dst, mm_dI); + _mm_storel_epi64((__m128i *)(dst + dstStride), _mm_unpackhi_epi64(mm_dI, mm_dI)); +#endif +#else mm_dmvx = _mm_loadu_si128((const __m128i *)vX); mm_dmvy = _mm_loadu_si128((const __m128i *)vY); mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX)); @@ -390,24 +515,43 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, #if !JVET_P0057_BDOF_PROF_HARMONIZATION mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1); #endif -#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING - mm_dI = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI)); -#endif + mm_dI = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(mm_dI, mm_src), mm_offset), shiftNum); mm_dI = _mm_packs_epi32(_mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, mm_dI)), vzero); _mm_storel_epi64((__m128i *)dst, mm_dI); +#endif vX += 4; vY += 4; gX += 4; gY += 4; src += 4; dst += 4; } + +#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING +#if USE_AVX2 + dMvX += (dMvStride << 2); + dMvY += (dMvStride << 2); + gradX += (gradStride << 2); + gradY += (gradStride << 2); + srcPel += (srcStride << 2); + dstPel += (dstStride << 2); +#else + dMvX += (dMvStride << 1); + dMvY += (dMvStride << 1); + gradX += (gradStride << 1); + gradY += (gradStride << 1); + srcPel += (srcStride << 1); + dstPel += (dstStride << 1); +#endif +#else dMvX += dMvStride; dMvY += dMvStride; gradX += gradStride; gradY += gradStride; srcPel += srcStride; dstPel += dstStride; +#endif } } +#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING template< X86_VEXT vext, bool l1PROFEnabled = true> void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int srcStride, int width, int height, const Pel* gradX0, const Pel* gradY0, const Pel* gradX1, const Pel* gradY1, int gradStride, const int* dMvX0, const int* dMvY0, const int* dMvX1, const int* dMvY1, int dMvStride, const int8_t w0, const ClpRng& clpRng) { @@ -525,6 +669,7 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, dst += dstStride; } } +#endif template< X86_VEXT vext > void roundIntVector_SIMD(int* v, int size, unsigned int nShift, const int dmvLimit) @@ -1294,8 +1439,10 @@ void PelBufferOps::_initPelBufOpsX86() #endif profGradFilter = gradFilter_SSE<vext, false>; applyPROF = applyPROF_SSE<vext>; +#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING applyBiPROF[1] = applyBiPROF_SSE<vext>; applyBiPROF[0] = applyBiPROF_SSE<vext, false>; +#endif roundIntVector = roundIntVector_SIMD<vext>; } diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index da84544f19fcab1063cf7cbfd7f184b535ea4d89..10d9e6d589914e3953fa2c6d5b07cbb8f2333bd2 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -1552,7 +1552,10 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx& cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] = false; cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false; - cuCtx.lfnstLastScanPos = false; + cuCtx.lfnstLastScanPos = false; +#if JVET_P1026_MTS_SIGNALLING + cuCtx.violatesMtsCoeffConstraint = false; +#endif ChromaCbfs chromaCbfs; if( cu.ispMode && isLuma( partitioner.chType ) ) @@ -1564,7 +1567,11 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx& { transform_tree( *cu.cs, partitioner, cuCtx ); } + residual_lfnst_mode( cu, cuCtx ); +#if JVET_P1026_MTS_SIGNALLING + mts_idx ( cu, cuCtx ); +#endif } void CABACReader::rqt_root_cbf( CodingUnit& cu ) @@ -2992,7 +2999,11 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx& return; // parse transform skip and explicit rdpcm mode +#if JVET_P1026_MTS_SIGNALLING + ts_flag ( tu, compID ); +#else mts_coding ( tu, compID ); +#endif explicit_rdpcm_mode( tu, compID ); #if JVET_P0058_CHROMA_TS @@ -3045,6 +3056,13 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx& const int lfnstLastScanPosTh = isLuma( compID ) ? LFNST_LAST_SIG_LUMA : LFNST_LAST_SIG_CHROMA; cuCtx.lfnstLastScanPos |= cctx.scanPosLast() >= lfnstLastScanPosTh; } +#if JVET_P1026_MTS_SIGNALLING + if( isLuma(compID) && ( cctx.posX(cctx.scanPosLast()) >= 16 || cctx.posY(cctx.scanPosLast()) >= 16 ) ) + { + cuCtx.violatesMtsCoeffConstraint = true; + } +#endif + // parse subblocks const int stateTransTab = ( tu.cs->slice->getDepQuantEnabledFlag() ? 32040 : 0 ); int state = 0; @@ -3055,10 +3073,15 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx& for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--) { cctx.initSubblock ( subSetId ); + +#if JVET_P1026_MTS_SIGNALLING + if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) +#else #if JVET_P0058_CHROMA_TS if( ( tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[compID].height <= 32 && tu.blocks[compID].width <= 32)) && !tu.cu->transQuantBypass && compID == COMPONENT_Y) #else if( ( tu.mtsIdx > MTS_SKIP || ( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) +#endif #endif { if( ( tu.blocks[ compID ].height == 32 && cctx.cgPosY() >= ( 16 >> cctx.log2CGHeight() ) ) || ( tu.blocks[ compID ].width == 32 && cctx.cgPosX() >= ( 16 >> cctx.log2CGWidth() ) ) ) @@ -3071,6 +3094,74 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx& } +#if JVET_P1026_MTS_SIGNALLING +void CABACReader::ts_flag( TransformUnit& tu, ComponentID compID ) +{ +#if JVET_P0058_CHROMA_TS + int tsFlag = tu.cu->bdpcmMode && isLuma(compID) ? 1 : tu.mtsIdx[compID] == MTS_SKIP ? 1 : 0; + int ctxIdx = isLuma(compID) ? 6 : 11; +#else + int tsFlag = tu.cu->bdpcmMode ? 1 : tu.mtsIdx == MTS_SKIP ? 1 : 0; + int ctxIdx = 6; +#endif + + if( TU::isTSAllowed ( tu, compID ) ) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[compID], compID ); + tsFlag = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) ); + } + +#if JVET_P0058_CHROMA_TS + tu.mtsIdx[compID] = tsFlag ? MTS_SKIP : MTS_DCT2_DCT2; +#else + tu.mtsIdx = tsFlag ? MTS_SKIP : MTS_DCT2_DCT2; +#endif + + DTRACE(g_trace_ctx, D_SYNTAX, "ts_flag() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tsFlag); +} + +void CABACReader::mts_idx( CodingUnit& cu, CUCtx& cuCtx ) +{ + TransformUnit &tu = *cu.firstTU; +#if JVET_P0058_CHROMA_TS + int mtsIdx = tu.mtsIdx[COMPONENT_Y]; // Transform skip flag has already been decoded +#else + int mtsIdx = tu.mtsIdx; // Transform skip flag has already been decoded +#endif + + if( CU::isMTSAllowed( cu, COMPONENT_Y ) && !cuCtx.violatesMtsCoeffConstraint && + cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP && TU::getCbf(tu, COMPONENT_Y) ) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[COMPONENT_Y], COMPONENT_Y ); + int ctxIdx = 0; + int symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) ); + + if( symbol ) + { + ctxIdx = 7; + mtsIdx = MTS_DST7_DST7; // mtsIdx = 2 -- 4 + for( int i = 0; i < 3; i++, ctxIdx++ ) + { + symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) ); + mtsIdx += symbol; + + if( !symbol ) + { + break; + } + } + } + } + +#if JVET_P0058_CHROMA_TS + tu.mtsIdx[COMPONENT_Y] = mtsIdx; +#else + tu.mtsIdx = mtsIdx; +#endif + + DTRACE(g_trace_ctx, D_SYNTAX, "mts_idx() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), mtsIdx); +} +#else void CABACReader::mts_coding( TransformUnit& tu, ComponentID compID ) { const bool tsAllowed = TU::isTSAllowed ( tu, compID ); @@ -3144,7 +3235,8 @@ void CABACReader::mts_coding( TransformUnit& tu, ComponentID compID ) DTRACE(g_trace_ctx, D_SYNTAX, "mts_coding() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tu.mtsIdx); #endif } - +#endif + void CABACReader::isp_mode( CodingUnit& cu ) { if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || !cu.cs->sps->getUseISP() || cu.bdpcmMode || !CU::canUseISP( cu, getFirstComponentOfChannel( cu.chType ) ) ) @@ -3213,12 +3305,21 @@ void CABACReader::residual_lfnst_mode( CodingUnit& cu, CUCtx& cuCtx ) const bool lumaFlag = cu.isSepTree() ? ( isLuma( cu.chType ) ? true : false ) : true; const bool chromaFlag = cu.isSepTree() ? ( isChroma( cu.chType ) ? true : false ) : true; bool nonZeroCoeffNonTsCorner8x8 = ( lumaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] ) || (chromaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] ); +#if JVET_P1026_MTS_SIGNALLING +#if JVET_P0058_CHROMA_TS + const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP; +#else + const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx == MTS_SKIP; +#endif + if( !cuCtx.lfnstLastScanPos || nonZeroCoeffNonTsCorner8x8 || isTrSkip ) +#else #if JVET_P0058_CHROMA_TS const bool isNonDCT2 = (TU::getCbf(*cu.firstTU, ComponentID(COMPONENT_Y)) && cu.firstTU->mtsIdx[COMPONENT_Y] != MTS_DCT2_DCT2); #else const bool isNonDCT2 = (TU::getCbf(*cu.firstTU, ComponentID(COMPONENT_Y)) && cu.firstTU->mtsIdx != MTS_DCT2_DCT2); #endif if( !cuCtx.lfnstLastScanPos || nonZeroCoeffNonTsCorner8x8 || isNonDCT2 ) +#endif { cu.lfnstIdx = 0; return; @@ -3252,10 +3353,14 @@ int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, Co unsigned maxLastPosX = cctx.maxLastPosX(); unsigned maxLastPosY = cctx.maxLastPosY(); +#if JVET_P1026_MTS_SIGNALLING + if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) +#else #if JVET_P0058_CHROMA_TS if( ( tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[compID].width <= 32 && tu.blocks[compID].height <= 32)) && !tu.cu->transQuantBypass && compID == COMPONENT_Y) #else if( ( tu.mtsIdx > MTS_SKIP || ( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) +#endif #endif { maxLastPosX = ( tu.blocks[ compID ].width == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosX; diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index 6425ce8149ae370719618aef9b333f40f5c9b307..7406715f55a77b9fe5c6a65041d8b08f4ea4ace4 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -135,7 +135,12 @@ public: // residual coding (clause 7.3.8.11) void residual_coding ( TransformUnit& tu, ComponentID compID, CUCtx& cuCtx ); +#if JVET_P1026_MTS_SIGNALLING + void ts_flag ( TransformUnit& tu, ComponentID compID ); + void mts_idx ( CodingUnit& cu, CUCtx& cuCtx ); +#else void mts_coding ( TransformUnit& tu, ComponentID compID ); +#endif void residual_lfnst_mode ( CodingUnit& cu, CUCtx& cuCtx ); void isp_mode ( CodingUnit& cu ); void explicit_rdpcm_mode ( TransformUnit& tu, ComponentID compID ); diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index a9c149fc599dd61de9b0dcb4103cb1818cd021cf..322689c4604b43ba6515e58f27b74d0cd6c52d06 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -405,7 +405,9 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana READ_CODE( 2, uiCode, "pps_dep_quant_enabled_idc"); pcPPS->setPPSDepQuantEnabledIdc(uiCode); READ_CODE( 2, uiCode, "pps_ref_pic_list_sps_idc[0]"); pcPPS->setPPSRefPicListSPSIdc0(uiCode); READ_CODE( 2, uiCode, "pps_ref_pic_list_sps_idc[1]"); pcPPS->setPPSRefPicListSPSIdc1(uiCode); +#if !JVET_P0206_TMVP_flags READ_CODE( 2, uiCode, "pps_temporal_mvp_enabled_idc"); pcPPS->setPPSTemporalMVPEnabledIdc(uiCode); +#endif READ_CODE( 2, uiCode, "pps_mvd_l1_zero_idc"); pcPPS->setPPSMvdL1ZeroIdc(uiCode); READ_CODE( 2, uiCode, "pps_collocated_from_l0_idc"); pcPPS->setPPSCollocatedFromL0Idc(uiCode); READ_UVLC( uiCode, "pps_six_minus_max_num_merge_cand_plus1"); pcPPS->setPPSSixMinusMaxNumMergeCandPlus1(uiCode); @@ -417,7 +419,9 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana pcPPS->setPPSDepQuantEnabledIdc(0); pcPPS->setPPSRefPicListSPSIdc0(0); pcPPS->setPPSRefPicListSPSIdc1(0); +#if !JVET_P0206_TMVP_flags pcPPS->setPPSTemporalMVPEnabledIdc(0); +#endif pcPPS->setPPSMvdL1ZeroIdc(0); pcPPS->setPPSCollocatedFromL0Idc(0); pcPPS->setPPSSixMinusMaxNumMergeCandPlus1(0); @@ -2090,13 +2094,20 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para if(!pcSlice->isIntra()) { +#if JVET_P0206_TMVP_flags + if (sps->getSPSTemporalMVPEnabledFlag()) +#else if (sps->getSPSTemporalMVPEnabledFlag() && !pps->getPPSTemporalMVPEnabledIdc()) +#endif { READ_FLAG( uiCode, "slice_temporal_mvp_enabled_flag" ); pcSlice->setEnableTMVPFlag( uiCode == 1 ? true : false ); } else { +#if JVET_P0206_TMVP_flags + pcSlice->setEnableTMVPFlag(false); +#else if (!sps->getSPSTemporalMVPEnabledFlag()) { pcSlice->setEnableTMVPFlag(false); @@ -2105,6 +2116,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para { pcSlice->setEnableTMVPFlag((pps->getPPSTemporalMVPEnabledIdc() - 1) == 1 ? true: false); } +#endif } } diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index 38a373a251eb8b8b70562d761159928a6b993197..edf3050c903c4dc6f19fdc5435e500ec4f0e6b1c 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -1325,7 +1325,10 @@ void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, C cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] = false; cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false; - cuCtx.lfnstLastScanPos = false; + cuCtx.lfnstLastScanPos = false; +#if JVET_P1026_MTS_SIGNALLING + cuCtx.violatesMtsCoeffConstraint = false; +#endif if( cu.ispMode && isLuma( partitioner.chType ) ) { @@ -1338,6 +1341,9 @@ void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, C } residual_lfnst_mode( cu, cuCtx ); +#if JVET_P1026_MTS_SIGNALLING + mts_idx ( cu, cuCtx ); +#endif } void CABACWriter::rqt_root_cbf( const CodingUnit& cu ) @@ -2766,7 +2772,11 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID, return; // code transform skip and explicit rdpcm mode +#if JVET_P1026_MTS_SIGNALLING + ts_flag ( tu, compID ); +#else mts_coding ( tu, compID ); +#endif explicit_rdpcm_mode( tu, compID ); #if JVET_P0058_CHROMA_TS @@ -2832,6 +2842,13 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID, const int lfnstLastScanPosTh = isLuma( compID ) ? LFNST_LAST_SIG_LUMA : LFNST_LAST_SIG_CHROMA; cuCtx->lfnstLastScanPos |= cctx.scanPosLast() >= lfnstLastScanPosTh; } +#if JVET_P1026_MTS_SIGNALLING + if( cuCtx && isLuma(compID) && ( cctx.posX(cctx.scanPosLast()) >= 16 || cctx.posY(cctx.scanPosLast()) >= 16 ) ) + { + cuCtx->violatesMtsCoeffConstraint = true; + } +#endif + // code last coeff position last_sig_coeff( cctx, tu, compID ); @@ -2845,11 +2862,16 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID, for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--) { cctx.initSubblock ( subSetId, sigGroupFlags[subSetId] ); + +#if JVET_P1026_MTS_SIGNALLING + if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) +#else #if JVET_P0058_CHROMA_TS if( ( tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[compID].height <= 32 && tu.blocks[compID].width <= 32)) && !tu.cu->transQuantBypass && compID == COMPONENT_Y) #else if( ( tu.mtsIdx > MTS_SKIP || ( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) -#endif +#endif +#endif { if( ( tu.blocks[ compID ].height == 32 && cctx.cgPosY() >= ( 16 >> cctx.log2CGHeight() ) ) || ( tu.blocks[ compID ].width == 32 && cctx.cgPosX() >= ( 16 >> cctx.log2CGWidth() ) ) ) @@ -2861,6 +2883,59 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID, } } +#if JVET_P1026_MTS_SIGNALLING +void CABACWriter::ts_flag( const TransformUnit& tu, ComponentID compID ) +{ +#if JVET_P0058_CHROMA_TS + int tsFlag = tu.mtsIdx[compID] == MTS_SKIP ? 1 : 0; + int ctxIdx = isLuma(compID) ? 6 : 11; +#else + int tsFlag = tu.mtsIdx == MTS_SKIP ? 1 : 0; + int ctxIdx = 6; +#endif + + if( TU::isTSAllowed ( tu, compID ) ) + { + m_BinEncoder.encodeBin( tsFlag, Ctx::MTSIndex( ctxIdx ) ); + } + DTRACE( g_trace_ctx, D_SYNTAX, "ts_flag() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tsFlag ); +} + +void CABACWriter::mts_idx( const CodingUnit& cu, CUCtx& cuCtx ) +{ + TransformUnit &tu = *cu.firstTU; +#if JVET_P0058_CHROMA_TS + int mtsIdx = tu.mtsIdx[COMPONENT_Y]; +#else + int mtsIdx = tu.mtsIdx; +#endif + + if( CU::isMTSAllowed( cu, COMPONENT_Y ) && !cuCtx.violatesMtsCoeffConstraint && + cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP && TU::getCbf(tu, COMPONENT_Y) ) + { + int symbol = mtsIdx != MTS_DCT2_DCT2 ? 1 : 0; + int ctxIdx = 0; + + m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) ); + + if( symbol ) + { + ctxIdx = 7; + for( int i = 0; i < 3; i++, ctxIdx++ ) + { + symbol = mtsIdx > i + MTS_DST7_DST7 ? 1 : 0; + m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) ); + + if( !symbol ) + { + break; + } + } + } + } + DTRACE( g_trace_ctx, D_SYNTAX, "mts_idx() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), mtsIdx); +} +#else void CABACWriter::mts_coding( const TransformUnit& tu, ComponentID compID ) { const bool tsAllowed = TU::isTSAllowed ( tu, compID ); @@ -2929,6 +3004,7 @@ void CABACWriter::mts_coding( const TransformUnit& tu, ComponentID compID ) DTRACE( g_trace_ctx, D_SYNTAX, "mts_coding() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tu.mtsIdx); #endif } +#endif void CABACWriter::isp_mode( const CodingUnit& cu ) { @@ -2992,12 +3068,22 @@ void CABACWriter::residual_lfnst_mode( const CodingUnit& cu, CUCtx& cuCtx ) const bool lumaFlag = cu.isSepTree() ? ( isLuma( cu.chType ) ? true : false ) : true; const bool chromaFlag = cu.isSepTree() ? ( isChroma( cu.chType ) ? true : false ) : true; bool nonZeroCoeffNonTsCorner8x8 = ( lumaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] ) || (chromaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] ); + +#if JVET_P1026_MTS_SIGNALLING +#if JVET_P0058_CHROMA_TS + const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP; +#else + const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx == MTS_SKIP; +#endif + if( !cuCtx.lfnstLastScanPos || nonZeroCoeffNonTsCorner8x8 || isTrSkip ) +#else #if JVET_P0058_CHROMA_TS const bool isNonDCT2 = (TU::getCbf(*cu.firstTU, ComponentID(COMPONENT_Y)) && cu.firstTU->mtsIdx[COMPONENT_Y] != MTS_DCT2_DCT2); #else const bool isNonDCT2 = (TU::getCbf(*cu.firstTU, ComponentID(COMPONENT_Y)) && cu.firstTU->mtsIdx != MTS_DCT2_DCT2); #endif if( !cuCtx.lfnstLastScanPos || nonZeroCoeffNonTsCorner8x8 || isNonDCT2 ) +#endif { return; } @@ -3039,10 +3125,14 @@ void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit& unsigned maxLastPosX = cctx.maxLastPosX(); unsigned maxLastPosY = cctx.maxLastPosY(); +#if JVET_P1026_MTS_SIGNALLING + if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) +#else #if JVET_P0058_CHROMA_TS if ((tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[compID].width <= 32 && tu.blocks[compID].height <= 32)) && !tu.cu->transQuantBypass && compID == COMPONENT_Y) #else if( ( tu.mtsIdx > MTS_SKIP || ( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) +#endif #endif { maxLastPosX = ( tu.blocks[compID].width == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosX; diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index 72c63a2a66663d55bfdb37750c63dacaff95a747..7ffde4fb19a451bbaca0b2d9507f1b19b1d2a6f9 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -146,7 +146,12 @@ public: // residual coding (clause 7.3.8.11) void residual_coding ( const TransformUnit& tu, ComponentID compID, CUCtx* cuCtx = nullptr ); +#if JVET_P1026_MTS_SIGNALLING + void ts_flag ( const TransformUnit& tu, ComponentID compID ); + void mts_idx ( const CodingUnit& cu, CUCtx& cuCtx ); +#else void mts_coding ( const TransformUnit& tu, ComponentID compID ); +#endif void residual_lfnst_mode ( const CodingUnit& cu, CUCtx& cuCtx ); void isp_mode ( const CodingUnit& cu ); void explicit_rdpcm_mode ( const TransformUnit& tu, ComponentID compID ); diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 364fb348946225003f4ef069dec4917fedf3dec9..db791a5c84d7f6c5a4d522c65014020f7ecafdbc 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -591,7 +591,9 @@ protected: int m_PPSDepQuantEnabledIdc; int m_PPSRefPicListSPSIdc0; int m_PPSRefPicListSPSIdc1; +#if !JVET_P0206_TMVP_flags int m_PPSTemporalMVPEnabledIdc; +#endif int m_PPSMvdL1ZeroIdc; int m_PPSCollocatedFromL0Idc; uint32_t m_PPSSixMinusMaxNumMergeCandPlus1; @@ -1513,8 +1515,10 @@ public: int getPPSRefPicListSPSIdc0 () { return m_PPSRefPicListSPSIdc0; } void setPPSRefPicListSPSIdc1 ( int u ) { m_PPSRefPicListSPSIdc1 = u; } int getPPSRefPicListSPSIdc1 () { return m_PPSRefPicListSPSIdc1; } +#if !JVET_P0206_TMVP_flags void setPPSTemporalMVPEnabledIdc ( int u ) { m_PPSTemporalMVPEnabledIdc = u; } int getPPSTemporalMVPEnabledIdc () { return m_PPSTemporalMVPEnabledIdc; } +#endif void setPPSMvdL1ZeroIdc ( int u ) { m_PPSMvdL1ZeroIdc = u; } int getPPSMvdL1ZeroIdc () { return m_PPSMvdL1ZeroIdc; } void setPPSCollocatedFromL0Idc ( int u ) { m_PPSCollocatedFromL0Idc = u; } diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp index c3b588dbee0506d52da2fcbf9f7277778ab19f12..eb349662a354055268937183908290d59b040c32 100644 --- a/source/Lib/EncoderLib/EncGOP.cpp +++ b/source/Lib/EncoderLib/EncGOP.cpp @@ -2319,7 +2319,9 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, if (m_pcEncLib->getTMVPModeId() == 2) { +#if !JVET_P0206_TMVP_flags assert (m_pcEncLib->getPPSTemporalMVPEnabledIdc() == 0); +#endif if (iGOPid == 0) // first picture in SOP (i.e. forward B) { pcSlice->setEnableTMVPFlag(0); @@ -2330,7 +2332,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcSlice->setEnableTMVPFlag(1); } } +#if JVET_P0206_TMVP_flags + else if (m_pcEncLib->getTMVPModeId() == 1) +#else else if (m_pcEncLib->getTMVPModeId() == 1 && m_pcEncLib->getPPSTemporalMVPEnabledIdc() != 1) +#endif { pcSlice->setEnableTMVPFlag(1); } @@ -3419,7 +3425,7 @@ static inline double calcWeightedSquaredError(const CPelBuf& org, const C uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift #if ENABLE_QPA - , const uint32_t chromaShift /*= 0*/ + , const uint32_t chromaShiftHor /*= 0*/, const uint32_t chromaShiftVer /*= 0*/ #endif ) { @@ -3439,7 +3445,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t W = pic0.width; // image width const uint32_t H = pic0.height; // image height const double R = double(W * H) / (1920.0 * 1080.0); - const uint32_t B = Clip3<uint32_t>(0, 128 >> chromaShift, 4 * uint32_t(16.0 * sqrt(R) + 0.5)); // WPSNR block size in integer multiple of 4 (for SIMD, = 64 at full-HD) + const uint32_t B = Clip3<uint32_t>(0, 128 >> chromaShiftVer, 4 * uint32_t(16.0 * sqrt(R) + 0.5)); // WPSNR block size in integer multiple of 4 (for SIMD, = 64 at full-HD) uint32_t x, y; @@ -3474,7 +3480,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, } // integer weighted distortion - sumAct = 16.0 * sqrt ((3840.0 * 2160.0) / double((W << chromaShift) * (H << chromaShift))) * double(1 << BD); + sumAct = 16.0 * sqrt ((3840.0 * 2160.0) / double((W << chromaShiftHor) * (H << chromaShiftVer))) * double(1 << BD); return (wmse <= 0.0) ? 0 : uint64_t(wmse * pow(sumAct, BETA) + 0.5); } @@ -3725,7 +3731,7 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni const CPelBuf orgPB(o.bufAt(0, 0), o.stride, width, height); const uint32_t bitDepth = sps.getBitDepth(toChannelType(compID)); #if ENABLE_QPA - const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX(compID, format)); + const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX(compID, format), ::getComponentScaleY(compID, format)); #else const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, 0); #endif @@ -3748,7 +3754,7 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni const CPelBuf& upscaledOrg = sps.getUseReshaper() ? pcPic->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).get( compID ) : pcPic->M_BUFS( 0, PIC_ORIGINAL_INPUT ).get( compID ); #if ENABLE_QPA - const uint64_t upscaledSSD = xFindDistortionPlane( upscaledRec.get( compID ), upscaledOrg, useWPSNR ? bitDepth : 0, ::getComponentScaleX( compID, format ) ); + const uint64_t upscaledSSD = xFindDistortionPlane( upscaledRec.get( compID ), upscaledOrg, useWPSNR ? bitDepth : 0, ::getComponentScaleX( compID, format ), ::getComponentScaleY( compID, format ) ); #else const uint64_t scaledSSD = xFindDistortionPlane( upscaledRec.get( compID ), upscaledOrg, 0 ); #endif @@ -4158,7 +4164,7 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* { CHECK(!(conversion == IPCOLOURSPACE_UNCHANGED), "Unspecified error"); #if ENABLE_QPA - uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), useWPSNR ? bitDepth : 0, ::getComponentScaleX(ch, format) ); + uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), useWPSNR ? bitDepth : 0, ::getComponentScaleX(ch, format), ::getComponentScaleY(ch, format) ); #else uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), 0 ); #endif diff --git a/source/Lib/EncoderLib/EncGOP.h b/source/Lib/EncoderLib/EncGOP.h index e9667c5d0f0afacbd2dfcd95d8f3aa60a6d2610e..1db3355bf687c1630876348228d6beda5b7bec64 100644 --- a/source/Lib/EncoderLib/EncGOP.h +++ b/source/Lib/EncoderLib/EncGOP.h @@ -287,7 +287,7 @@ protected: uint64_t xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift #if ENABLE_QPA - , const uint32_t chromaShift = 0 + , const uint32_t chromaShiftHor = 0, const uint32_t chromaShiftVer = 0 #endif ); #if WCG_WPSNR diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index c195a983db58cd5103eafb08aecad9034b33e26f..9ee126d417ba8954d4d0cde259506f3f985e0856 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -1168,7 +1168,9 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) pps.setPPSDepQuantEnabledIdc(getPPSDepQuantEnabledIdc()); pps.setPPSRefPicListSPSIdc0(getPPSRefPicListSPSIdc0()); pps.setPPSRefPicListSPSIdc1(getPPSRefPicListSPSIdc1()); +#if !JVET_P0206_TMVP_flags pps.setPPSTemporalMVPEnabledIdc(getPPSTemporalMVPEnabledIdc()); +#endif pps.setPPSMvdL1ZeroIdc(getPPSMvdL1ZeroIdc()); pps.setPPSCollocatedFromL0Idc(getPPSCollocatedFromL0Idc()); pps.setPPSSixMinusMaxNumMergeCandPlus1(getPPSSixMinusMaxNumMergeCandPlus1()); diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index a3f34e285a551a3d088addf5b9455007c7a22665..9c47b307ecae5bb40652896fa017a9166aebd045 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -6583,7 +6583,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par #else const bool tsAllowed = TU::isTSAllowed ( tu, compID ); #endif +#if JVET_P1026_MTS_SIGNALLING + const bool mtsAllowed = CU::isMTSAllowed( *tu.cu, compID ); +#else const bool mtsAllowed = TU::isMTSAllowed( tu, compID ); +#endif + uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests std::vector<TrMode> trModes; trModes.push_back( TrMode( 0, true ) ); //DCT2 diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 062ecbafcec255080b22a7d993d7e23938aee605..d2abffdab0c46afb2d6792fb32d7aca9f808f9d4 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -362,10 +362,9 @@ bool IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, m_ispCandListHor.clear(); m_ispCandListVer.clear(); m_regIntraRDListWithCosts.clear(); - m_ispTestedModes.clear(); - //save the number of subpartitions - m_ispTestedModes.numTotalParts[0] = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT)); - m_ispTestedModes.numTotalParts[1] = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT)); + int numTotalPartsHor = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT)); + int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT)); + m_ispTestedModes.init(numTotalPartsHor, numTotalPartsVer); } #if ADAPTIVE_COLOR_TRANSFORM @@ -2328,6 +2327,15 @@ void IntraSearch::xEncCoeffQT( CodingStructure &cs, Partitioner &partitioner, co } if( TU::getCbf( currTU, compID ) ) { +#if JVET_P1026_MTS_SIGNALLING + if( isLuma(compID) ) + { + CUCtx cuCtx; + m_CABACEstimator->residual_coding( currTU, compID, &cuCtx ); + m_CABACEstimator->mts_idx( *currTU.cu, cuCtx ); + } + else +#endif m_CABACEstimator->residual_coding( currTU, compID ); } } @@ -3103,7 +3111,11 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par tu.depth = currDepth; const bool tsAllowed = TU::isTSAllowed( tu, COMPONENT_Y ); +#if JVET_P1026_MTS_SIGNALLING + const bool mtsAllowed = CU::isMTSAllowed( cu, COMPONENT_Y ); +#else const bool mtsAllowed = TU::isMTSAllowed( tu, COMPONENT_Y ); +#endif std::vector<TrMode> trModes; if( sps.getUseLFNST() ) @@ -4874,15 +4886,18 @@ void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode, static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>* rdModeLists[2] = { &m_ispCandListHor, &m_ispCandListVer }; ISPType nextISPcandSplitType; - if (!m_ispTestedModes.stopTestingHorSplit && !m_ispTestedModes.stopTestingVerSplit) + auto& ispTestedModes = m_ispTestedModes; + const bool horSplitIsTerminated = ispTestedModes.splitIsFinished[HOR_INTRA_SUBPARTITIONS - 1]; + const bool verSplitIsTerminated = ispTestedModes.splitIsFinished[VER_INTRA_SUBPARTITIONS - 1]; + if (!horSplitIsTerminated && !verSplitIsTerminated) { nextISPcandSplitType = !lastMode ? HOR_INTRA_SUBPARTITIONS : lastMode->ispMod == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS; } - else if (!m_ispTestedModes.stopTestingHorSplit && m_ispTestedModes.stopTestingVerSplit) + else if (!horSplitIsTerminated && verSplitIsTerminated) { nextISPcandSplitType = HOR_INTRA_SUBPARTITIONS; } - else if (m_ispTestedModes.stopTestingHorSplit && !m_ispTestedModes.stopTestingVerSplit) + else if (horSplitIsTerminated && !verSplitIsTerminated) { nextISPcandSplitType = VER_INTRA_SUBPARTITIONS; } @@ -4891,70 +4906,70 @@ void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode, return; // no more modes will be tested } - int maxNumSubPartitions = m_ispTestedModes.numTotalParts[nextISPcandSplitType - 1]; + int maxNumSubPartitions = ispTestedModes.numTotalParts[nextISPcandSplitType - 1]; - if (m_ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2) + if (ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2) { // Split stop criteria after checking the performance of previously tested intra modes const int thresholdSplit1 = maxNumSubPartitions; + bool stopThisSplit = false; - int mode1 = m_ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 0); + int mode1 = ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 0); mode1 = mode1 == DC_IDX ? -1 : mode1; - int numSubPartsBestMode1 = mode1 != -1 ? m_ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode1) : -1; - int mode2 = m_ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 1); + int numSubPartsBestMode1 = mode1 != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode1) : -1; + int mode2 = ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 1); mode2 = mode2 == DC_IDX ? -1 : mode2; - int numSubPartsBestMode2 = mode2 != -1 ? m_ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode2) : -1; + int numSubPartsBestMode2 = mode2 != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode2) : -1; // 1) The 2 most promising modes do not reach a certain number of sub-partitions if (numSubPartsBestMode1 != -1 && numSubPartsBestMode2 != -1) { if (numSubPartsBestMode1 < thresholdSplit1 && numSubPartsBestMode2 < thresholdSplit1) { - m_ispTestedModes.stopTestingVerSplit = nextISPcandSplitType == VER_INTRA_SUBPARTITIONS ? true : m_ispTestedModes.stopTestingVerSplit; - m_ispTestedModes.stopTestingHorSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? true : m_ispTestedModes.stopTestingHorSplit; - return; + stopThisSplit = true; } } - // 2) One split is better than the other after PLANAR and one angle have been tested - ISPType otherSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS; - int numSubPartsBestAngleOtherSplit = mode2 != -1 ? m_ispTestedModes.getNumCompletedSubParts(otherSplit, mode2) : -1; - bool stopThisSplit = false; - if (numSubPartsBestAngleOtherSplit != -1 && numSubPartsBestMode2 != -1) + if (!stopThisSplit) { - if (numSubPartsBestAngleOtherSplit > numSubPartsBestMode2) + // 2) One split type may be discarded by comparing the number of sub-partitions of the best angle modes of both splits + ISPType otherSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS; + int numSubPartsBestMode2OtherSplit = mode2 != -1 ? ispTestedModes.getNumCompletedSubParts(otherSplit, mode2) : -1; + if (numSubPartsBestMode2OtherSplit != -1 && numSubPartsBestMode2 != -1) { - stopThisSplit = true; - } - else if (numSubPartsBestAngleOtherSplit == numSubPartsBestMode2 && numSubPartsBestAngleOtherSplit == maxNumSubPartitions) - { - double rdCostBestAngleThisSplit = m_ispTestedModes.getRDCost(nextISPcandSplitType, mode2, maxNumSubPartitions); - double rdCostBestAngleOtherSplit = m_ispTestedModes.getRDCost(otherSplit, mode2, maxNumSubPartitions); - - if (rdCostBestAngleThisSplit == MAX_DOUBLE || rdCostBestAngleOtherSplit < rdCostBestAngleThisSplit * 1.3) + if (numSubPartsBestMode2OtherSplit > numSubPartsBestMode2) { stopThisSplit = true; } + else if (numSubPartsBestMode2OtherSplit == numSubPartsBestMode2 && numSubPartsBestMode2OtherSplit == maxNumSubPartitions) + { + double rdCostBestMode2ThisSplit = ispTestedModes.getRDCost(nextISPcandSplitType, mode2); + double rdCostBestMode2OtherSplit = ispTestedModes.getRDCost(otherSplit, mode2); + double threshold = 1.3; + if (rdCostBestMode2ThisSplit == MAX_DOUBLE || rdCostBestMode2OtherSplit < rdCostBestMode2ThisSplit * threshold) + { + stopThisSplit = true; + } + } } } if (stopThisSplit) { - m_ispTestedModes.stopTestingVerSplit = nextISPcandSplitType == VER_INTRA_SUBPARTITIONS ? true : m_ispTestedModes.stopTestingVerSplit; - m_ispTestedModes.stopTestingHorSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? true : m_ispTestedModes.stopTestingHorSplit; + ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true; return; } } // Now a new mode is retrieved from the list and it has to be decided whether it should be tested or not - if (m_ispTestedModes.candIndexInList[nextISPcandSplitType - 1] < rdModeLists[nextISPcandSplitType - 1]->size()) + if (ispTestedModes.candIndexInList[nextISPcandSplitType - 1] < rdModeLists[nextISPcandSplitType - 1]->size()) { - ModeInfo candidate = rdModeLists[nextISPcandSplitType - 1]->at(m_ispTestedModes.candIndexInList[nextISPcandSplitType - 1]); - m_ispTestedModes.candIndexInList[nextISPcandSplitType - 1]++; + ModeInfo candidate = rdModeLists[nextISPcandSplitType - 1]->at(ispTestedModes.candIndexInList[nextISPcandSplitType - 1]); + ispTestedModes.candIndexInList[nextISPcandSplitType - 1]++; // extra modes are only tested if ISP has won so far - if (m_ispTestedModes.candIndexInList[nextISPcandSplitType - 1] > m_ispTestedModes.numOrigModesToTest) + if (ispTestedModes.candIndexInList[nextISPcandSplitType - 1] > ispTestedModes.numOrigModesToTest) { - if (m_ispTestedModes.bestSplitSoFar != candidate.ispMod || m_ispTestedModes.bestModeSoFar == PLANAR_IDX) + if (ispTestedModes.bestSplitSoFar != candidate.ispMod || ispTestedModes.bestModeSoFar == PLANAR_IDX) { return; } @@ -4963,7 +4978,7 @@ void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode, bool testCandidate = true; // we look for a reference mode that has already been tested within the window and decide to test the new one according to the reference mode costs - if (candidate.modeId >= DC_IDX && maxNumSubPartitions > 2 && m_ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2) + if (candidate.modeId >= DC_IDX && maxNumSubPartitions > 2 && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2) { const int angWindowSize = 5; int numSubPartsLeftMode, numSubPartsRightMode, numSubPartsRefMode, leftIntraMode = -1, rightIntraMode = -1; @@ -4973,8 +4988,8 @@ void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode, xFindAlreadyTestedNearbyIntraModes((int)candidate.modeId, &leftIntraMode, &rightIntraMode, (ISPType)candidate.ispMod, windowSize); - numSubPartsLeftMode = leftIntraMode != -1 ? m_ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, leftIntraMode) : -1; - numSubPartsRightMode = rightIntraMode != -1 ? m_ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, rightIntraMode) : -1; + numSubPartsLeftMode = leftIntraMode != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, leftIntraMode) : -1; + numSubPartsRightMode = rightIntraMode != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, rightIntraMode) : -1; numSubPartsRefMode = std::max(numSubPartsLeftMode, numSubPartsRightMode); @@ -5023,8 +5038,10 @@ void IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost) double thSkipISP = 1.4; if (bestNonISPCost > bestCostSoFar * thSkipISP) { - m_ispTestedModes.stopTestingHorSplit = true; - m_ispTestedModes.stopTestingVerSplit = true; + for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++) + { + m_ispTestedModes.splitIsFinished[splitIdx] = true; + } return; } } diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h index 544db3f1d9bcc3615373693cc91f414501317edc..12517db466cd7daec959b16925fbca13556db3d0 100644 --- a/source/Lib/EncoderLib/IntraSearch.h +++ b/source/Lib/EncoderLib/IntraSearch.h @@ -233,8 +233,7 @@ private: double bestCost[2]; int numTestedModes[2]; int candIndexInList[2]; - bool stopTestingHorSplit; - bool stopTestingVerSplit; + bool splitIsFinished[2]; int numOrigModesToTest; // set a tested mode results @@ -267,13 +266,11 @@ private: return modeHasBeenTested[iModeIdx][st] ? intraMode[iModeIdx][st].numCompSubParts : -1; } - double getRDCost(ISPType splitType, int iModeIdx, int maxNumSubParts) + double getRDCost(ISPType splitType, int iModeIdx) { const unsigned st = splitType - 1; CHECKD(st > 1, "The split type is invalid!"); - return modeHasBeenTested[iModeIdx][st] && intraMode[iModeIdx][st].numCompSubParts == maxNumSubParts - ? intraMode[iModeIdx][st].rdCost - : -1; + return modeHasBeenTested[iModeIdx][st] ? intraMode[iModeIdx][st].rdCost : MAX_DOUBLE; } // get a tested intra mode index @@ -287,16 +284,16 @@ private: // set everything to default values void clear() { - numTestedModes[0] = numTestedModes[1] = 0; - candIndexInList[0] = candIndexInList[1] = 0; - stopTestingHorSplit = false; - stopTestingVerSplit = false; - testedModes[0].clear(); - testedModes[1].clear(); - bestCost[0] = MAX_DOUBLE; - bestCost[1] = MAX_DOUBLE; - bestMode[0] = -1; - bestMode[1] = -1; + for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++) + { + numTestedModes [splitIdx] = 0; + candIndexInList[splitIdx] = 0; + numTotalParts [splitIdx] = 0; + splitIsFinished[splitIdx] = false; + testedModes [splitIdx].clear(); + bestCost [splitIdx] = MAX_DOUBLE; + bestMode [splitIdx] = -1; + } bestModeSoFar = -1; bestSplitSoFar = NOT_INTRA_SUBPARTITIONS; numOrigModesToTest = -1; @@ -307,6 +304,15 @@ private: intraMode[idx][0].clear(); intraMode[idx][1].clear(); } + void init(const int numTotalPartsHor, const int numTotalPartsVer) + { + clear(); + const int horSplit = HOR_INTRA_SUBPARTITIONS - 1, verSplit = VER_INTRA_SUBPARTITIONS - 1; + numTotalParts [horSplit] = numTotalPartsHor; + numTotalParts [verSplit] = numTotalPartsVer; + splitIsFinished[horSplit] = (numTotalParts[horSplit] == 0); + splitIsFinished[verSplit] = (numTotalParts[verSplit] == 0); + } }; static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_ispCandListHor, m_ispCandListVer; diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index adb300ef0742c29b37e2add19ed6d7ca56d1193f..39b5b733dcd4f656db2b2cc696ba959caf47cc51 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -236,7 +236,9 @@ void HLSWriter::codePPS( const PPS* pcPPS, const SPS* pcSPS ) WRITE_CODE( pcPPS->getPPSDepQuantEnabledIdc(), 2, "pps_dep_quant_enabled_idc"); WRITE_CODE( pcPPS->getPPSRefPicListSPSIdc0(), 2, "pps_ref_pic_list_sps_idc[0]"); WRITE_CODE( pcPPS->getPPSRefPicListSPSIdc1(), 2, "pps_ref_pic_list_sps_idc[1]"); +#if !JVET_P0206_TMVP_flags WRITE_CODE( pcPPS->getPPSTemporalMVPEnabledIdc(), 2, "pps_temporal_mvp_enabled_idc"); +#endif WRITE_CODE( pcPPS->getPPSMvdL1ZeroIdc(), 2, "pps_mvd_l1_zero_idc"); WRITE_CODE( pcPPS->getPPSCollocatedFromL0Idc(), 2, "pps_collocated_from_l0_idc"); WRITE_UVLC( pcPPS->getPPSSixMinusMaxNumMergeCandPlus1(), "pps_six_minus_max_num_merge_cand_plus1"); @@ -1324,7 +1326,11 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) if(!pcSlice->isIntra()) { +#if JVET_P0206_TMVP_flags + if( pcSlice->getSPS()->getSPSTemporalMVPEnabledFlag()) +#else if( pcSlice->getSPS()->getSPSTemporalMVPEnabledFlag() && !pcSlice->getPPS()->getPPSTemporalMVPEnabledIdc() ) +#endif { WRITE_FLAG( pcSlice->getEnableTMVPFlag() ? 1 : 0, "slice_temporal_mvp_enabled_flag" ); }