From 831167838f60dc77dd786395474448183fa1bbe3 Mon Sep 17 00:00:00 2001 From: Adam Wieckowski <adam.wieckowski@hhi.fraunhofer.de> Date: Thu, 21 Mar 2019 16:45:20 +0100 Subject: [PATCH] work in progress on ENABLE_SPLIT_PARALLELISM --- source/Lib/CommonLib/CodingStructure.cpp | 8 ++- source/Lib/CommonLib/InterPrediction.h | 3 + source/Lib/CommonLib/TypeDef.h | 2 +- source/Lib/DecoderLib/DecCu.h | 3 + source/Lib/EncoderLib/EncCu.cpp | 82 ++++++------------------ source/Lib/EncoderLib/EncCu.h | 11 +--- source/Lib/EncoderLib/EncModeCtrl.cpp | 10 ++- source/Lib/EncoderLib/InterSearch.cpp | 2 + 8 files changed, 44 insertions(+), 77 deletions(-) diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp index 8778f3567..e79e3c8ec 100644 --- a/source/Lib/CommonLib/CodingStructure.cpp +++ b/source/Lib/CommonLib/CodingStructure.cpp @@ -867,9 +867,9 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C if( cpyReco ) picture->getRecoBuf( clippedArea ).copyFrom( subRecoBuf ); #if JVET_M0483_IBC - if (!subStruct.m_isTuEnc && ((!slice->isIntra() || slice->getSPS()->getIBCFlag()) && subStruct.chType != CHANNEL_TYPE_CHROMA)) + if (!subStruct.m_isTuEnc && ((!slice->isIntra() || slice->getSPS()->getIBCFlag()) && chType != CHANNEL_TYPE_CHROMA)) #else - if (!subStruct.m_isTuEnc && (!slice->isIntra() && subStruct.chType != CHANNEL_TYPE_CHROMA)) + if (!subStruct.m_isTuEnc && (!slice->isIntra() && chType != CHANNEL_TYPE_CHROMA)) #endif { // copy motion buffer @@ -1149,6 +1149,10 @@ void CodingStructure::initStructData( const int &QP, const bool &_isLosses, cons { getMotionBuf() .memset( 0 ); } + if( parent ) + { + motionLut = parent->motionLut; + } fracBits = 0; dist = 0; diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index e329cc291..824351800 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -214,6 +214,9 @@ public: #endif #if JVET_M0170_MRG_SHARELIST void setShareState(int shareStateIn) {m_shareState = shareStateIn;} +#if ENABLE_SPLIT_PARALLELISM + int getShareState() const { return m_shareState; } +#endif #endif }; diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 04de41c87..b09ad5b63 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -221,7 +221,7 @@ typedef std::pair<int, int> TrCost; #endif #ifndef ENABLE_SPLIT_PARALLELISM -#define ENABLE_SPLIT_PARALLELISM 0 +#define ENABLE_SPLIT_PARALLELISM 1 #endif #if ENABLE_SPLIT_PARALLELISM #define PARL_SPLIT_MAX_NUM_JOBS 6 // number of parallel jobs that can be defined and need memory allocated diff --git a/source/Lib/DecoderLib/DecCu.h b/source/Lib/DecoderLib/DecCu.h index 40114ce68..8990dfec0 100644 --- a/source/Lib/DecoderLib/DecCu.h +++ b/source/Lib/DecoderLib/DecCu.h @@ -79,6 +79,9 @@ public: #if JVET_M0170_MRG_SHARELIST void setShareStateDec (int shareStateDecIn) { m_shareStateDec = shareStateDecIn; } +#if ENABLE_SPLIT_PARALLELISM + int getShareStateDec () const { return m_shareStateDec; } +#endif #endif /// reconstruct Ctu information protected: diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 39c6a5e43..b9ba49d91 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -118,9 +118,6 @@ void EncCu::create( EncCfg* encCfg ) } } - // WIA: only the weight==height case is relevant without QTBT - m_pImvTempCS = nullptr; - m_cuChromaQpOffsetIdxPlus1 = 0; unsigned maxDepth = numWidths + numHeights; @@ -199,22 +196,6 @@ void EncCu::destroy() delete m_modeCtrl; m_modeCtrl = nullptr; - // WIA: only the weight==height case is relevant without QTBT - if( m_pImvTempCS ) - { - for( unsigned w = 0; w < numWidths; w++ ) - { - if( m_pImvTempCS[w] ) - { - m_pImvTempCS[w]->destroy(); - delete[] m_pImvTempCS[w]; - } - } - - delete[] m_pImvTempCS; - m_pImvTempCS = nullptr; - } - for (unsigned ui = 0; ui < MMVD_MRG_MAX_RD_BUF_NUM; ui++) { m_acMergeBuffer[ui].destroy(); @@ -281,9 +262,6 @@ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) ) #if JVET_M0102_INTRA_SUBPARTITIONS m_pcIntraSearch->setModeCtrl( m_modeCtrl ); #endif - ::memset(m_subMergeBlkSize, 0, sizeof(m_subMergeBlkSize)); - ::memset(m_subMergeBlkNum, 0, sizeof(m_subMergeBlkNum)); - m_prevPOC = MAX_UINT; #if JVET_M0255_FRACMMVD_SWITCH if ( ( m_pcEncCfg->getIBCHashSearch() && m_pcEncCfg->getIBCMode() ) || m_pcEncCfg->getAllowDisFracMMVD() ) @@ -653,14 +631,6 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() ); const UnitArea currCsArea = clipArea( CS::getArea( *bestCS, bestCS->area, partitioner.chType ), *tempCS->picture ); -#if JVET_M0483_IBC - if (m_pImvTempCS && (!slice.isIntra() || slice.getSPS()->getIBCFlag())) -#else - if( m_pImvTempCS && !slice.isIntra() ) -#endif - { - tempCS->initSubStructure( *m_pImvTempCS[wIdx], partitioner.chType, partitioner.currArea(), false ); - } tempCS->chType = partitioner.chType; bestCS->chType = partitioner.chType; @@ -680,6 +650,7 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par #endif slsSbt->resetSaveloadSbt( maxSLSize ); #if ENABLE_SPLIT_PARALLELISM + CHECK( tempCS->picture->scheduler.getSplitJobId() != 0, "The SBT search reset need to happen in sequential region." ); if (m_pcEncCfg->getNumSplitThreads() > 1) { for (int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++) @@ -1163,8 +1134,13 @@ void EncCu::copyState( EncCu* other, Partitioner& partitioner, const UnitArea& c CodingStructure* dst = m_pBestCS[wIdx][hIdx]; const CodingStructure* src = other->m_pBestCS[wIdx][hIdx]; bool keepResi = KEEP_PRED_AND_RESI_SIGNALS; +#if JVET_M0427_INLOOP_RESHAPER + bool keepPred = true; +#else + bool keepPred = KEEP_PRED_AND_RESI_SIGNALS; +#endif - dst->useSubStructure( *src, partitioner.chType, currArea, KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, keepResi ); + dst->useSubStructure( *src, partitioner.chType, currArea, keepPred, true, keepResi, keepResi ); dst->cost = src->cost; dst->dist = src->dist; @@ -1189,7 +1165,16 @@ void EncCu::copyState( EncCu* other, Partitioner& partitioner, const UnitArea& c encReshapeThis->copyState( *encReshapeOther ); } #endif +#if JVET_M0170_MRG_SHARELIST + m_shareState = other->m_shareState; + m_shareBndPosX = other->m_shareBndPosX; + m_shareBndPosY = other->m_shareBndPosY; + m_shareBndSizeW = other->m_shareBndSizeW; + m_shareBndSizeH = other->m_shareBndSizeH; + setShareStateDec( other->getShareStateDec() ); + m_pcInterSearch->setShareState( other->m_pcInterSearch->getShareState() ); +#endif m_CABACEstimator->getCtx() = other->m_CABACEstimator->getCtx(); } #endif @@ -1993,12 +1978,10 @@ void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&b #if JVET_M0464_UNI_MTS xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0 - , m_pImvTempCS ? m_pImvTempCS[wIdx] : NULL , 0 , &equGBiCost #else xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0 - , m_pImvTempCS ? m_pImvTempCS[wIdx] : NULL , 1 , 0 , &equGBiCost @@ -2701,10 +2684,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& } #if JVET_M0464_UNI_MTS - xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL ); + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL ); #else xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass - , NULL , 1 , uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL); #endif @@ -3063,9 +3045,9 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru } #if JVET_M0464_UNI_MTS - xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, NULL, ( noResidualPass == 0 ? &trianglecandHasNoResidual[mergeCand] : NULL ) ); + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, ( noResidualPass == 0 ? &trianglecandHasNoResidual[mergeCand] : NULL ) ); #else - xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, NULL, true, ( (noResidualPass == 0 ) ? &trianglecandHasNoResidual[mergeCand] : NULL ) ); + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, true, ( (noResidualPass == 0 ) ? &trianglecandHasNoResidual[mergeCand] : NULL ) ); #endif if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip) @@ -3345,9 +3327,9 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct } #if JVET_M0464_UNI_MTS - xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, ( uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL ) ); + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, ( uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL ) ); #else - xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, true, ((uiNoResidualPass == 0) ? &candHasNoResidual[uiMergeCand] : NULL) ); + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, true, ((uiNoResidualPass == 0) ? &candHasNoResidual[uiMergeCand] : NULL) ); #endif if ( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip ) @@ -4067,12 +4049,10 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC #if JVET_M0464_UNI_MTS xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0 - , m_pImvTempCS ? m_pImvTempCS[wIdx] : NULL , 0 , &equGBiCost #else xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0 - , m_pImvTempCS ? m_pImvTempCS[wIdx] : NULL , 1 , 0 , &equGBiCost @@ -4341,12 +4321,10 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be #if JVET_M0464_UNI_MTS xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0 - , NULL , 0 , &equGBiCost #else xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0 - , NULL , true , 0 , &equGBiCost @@ -4607,7 +4585,6 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS , Partitioner &partitioner , const EncTestMode& encTestMode , int residualPass - , CodingStructure* imvCS , bool* bestHasNonResi , double* equGBiCost #else @@ -4823,14 +4800,6 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be #if !JVET_M0464_UNI_MTS double emtFirstPassCost = tempCS->cost; #endif - if( imvCS && (tempCS->cost < imvCS->cost) ) - { - if( imvCS->cost != MAX_DOUBLE ) - { - imvCS->initStructData( encTestMode.qp, encTestMode.lossless ); - } - imvCS->copyStructure( *tempCS, partitioner.chType ); - } if( NULL != bestHasNonResi && (bestCostInternal > tempCS->cost) ) { bestCostInternal = tempCS->cost; @@ -5035,15 +5004,6 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be xCheckDQP( *tempCS, partitioner ); - if( imvCS && ( tempCS->cost < imvCS->cost ) ) - { - if( imvCS->cost != MAX_DOUBLE ) - { - imvCS->initStructData( encTestMode.qp, encTestMode.lossless ); - } - imvCS->copyStructure( *tempCS, partitioner.chType ); - } - if( NULL != bestHasNonResi && ( bestCostInternal > tempCS->cost ) ) { bestCostInternal = tempCS->cost; diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index 9a8259056..a1938e7b4 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -125,7 +125,6 @@ private: CABACWriter* m_CABACEstimator; RateCtrl* m_pcRateCtrl; IbcHashMap m_ibcHashMap; - CodingStructure **m_pImvTempCS; EncModeCtrl *m_modeCtrl; #if JVET_M0170_MRG_SHARELIST int m_shareState; @@ -140,9 +139,7 @@ private: PelStorage m_acTriangleWeightedBuffer[TRIANGLE_MAX_NUM_CANDS]; // to store weighted prediction pixles double m_mergeBestSATDCost; MotionInfo m_SubPuMiBuf [( MAX_CU_SIZE * MAX_CU_SIZE ) >> ( MIN_CU_LOG2 << 1 )]; - unsigned int m_subMergeBlkSize[10]; - unsigned int m_subMergeBlkNum[10]; - unsigned int m_prevPOC; + int m_ctuIbcSearchRangeX; int m_ctuIbcSearchRangeY; #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM @@ -188,10 +185,6 @@ public: EncCfg* getEncCfg() const { return m_pcEncCfg; } #endif -#if JVET_M0170_MRG_SHARELIST - Position shareParentPos; - Size shareParentSize; -#endif ~EncCu(); protected: @@ -237,13 +230,11 @@ protected: , Partitioner &partitioner , const EncTestMode& encTestMode , int residualPass = 0 - , CodingStructure* imvCS = NULL , bool* bestHasNonResi = NULL , double* equGBiCost = NULL ); #else void xEncodeInterResidual ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass = 0 - , CodingStructure* imvCS = NULL , int emtMode = 1 , bool* bestHasNonResi = NULL , double* equGBiCost = NULL diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index c056b9e29..4d360e9ff 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -1326,7 +1326,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru xGetMinMaxQP( minQP, maxQP, cs, partitioner, baseQP, *cs.sps, *cs.pps, true ); #endif bool checkIbc = true; - if (cs.chType == CHANNEL_TYPE_CHROMA) + if (cs.chType == CHANNEL_TYPE_CHROMA) { IbcLumaCoverage ibcLumaCoverage = cs.getIbcLumaCoverage(cs.area.Cb()); switch (ibcLumaCoverage) @@ -2235,13 +2235,17 @@ int EncModeCtrlMTnoRQT::getNumParallelJobs( const CodingStructure &cs, Partition bool EncModeCtrlMTnoRQT::isParallelSplit( const CodingStructure &cs, Partitioner& partitioner ) const { if( partitioner.getImplicitSplit( cs ) != CU_DONT_SPLIT || cs.picture->scheduler.getSplitJobId() != 0 ) return false; - if( partitioner.currDepth <= cs.pps->getMaxCuDQPDepth() ) return false; +#if JVET_M0113_M0188_QG_SIZE + if( cs.pps->getUseDQP() && partitioner.currQgEnable() ) return false; +#else + if( cs.pps->getUseDQP() && partitioner.currDepth <= cs.pps->getMaxCuDQPDepth() ) return false; +#endif const int numJobs = getNumParallelJobs( cs, partitioner ); const int numPxl = partitioner.currArea().Y().area(); const int parlAt = m_pcEncCfg->getNumSplitThreads() <= 3 ? 1024 : 256; if( cs.slice->isIntra() && numJobs > 2 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true; if( !cs.slice->isIntra() && numJobs > 1 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true; - return false; + return false; } bool EncModeCtrlMTnoRQT::parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index 5126a4637..0d8e96948 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -176,6 +176,8 @@ void InterSearch::setTempBuffers( CodingStructure ****pSplitCS, CodingStructure void InterSearch::copyState( const InterSearch& other ) { memcpy( m_aaiAdaptSR, other.m_aaiAdaptSR, sizeof( m_aaiAdaptSR ) ); + m_affineMotion = other.m_affineMotion; + m_uniMotions = other.m_uniMotions; } #endif -- GitLab