/* The copyright in this software is being made available under the BSD * License, included below. This software may be subject to other third party * and contributor rights, including patent rights, and no such rights are * granted under this license. * * Copyright (c) 2010-2018, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** \file EncCu.cpp \brief Coding Unit (CU) encoder class */ #include "EncCu.h" #include "EncLib.h" #include "Analyze.h" #include "AQp.h" #include "CommonLib/dtrace_codingstruct.h" #include "CommonLib/Picture.h" #include "CommonLib/UnitTools.h" #include "CommonLib/dtrace_buffer.h" #include <stdio.h> #include <cmath> #include <algorithm> #if ENABLE_WPP_PARALLELISM #include <mutex> extern std::recursive_mutex g_cache_mutex; #endif //! \ingroup EncoderLib //! \{ // ==================================================================================================================== // Constructor / destructor / create / destroy // ==================================================================================================================== void EncCu::create( EncCfg* encCfg ) { unsigned uiMaxWidth = encCfg->getMaxCUWidth(); unsigned uiMaxHeight = encCfg->getMaxCUHeight(); ChromaFormat chromaFormat = encCfg->getChromaFormatIdc(); bool BTnoRQT = encCfg->getQTBT(); unsigned numWidths = gp_sizeIdxInfo->numWidths(); unsigned numHeights = gp_sizeIdxInfo->numHeights(); #if JVET_K0357_AMVR unsigned maxMEPart = BTnoRQT ? 1 : NUMBER_OF_PART_SIZES; #endif m_pTempCS = new CodingStructure** [numWidths]; m_pBestCS = new CodingStructure** [numWidths]; for( unsigned w = 0; w < numWidths; w++ ) { m_pTempCS[w] = new CodingStructure* [numHeights]; m_pBestCS[w] = new CodingStructure* [numHeights]; for( unsigned h = 0; h < numHeights; h++ ) { unsigned width = gp_sizeIdxInfo->sizeFrom( w ); unsigned height = gp_sizeIdxInfo->sizeFrom( h ); if( ( BTnoRQT || w == h ) && gp_sizeIdxInfo->isCuSize( width ) && gp_sizeIdxInfo->isCuSize( height ) ) { m_pTempCS[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); m_pBestCS[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); m_pTempCS[w][h]->create( chromaFormat, Area( 0, 0, width, height ), false ); m_pBestCS[w][h]->create( chromaFormat, Area( 0, 0, width, height ), false ); } else { m_pTempCS[w][h] = nullptr; m_pBestCS[w][h] = nullptr; } } } #if JVET_K0357_AMVR // WIA: only the weight==height case is relevant without QTBT m_pImvTempCS = nullptr; if( IMV_OFF != encCfg->getIMV() && !BTnoRQT ) { m_pImvTempCS = new CodingStructure**[numWidths]; for( unsigned w = 0; w < numWidths; w++ ) { unsigned width = gp_sizeIdxInfo->sizeFrom( w ); unsigned height = gp_sizeIdxInfo->sizeFrom( w ); m_pImvTempCS[w] = new CodingStructure*[maxMEPart]; for( unsigned p = 0; p < maxMEPart; p++ ) { if( gp_sizeIdxInfo->isCuSize( width ) ) { m_pImvTempCS[w][p] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); m_pImvTempCS[w][p]->create( chromaFormat, Area( 0, 0, width, height ), false ); } else { m_pImvTempCS[w][p] = nullptr; } } } } #endif m_cuChromaQpOffsetIdxPlus1 = 0; unsigned maxDepth = numWidths + numHeights; if( encCfg->getQTBT() ) { m_modeCtrl = new EncModeCtrlMTnoRQT(); } else { THROW( "Unknown partitioner!" ); } #if REUSE_CU_RESULTS m_modeCtrl->create( *encCfg ); #endif for( unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++ ) { m_acMergeBuffer[ui].create( chromaFormat, Area( 0, 0, uiMaxWidth, uiMaxHeight ) ); } m_CtxBuffer.resize( maxDepth ); m_CurrCtx = 0; } void EncCu::destroy() { bool BTnoRQT = m_pcEncCfg->getQTBT(); #if JVET_K0357_AMVR unsigned maxMEPart = BTnoRQT ? 1 : NUMBER_OF_PART_SIZES; #endif unsigned numWidths = gp_sizeIdxInfo->numWidths(); unsigned numHeights = gp_sizeIdxInfo->numHeights(); for( unsigned w = 0; w < numWidths; w++ ) { for( unsigned h = 0; h < numHeights; h++ ) { if( BTnoRQT || w == h ) { if( m_pBestCS[w][h] ) m_pBestCS[w][h]->destroy(); if( m_pTempCS[w][h] ) m_pTempCS[w][h]->destroy(); delete m_pBestCS[w][h]; delete m_pTempCS[w][h]; } } delete[] m_pTempCS[w]; delete[] m_pBestCS[w]; } delete[] m_pBestCS; m_pBestCS = nullptr; delete[] m_pTempCS; m_pTempCS = nullptr; #if REUSE_CU_RESULTS m_modeCtrl->destroy(); #endif delete m_modeCtrl; m_modeCtrl = nullptr; #if JVET_K0357_AMVR // WIA: only the weight==height case is relevant without QTBT if( m_pImvTempCS ) { for( unsigned w = 0; w < numWidths; w++ ) { for( unsigned p = 0; p < maxMEPart; p++ ) { if( m_pImvTempCS[w][p] ) m_pImvTempCS[w][p]->destroy(); delete m_pImvTempCS[w][p]; } delete[] m_pImvTempCS[w]; } delete[] m_pImvTempCS; m_pImvTempCS = nullptr; } #endif for( unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++ ) { m_acMergeBuffer[ui].destroy(); } } EncCu::~EncCu() { } /** \param pcEncLib pointer of encoder class */ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) ) { m_pcEncCfg = pcEncLib; m_pcIntraSearch = pcEncLib->getIntraSearch( PARL_PARAM0( tId ) ); m_pcInterSearch = pcEncLib->getInterSearch( PARL_PARAM0( tId ) ); m_pcTrQuant = pcEncLib->getTrQuant( PARL_PARAM0( tId ) ); m_pcRdCost = pcEncLib->getRdCost ( PARL_PARAM0( tId ) ); m_CABACEstimator = pcEncLib->getCABACEncoder( PARL_PARAM0( tId ) )->getCABACEstimator( &sps ); #if JVET_K0346 m_CABACEstimator->setEncCu(this); #endif m_CtxCache = pcEncLib->getCtxCache( PARL_PARAM0( tId ) ); m_pcRateCtrl = pcEncLib->getRateCtrl(); m_pcSliceEncoder = pcEncLib->getSliceEncoder(); #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM m_pcEncLib = pcEncLib; m_dataId = tId; #endif #if REUSE_CU_RESULTS DecCu::init( m_pcTrQuant, m_pcIntraSearch, m_pcInterSearch ); #endif m_modeCtrl->init( m_pcEncCfg, m_pcRateCtrl, m_pcRdCost ); m_pcInterSearch->setModeCtrl( m_modeCtrl ); #if !JVET_K0220_ENC_CTRL m_pcIntraSearch->setModeCtrl( m_modeCtrl ); #endif #if JVET_K0346 ::memset(m_subMergeBlkSize, 0, sizeof(m_subMergeBlkSize)); ::memset(m_subMergeBlkNum, 0, sizeof(m_subMergeBlkNum)); m_prevPOC = MAX_UINT; m_clearSubMergeStatic = false; #endif } // ==================================================================================================================== // Public member functions // ==================================================================================================================== void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsigned ctuRsAddr, const int prevQP[], const int currQP[] ) { m_modeCtrl->initCTUEncoding( *cs.slice ); #if ENABLE_SPLIT_PARALLELISM if( m_pcEncCfg->getNumSplitThreads() > 1 ) { for( int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++ ) { EncCu* jobEncCu = m_pcEncLib->getCuEncoder( cs.picture->scheduler.getSplitDataId( jId ) ); CacheBlkInfoCtrl* cacheCtrl = dynamic_cast< CacheBlkInfoCtrl* >( jobEncCu->m_modeCtrl ); if( cacheCtrl ) { cacheCtrl->init( *cs.slice ); } } } if( auto* cacheCtrl = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) ) { cacheCtrl->tick(); } #endif // init the partitioning manager Partitioner *partitioner = PartitionerFactory::get( *cs.slice ); partitioner->initCtu( area, CH_L, *cs.slice ); // init current context pointer m_CurrCtx = m_CtxBuffer.data(); CodingStructure *tempCS = m_pTempCS[gp_sizeIdxInfo->idxFrom( area.lumaSize().width )][gp_sizeIdxInfo->idxFrom( area.lumaSize().height )]; CodingStructure *bestCS = m_pBestCS[gp_sizeIdxInfo->idxFrom( area.lumaSize().width )][gp_sizeIdxInfo->idxFrom( area.lumaSize().height )]; cs.initSubStructure( *tempCS, partitioner->chType, partitioner->currArea(), false ); cs.initSubStructure( *bestCS, partitioner->chType, partitioner->currArea(), false ); tempCS->currQP[CH_L] = bestCS->currQP[CH_L] = tempCS->baseQP = bestCS->baseQP = currQP[CH_L]; tempCS->prevQP[CH_L] = bestCS->prevQP[CH_L] = prevQP[CH_L]; xCompressCU( tempCS, bestCS, *partitioner ); // all signals were already copied during compression if the CTU was split - at this point only the structures are copied to the top level CS const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1 && KEEP_PRED_AND_RESI_SIGNALS; cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals ); if( !cs.pcv->ISingleTree && cs.slice->isIntra() && cs.pcv->chrFormat != CHROMA_400 ) { m_CABACEstimator->getCtx() = m_CurrCtx->start; partitioner->initCtu( area, CH_C, *cs.slice ); cs.initSubStructure( *tempCS, partitioner->chType, partitioner->currArea(), false ); cs.initSubStructure( *bestCS, partitioner->chType, partitioner->currArea(), false ); tempCS->currQP[CH_C] = bestCS->currQP[CH_C] = tempCS->baseQP = bestCS->baseQP = currQP[CH_C]; tempCS->prevQP[CH_C] = bestCS->prevQP[CH_C] = prevQP[CH_C]; xCompressCU( tempCS, bestCS, *partitioner ); const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1 && KEEP_PRED_AND_RESI_SIGNALS; cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals ); } // reset context states and uninit context pointer m_CABACEstimator->getCtx() = m_CurrCtx->start; m_CurrCtx = 0; delete partitioner; #if ENABLE_SPLIT_PARALLELISM && ENABLE_WPP_PARALLELISM if( m_pcEncCfg->getNumSplitThreads() > 1 && m_pcEncCfg->getNumWppThreads() > 1 ) { cs.picture->finishCtuPart( area ); } #endif // Ensure that a coding was found // Selected mode's RD-cost must be not MAX_DOUBLE. CHECK( bestCS->cus.empty() , "No possible encoding found" ); CHECK( bestCS->cus[0]->partSize == NUMBER_OF_PART_SIZES , "No possible encoding found" ); CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" ); CHECK( bestCS->cost == MAX_DOUBLE , "No possible encoding found" ); } // ==================================================================================================================== // Protected member functions // ==================================================================================================================== static int xCalcHADs8x8_ISlice(const Pel *piOrg, const int iStrideOrg) { int k, i, j, jj; int diff[64], m1[8][8], m2[8][8], m3[8][8], iSumHad = 0; for (k = 0; k < 64; k += 8) { diff[k + 0] = piOrg[0]; diff[k + 1] = piOrg[1]; diff[k + 2] = piOrg[2]; diff[k + 3] = piOrg[3]; diff[k + 4] = piOrg[4]; diff[k + 5] = piOrg[5]; diff[k + 6] = piOrg[6]; diff[k + 7] = piOrg[7]; piOrg += iStrideOrg; } //horizontal for (j = 0; j < 8; j++) { jj = j << 3; m2[j][0] = diff[jj ] + diff[jj + 4]; m2[j][1] = diff[jj + 1] + diff[jj + 5]; m2[j][2] = diff[jj + 2] + diff[jj + 6]; m2[j][3] = diff[jj + 3] + diff[jj + 7]; m2[j][4] = diff[jj ] - diff[jj + 4]; m2[j][5] = diff[jj + 1] - diff[jj + 5]; m2[j][6] = diff[jj + 2] - diff[jj + 6]; m2[j][7] = diff[jj + 3] - diff[jj + 7]; m1[j][0] = m2[j][0] + m2[j][2]; m1[j][1] = m2[j][1] + m2[j][3]; m1[j][2] = m2[j][0] - m2[j][2]; m1[j][3] = m2[j][1] - m2[j][3]; m1[j][4] = m2[j][4] + m2[j][6]; m1[j][5] = m2[j][5] + m2[j][7]; m1[j][6] = m2[j][4] - m2[j][6]; m1[j][7] = m2[j][5] - m2[j][7]; m2[j][0] = m1[j][0] + m1[j][1]; m2[j][1] = m1[j][0] - m1[j][1]; m2[j][2] = m1[j][2] + m1[j][3]; m2[j][3] = m1[j][2] - m1[j][3]; m2[j][4] = m1[j][4] + m1[j][5]; m2[j][5] = m1[j][4] - m1[j][5]; m2[j][6] = m1[j][6] + m1[j][7]; m2[j][7] = m1[j][6] - m1[j][7]; } //vertical for (i = 0; i < 8; i++) { m3[0][i] = m2[0][i] + m2[4][i]; m3[1][i] = m2[1][i] + m2[5][i]; m3[2][i] = m2[2][i] + m2[6][i]; m3[3][i] = m2[3][i] + m2[7][i]; m3[4][i] = m2[0][i] - m2[4][i]; m3[5][i] = m2[1][i] - m2[5][i]; m3[6][i] = m2[2][i] - m2[6][i]; m3[7][i] = m2[3][i] - m2[7][i]; m1[0][i] = m3[0][i] + m3[2][i]; m1[1][i] = m3[1][i] + m3[3][i]; m1[2][i] = m3[0][i] - m3[2][i]; m1[3][i] = m3[1][i] - m3[3][i]; m1[4][i] = m3[4][i] + m3[6][i]; m1[5][i] = m3[5][i] + m3[7][i]; m1[6][i] = m3[4][i] - m3[6][i]; m1[7][i] = m3[5][i] - m3[7][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; m2[4][i] = m1[4][i] + m1[5][i]; m2[5][i] = m1[4][i] - m1[5][i]; m2[6][i] = m1[6][i] + m1[7][i]; m2[7][i] = m1[6][i] - m1[7][i]; } for (i = 0; i < 8; i++) { for (j = 0; j < 8; j++) { iSumHad += abs(m2[i][j]); } } iSumHad -= abs(m2[0][0]); iSumHad = (iSumHad + 2) >> 2; return(iSumHad); } int EncCu::updateCtuDataISlice(const CPelBuf buf) { int xBl, yBl; const int iBlkSize = 8; const Pel* pOrgInit = buf.buf; int iStrideOrig = buf.stride; int iSumHad = 0; for( yBl = 0; ( yBl + iBlkSize ) <= buf.height; yBl += iBlkSize ) { for( xBl = 0; ( xBl + iBlkSize ) <= buf.width; xBl += iBlkSize ) { const Pel* pOrg = pOrgInit + iStrideOrig*yBl + xBl; iSumHad += xCalcHADs8x8_ISlice( pOrg, iStrideOrig ); } } return( iSumHad ); } void EncCu::xCheckBestMode( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { if( !tempCS->cus.empty() ) { if( tempCS->cus.size() == 1 ) { const CodingUnit& cu = *tempCS->cus.front(); CHECK( cu.skip && !cu.firstPU->mergeFlag, "Skip flag without a merge flag is not allowed!" ); } #if WCG_EXT DTRACE_BEST_MODE( tempCS, bestCS, m_pcRdCost->getLambda( true ) ); #else DTRACE_BEST_MODE( tempCS, bestCS, m_pcRdCost->getLambda() ); #endif if( m_modeCtrl->useModeResult( encTestMode, tempCS, partitioner ) ) { if( tempCS->cus.size() == 1 ) { // if tempCS is not a split-mode CodingUnit &cu = *tempCS->cus.front(); if( CU::isLosslessCoded( cu ) && !cu.ipcm ) { xFillPCMBuffer( cu ); } } std::swap( tempCS, bestCS ); // store temp best CI for next CU coding m_CurrCtx->best = m_CABACEstimator->getCtx(); } } // reset context states m_CABACEstimator->getCtx() = m_CurrCtx->start; } void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner ) { #if ENABLE_SPLIT_PARALLELISM CHECK( m_dataId != tempCS->picture->scheduler.getDataId(), "Working in the wrong dataId!" ); if( m_pcEncCfg->getNumSplitThreads() != 1 && tempCS->picture->scheduler.getSplitJobId() == 0 ) { if( m_modeCtrl->isParallelSplit( *tempCS, partitioner ) ) { m_modeCtrl->setParallelSplit( true ); xCompressCUParallel( tempCS, bestCS, partitioner ); return; } } #endif Slice& slice = *tempCS->slice; const PPS &pps = *tempCS->pps; const SPS &sps = *tempCS->sps; const uint32_t uiLPelX = tempCS->area.Y().lumaPos().x; const uint32_t uiTPelY = tempCS->area.Y().lumaPos().y; #if JVET_K0357_AMVR const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() ); #endif const UnitArea currCsArea = clipArea( CS::getArea( *bestCS, bestCS->area, partitioner.chType ), *tempCS->picture ); #if JVET_K0357_AMVR if( m_pImvTempCS && !slice.isIntra() ) { const unsigned maxMEPart = tempCS->pcv->only2Nx2N ? 1 : NUMBER_OF_PART_SIZES; for( unsigned p = 0; p < maxMEPart; p++ ) { tempCS->initSubStructure( *m_pImvTempCS[wIdx][p], partitioner.chType, partitioner.currArea(), false ); } } #endif m_modeCtrl->initCULevel( partitioner, *tempCS ); m_CurrCtx->start = m_CABACEstimator->getCtx(); m_cuChromaQpOffsetIdxPlus1 = 0; if( slice.getUseChromaQpAdj() ) { int lgMinCuSize = sps.getLog2MinCodingBlockSize() + std::max<int>( 0, sps.getLog2DiffMaxMinCodingBlockSize() - int( pps.getPpsRangeExtension().getDiffCuChromaQpOffsetDepth() ) ); m_cuChromaQpOffsetIdxPlus1 = ( ( uiLPelX >> lgMinCuSize ) + ( uiTPelY >> lgMinCuSize ) ) % ( pps.getPpsRangeExtension().getChromaQpOffsetListLen() + 1 ); } if( !m_modeCtrl->anyMode() ) { m_modeCtrl->finishCULevel( partitioner ); return; } DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cux", uiLPelX ) ); DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuy", uiTPelY ) ); DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuw", tempCS->area.lwidth() ) ); DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuh", tempCS->area.lheight() ) ); DTRACE( g_trace_ctx, D_COMMON, "@(%4d,%4d) [%2dx%2d]\n", tempCS->area.lx(), tempCS->area.ly(), tempCS->area.lwidth(), tempCS->area.lheight() ); do { const EncTestMode currTestMode = m_modeCtrl->currTestMode(); #if SHARP_LUMA_DELTA_QP if( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() && partitioner.currDepth <= pps.getMaxCuDQPDepth() ) { #if ENABLE_SPLIT_PARALLELISM CHECK( tempCS->picture->scheduler.getSplitJobId() > 0, "Changing lambda is only allowed in the master thread!" ); #endif if (currTestMode.qp >= 0) { updateLambda(&slice, currTestMode.qp); } } #endif if( currTestMode.type == ETM_INTER_ME ) { #if JVET_K0357_AMVR if( ( currTestMode.opts & ETO_IMV ) != 0 ) { xCheckRDCostInterIMV(tempCS, bestCS, partitioner, currTestMode); } else #endif { xCheckRDCostInter( tempCS, bestCS, partitioner, currTestMode ); } } #if JVET_K_AFFINE else if( currTestMode.type == ETM_AFFINE ) { xCheckRDCostAffineMerge2Nx2N( tempCS, bestCS, partitioner, currTestMode ); } #endif #if REUSE_CU_RESULTS else if( currTestMode.type == ETM_RECO_CACHED ) { xReuseCachedResult( tempCS, bestCS, partitioner ); } #endif else if( currTestMode.type == ETM_MERGE_SKIP ) { xCheckRDCostMerge2Nx2N( tempCS, bestCS, partitioner, currTestMode ); } else if( currTestMode.type == ETM_INTRA ) { xCheckRDCostIntra( tempCS, bestCS, partitioner, currTestMode ); } else if( currTestMode.type == ETM_IPCM ) { xCheckIntraPCM( tempCS, bestCS, partitioner, currTestMode ); } else if( isModeSplit( currTestMode ) ) { xCheckModeSplit( tempCS, bestCS, partitioner, currTestMode ); } else { THROW( "Don't know how to handle mode: type = " << currTestMode.type << ", size = " << currTestMode.partSize << ", options = " << currTestMode.opts ); } } while( m_modeCtrl->nextMode( *tempCS, partitioner ) ); ////////////////////////////////////////////////////////////////////////// // Finishing CU #if ENABLE_SPLIT_PARALLELISM if( bestCS->cus.empty() ) { CHECK( bestCS->cost != MAX_DOUBLE, "Cost should be maximal if no encoding found" ); CHECK( bestCS->picture->scheduler.getSplitJobId() == 0, "Should always get a result in serial case" ); m_modeCtrl->finishCULevel( partitioner ); return; } #endif // set context states m_CABACEstimator->getCtx() = m_CurrCtx->best; // QP from last processed CU for further processing bestCS->prevQP[partitioner.chType] = bestCS->cus.back()->qp; bestCS->picture->getRecoBuf( currCsArea ).copyFrom( bestCS->getRecoBuf( currCsArea ) ); m_modeCtrl->finishCULevel( partitioner ); #if ENABLE_SPLIT_PARALLELISM if( tempCS->picture->scheduler.getSplitJobId() == 0 && m_pcEncCfg->getNumSplitThreads() != 1 ) { tempCS->picture->finishParallelPart( currCsArea ); } #endif // Assert if Best prediction mode is NONE // Selected mode's RD-cost must be not MAX_DOUBLE. CHECK( bestCS->cus.empty() , "No possible encoding found" ); CHECK( bestCS->cus[0]->partSize == NUMBER_OF_PART_SIZES , "No possible encoding found" ); CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" ); CHECK( bestCS->cost == MAX_DOUBLE , "No possible encoding found" ); } #if SHARP_LUMA_DELTA_QP void EncCu::updateLambda( Slice* slice, double dQP ) { #if WCG_EXT int NumberBFrames = ( m_pcEncCfg->getGOPSize() - 1 ); int SHIFT_QP = 12; double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)(slice->getPic()->fieldPic ? NumberBFrames/2 : NumberBFrames) ); #if DISTORTION_LAMBDA_BUGFIX int bitdepth_luma_qp_scale = 6 * (slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8 - DISTORTION_PRECISION_ADJUSTMENT(slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA))); #else #if FULL_NBIT int bitdepth_luma_qp_scale = 6 * (slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8); #else int bitdepth_luma_qp_scale = 0; #endif #endif double qp_temp = (double) dQP + bitdepth_luma_qp_scale - SHIFT_QP; double dQPFactor = m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_QPFactor; if( slice->getSliceType() == I_SLICE ) { if( m_pcEncCfg->getIntraQpFactor() >= 0.0 /*&& m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_sliceType != I_SLICE*/ ) { dQPFactor = m_pcEncCfg->getIntraQpFactor(); } else { if( m_pcEncCfg->getLambdaFromQPEnable() ) { dQPFactor = 0.57; } else { dQPFactor = 0.57*dLambda_scale; } } } else if( m_pcEncCfg->getLambdaFromQPEnable() ) { dQPFactor = 0.57*dQPFactor; } double dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 ); int depth = slice->getDepth(); if( !m_pcEncCfg->getLambdaFromQPEnable() && depth>0 ) { int qp_temp_slice = slice->getSliceQp() + bitdepth_luma_qp_scale - SHIFT_QP; // avoid lambda over adjustment, use slice_qp here dLambda *= Clip3( 2.00, 4.00, (qp_temp_slice / 6.0) ); // (j == B_SLICE && p_cur_frm->layer != 0 ) } if( !m_pcEncCfg->getUseHADME() && slice->getSliceType( ) != I_SLICE ) { dLambda *= 0.95; } const int temporalId = m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_temporalId; const std::vector<double> &intraLambdaModifiers = m_pcEncCfg->getIntraLambdaModifier(); double lambdaModifier; if( slice->getSliceType( ) != I_SLICE || intraLambdaModifiers.empty()) { lambdaModifier = m_pcEncCfg->getLambdaModifier(temporalId); } else { lambdaModifier = intraLambdaModifiers[(temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size() - 1)]; } dLambda *= lambdaModifier; int qpBDoffset = slice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA); int iQP = max( -qpBDoffset, min( MAX_QP, (int) floor( dQP + 0.5 ) ) ); m_pcSliceEncoder->setUpLambda(slice, dLambda, iQP); #else int iQP = (int)dQP; const double oldQP = (double)slice->getSliceQpBase(); const double oldLambda = m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), slice->getDepth(), oldQP, oldQP, iQP); const double newLambda = oldLambda * pow (2.0, (dQP - oldQP) / 3.0); #if RDOQ_CHROMA_LAMBDA const double chromaLambda = newLambda / m_pcRdCost->getChromaWeight(); const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda, chromaLambda, chromaLambda}; m_pcTrQuant->setLambdas (lambdaArray); #else m_pcTrQuant->setLambda (newLambda); #endif m_pcRdCost->setLambda( newLambda, slice->getSPS()->getBitDepths() ); #endif } #endif #if ENABLE_SPLIT_PARALLELISM //#undef DEBUG_PARALLEL_TIMINGS //#define DEBUG_PARALLEL_TIMINGS 1 void EncCu::xCompressCUParallel( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner ) { const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() ); const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() ); Picture* picture = tempCS->picture; int numJobs = m_modeCtrl->getNumParallelJobs( *bestCS, partitioner ); bool jobUsed [NUM_RESERVERD_SPLIT_JOBS]; std::fill( jobUsed, jobUsed + NUM_RESERVERD_SPLIT_JOBS, false ); const UnitArea currArea = CS::getArea( *tempCS, partitioner.currArea(), partitioner.chType ); #if ENABLE_WPP_PARALLELISM const int wppTId = picture->scheduler.getWppThreadId(); #endif const bool doParallel = !m_pcEncCfg->getForceSingleSplitThread(); #if _MSC_VER && ENABLE_WPP_PARALLELISM #pragma omp parallel for schedule(dynamic,1) num_threads(NUM_SPLIT_THREADS_IF_MSVC) if(doParallel) #else omp_set_num_threads( m_pcEncCfg->getNumSplitThreads() ); #pragma omp parallel for schedule(dynamic,1) if(doParallel) #endif for( int jId = 1; jId <= numJobs; jId++ ) { // thread start #if ENABLE_WPP_PARALLELISM picture->scheduler.setWppThreadId( wppTId ); #endif picture->scheduler.setSplitThreadId(); picture->scheduler.setSplitJobId( jId ); Partitioner* jobPartitioner = PartitionerFactory::get( *tempCS->slice ); EncCu* jobCuEnc = m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) ); auto* jobBlkCache = dynamic_cast<CacheBlkInfoCtrl*>( jobCuEnc->m_modeCtrl ); jobPartitioner->copyState( partitioner ); jobCuEnc ->copyState( this, *jobPartitioner, currArea, true ); if( jobBlkCache ) { jobBlkCache->tick(); } CodingStructure *&jobBest = jobCuEnc->m_pBestCS[wIdx][hIdx]; CodingStructure *&jobTemp = jobCuEnc->m_pTempCS[wIdx][hIdx]; jobUsed[jId] = true; jobCuEnc->xCompressCU( jobTemp, jobBest, *jobPartitioner ); delete jobPartitioner; picture->scheduler.setSplitJobId( 0 ); // thread stop } picture->scheduler.setSplitThreadId( 0 ); int bestJId = 0; double bestCost = bestCS->cost; for( int jId = 1; jId <= numJobs; jId++ ) { EncCu* jobCuEnc = m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) ); if( jobUsed[jId] && jobCuEnc->m_pBestCS[wIdx][hIdx]->cost < bestCost ) { bestCost = jobCuEnc->m_pBestCS[wIdx][hIdx]->cost; bestJId = jId; } } if( bestJId > 0 ) { copyState( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( bestJId ) ), partitioner, currArea, false ); m_CurrCtx->best = m_CABACEstimator->getCtx(); tempCS = m_pTempCS[wIdx][hIdx]; bestCS = m_pBestCS[wIdx][hIdx]; } const int bitDepthY = tempCS->sps->getBitDepth( CH_L ); const UnitArea clipdArea = clipArea( currArea, *picture ); CHECK( calcCheckSum( picture->getRecoBuf( clipdArea.Y() ), bitDepthY ) != calcCheckSum( bestCS->getRecoBuf( clipdArea.Y() ), bitDepthY ), "Data copied incorrectly!" ); picture->finishParallelPart( currArea ); if( auto *blkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) ) { for( int jId = 1; jId <= numJobs; jId++ ) { if( !jobUsed[jId] || jId == bestJId ) continue; auto *jobBlkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) )->m_modeCtrl ); CHECK( !jobBlkCache, "If own mode controller has blk info cache capability so should all other mode controllers!" ); blkCache->CacheBlkInfoCtrl::copyState( *jobBlkCache, partitioner.currArea() ); } blkCache->tick(); } } void EncCu::copyState( EncCu* other, Partitioner& partitioner, const UnitArea& currArea, const bool isDist ) { const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth () ); const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() ); if( isDist ) { other->m_pBestCS[wIdx][hIdx]->initSubStructure( *m_pBestCS[wIdx][hIdx], partitioner.chType, partitioner.currArea(), false ); other->m_pTempCS[wIdx][hIdx]->initSubStructure( *m_pTempCS[wIdx][hIdx], partitioner.chType, partitioner.currArea(), false ); } else { CodingStructure* dst = m_pBestCS[wIdx][hIdx]; const CodingStructure *src = other->m_pBestCS[wIdx][hIdx]; bool keepResi = KEEP_PRED_AND_RESI_SIGNALS; dst->useSubStructure( *src, partitioner.chType, currArea, KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, keepResi ); dst->cost = src->cost; dst->dist = src->dist; dst->fracBits = src->fracBits; dst->features = src->features; } if( isDist ) { m_CurrCtx = m_CtxBuffer.data(); } m_pcInterSearch->copyState( *other->m_pcInterSearch ); m_modeCtrl ->copyState( *other->m_modeCtrl, partitioner.currArea() ); m_pcRdCost ->copyState( *other->m_pcRdCost ); m_pcTrQuant ->copyState( *other->m_pcTrQuant ); m_CABACEstimator->getCtx() = other->m_CABACEstimator->getCtx(); } #endif void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode) { const int qp = encTestMode.qp; const PPS &pps = *tempCS->pps; const Slice &slice = *tempCS->slice; const bool bIsLosslessMode = false; // False at this level. Next level down may set it to true. const int oldPrevQp = tempCS->prevQP[partitioner.chType]; const uint32_t currDepth = partitioner.currDepth; const PartSplit split = getPartSplit( encTestMode ); CHECK( split == CU_DONT_SPLIT, "No proper split provided!" ); tempCS->initStructData( qp, bIsLosslessMode ); #if !JVET_K0220_ENC_CTRL partitioner.splitCurrArea( split, *tempCS ); #endif m_CABACEstimator->getCtx() = m_CurrCtx->start; #if JVET_K0220_ENC_CTRL if( tempCS->sps->getSpsNext().getUseQTBT() ) { const TempCtx ctxStartSP( m_CtxCache, SubCtx( Ctx::SplitFlag, m_CABACEstimator->getCtx() ) ); const TempCtx ctxStartBT( m_CtxCache, SubCtx( Ctx::BTSplitFlag, m_CABACEstimator->getCtx() ) ); m_CABACEstimator->resetBits(); if( partitioner.getImplicitSplit( *tempCS ) != CU_QUAD_SPLIT ) { if( partitioner.canSplit( CU_QUAD_SPLIT, *tempCS ) ) { m_CABACEstimator->split_cu_flag( split == CU_QUAD_SPLIT, *tempCS, partitioner ); } if( split != CU_QUAD_SPLIT ) { m_CABACEstimator->split_cu_mode_mt( split, *tempCS, partitioner ); } } const double factor = ( tempCS->currQP[partitioner.chType] > 30 ? 1.1 : 1.075 ); const double cost = m_pcRdCost->calcRdCost( uint64_t( m_CABACEstimator->getEstFracBits() + ( ( bestCS->fracBits ) / factor ) ), Distortion( bestCS->dist / factor ) ); m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitFlag, ctxStartSP ); m_CABACEstimator->getCtx() = SubCtx( Ctx::BTSplitFlag, ctxStartBT ); if( cost > bestCS->cost ) { xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); return; } } partitioner.splitCurrArea( split, *tempCS ); #endif m_CurrCtx++; tempCS->getRecoBuf().fill( 0 ); do { const auto &subCUArea = partitioner.currArea(); if( tempCS->picture->Y().contains( subCUArea.lumaPos() ) ) { const unsigned wIdx = gp_sizeIdxInfo->idxFrom( subCUArea.lwidth () ); const unsigned hIdx = gp_sizeIdxInfo->idxFrom( subCUArea.lheight() ); CodingStructure *tempSubCS = m_pTempCS[wIdx][hIdx]; CodingStructure *bestSubCS = m_pBestCS[wIdx][hIdx]; tempCS->initSubStructure( *tempSubCS, partitioner.chType, subCUArea, false ); tempCS->initSubStructure( *bestSubCS, partitioner.chType, subCUArea, false ); xCompressCU( tempSubCS, bestSubCS, partitioner ); if( bestSubCS->cost == MAX_DOUBLE ) { CHECK( split == CU_QUAD_SPLIT, "Split decision reusing cannot skip quad split" ); tempCS->cost = MAX_DOUBLE; m_CurrCtx--; partitioner.exitCurrSplit(); xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); return; } bool keepResi = KEEP_PRED_AND_RESI_SIGNALS; tempCS->useSubStructure( *bestSubCS, partitioner.chType, CS::getArea( *tempCS, subCUArea, partitioner.chType ), KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, keepResi ); if(currDepth < pps.getMaxCuDQPDepth()) { tempCS->prevQP[partitioner.chType] = bestSubCS->prevQP[partitioner.chType]; } tempSubCS->releaseIntermediateData(); bestSubCS->releaseIntermediateData(); } } while( partitioner.nextPart( *tempCS ) ); partitioner.exitCurrSplit(); m_CurrCtx--; // Finally, generate split-signaling bits for RD-cost check const PartSplit implicitSplit = partitioner.getImplicitSplit( *tempCS ); { bool enforceQT = implicitSplit == CU_QUAD_SPLIT; #if HM_QTBT_REPRODUCE_FAST_LCTU_BUG // LARGE CTU bug if( m_pcEncCfg->getQTBT() && m_pcEncCfg->getUseFastLCTU() ) { unsigned minDepth = 0; unsigned maxDepth = g_aucLog2[tempCS->sps->getSpsNext().getCTUSize()] - g_aucLog2[tempCS->sps->getSpsNext().getMinQTSize( slice.getSliceType(), partitioner.chType )]; if( auto ad = dynamic_cast<AdaptiveDepthPartitioner*>( &partitioner ) ) { ad->setMaxMinDepth( minDepth, maxDepth, *tempCS ); } if( minDepth > partitioner.currQtDepth ) { // enforce QT enforceQT = true; } } #endif if( !enforceQT ) { m_CABACEstimator->resetBits(); if( partitioner.canSplit( CU_QUAD_SPLIT, *tempCS ) ) { m_CABACEstimator->split_cu_flag( split == CU_QUAD_SPLIT, *tempCS, partitioner ); } #if JVET_K0554 if( split != CU_QUAD_SPLIT ) #else if( split != CU_QUAD_SPLIT && implicitSplit == CU_DONT_SPLIT ) #endif { m_CABACEstimator->split_cu_mode_mt( split, *tempCS, partitioner ); } tempCS->fracBits += m_CABACEstimator->getEstFracBits(); // split bits } } tempCS->cost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist ); // Check Delta QP bits for splitted structure xCheckDQP( *tempCS, partitioner, true ); // If the configuration being tested exceeds the maximum number of bytes for a slice / slice-segment, then // a proper RD evaluation cannot be performed. Therefore, termination of the // slice/slice-segment must be made prior to this CTU. // This can be achieved by forcing the decision to be that of the rpcTempCU. // The exception is each slice / slice-segment must have at least one CTU. if (bestCS->cost != MAX_DOUBLE) { #if HEVC_TILES_WPP const TileMap& tileMap = *tempCS->picture->tileMap; #endif #if HEVC_TILES_WPP || HEVC_DEPENDENT_SLICES const uint32_t CtuAddr = CU::getCtuAddr( *bestCS->getCU( partitioner.chType ) ); #endif const bool isEndOfSlice = slice.getSliceMode() == FIXED_NUMBER_OF_BYTES && ((slice.getSliceBits() + CS::getEstBits(*bestCS)) > slice.getSliceArgument() << 3) #if HEVC_TILES_WPP && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceCurStartCtuTsAddr()) #endif #if HEVC_DEPENDENT_SLICES && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceSegmentCurStartCtuTsAddr()); #else ; #endif #if HEVC_DEPENDENT_SLICES const bool isEndOfSliceSegment = slice.getSliceSegmentMode() == FIXED_NUMBER_OF_BYTES && ((slice.getSliceSegmentBits() + CS::getEstBits(*bestCS)) > slice.getSliceSegmentArgument() << 3) && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceSegmentCurStartCtuTsAddr()); // Do not need to check slice condition for slice-segment since a slice-segment is a subset of a slice. if (isEndOfSlice || isEndOfSliceSegment) #else if(isEndOfSlice) #endif { bestCS->cost = MAX_DOUBLE; } } // RD check for sub partitioned coding structure. xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); tempCS->releaseIntermediateData(); tempCS->prevQP[partitioner.chType] = oldPrevQp; } void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { #if JVET_K1000_SIMPLIFIED_EMT double bestInterCost = m_modeCtrl->getBestInterCost(); double costSize2Nx2NemtFirstPass = m_modeCtrl->getEmtSize2Nx2NFirstPassCost(); double costSizeNxNemtFirstPass = MAX_DOUBLE; bool skipSecondEmtPass = m_modeCtrl->getSkipSecondEMTPass(); const SPS &sps = *tempCS->sps; #endif const PPS &pps = *tempCS->pps; const CodingUnit *bestCU = bestCS->getCU( partitioner.chType ); #if JVET_K1000_SIMPLIFIED_EMT const int maxSizeEMT = pps.pcv->noRQT ? EMT_INTRA_MAX_CU_WITH_QTBT : EMT_INTRA_MAX_CU; #endif #if JVET_K1000_SIMPLIFIED_EMT #if HM_EMT_NSST_AS_IN_JEM uint8_t considerEmtSecondPass = ( sps.getSpsNext().getUseIntraEMT() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeEMT && partitioner.currArea().lheight() <= maxSizeEMT ) ? 1 : 0; #else uint8_t considerEmtSecondPass = ( sps.getSpsNext().getUseIntraEMT() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeEMT && partitioner.currArea().lheight() <= maxSizeEMT && nsstIdx == 0 ) ? 1 : 0; #endif #endif Distortion interHad = m_modeCtrl->getInterHad(); #if JVET_K1000_SIMPLIFIED_EMT for( uint8_t emtCuFlag = 0; emtCuFlag <= considerEmtSecondPass; emtCuFlag++ ) #else for( uint8_t numPasses = 0; numPasses < 1; numPasses++ ) #endif { #if JVET_K1000_SIMPLIFIED_EMT //Possible early EMT tests interruptions #if !JVET_K0220_ENC_CTRL //1) saveLoadTag code for EMT if( sps.getSpsNext().getUseQTBT() && slsCtrl && m_pcEncCfg->getUseSaveLoadEncInfo() ) { if( m_pcEncCfg->getIntraEMT() && LOAD_ENC_INFO == slsCtrl->getSaveLoadTag( tempCS->area ) && ( emtCuFlag > 0 ) != slsCtrl->getSaveLoadEmtCuFlag( tempCS->area ) ) { continue; } } #endif //2) Second EMT pass. This "if clause" is necessary because of the NSST and PDPC "for loops". if( emtCuFlag && skipSecondEmtPass ) { continue; } #endif //3) if interHad is 0, only try further modes if some intra mode was already better than inter if( m_pcEncCfg->getUsePbIntraFast() && !tempCS->slice->isIntra() && bestCU && CU::isInter( *bestCS->getCU( partitioner.chType ) ) && interHad == 0 ) { continue; } tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); CodingUnit &cu = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType ); partitioner.setCUData( cu ); cu.slice = tempCS->slice; #if HEVC_TILES_WPP cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); #endif cu.skip = false; cu.partSize = encTestMode.partSize; cu.predMode = MODE_INTRA; cu.transQuantBypass = encTestMode.lossless; cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; //cu.ipcm = false; #if JVET_K1000_SIMPLIFIED_EMT cu.emtFlag = emtCuFlag; #endif CU::addPUs( cu ); tempCS->interHad = interHad; if( isLuma( partitioner.chType ) ) { m_pcIntraSearch->estIntraPredLumaQT( cu, partitioner ); #if DISTORTION_TYPE_BUGFIX if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max() && tempCS->interHad == 0) #else if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == MAX_UINT && tempCS->interHad == 0) #endif { interHad = 0; // JEM assumes only perfect reconstructions can from now on beat the inter mode m_modeCtrl->enforceInterHad( 0 ); continue; } if( !CS::isDualITree( *tempCS ) ) { cu.cs->picture->getRecoBuf( cu.Y() ).copyFrom( cu.cs->getRecoBuf( COMPONENT_Y ) ); } } if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !CS::isDualITree( *tempCS ) ) ) { m_pcIntraSearch->estIntraPredChromaQT( cu, partitioner ); } cu.rootCbf = false; for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ ) { cu.rootCbf |= cu.firstTU->cbf[t] != 0; } // Get total bits for current mode: encode CU m_CABACEstimator->resetBits(); if( pps.getTransquantBypassEnabledFlag() ) { m_CABACEstimator->cu_transquant_bypass_flag( cu ); } if( !cu.cs->slice->isIntra() ) { m_CABACEstimator->cu_skip_flag ( cu ); } m_CABACEstimator->pred_mode ( cu ); m_CABACEstimator->cu_pred_data ( cu ); m_CABACEstimator->pcm_data ( cu ); // Encode Coefficients CUCtx cuCtx; cuCtx.isDQPCoded = true; cuCtx.isChromaQpAdjCoded = true; m_CABACEstimator->cu_residual( cu, partitioner, cuCtx ); tempCS->fracBits = m_CABACEstimator->getEstFracBits(); tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist); xEncodeDontSplit( *tempCS, partitioner ); xCheckDQP( *tempCS, partitioner ); #if JVET_K1000_SIMPLIFIED_EMT // we save the cost of the modes for the first EMT pass if( !emtCuFlag ) static_cast< double& >( cu.partSize == SIZE_2Nx2N ? costSize2Nx2NemtFirstPass : costSizeNxNemtFirstPass ) = tempCS->cost; #endif #if WCG_EXT DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); #else DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() ); #endif xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); #if JVET_K1000_SIMPLIFIED_EMT //now we check whether the second pass of SIZE_2Nx2N and the whole Intra SIZE_NxN should be skipped or not #if JVET_K0220_ENC_CTRL if( !emtCuFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA && cu.partSize == SIZE_2Nx2N && m_pcEncCfg->getFastInterEMT() ) #else if( !emtCuFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA && cu.partSize == SIZE_2Nx2N && m_pcEncCfg->getFastInterEMT() && ( m_pcEncCfg->getUseSaveLoadEncInfo() ? ( bestInterCost < MAX_DOUBLE ) : true ) ) #endif { const double thEmtInterFastSkipIntra = 1.4; // Skip checking Intra if "2Nx2N using DCT2" is worse than best Inter mode if( costSize2Nx2NemtFirstPass > thEmtInterFastSkipIntra * bestInterCost ) { skipSecondEmtPass = true; m_modeCtrl->setSkipSecondEMTPass( true ); break; } } #endif } //for emtCuFlag } void EncCu::xCheckIntraPCM(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); partitioner.setCUData( cu ); cu.slice = tempCS->slice; #if HEVC_TILES_WPP cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); #endif cu.skip = false; cu.partSize = SIZE_2Nx2N; cu.predMode = MODE_INTRA; cu.transQuantBypass = encTestMode.lossless; cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; cu.ipcm = true; tempCS->addPU(tempCS->area, partitioner.chType); tempCS->addTU( tempCS->area, partitioner.chType ); m_pcIntraSearch->IPCMSearch(*tempCS, partitioner); m_CABACEstimator->getCtx() = m_CurrCtx->start; m_CABACEstimator->resetBits(); if( tempCS->pps->getTransquantBypassEnabledFlag() ) { m_CABACEstimator->cu_transquant_bypass_flag( cu ); } if( !cu.cs->slice->isIntra() ) { m_CABACEstimator->cu_skip_flag ( cu ); } m_CABACEstimator->pred_mode ( cu ); m_CABACEstimator->pcm_data ( cu ); tempCS->fracBits = m_CABACEstimator->getEstFracBits(); tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist); xEncodeDontSplit( *tempCS, partitioner ); xCheckDQP( *tempCS, partitioner ); #if WCG_EXT DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); #else DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() ); #endif xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); } void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeepCtx ) { CHECK( bKeepCtx && cs.cus.size() <= 1 && partitioner.getImplicitSplit( cs ) == CU_DONT_SPLIT, "bKeepCtx should only be set in split case" ); CHECK( !bKeepCtx && cs.cus.size() > 1, "bKeepCtx should never be set for non-split case" ); if( !cs.pps->getUseDQP() ) { return; } if( bKeepCtx && partitioner.currDepth != cs.pps->getMaxCuDQPDepth() ) { return; } if( !bKeepCtx && partitioner.currDepth > cs.pps->getMaxCuDQPDepth() ) { return; } CodingUnit* cuFirst = cs.getCU( partitioner.chType ); CHECK( !cuFirst, "No CU available" ); bool hasResidual = false; for( const auto &cu : cs.cus ) { if( cu->rootCbf ) { hasResidual = true; break; } } int predQP = CU::predictQP( *cuFirst, cs.prevQP[partitioner.chType] ); if( hasResidual ) { TempCtx ctxTemp( m_CtxCache ); if( !bKeepCtx ) ctxTemp = SubCtx( Ctx::DeltaQP, m_CABACEstimator->getCtx() ); m_CABACEstimator->resetBits(); m_CABACEstimator->cu_qp_delta( *cuFirst, predQP, cuFirst->qp ); cs.fracBits += m_CABACEstimator->getEstFracBits(); // dQP bits cs.cost = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist); if( !bKeepCtx ) m_CABACEstimator->getCtx() = SubCtx( Ctx::DeltaQP, ctxTemp ); // NOTE: reset QPs for CUs without residuals up to first coded CU for( const auto &cu : cs.cus ) { if( cu->rootCbf ) { break; } cu->qp = predQP; } } else { // No residuals: reset CU QP to predicted value for( const auto &cu : cs.cus ) { cu->qp = predQP; } } } void EncCu::xFillPCMBuffer( CodingUnit &cu ) { const ChromaFormat format = cu.chromaFormat; const uint32_t numberValidComponents = getNumberValidComponents(format); for( auto &tu : CU::traverseTUs( cu ) ) { for( uint32_t ch = 0; ch < numberValidComponents; ch++ ) { const ComponentID compID = ComponentID( ch ); const CompArea &compArea = tu.blocks[ compID ]; const CPelBuf source = tu.cs->getOrgBuf( compArea ); PelBuf destination = tu.getPcmbuf( compID ); destination.copyFrom( source ); } } } void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { const Slice &slice = *tempCS->slice; CHECK( slice.getSliceType() == I_SLICE, "Merge modes not available for I-slices" ); tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); MergeCtx mergeCtx; const SPS &sps = *tempCS->sps; #if JVET_K0346 if( sps.getSpsNext().getUseSubPuMvp() ) { Size bufSize = g_miScaling.scale( tempCS->area.lumaSize() ); mergeCtx.subPuMvpMiBuf = MotionBuf( m_SubPuMiBuf, bufSize ); } #endif { // first get merge candidates CodingUnit cu( tempCS->area ); cu.cs = tempCS; cu.partSize = SIZE_2Nx2N; cu.predMode = MODE_INTER; cu.slice = tempCS->slice; #if HEVC_TILES_WPP cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos()); #endif PredictionUnit pu( tempCS->area ); pu.cu = &cu; pu.cs = tempCS; PU::getInterMergeCandidates(pu, mergeCtx); } bool candHasNoResidual[MRG_MAX_NUM_CANDS]; for (uint32_t ui = 0; ui < mergeCtx.numValidMergeCand; ui++) { candHasNoResidual[ui] = false; } bool bestIsSkip = false; unsigned uiNumMrgSATDCand = mergeCtx.numValidMergeCand; PelUnitBuf acMergeBuffer [ MRG_MAX_NUM_CANDS ]; static_vector<unsigned, MRG_MAX_NUM_CANDS> RdModeList; bool mrgTempBufSet = false; for( unsigned i = 0; i < MRG_MAX_NUM_CANDS; i++ ) { RdModeList.push_back( i ); } if( m_pcEncCfg->getUseFastMerge() ) { uiNumMrgSATDCand = NUM_MRG_SATD_CAND; bestIsSkip = false; if( auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >( m_modeCtrl ) ) { bestIsSkip = blkCache->isSkip( tempCS->area ); } static_vector<double, MRG_MAX_NUM_CANDS> candCostList; // 1. Pass: get SATD-cost for selected candidates and reduce their count if( !bestIsSkip ) { RdModeList.clear(); mrgTempBufSet = true; const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( encTestMode.lossless ); CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); partitioner.setCUData( cu ); cu.slice = tempCS->slice; #if HEVC_TILES_WPP cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); #endif cu.skip = false; cu.partSize = SIZE_2Nx2N; //cu.affine cu.predMode = MODE_INTER; //cu.LICFlag cu.transQuantBypass = encTestMode.lossless; cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; //cu.emtFlag is set below PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType ); DistParam distParam; const bool bUseHadamard= !encTestMode.lossless; m_pcRdCost->setDistParam (distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth (CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard); const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) ); for( uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++ ) { acMergeBuffer[uiMergeCand] = m_acMergeBuffer[uiMergeCand].getBuf( localUnitArea ); mergeCtx.setMergeInfo( pu, uiMergeCand ); PU::spanMotionInfo( pu, mergeCtx ); distParam.cur = acMergeBuffer[uiMergeCand].Y(); m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand] ); if( mergeCtx.interDirNeighbours[uiMergeCand] == 3 && mergeCtx.mrgTypeNeighbours[uiMergeCand] == MRG_TYPE_DEFAULT_N ) { mergeCtx.mvFieldNeighbours[2*uiMergeCand].mv = pu.mv[0]; mergeCtx.mvFieldNeighbours[2*uiMergeCand+1].mv = pu.mv[1]; } #if DISTORTION_TYPE_BUGFIX Distortion uiSad = distParam.distFunc(distParam); #else uint32_t uiSad = distParam.distFunc(distParam); #endif uint32_t uiBitsCand = uiMergeCand + 1; if( uiMergeCand == tempCS->slice->getMaxNumMergeCand() - 1 ) { uiBitsCand--; } double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass; updateCandList( uiMergeCand, cost, RdModeList, candCostList, uiNumMrgSATDCand ); CHECK( std::min( uiMergeCand + 1, uiNumMrgSATDCand ) != RdModeList.size(), "" ); } // Try to limit number of candidates using SATD-costs for( uint32_t i = 1; i < uiNumMrgSATDCand; i++ ) { if( candCostList[i] > MRG_FAST_RATIO * candCostList[0] ) { uiNumMrgSATDCand = i; break; } } tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); } #if JVET_K0352_MERGE_ENCOPT else { uiNumMrgSATDCand = mergeCtx.numValidMergeCand; } #endif } const uint32_t iteration = encTestMode.lossless ? 1 : 2; // 2. Pass: check candidates using full RD test for( uint32_t uiNoResidualPass = 0; uiNoResidualPass < iteration; uiNoResidualPass++ ) { for( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ ) { uint32_t uiMergeCand = RdModeList[uiMrgHADIdx]; if( ( (uiNoResidualPass != 0) && candHasNoResidual[uiMergeCand] ) || ( (uiNoResidualPass == 0) && bestIsSkip ) ) { continue; } // first get merge candidates CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); partitioner.setCUData( cu ); cu.slice = tempCS->slice; #if HEVC_TILES_WPP cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); #endif cu.skip = false; cu.partSize = SIZE_2Nx2N; //cu.affine cu.predMode = MODE_INTER; //cu.LICFlag cu.transQuantBypass = encTestMode.lossless; cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType ); mergeCtx.setMergeInfo( pu, uiMergeCand ); PU::spanMotionInfo( pu, mergeCtx ); if( mrgTempBufSet ) { tempCS->getPredBuf().copyFrom( acMergeBuffer[ uiMergeCand ]); } else { m_pcInterSearch->motionCompensation( pu ); } #if JVET_K0357_AMVR xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, true, ( ( uiNoResidualPass == 0 ) ? &candHasNoResidual[uiMergeCand] : NULL ) ); #else xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, ( ( uiNoResidualPass == 0 ) ? &candHasNoResidual[uiMergeCand] : NULL ) ); #endif if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip ) { bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0; } tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); }// end loop uiMrgHADIdx if( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() ) { const CodingUnit &bestCU = *bestCS->getCU( partitioner.chType ); const PredictionUnit &bestPU = *bestCS->getPU( partitioner.chType ); if( bestCU.rootCbf == 0 ) { if( bestPU.mergeFlag ) { m_modeCtrl->setEarlySkipDetected(); } else if( m_pcEncCfg->getMotionEstimationSearchMethod() != MESEARCH_SELECTIVE ) { int absolute_MV = 0; for( uint32_t uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ ) { if( slice.getNumRefIdx( RefPicList( uiRefListIdx ) ) > 0 ) { absolute_MV += bestPU.mvd[uiRefListIdx].getAbsHor() + bestPU.mvd[uiRefListIdx].getAbsVer(); } } if( absolute_MV == 0 ) { m_modeCtrl->setEarlySkipDetected(); } } } } } } #if JVET_K_AFFINE void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { if( m_modeCtrl->getFastDeltaQp() ) { return; } #if JVET_K_AFFINE_BUG_FIXES if ( bestCS->area.lumaSize().width < 8 || bestCS->area.lumaSize().height < 8 ) #else if( tempCS->pcv->rectCUs && bestCS->area.lumaSize().area() < 64 ) #endif { return; } MvField affineMvField[2][3]; unsigned char interDirNeighbours; int numValidMergeCand; bool hasNoResidual = false; tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); partitioner.setCUData( cu ); cu.slice = tempCS->slice; #if HEVC_TILES_WPP cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); #endif cu.skip = false; cu.partSize = encTestMode.partSize; cu.affine = true; cu.predMode = MODE_INTER; cu.transQuantBypass = encTestMode.lossless; cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; CU::addPUs( cu ); cu.firstPU->mergeFlag = true; cu.firstPU->mergeIdx = 0; PU::getAffineMergeCand( *cu.firstPU, affineMvField, interDirNeighbours, numValidMergeCand ); if( numValidMergeCand == -1 ) { return; } cu.firstPU->interDir = interDirNeighbours; PU::setAllAffineMvField( *cu.firstPU, affineMvField[REF_PIC_LIST_0], REF_PIC_LIST_0 ); PU::setAllAffineMvField( *cu.firstPU, affineMvField[REF_PIC_LIST_1], REF_PIC_LIST_1 ); PU::spanMotionInfo( *cu.firstPU ); m_pcInterSearch->motionCompensation( cu ); #if JVET_K0357_AMVR xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, NULL, true, &hasNoResidual); #else xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, &hasNoResidual); #endif if( ! (encTestMode.lossless || hasNoResidual) ) { tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); tempCS->copyStructure( *bestCS, partitioner.chType ); tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() ); #if JVET_K0357_AMVR xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 1, NULL, true, &hasNoResidual); #else xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 1, &hasNoResidual); #endif } } #endif void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); partitioner.setCUData( cu ); cu.slice = tempCS->slice; #if HEVC_TILES_WPP cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); #endif cu.skip = false; cu.partSize = encTestMode.partSize; //cu.affine cu.predMode = MODE_INTER; cu.transQuantBypass = encTestMode.lossless; cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; CU::addPUs( cu ); m_pcInterSearch->predInterSearch( cu, partitioner ); #if JVET_K0357_AMVR const unsigned wIdx = gp_sizeIdxInfo->idxFrom( tempCS->area.lwidth () ); #endif #if JVET_K0357_AMVR xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, (m_pImvTempCS ? m_pImvTempCS[wIdx][encTestMode.partSize] : NULL)); #else xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, NULL); #endif } #if JVET_K0357_AMVR bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { int iIMV = int( ( encTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT ); // Only int-Pel, 4-Pel and fast 4-Pel allowed CHECK( iIMV != 1 && iIMV != 2 && iIMV != 3, "Unsupported IMV Mode" ); // Fast 4-Pel Mode EncTestMode encTestModeBase = encTestMode; // copy for clearing non-IMV options encTestModeBase.opts = EncTestModeOpts( encTestModeBase.opts & ETO_IMV ); // clear non-IMV options (is that intended?) tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); CodingStructure* pcCUInfo2Reuse = nullptr; if( m_pImvTempCS && encTestMode.partSize != SIZE_2Nx2N && ( ( encTestMode.opts & ETO_FORCE_MERGE ) == 0 ) ) { pcCUInfo2Reuse = m_pImvTempCS[gp_sizeIdxInfo->idxFrom( tempCS->area.lwidth() )][encTestMode.partSize]; if( pcCUInfo2Reuse != nullptr ) { CHECK( tempCS->area != pcCUInfo2Reuse->area, " mismatch" ); tempCS->copyStructure( *pcCUInfo2Reuse, partitioner.chType ); tempCS->fracBits = 0; tempCS->dist = 0; tempCS->cost = MAX_DOUBLE; } } CodingUnit &cu = ( pcCUInfo2Reuse != nullptr ) ? *tempCS->getCU( partitioner.chType ) : tempCS->addCU( tempCS->area, partitioner.chType ); if( pcCUInfo2Reuse == nullptr ) { partitioner.setCUData( cu ); cu.slice = tempCS->slice; #if HEVC_TILES_WPP cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); #endif cu.skip = false; cu.partSize = encTestMode.partSize; //cu.affine cu.predMode = MODE_INTER; cu.transQuantBypass = encTestMode.lossless; cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; CU::addPUs( cu ); } else { CHECK( cu.partSize != encTestMode.partSize, "Mismatch" ); CHECK( cu.skip, "Mismatch" ); CHECK( cu.qtDepth != partitioner.currQtDepth, "Mismatch" ); CHECK( cu.btDepth != partitioner.currBtDepth, "Mismatch" ); CHECK( cu.mtDepth != partitioner.currMtDepth, "Mismatch" ); CHECK( cu.depth != partitioner.currDepth, "Mismatch" ); } cu.imv = iIMV > 1 ? 2 : 1; #if JVET_K1000_SIMPLIFIED_EMT cu.emtFlag = false; #endif if( pcCUInfo2Reuse != nullptr ) { // reuse the motion info from pcCUInfo2Reuse CU::resetMVDandMV2Int( cu, m_pcInterSearch ); if( !CU::hasSubCUNonZeroMVd( cu ) ) { m_modeCtrl->useModeResult( encTestModeBase, tempCS, partitioner ); return false; } else { m_pcInterSearch->motionCompensation( cu ); } } else { m_pcInterSearch->predInterSearch( cu, partitioner ); } if( !CU::hasSubCUNonZeroMVd( cu ) ) { m_modeCtrl->useModeResult( encTestModeBase, tempCS, partitioner ); return false; } xEncodeInterResidual(tempCS, bestCS, partitioner, encTestModeBase, 0, NULL); return true; } #endif #if JVET_K0357_AMVR void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass, CodingStructure* imvCS, int emtMode, bool* bestHasNonResi ) #else void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass, bool* bestHasNonResi ) #endif { if( residualPass == 1 && encTestMode.lossless ) { return; } CodingUnit* cu = tempCS->getCU( partitioner.chType ); double bestCostInternal = MAX_DOUBLE; { cu->skip = false; const bool skipResidual = residualPass == 1; m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual ); xEncodeDontSplit( *tempCS, partitioner ); xCheckDQP( *tempCS, partitioner ); #if JVET_K0357_AMVR if( imvCS && (tempCS->cost < imvCS->cost) ) { if( imvCS->cost != MAX_DOUBLE ) { imvCS->initStructData( encTestMode.qp, encTestMode.lossless ); } imvCS->copyStructure( *tempCS, partitioner.chType ); } #endif if( NULL != bestHasNonResi && (bestCostInternal > tempCS->cost) ) { bestCostInternal = tempCS->cost; *bestHasNonResi = !cu->rootCbf; } #if WCG_EXT DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); #else DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() ); #endif xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); }//end emt loop } void EncCu::xEncodeDontSplit( CodingStructure &cs, Partitioner &partitioner ) { m_CABACEstimator->resetBits(); { if( partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) { m_CABACEstimator->split_cu_flag( false, cs, partitioner ); } if( partitioner.canSplit( CU_MT_SPLIT, cs ) ) { m_CABACEstimator->split_cu_mode_mt( CU_DONT_SPLIT, cs, partitioner ); } } cs.fracBits += m_CABACEstimator->getEstFracBits(); // split bits cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist ); } #if REUSE_CU_RESULTS void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner ) { const SPS &sps = *tempCS->sps; BestEncInfoCache* bestEncCache = dynamic_cast<BestEncInfoCache*>( m_modeCtrl ); CHECK( !bestEncCache, "If this mode is chosen, mode controller has to implement the mode caching capabilities" ); EncTestMode cachedMode; if( bestEncCache->setCsFrom( *tempCS, cachedMode, partitioner ) ) { CodingUnit& cu = *tempCS->cus.front(); partitioner.setCUData( cu ); if( CU::isIntra( cu ) ) { xReconIntraQT( cu ); } else { xDeriveCUMV( cu ); xReconInter( cu ); } Distortion finalDistortion = 0; const int numValidComponents = getNumberValidComponents( tempCS->area.chromaFormat ); for( int comp = 0; comp < numValidComponents; comp++ ) { const ComponentID compID = ComponentID( comp ); if( CS::isDualITree( *tempCS ) && toChannelType( compID ) != partitioner.chType ) { continue; } CPelBuf reco = tempCS->getRecoBuf( compID ); CPelBuf org = tempCS->getOrgBuf ( compID ); #if WCG_EXT if( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() ) { #if WCG_EXT_BUGFIX const CPelBuf orgLuma = tempCS->getOrgBuf(tempCS->area.blocks[COMPONENT_Y]); #else const CPelBuf orgLuma = tempCS->getOrgBuf(cs.area.blocks[COMPONENT_Y]); #endif finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma ); } else #endif finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE ); } m_CABACEstimator->getCtx() = m_CurrCtx->start; m_CABACEstimator->resetBits(); CUCtx cuCtx; cuCtx.isDQPCoded = true; cuCtx.isChromaQpAdjCoded = true; m_CABACEstimator->coding_unit( cu, partitioner, cuCtx ); tempCS->dist = finalDistortion; tempCS->fracBits = m_CABACEstimator->getEstFracBits(); tempCS->cost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist ); xEncodeDontSplit( *tempCS, partitioner ); xCheckDQP ( *tempCS, partitioner ); xCheckBestMode ( tempCS, bestCS, partitioner, cachedMode ); } else { THROW( "Should never happen!" ); } } #endif //! \}