Skip to content
Snippets Groups Projects
IntraSearch.cpp 66.3 KiB
Newer Older
  • Learn to ignore specific revisions
  • /* The copyright in this software is being made available under the BSD
     * License, included below. This software may be subject to other third party
     * and contributor rights, including patent rights, and no such rights are
     * granted under this license.
     *
     * Copyright (c) 2010-2018, ITU/ISO/IEC
     * All rights reserved.
     *
     * Redistribution and use in source and binary forms, with or without
     * modification, are permitted provided that the following conditions are met:
     *
     *  * Redistributions of source code must retain the above copyright notice,
     *    this list of conditions and the following disclaimer.
     *  * Redistributions in binary form must reproduce the above copyright notice,
     *    this list of conditions and the following disclaimer in the documentation
     *    and/or other materials provided with the distribution.
     *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
     *    be used to endorse or promote products derived from this software without
     *    specific prior written permission.
     *
     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
     * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
     * THE POSSIBILITY OF SUCH DAMAGE.
     */
    
    /** \file     EncSearch.cpp
     *  \brief    encoder intra search class
     */
    
    #include "IntraSearch.h"
    
    #include "EncModeCtrl.h"
    
    #include "CommonLib/CommonDef.h"
    #include "CommonLib/Rom.h"
    #include "CommonLib/Picture.h"
    #include "CommonLib/UnitTools.h"
    
    #include "CommonLib/dtrace_next.h"
    #include "CommonLib/dtrace_buffer.h"
    
    #include <math.h>
    #include <limits>
    
     //! \ingroup EncoderLib
     //! \{
    
    IntraSearch::IntraSearch()
      : m_pSplitCS      (nullptr)
      , m_pFullCS       (nullptr)
      , m_pBestCS       (nullptr)
      , m_pcEncCfg      (nullptr)
      , m_pcTrQuant     (nullptr)
      , m_pcRdCost      (nullptr)
      , m_CABACEstimator(nullptr)
      , m_CtxCache      (nullptr)
      , m_isInitialized (false)
    {
      for( uint32_t ch = 0; ch < MAX_NUM_TBLOCKS; ch++ )
      {
        m_pSharedPredTransformSkip[ch] = nullptr;
      }
    }
    
    
    void IntraSearch::destroy()
    {
      CHECK( !m_isInitialized, "Not initialized" );
    
      if( m_pcEncCfg )
      {
        bool BTnoRQT = m_pcEncCfg->getQTBT();
    
    
        const uint32_t uiNumLayersToAllocateSplit = BTnoRQT ? 1 : m_pcEncCfg->getQuadtreeTULog2MaxSize() - m_pcEncCfg->getQuadtreeTULog2MinSize() + 1;
        const uint32_t uiNumLayersToAllocateFull  = BTnoRQT ? 1 : m_pcEncCfg->getQuadtreeTULog2MaxSize() - m_pcEncCfg->getQuadtreeTULog2MinSize() + 1;
        const int uiNumSaveLayersToAllocate = 2;
    
        for( uint32_t layer = 0; layer < uiNumSaveLayersToAllocate; layer++ )
        {
          m_pSaveCS[layer]->destroy();
          delete m_pSaveCS[layer];
        }
    
        uint32_t numWidths  = gp_sizeIdxInfo->numWidths();
        uint32_t numHeights = gp_sizeIdxInfo->numHeights();
    
        for( uint32_t width = 0; width < numWidths; width++ )
        {
          for( uint32_t height = 0; height < numHeights; height++ )
          {
            if( ( BTnoRQT || width == height ) && gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( width ) ) && gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( height ) ) )
            {
              for( uint32_t layer = 0; layer < uiNumLayersToAllocateSplit; layer++ )
              {
                m_pSplitCS[width][height][layer]->destroy();
    
                delete m_pSplitCS[width][height][layer];
              }
    
              for( uint32_t layer = 0; layer < uiNumLayersToAllocateFull; layer++ )
              {
                m_pFullCS[width][height][layer]->destroy();
    
                delete m_pFullCS[width][height][layer];
              }
    
              delete[] m_pSplitCS[width][height];
              delete[] m_pFullCS [width][height];
    
              m_pBestCS[width][height]->destroy();
              m_pTempCS[width][height]->destroy();
    
              delete m_pTempCS[width][height];
              delete m_pBestCS[width][height];
            }
          }
    
          delete[] m_pSplitCS[width];
          delete[] m_pFullCS [width];
    
          delete[] m_pTempCS[width];
          delete[] m_pBestCS[width];
        }
    
        delete[] m_pSplitCS;
        delete[] m_pFullCS;
    
        delete[] m_pBestCS;
        delete[] m_pTempCS;
    
        delete[] m_pSaveCS;
      }
    
      m_pSplitCS = m_pFullCS = nullptr;
    
      m_pBestCS = m_pTempCS = nullptr;
    
      m_pSaveCS = nullptr;
    
      for( uint32_t ch = 0; ch < MAX_NUM_TBLOCKS; ch++ )
      {
        delete[] m_pSharedPredTransformSkip[ch];
        m_pSharedPredTransformSkip[ch] = nullptr;
      }
    
      m_isInitialized = false;
    }
    
    IntraSearch::~IntraSearch()
    {
      if( m_isInitialized )
      {
        destroy();
      }
    }
    
    void IntraSearch::init( EncCfg*        pcEncCfg,
                            TrQuant*       pcTrQuant,
                            RdCost*        pcRdCost,
                            CABACWriter*   CABACEstimator,
                            CtxCache*      ctxCache,
                            const uint32_t     maxCUWidth,
                            const uint32_t     maxCUHeight,
                            const uint32_t     maxTotalCUDepth
    )
    {
      CHECK(m_isInitialized, "Already initialized");
      m_pcEncCfg                     = pcEncCfg;
      m_pcTrQuant                    = pcTrQuant;
      m_pcRdCost                     = pcRdCost;
      m_CABACEstimator               = CABACEstimator;
      m_CtxCache                     = ctxCache;
    
      const ChromaFormat cform = pcEncCfg->getChromaFormatIdc();
    
      IntraPrediction::init( cform, pcEncCfg->getBitDepth( CHANNEL_TYPE_LUMA ) );
    
      for( uint32_t ch = 0; ch < MAX_NUM_TBLOCKS; ch++ )
      {
        m_pSharedPredTransformSkip[ch] = new Pel[MAX_CU_SIZE * MAX_CU_SIZE];
      }
    
      uint32_t numWidths  = gp_sizeIdxInfo->numWidths();
      uint32_t numHeights = gp_sizeIdxInfo->numHeights();
    
      bool BTnoRQT = m_pcEncCfg->getQTBT();
    
      const uint32_t uiNumLayersToAllocateSplit = BTnoRQT ? 1 : pcEncCfg->getQuadtreeTULog2MaxSize() - pcEncCfg->getQuadtreeTULog2MinSize() + 1;
      const uint32_t uiNumLayersToAllocateFull  = BTnoRQT ? 1 : pcEncCfg->getQuadtreeTULog2MaxSize() - pcEncCfg->getQuadtreeTULog2MinSize() + 1;
    
      m_pBestCS = new CodingStructure**[numWidths];
      m_pTempCS = new CodingStructure**[numWidths];
    
      m_pFullCS  = new CodingStructure***[numWidths];
      m_pSplitCS = new CodingStructure***[numWidths];
    
      for( uint32_t width = 0; width < numWidths; width++ )
      {
        m_pBestCS[width] = new CodingStructure*[numHeights];
        m_pTempCS[width] = new CodingStructure*[numHeights];
    
        m_pFullCS [width] = new CodingStructure**[numHeights];
        m_pSplitCS[width] = new CodingStructure**[numHeights];
    
        for( uint32_t height = 0; height < numHeights; height++ )
        {
          if( ( BTnoRQT || width == height ) && gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( width ) ) && gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( height ) ) )
          {
            m_pBestCS[width][height] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
            m_pTempCS[width][height] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
    
            m_pBestCS[width][height]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
            m_pTempCS[width][height]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
            m_pFullCS [width][height] = new CodingStructure*[uiNumLayersToAllocateFull];
            m_pSplitCS[width][height] = new CodingStructure*[uiNumLayersToAllocateSplit];
    
            for( uint32_t layer = 0; layer < uiNumLayersToAllocateFull; layer++ )
            {
              m_pFullCS [width][height][layer] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
    
              m_pFullCS [width][height][layer]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
            }
    
            for( uint32_t layer = 0; layer < uiNumLayersToAllocateSplit; layer++ )
            {
              m_pSplitCS[width][height][layer] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
    
              m_pSplitCS[width][height][layer]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
            }
          }
          else
          {
            m_pBestCS[width][height] = nullptr;
            m_pTempCS[width][height] = nullptr;
    
            m_pFullCS [width][height] = nullptr;
            m_pSplitCS[width][height] = nullptr;
          }
        }
      }
    
      const int uiNumSaveLayersToAllocate = 2;
    
      m_pSaveCS = new CodingStructure*[uiNumSaveLayersToAllocate];
    
      for( uint32_t depth = 0; depth < uiNumSaveLayersToAllocate; depth++ )
      {
        m_pSaveCS[depth] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
        m_pSaveCS[depth]->create( UnitArea( cform, Area( 0, 0, maxCUWidth, maxCUHeight ) ), false );
      }
    
      m_isInitialized = true;
    }
    
    
    //////////////////////////////////////////////////////////////////////////
    // INTRA PREDICTION
    //////////////////////////////////////////////////////////////////////////
    
    void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner )
    {
      CodingStructure       &cs            = *cu.cs;
      const SPS             &sps           = *cs.sps;
      const uint32_t             uiWidthBit    = cs.pcv->rectCUs ? g_aucLog2[partitioner.currArea().lwidth() ] : CU::getIntraSizeIdx(cu);
      const uint32_t             uiHeightBit   =                   g_aucLog2[partitioner.currArea().lheight()];
    
      // Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantization divisor is 1.
      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(cu.transQuantBypass) / double(1 << SCALE_BITS);
    
    
      //===== loop over partitions =====
    
      const TempCtx ctxStart          ( m_CtxCache, m_CABACEstimator->getCtx() );
      const TempCtx ctxStartIntraMode ( m_CtxCache, SubCtx( Ctx::IPredMode[CHANNEL_TYPE_LUMA],        m_CABACEstimator->getCtx() ) );
    
      CHECK( !cu.firstPU, "CU has no PUs" );
      const bool keepResi   = cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS;
    
    
      uint32_t extraModes = 0; // add two extra modes, which would be used after uiMode <= DC_IDX is removed for cu.nsstIdx == 3
    
    
      const int width   = partitioner.currArea().lwidth();
      const int height  = partitioner.currArea().lheight();
    
      // Marking EMT usage for faster EMT
      // 0: EMT is either not applicable for current CU (cuWidth > EMT_INTRA_MAX_CU or cuHeight > EMT_INTRA_MAX_CU), not active in the config file or the fast decision algorithm is not used in this case
      // 1: EMT fast algorithm can be applied for the current CU, and the DCT2 is being checked
      // 2: EMT is being checked for current CU. Stored results of DCT2 can be utilized for speedup
      uint8_t emtUsageFlag = 0;
      const int maxSizeEMT = cs.pcv->noRQT ? EMT_INTRA_MAX_CU_WITH_QTBT : EMT_INTRA_MAX_CU;
      if( width <= maxSizeEMT && height <= maxSizeEMT && sps.getSpsNext().getUseIntraEMT() )
      {
        emtUsageFlag = cu.emtFlag == 1 ? 2 : 1;
      }
    
      bool isAllIntra = m_pcEncCfg->getIntraPeriod() == 1;
    
      if( cs.pcv->rectCUs )
      {
        if( width * height < 64 && !isAllIntra )
        {
          emtUsageFlag = 0; //this forces the recalculation of the candidates list. Why is this necessary? (to be checked)
        }
      }
    
      static_vector<uint32_t,   FAST_UDI_MAX_RDMODE_NUM> uiHadModeList;
      static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList;
      static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList;
    
    
    #if JVET_L0283_MULTI_REF_LINE
      static_vector<int, FAST_UDI_MAX_RDMODE_NUM> extendRefList;
      static_vector<int, FAST_UDI_MAX_RDMODE_NUM>* nullList = NULL;
    #endif
    
    
      auto &pu = *cu.firstPU;
      int puIndex = 0;
      {
        CandHadList.clear();
        CandCostList.clear();
        uiHadModeList.clear();
    
    #if JVET_L0283_MULTI_REF_LINE
        extendRefList.clear();
    #endif
    
    
        CHECK(pu.cu != &cu, "PU is not contained in the CU");
    
        //===== determine set of modes to be tested (using prediction signal only) =====
        int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
        static_vector< uint32_t, FAST_UDI_MAX_RDMODE_NUM > uiRdModeList;
    
        int numModesForFullRD = 3;
        if( cs.pcv->rectCUs )
        {
          numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[uiWidthBit - MIN_CU_LOG2][uiHeightBit - MIN_CU_LOG2];
        }
        else
        {
          numModesForFullRD = m_pcEncCfg->getFastUDIUseMPMEnabled() ? g_aucIntraModeNumFast_UseMPM[uiWidthBit] : g_aucIntraModeNumFast_NotUseMPM[uiWidthBit];
          numModesForFullRD -= 1;
        }
    
    #if INTRA_FULL_SEARCH
        numModesForFullRD = numModesAvailable;
    #endif
    
    
        if( emtUsageFlag != 2 )
        {
          // this should always be true
          CHECK( !pu.Y().valid(), "PU is not valid" );
    
    #if JVET_L0283_MULTI_REF_LINE
          bool isFirstLineOfCtu = (((pu.block(COMPONENT_Y).y)&((pu.cs->sps)->getMaxCUWidth() - 1)) == 0);
          int numOfPassesExtendRef = (isFirstLineOfCtu ? 1 : MRL_NUM_REF_LINES);
          pu.multiRefIdx = 0;
    #endif
    
    
          //===== init pattern for luma prediction =====
          initIntraPatternChType( cu, pu.Y(), IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, pu, false, pu ) );
          if( numModesForFullRD != numModesAvailable )
          {
            CHECK( numModesForFullRD >= numModesAvailable, "Too many modes for full RD search" );
    
            const CompArea &area = pu.Y();
    
            PelBuf piOrg         = cs.getOrgBuf(area);
            PelBuf piPred        = cs.getPredBuf(area);
    
            DistParam distParam;
    
            const bool bUseHadamard = cu.transQuantBypass == 0;
    
            m_pcRdCost->setDistParam(distParam, piOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
    
            distParam.applyWeight = false;
    
            bool bSatdChecked[NUM_INTRA_MODE];
            memset( bSatdChecked, 0, sizeof( bSatdChecked ) );
    
            {
              for( int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )
              {
                uint32_t       uiMode = modeIdx;
                Distortion uiSad  = 0;
    
                // Skip checking extended Angular modes in the first round of SATD
                if( uiMode > DC_IDX && ( uiMode & 1 ) )
                {
                  continue;
                }
    
                bSatdChecked[uiMode] = true;
    
                pu.intraDir[0] = modeIdx;
    
                if( useDPCMForFirstPassIntraEstimation( pu, uiMode ) )
                {
                  encPredIntraDPCM( COMPONENT_Y, piOrg, piPred, uiMode );
                }
                else
                {
                  predIntraAng( COMPONENT_Y, piPred, pu, IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, pu, true, pu ) );
                }
                // use Hadamard transform here
                uiSad += distParam.distFunc(distParam);
    
                // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
                m_CABACEstimator->getCtx() = SubCtx( Ctx::IPredMode[CHANNEL_TYPE_LUMA], ctxStartIntraMode );
    
                uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA);
    
                double cost = ( double ) uiSad + ( double ) fracModeBits * sqrtLambdaForFirstPass;
    
                DTRACE( g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", uiSad, fracModeBits, cost, uiMode );
    
    
                updateCandList( uiMode, cost,  uiRdModeList, CandCostList
    #if JVET_L0283_MULTI_REF_LINE
                  , extendRefList, 0
    #endif              
                  , numModesForFullRD + extraModes );
                updateCandList(uiMode, (double) uiSad, uiHadModeList, CandHadList
    #if JVET_L0283_MULTI_REF_LINE
                  , *nullList, -1
    #endif              
                  , 3 + extraModes);
    
              }
            } // NSSTFlag
    
            // forget the extra modes
            uiRdModeList.resize( numModesForFullRD );
    
    #if JVET_L0283_MULTI_REF_LINE
            CandCostList.resize(numModesForFullRD);
            extendRefList.resize(numModesForFullRD);
    #endif
    
            static_vector<unsigned, FAST_UDI_MAX_RDMODE_NUM> parentCandList(FAST_UDI_MAX_RDMODE_NUM);
            std::copy_n(uiRdModeList.begin(), numModesForFullRD, parentCandList.begin());
    
            // Second round of SATD for extended Angular modes
            for (int modeIdx = 0; modeIdx < numModesForFullRD; modeIdx++)
            {
              unsigned parentMode = parentCandList[modeIdx];
              if (parentMode > (DC_IDX + 1) && parentMode < (NUM_LUMA_MODE - 1))
              {
                for (int subModeIdx = -1; subModeIdx <= 1; subModeIdx += 2)
                {
                  unsigned mode = parentMode + subModeIdx;
    
    
                  if (!bSatdChecked[mode])
                  {
                    pu.intraDir[0] = mode;
    
                    if (useDPCMForFirstPassIntraEstimation(pu, mode))
                    {
                      encPredIntraDPCM(COMPONENT_Y, piOrg, piPred, mode);
                    }
                    else
                    {
                      predIntraAng(COMPONENT_Y, piPred, pu,
                                   IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, true, pu));
                    }
                    // use Hadamard transform here
                    Distortion sad = distParam.distFunc(distParam);
    
                    // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
                    m_CABACEstimator->getCtx() = SubCtx(Ctx::IPredMode[CHANNEL_TYPE_LUMA], ctxStartIntraMode);
    
                    uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
    
                    double cost = (double) sad + (double) fracModeBits * sqrtLambdaForFirstPass;
    
    
                    updateCandList(mode, cost, uiRdModeList, CandCostList
    #if JVET_L0283_MULTI_REF_LINE
                      , extendRefList, 0
    #endif
                      , numModesForFullRD);
                    updateCandList(mode, (double)sad, uiHadModeList, CandHadList
    #if JVET_L0283_MULTI_REF_LINE
                      , *nullList, -1
    #endif                 
                      , 3);
    
    #if JVET_L0283_MULTI_REF_LINE
            pu.multiRefIdx = 1;
    
            const int  numMPMs = NUM_MOST_PROBABLE_MODES;
            unsigned  multiRefMPM [numMPMs];
    
            PU::getIntraMPMs(pu, multiRefMPM);
            for (int mRefNum = 1; mRefNum < numOfPassesExtendRef; mRefNum++)
            {
              int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum];
    
              pu.multiRefIdx = multiRefIdx;
              {
                initIntraPatternChType(cu, pu.Y(), IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, false, pu));
              }
              for (int x = 0; x < numMPMs; x++)
              {
                uint32_t mode = multiRefMPM[x];
                {
                  pu.intraDir[0] = mode;
    
                  if (useDPCMForFirstPassIntraEstimation(pu, mode))
                  {
                    encPredIntraDPCM(COMPONENT_Y, piOrg, piPred, mode);
                  }
                  else
                  {
                    predIntraAng(COMPONENT_Y, piPred, pu, IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, true, pu));
                  }
    
                  // use Hadamard transform here
                  Distortion sad = distParam.distFunc(distParam);
    
                  // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
                  m_CABACEstimator->getCtx() = SubCtx(Ctx::IPredMode[CHANNEL_TYPE_LUMA], ctxStartIntraMode);
    
                  uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
    
                  double cost = (double)sad + (double)fracModeBits * sqrtLambdaForFirstPass;
                  updateCandList(mode, cost, uiRdModeList, CandCostList, extendRefList, multiRefIdx, numModesForFullRD);
                }
              }
            }
            CandCostList.resize(numModesForFullRD);
            extendRefList.resize(numModesForFullRD);
    #endif
    
              const int numMPMs = NUM_MOST_PROBABLE_MODES;
              unsigned  uiPreds[numMPMs];
    
    #if JVET_L0283_MULTI_REF_LINE
              pu.multiRefIdx = 0;
    #endif
    
    
              const int numCand = PU::getIntraMPMs( pu, uiPreds );
    
              for( int j = 0; j < numCand; j++ )
              {
                bool mostProbableModeIncluded = false;
                int  mostProbableMode         = uiPreds[j];
    
    
                for( int i = 0; i < numModesForFullRD; i++ )
                {
    
    #if JVET_L0283_MULTI_REF_LINE
                  mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i] && extendRefList[i] == 0);
    #else
    
                  mostProbableModeIncluded |= ( mostProbableMode == uiRdModeList[i] );
    
    #if JVET_L0283_MULTI_REF_LINE
                  extendRefList.push_back(0);
    #endif
    
                  numModesForFullRD++;
                  uiRdModeList.push_back( mostProbableMode );
                }
              }
            }
          }
          else
          {
            for( int i = 0; i < numModesForFullRD; i++ )
            {
              uiRdModeList.push_back( i );
            }
          }
          if( emtUsageFlag == 1 )
          {
            // Store the modes to be checked with RD
            m_savedNumRdModes[puIndex] = numModesForFullRD;
            std::copy_n( uiRdModeList.begin(), numModesForFullRD, m_savedRdModeList[puIndex] );
    
    #if JVET_L0283_MULTI_REF_LINE
            std::copy_n(extendRefList.begin(), numModesForFullRD, m_savedExtendRefList[puIndex]);
    #endif
    
          }
        }
        else //emtUsage = 2 (here we potentially reduce the number of modes that will be full-RD checked)
        {
          if( isAllIntra && m_pcEncCfg->getFastIntraEMT() )
          {
            double thresholdSkipMode;
            if( cs.pcv->noRQT )
            {
              thresholdSkipMode = 1.0 + 1.4 / sqrt( ( double ) ( width*height ) );
            }
            else
            {
              switch( width )
              {
              case  4: thresholdSkipMode = 1.47; break; // Skip checking   4x4 Intra modes using the R-D cost in the DCT2-pass
              case  8: thresholdSkipMode = 1.28; break; // Skip checking   8x8 Intra modes using the R-D cost in the DCT2-pass
              case 16: thresholdSkipMode = 1.12; break; // Skip checking 16x16 Intra modes using the R-D cost in the DCT2-pass
              case 32: thresholdSkipMode = 1.06; break; // Skip checking 32x32 Intra modes using the R-D cost in the DCT2-pass
              default: thresholdSkipMode = 1.06; break; // Skip checking 32x32 Intra modes using the R-D cost in the DCT2-pass
              }
            }
    
            numModesForFullRD = 0;
    
            // Skip checking the modes with much larger R-D cost than the best mode
            for( int i = 0; i < m_savedNumRdModes[puIndex]; i++ )
            {
              if( m_modeCostStore[puIndex][i] <= thresholdSkipMode * m_bestModeCostStore[puIndex] )
              {
                uiRdModeList.push_back( m_savedRdModeList[puIndex][i] );
    
    #if JVET_L0283_MULTI_REF_LINE
                extendRefList.push_back(m_savedExtendRefList[puIndex][i]);
    #endif
    
                numModesForFullRD++;
              }
            }
          }
          else //this is necessary because we skip the candidates list calculation, since it was already obtained for the DCT-II. Now we load it
          {
            // Restore the modes to be checked with RD
            numModesForFullRD = m_savedNumRdModes[puIndex];
            uiRdModeList.resize( numModesForFullRD );
            std::copy_n( m_savedRdModeList[puIndex], m_savedNumRdModes[puIndex], uiRdModeList.begin() );
    
    #if JVET_L0283_MULTI_REF_LINE
            CandCostList.resize(numModesForFullRD);
            extendRefList.resize(numModesForFullRD);
            std::copy_n(m_savedExtendRefList[puIndex], m_savedNumRdModes[puIndex], extendRefList.begin());
    #endif
    
          }
        }
    
    
        CHECK( numModesForFullRD != uiRdModeList.size(), "Inconsistent state!" );
    
        // after this point, don't use numModesForFullRD
    
        // PBINTRA fast
        if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && cu.partSize == SIZE_2Nx2N && uiRdModeList.size() < numModesAvailable && emtUsageFlag != 2 )
        {
          if( CandHadList.size() < 3 || CandHadList[2] > cs.interHad * PBINTRA_RATIO )
          {
            uiRdModeList.resize( std::min<size_t>( uiRdModeList.size(), 2 ) );
          }
          if( CandHadList.size() < 2 || CandHadList[1] > cs.interHad * PBINTRA_RATIO )
          {
            uiRdModeList.resize( std::min<size_t>( uiRdModeList.size(), 1 ) );
          }
          if( CandHadList.size() < 1 || CandHadList[0] > cs.interHad * PBINTRA_RATIO )
          {
            cs.dist = std::numeric_limits<Distortion>::max();
            cs.interHad = 0;
    
            //===== reset context models =====
            m_CABACEstimator->getCtx() = SubCtx( Ctx::IPredMode       [CHANNEL_TYPE_LUMA], ctxStartIntraMode );
    
            return;
          }
        }
    
        //===== check modes (using r-d costs) =====
        uint32_t       uiBestPUMode  = 0;
    
    #if JVET_L0283_MULTI_REF_LINE
        int            bestExtendRef = 0;
    #endif
    
    
        CodingStructure *csTemp = m_pTempCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )];
        CodingStructure *csBest = m_pBestCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )];
    
        csTemp->slice = cs.slice;
        csBest->slice = cs.slice;
        csTemp->initStructData();
        csBest->initStructData();
    
        // just to be sure
        numModesForFullRD = ( int ) uiRdModeList.size();
        for (uint32_t uiMode = 0; uiMode < numModesForFullRD; uiMode++)
        {
          // set luma prediction mode
          uint32_t uiOrgMode = uiRdModeList[uiMode];
    
          pu.intraDir[0] = uiOrgMode;
    
    #if JVET_L0283_MULTI_REF_LINE
          int multiRefIdx = extendRefList[uiMode];
          pu.multiRefIdx  = multiRefIdx;
          CHECK(pu.multiRefIdx && (pu.intraDir[0] == DC_IDX || pu.intraDir[0] == PLANAR_IDX), "ERL");
    #endif
    
    
    
          // set context models
          m_CABACEstimator->getCtx() = ctxStart;
    
          // determine residual for partition
          cs.initSubStructure( *csTemp, partitioner.chType, cs.area, true );
    
          xRecurIntraCodingLumaQT( *csTemp, partitioner );
    
          if( emtUsageFlag == 1 && m_pcEncCfg->getFastIntraEMT() )
          {
            m_modeCostStore[puIndex][uiMode] = csTemp->cost; //cs.cost;
          }
    
    
          DTRACE( g_trace_ctx, D_INTRA_COST, "IntraCost T %f (%d) \n", csTemp->cost, uiOrgMode );
    
          // check r-d cost
          if( csTemp->cost < csBest->cost )
          {
            std::swap( csTemp, csBest );
    
            uiBestPUMode  = uiOrgMode;
    
    #if JVET_L0283_MULTI_REF_LINE
            bestExtendRef = multiRefIdx;
    #endif
    
    
            if( ( emtUsageFlag == 1 ) && m_pcEncCfg->getFastIntraEMT() )
            {
              m_bestModeCostStore[puIndex] = csBest->cost; //cs.cost;
            }
          }
    
          csTemp->releaseIntermediateData();
        } // Mode loop
    
        cs.useSubStructure( *csBest, partitioner.chType, pu.singleChan( CHANNEL_TYPE_LUMA ), KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, keepResi );
    
        csBest->releaseIntermediateData();
        //=== update PU data ====
        pu.intraDir[0] = uiBestPUMode;
    
    #if JVET_L0283_MULTI_REF_LINE
        pu.multiRefIdx = bestExtendRef;
    #endif
    
      }
    
      //===== reset context models =====
      m_CABACEstimator->getCtx() = ctxStart;
    }
    
    void IntraSearch::estIntraPredChromaQT(CodingUnit &cu, Partitioner &partitioner)
    {
      const ChromaFormat format   = cu.chromaFormat;
      const uint32_t    numberValidComponents = getNumberValidComponents(format);
      CodingStructure &cs = *cu.cs;
      const TempCtx ctxStart  ( m_CtxCache, m_CABACEstimator->getCtx() );
    
      cs.setDecomp( cs.area.Cb(), false );
    
      auto &pu = *cu.firstPU;
    
      {
        uint32_t       uiBestMode = 0;
        Distortion uiBestDist = 0;
        double     dBestCost = MAX_DOUBLE;
    
        //----- init mode list ----
        {
          uint32_t  uiMinMode = 0;
          uint32_t  uiMaxMode = NUM_CHROMA_MODE;
    
          //----- check chroma modes -----
          uint32_t chromaCandModes[ NUM_CHROMA_MODE ];
          PU::getIntraChromaCandModes( pu, chromaCandModes );
    
          // create a temporary CS
          CodingStructure &saveCS = *m_pSaveCS[0];
          saveCS.pcv      = cs.pcv;
          saveCS.picture  = cs.picture;
          saveCS.area.repositionTo( cs.area );
          saveCS.clearTUs();
    
          if( CS::isDualITree( cs ) )
          {
            if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
            {
              partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
    
              do
              {
                cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType ), partitioner.chType ).depth = partitioner.currTrDepth;
              } while( partitioner.nextPart( cs ) );
    
              partitioner.exitCurrSplit();
            }
            else
            cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType ), partitioner.chType );
          }
    
          std::vector<TransformUnit*> orgTUs;
    
    
          // create a store for the TUs
          for( const auto &ptu : cs.tus )
          {
            // for split TUs in HEVC, add the TUs without Chroma parts for correct setting of Cbfs
            if( pu.contains( *ptu, CHANNEL_TYPE_CHROMA ) || ( !cs.pcv->noRQT && !ptu->Cb().valid() && !ptu->Cr().valid() ) )
            {
              saveCS.addTU( *ptu, partitioner.chType );
              orgTUs.push_back( ptu );
            }
          }
    
    #if JVET_L0338_MDLM
          // SATD pre-selecting.
          int satdModeList[NUM_CHROMA_MODE];
          int64_t satdSortedCost[NUM_CHROMA_MODE];
          for (int i = 0; i < NUM_CHROMA_MODE; i++)
          {
            satdSortedCost[i] = 0; // for the mode not pre-select by SATD, do RDO by default, so set the initial value 0.
            satdModeList[i] = 0;
          }
          bool modeIsEnable[NUM_INTRA_MODE + 1]; // use intra mode idx to check whether enable
          for (int i = 0; i < NUM_INTRA_MODE + 1; i++)
          {
            modeIsEnable[i] = 1;
          }
    
          DistParam distParam;
          const bool useHadamard = true;
          pu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation.
    
          initIntraPatternChType(cu, pu.Cb());
          initIntraPatternChType(cu, pu.Cr());
          xGetLumaRecPixels(pu, pu.Cb());
    
          for (int idx = uiMinMode; idx <= uiMaxMode - 1; idx++)
          {
            int mode = chromaCandModes[idx];
            satdModeList[idx] = mode;
            if (PU::isLMCMode(mode) && !PU::isLMCModeEnabled(pu, mode))
            {
              continue;
            }
    
            if ((mode == LM_CHROMA_IDX) || (mode == PLANAR_IDX) || (mode == DM_CHROMA_IDX)) // only pre-check regular modes and MDLM modes, not including DM ,Planar, and LM
    
            {
              continue;
            }
            pu.intraDir[1] = mode; // temporary assigned, for SATD checking.
    
            int64_t sad = 0;
            CodingStructure& cs = *(pu.cs);
    
            CompArea areaCb = pu.Cb();
            PelBuf orgCb = cs.getOrgBuf(areaCb);
            PelBuf predCb = cs.getPredBuf(areaCb);
    
            m_pcRdCost->setDistParam(distParam, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, useHadamard);
            distParam.applyWeight = false;
    
            if (PU::isLMCMode(mode))
            {
              predIntraChromaLM(COMPONENT_Cb, predCb, pu, areaCb, mode);
            }
            else
            {
              predIntraAng(COMPONENT_Cb, predCb, pu, false);
            }
    
            sad += distParam.distFunc(distParam);
    
            CompArea areaCr = pu.Cr();
            PelBuf orgCr = cs.getOrgBuf(areaCr);
            PelBuf predCr = cs.getPredBuf(areaCr);
    
            m_pcRdCost->setDistParam(distParam, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, useHadamard);
            distParam.applyWeight = false;
    
            if (PU::isLMCMode(mode))
            {
              predIntraChromaLM(COMPONENT_Cr, predCr, pu, areaCr, mode);
            }
            else
            {
              predIntraAng(COMPONENT_Cr, predCr, pu, false);
            }
            sad += distParam.distFunc(distParam);
            satdSortedCost[idx] = sad;
          }
          // sort the mode based on the cost from small to large.
          int tempIdx = 0;
          int64_t tempCost = 0;
          for (int i = uiMinMode; i <= uiMaxMode - 1; i++)
          {
            for (int j = i + 1; j <= uiMaxMode - 1; j++)
            {
              if (satdSortedCost[j] < satdSortedCost[i])
              {
                tempIdx = satdModeList[i];
                satdModeList[i] = satdModeList[j];
                satdModeList[j] = tempIdx;
    
                tempCost = satdSortedCost[i];
                satdSortedCost[i] = satdSortedCost[j];
                satdSortedCost[j] = tempCost;
    
              }
            }
          }
          int reducedModeNumber = 2; // reduce the number of chroma modes
          for (int i = 0; i < reducedModeNumber; i++)
          {
            modeIsEnable[satdModeList[uiMaxMode - 1 - i]] = 0; // disable the last reducedModeNumber modes
          }
    #endif
    
    
          // save the dist
          Distortion baseDist = cs.dist;
    
          for (uint32_t uiMode = uiMinMode; uiMode < uiMaxMode; uiMode++)
          {
            const int chromaIntraMode = chromaCandModes[uiMode];
            if( PU::isLMCMode( chromaIntraMode ) && ! PU::isLMCModeEnabled( pu, chromaIntraMode ) )
            {
              continue;
            }
    
    #if JVET_L0338_MDLM
            if (!modeIsEnable[chromaIntraMode] && PU::isLMCModeEnabled(pu, chromaIntraMode)) // when CCLM is disable, then MDLM is disable. not use satd checking
            {
              continue;
            }
    #endif
    
            cs.setDecomp( pu.Cb(), false );
            cs.dist = baseDist;
            //----- restore context models -----
            m_CABACEstimator->getCtx() = ctxStart;
    
            //----- chroma coding -----
            pu.intraDir[1] = chromaIntraMode;
    
            xRecurIntraChromaCodingQT( cs, partitioner );
    
            if (cs.pps->getUseTransformSkip())
            {
              m_CABACEstimator->getCtx() = ctxStart;
            }
    
            uint64_t fracBits   = xGetIntraFracBitsQT( cs, partitioner, false, true );
            Distortion uiDist = cs.dist;
            double    dCost   = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist );
    
            //----- compare -----
            if( dCost < dBestCost )
            {
              for( uint32_t i = getFirstComponentOfChannel( CHANNEL_TYPE_CHROMA ); i < numberValidComponents; i++ )
              {
                const CompArea &area = pu.blocks[i];
    
                saveCS.getRecoBuf     ( area ).copyFrom( cs.getRecoBuf   ( area ) );
    #if KEEP_PRED_AND_RESI_SIGNALS
                saveCS.getPredBuf     ( area ).copyFrom( cs.getPredBuf   ( area ) );
                saveCS.getResiBuf     ( area ).copyFrom( cs.getResiBuf   ( area ) );
    #endif
                cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf( area ) );
    
                for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
                {
                  saveCS.tus[j]->copyComponentFrom( *orgTUs[j], area.compID );
                }
              }
    
              dBestCost  = dCost;
              uiBestDist = uiDist;
              uiBestMode = chromaIntraMode;
            }
          }
    
          for( uint32_t i = getFirstComponentOfChannel( CHANNEL_TYPE_CHROMA ); i < numberValidComponents; i++ )
          {
            const CompArea &area = pu.blocks[i];
    
            cs.getRecoBuf         ( area ).copyFrom( saveCS.getRecoBuf( area ) );
    #if KEEP_PRED_AND_RESI_SIGNALS
            cs.getPredBuf         ( area ).copyFrom( saveCS.getPredBuf( area ) );
            cs.getResiBuf         ( area ).copyFrom( saveCS.getResiBuf( area ) );
    #endif
            cs.picture->getRecoBuf( area ).copyFrom( cs.    getRecoBuf( area ) );
    
            for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
            {
              orgTUs[ j ]->copyComponentFrom( *saveCS.tus[ j ], area.compID );
            }
          }
        }
    
        pu.intraDir[1] = uiBestMode;
        cs.dist        = uiBestDist;
      }
    
      //----- restore context models -----
      m_CABACEstimator->getCtx() = ctxStart;
    }