/* The copyright in this software is being made available under the BSD
 * License, included below. This software may be subject to other third party
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
 * Copyright (c) 2010-2022, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
 *    be used to endorse or promote products derived from this software without
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

/** \file     EncCu.cpp
    \brief    Coding Unit (CU) encoder class
*/

#include "EncCu.h"

#include "EncLib.h"
#include "Analyze.h"
#include "AQp.h"

#include "CommonLib/dtrace_codingstruct.h"
#include "CommonLib/Picture.h"
#include "CommonLib/UnitTools.h"
#include "MCTS.h"


#include "CommonLib/dtrace_buffer.h"

#include <stdio.h>
#include <cmath>
#include <algorithm>



//! \ingroup EncoderLib
//! \{

// ====================================================================================================================
#if JVET_Y0065_GPM_INTRA
EncCu::EncCu()
#else
EncCu::EncCu() : m_GeoModeTest
{
  GeoMotionInfo(0, 1), GeoMotionInfo(1, 0),GeoMotionInfo(0, 2), GeoMotionInfo(1, 2), GeoMotionInfo(2, 0),
  GeoMotionInfo(2, 1), GeoMotionInfo(0, 3),GeoMotionInfo(1, 3), GeoMotionInfo(2, 3), GeoMotionInfo(3, 0),
  GeoMotionInfo(3, 1), GeoMotionInfo(3, 2),GeoMotionInfo(0, 4), GeoMotionInfo(1, 4), GeoMotionInfo(2, 4),
  GeoMotionInfo(3, 4), GeoMotionInfo(4, 0),GeoMotionInfo(4, 1), GeoMotionInfo(4, 2), GeoMotionInfo(4, 3),
  GeoMotionInfo(0, 5), GeoMotionInfo(1, 5),GeoMotionInfo(2, 5), GeoMotionInfo(3, 5), GeoMotionInfo(4, 5),
  GeoMotionInfo(5, 0), GeoMotionInfo(5, 1),GeoMotionInfo(5, 2), GeoMotionInfo(5, 3), GeoMotionInfo(5, 4)
}
#endif
{
#if NON_ADJACENT_MRG_CAND
  int numGeoModeTest = 0;
#if JVET_Y0065_GPM_INTRA
  for (int i = 1; i < GEO_MAX_NUM_UNI_CANDS+GEO_MAX_NUM_INTRA_CANDS; i++)
#else
  for (int i = 1; i < GEO_MAX_NUM_UNI_CANDS; i++)
#endif
  {
    for (int j = 0; j < i; j++)
    {
      m_GeoModeTest[numGeoModeTest] = GeoMotionInfo(j, i);
      numGeoModeTest++;
    }
    for (int j = 0; j < i; j++)
    {
      m_GeoModeTest[numGeoModeTest] = GeoMotionInfo(i, j);
      numGeoModeTest++;
    }
  }
#endif
#if JVET_W0097_GPM_MMVD_TM
  m_fastGpmMmvdSearch = false;
  m_fastGpmMmvdRelatedCU = false;
  m_includeMoreMMVDCandFirstPass = false;
  m_maxNumGPMDirFirstPass = 64;
  m_numCandPerPar = 5;
#endif
}

void EncCu::create( EncCfg* encCfg )
{
  unsigned      uiMaxWidth    = encCfg->getMaxCUWidth();
  unsigned      uiMaxHeight   = encCfg->getMaxCUHeight();
  ChromaFormat  chromaFormat  = encCfg->getChromaFormatIdc();

  unsigned      numWidths     = gp_sizeIdxInfo->numWidths();
  unsigned      numHeights    = gp_sizeIdxInfo->numHeights();
  m_pTempCS = new CodingStructure**  [numWidths];
  m_pBestCS = new CodingStructure**  [numWidths];
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  m_pTempCS2 = new CodingStructure** [numWidths];
  m_pBestCS2 = new CodingStructure** [numWidths];
#endif
#if ENABLE_OBMC
  m_tempWoOBMCBuffer.create(UnitArea(chromaFormat, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));
#endif
  for( unsigned w = 0; w < numWidths; w++ )
  {
    m_pTempCS[w] = new CodingStructure*  [numHeights];
    m_pBestCS[w] = new CodingStructure*  [numHeights];
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
    m_pTempCS2[w] = new CodingStructure* [numHeights];
    m_pBestCS2[w] = new CodingStructure* [numHeights];
#endif
    for( unsigned h = 0; h < numHeights; h++ )
    {
      unsigned width  = gp_sizeIdxInfo->sizeFrom( w );
      unsigned height = gp_sizeIdxInfo->sizeFrom( h );

      if( gp_sizeIdxInfo->isCuSize( width ) && gp_sizeIdxInfo->isCuSize( height ) )
      {
        m_pTempCS[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
        m_pBestCS[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );

        m_pTempCS[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode());
        m_pBestCS[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode());
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
        m_pTempCS2[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
        m_pBestCS2[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );

        m_pTempCS2[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode());
        m_pBestCS2[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode());
#endif
      }
      else
      {
        m_pTempCS[w][h] = nullptr;
        m_pBestCS[w][h] = nullptr;
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
        m_pTempCS2[w][h] = nullptr;
        m_pBestCS2[w][h] = nullptr;
#endif
      }
    }
  }
#if ENABLE_OBMC
  m_pTempCUWoOBMC = nullptr;
  m_pPredBufWoOBMC = nullptr;

  if (encCfg->getUseOBMC())
  {
    m_pTempCUWoOBMC = new CodingStructure**[numWidths];
    m_pPredBufWoOBMC = new PelStorage*[numWidths];

    for (unsigned w = 0; w < numWidths; w++)
    {
      m_pTempCUWoOBMC[w] = new CodingStructure*[numHeights];
      m_pPredBufWoOBMC[w] = new PelStorage[numHeights];

      for (unsigned h = 0; h < numHeights; h++)
      {
        uint32_t width = gp_sizeIdxInfo->sizeFrom(w);
        uint32_t height = gp_sizeIdxInfo->sizeFrom(h);

        if (gp_sizeIdxInfo->isCuSize(width) && gp_sizeIdxInfo->isCuSize(height))
        {
          m_pTempCUWoOBMC[w][h] = new CodingStructure(m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache);
#if JVET_Z0118_GDR
          m_pTempCUWoOBMC[w][h]->create(chromaFormat, Area(0, 0, width, height), false, false, encCfg->getGdrEnabled());
#else
          m_pTempCUWoOBMC[w][h]->create(chromaFormat, Area(0, 0, width, height), false, false);
#endif
          m_pPredBufWoOBMC[w][h].create(UnitArea(chromaFormat, Area(0, 0, width, height)));
        }
      }
    }
  }
#endif
  m_cuChromaQpOffsetIdxPlus1 = 0;

  unsigned maxDepth = numWidths + numHeights;

  m_modeCtrl = new EncModeCtrlMTnoRQT();

  m_modeCtrl->create( *encCfg );

#if JVET_Y0065_GPM_INTRA
  for (unsigned ui = 0; ui < GEO_NUM_RDO_BUFFER; ui++)
#else
  for (unsigned ui = 0; ui < MMVD_MRG_MAX_RD_BUF_NUM; ui++)
#endif
  {
    m_acMergeBuffer[ui].create( chromaFormat, Area( 0, 0, uiMaxWidth, uiMaxHeight ) );
  }
  for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++)
  {
    m_acRealMergeBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
#if INTER_LIC || MULTI_HYP_PRED
    m_acRealMergeBuffer[ui+MRG_MAX_NUM_CANDS].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
#endif
    m_acMergeTmpBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
#if JVET_X0141_CIIP_TIMD_TM && TM_MRG
    m_acTmMergeTmpBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
#endif
  }

#if JVET_Y0065_GPM_INTRA
#if JVET_AA0058_GPM_ADP_BLD
  for (unsigned ui = 0; ui < GEO_MAX_TRY_WEIGHTED_SAD*GEO_NUM_BLD+1; ui++)
#else
  for( unsigned ui = 0; ui < GEO_MAX_TRY_WEIGHTED_SAD+1; ui++ )
#endif
#else
  for( unsigned ui = 0; ui < GEO_MAX_TRY_WEIGHTED_SAD; ui++ )
#endif
  {
    m_acGeoWeightedBuffer[ui].create( chromaFormat, Area( 0, 0, uiMaxWidth, uiMaxHeight ) );
  }
#if JVET_W0097_GPM_MMVD_TM
  for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++)
  {
    for (unsigned vi = 0; vi < GPM_EXT_MMVD_MAX_REFINE_NUM; vi++)
    {
      m_acGeoMMVDBuffer[ui][vi].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
      m_acGeoMMVDTmpBuffer[ui][vi].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
    }
  }
  int sourceWidth = encCfg->getSourceWidth();
  int sourceHeight = encCfg->getSourceHeight();
  m_fastGpmMmvdSearch = (((encCfg->getIntraPeriod() > 0) && ((sourceWidth * sourceHeight) <= (1920 * 1080))) || ((encCfg->getIntraPeriod() < 0) && ((sourceWidth * sourceHeight) >= (1280 * 720)))) && !encCfg->getIBCMode();
#if TM_MRG
  m_fastGpmMmvdRelatedCU = ((encCfg->getIntraPeriod() < 0) && ((sourceWidth * sourceHeight) >= (1920 * 1080))) && !encCfg->getIBCMode();
#else
  m_fastGpmMmvdRelatedCU = ((encCfg->getIntraPeriod() < 0) && ((sourceWidth * sourceHeight) >= (1280 * 720))) && !encCfg->getIBCMode();
#endif

  m_includeMoreMMVDCandFirstPass = ((encCfg->getIntraPeriod() > 0) || ((encCfg->getIntraPeriod() < 0) && m_fastGpmMmvdSearch));
  m_maxNumGPMDirFirstPass = ((encCfg->getIntraPeriod() < 0) ? 50 : (m_fastGpmMmvdSearch ? 36 : 64));
  m_numCandPerPar = (m_fastGpmMmvdSearch ? 4 : 5);
#if TM_MRG
  for (uint16_t ui = 0; ui < GEO_TM_MAX_NUM_CANDS; ui++)
  {
    m_acGeoMergeTmpBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
    m_acGeoSADTmpBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
  }
#endif
#endif
  m_ciipBuffer[0].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
  m_ciipBuffer[1].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));

  m_CtxBuffer.resize( maxDepth );
  m_CurrCtx = 0;
}


void EncCu::destroy()
{
  unsigned numWidths  = gp_sizeIdxInfo->numWidths();
  unsigned numHeights = gp_sizeIdxInfo->numHeights();

#if JVET_V0094_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER
  delete m_bilateralFilter;
#endif

#if ENABLE_OBMC
  m_tempWoOBMCBuffer.destroy();
#endif
  for( unsigned w = 0; w < numWidths; w++ )
  {
    for( unsigned h = 0; h < numHeights; h++ )
    {
      if( m_pBestCS[w][h] ) m_pBestCS[w][h]->destroy();
      if( m_pTempCS[w][h] ) m_pTempCS[w][h]->destroy();

      delete m_pBestCS[w][h];
      delete m_pTempCS[w][h];
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
      if( m_pBestCS2[w][h] ) m_pBestCS2[w][h]->destroy();
      if( m_pTempCS2[w][h] ) m_pTempCS2[w][h]->destroy();

      delete m_pBestCS2[w][h];
      delete m_pTempCS2[w][h];
#endif
    }

    delete[] m_pTempCS[w];
    delete[] m_pBestCS[w];
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
    delete[] m_pTempCS2[w];
    delete[] m_pBestCS2[w];
#endif
  }

  delete[] m_pBestCS; m_pBestCS = nullptr;
  delete[] m_pTempCS; m_pTempCS = nullptr;
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  delete[] m_pBestCS2; m_pBestCS2 = nullptr;
  delete[] m_pTempCS2; m_pTempCS2 = nullptr;
#endif
#if REUSE_CU_RESULTS
  if (m_tmpStorageLCU)
  {
    m_tmpStorageLCU->destroy();
    delete m_tmpStorageLCU;  m_tmpStorageLCU = nullptr;
  }
#endif

#if REUSE_CU_RESULTS
  m_modeCtrl->destroy();

#endif
  delete m_modeCtrl;
  m_modeCtrl = nullptr;
#if ENABLE_OBMC
  if (m_pTempCUWoOBMC)
  {
    for (unsigned w = 0; w < numWidths; w++)
    {
      for (unsigned h = 0; h < numHeights; h++)
      {
        if( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( w ) ) && gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( h ) ) )
        {
          m_pTempCUWoOBMC[w][h]->destroy();
          delete m_pTempCUWoOBMC[w][h];

          m_pPredBufWoOBMC[w][h].destroy();
        }
      }
      delete[] m_pTempCUWoOBMC[w];
      delete[] m_pPredBufWoOBMC[w];
    }

    delete[] m_pTempCUWoOBMC;
    m_pTempCUWoOBMC = nullptr;

    delete[] m_pPredBufWoOBMC;
    m_pPredBufWoOBMC = nullptr;
  }

#endif
#if JVET_Y0065_GPM_INTRA
  for (unsigned ui = 0; ui < GEO_NUM_RDO_BUFFER; ui++)
#else
  for (unsigned ui = 0; ui < MMVD_MRG_MAX_RD_BUF_NUM; ui++)
#endif
  {
    m_acMergeBuffer[ui].destroy();
  }
  for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++)
  {
    m_acRealMergeBuffer[ui].destroy();
#if INTER_LIC || MULTI_HYP_PRED
    m_acRealMergeBuffer[ui+MRG_MAX_NUM_CANDS].destroy();
#endif
    m_acMergeTmpBuffer[ui].destroy();
#if JVET_X0141_CIIP_TIMD_TM && TM_MRG
    m_acTmMergeTmpBuffer[ui].destroy();
#endif
  }

#if JVET_Y0065_GPM_INTRA
#if JVET_AA0058_GPM_ADP_BLD
  for (unsigned ui = 0; ui < GEO_MAX_TRY_WEIGHTED_SAD*GEO_NUM_BLD+1; ui++)
#else
  for (unsigned ui = 0; ui < GEO_MAX_TRY_WEIGHTED_SAD+1; ui++)
#endif
#else
  for (unsigned ui = 0; ui < GEO_MAX_TRY_WEIGHTED_SAD; ui++)
#endif
  {
    m_acGeoWeightedBuffer[ui].destroy();
  }
#if JVET_W0097_GPM_MMVD_TM
  for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++)
  {
    for (unsigned vi = 0; vi < GPM_EXT_MMVD_MAX_REFINE_NUM; vi++)
    {
      m_acGeoMMVDBuffer[ui][vi].destroy();
      m_acGeoMMVDTmpBuffer[ui][vi].destroy();
    }
  }
#if TM_MRG
  for (uint16_t ui = 0; ui < GEO_TM_MAX_NUM_CANDS; ui++)
  {
    m_acGeoMergeTmpBuffer[ui].destroy();
    m_acGeoSADTmpBuffer[ui].destroy();
  }
#endif
#endif
  m_ciipBuffer[0].destroy();
  m_ciipBuffer[1].destroy();
}



EncCu::~EncCu()
{
}



/** \param    pcEncLib      pointer of encoder class
 */
void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) )
{
  m_pcEncCfg           = pcEncLib;
  m_pcIntraSearch      = pcEncLib->getIntraSearch( PARL_PARAM0( tId ) );
  m_pcInterSearch      = pcEncLib->getInterSearch( PARL_PARAM0( tId ) );
  m_pcTrQuant          = pcEncLib->getTrQuant( PARL_PARAM0( tId ) );
  m_pcRdCost           = pcEncLib->getRdCost ( PARL_PARAM0( tId ) );
  m_CABACEstimator     = pcEncLib->getCABACEncoder( PARL_PARAM0( tId ) )->getCABACEstimator( &sps );
  m_CABACEstimator->setEncCu(this);
  m_CtxCache           = pcEncLib->getCtxCache( PARL_PARAM0( tId ) );
  m_pcRateCtrl         = pcEncLib->getRateCtrl();
  m_pcSliceEncoder     = pcEncLib->getSliceEncoder();
#if ENABLE_SPLIT_PARALLELISM
  m_pcEncLib           = pcEncLib;
  m_dataId             = tId;
#endif
  m_pcLoopFilter       = pcEncLib->getLoopFilter();

  m_GeoCostList.init(GEO_NUM_PARTITION_MODE, m_pcEncCfg->getMaxNumGeoCand());
  m_AFFBestSATDCost = MAX_DOUBLE;

  DecCu::init( m_pcTrQuant, m_pcIntraSearch, m_pcInterSearch );

  m_modeCtrl->init( m_pcEncCfg, m_pcRateCtrl, m_pcRdCost );
#if JVET_Y0240_BIM
  m_modeCtrl->setBIMQPMap( m_pcEncCfg->getAdaptQPmap() );
#endif

  m_pcInterSearch->setModeCtrl( m_modeCtrl );
  m_modeCtrl->setInterSearch(m_pcInterSearch);
  m_pcIntraSearch->setModeCtrl( m_modeCtrl );

}

// ====================================================================================================================
// Public member functions
// ====================================================================================================================

void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsigned ctuRsAddr, const int prevQP[], const int currQP[] )
{
  m_modeCtrl->initCTUEncoding( *cs.slice );
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  cs.treeType = TREE_D;
#endif
  cs.slice->m_mapPltCost[0].clear();
  cs.slice->m_mapPltCost[1].clear();
#if ENABLE_SPLIT_PARALLELISM
  if( m_pcEncCfg->getNumSplitThreads() > 1 )
  {
    for( int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++ )
    {
      EncCu*            jobEncCu  = m_pcEncLib->getCuEncoder( cs.picture->scheduler.getSplitDataId( jId ) );
      CacheBlkInfoCtrl* cacheCtrl = dynamic_cast< CacheBlkInfoCtrl* >( jobEncCu->m_modeCtrl );
#if REUSE_CU_RESULTS
      BestEncInfoCache* bestCache = dynamic_cast< BestEncInfoCache* >( jobEncCu->m_modeCtrl );
#endif
      SaveLoadEncInfoSbt *sbtCache = dynamic_cast< SaveLoadEncInfoSbt* >( jobEncCu->m_modeCtrl );
      if( cacheCtrl )
      {
        cacheCtrl->init( *cs.slice );
      }
#if REUSE_CU_RESULTS
      if (bestCache)
      {
        bestCache->init(*cs.slice);
      }
#endif
      if (sbtCache)
      {
        sbtCache->init(*cs.slice);
      }
    }
  }

#if REUSE_CU_RESULTS
  if( auto* cacheCtrl = dynamic_cast<BestEncInfoCache*>( m_modeCtrl ) ) { cacheCtrl->tick(); }
#endif
  if( auto* cacheCtrl = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) ) { cacheCtrl->tick(); }
#endif
  // init the partitioning manager
  QTBTPartitioner partitioner;
  partitioner.initCtu(area, CH_L, *cs.slice);
  if (m_pcEncCfg->getIBCMode())
  {
    if (area.lx() == 0 && area.ly() == 0)
    {
      m_pcInterSearch->resetIbcSearch();
    }
    m_pcInterSearch->resetCtuRecord();
    m_ctuIbcSearchRangeX = m_pcEncCfg->getIBCLocalSearchRangeX();
    m_ctuIbcSearchRangeY = m_pcEncCfg->getIBCLocalSearchRangeY();
  }
  if (m_pcEncCfg->getIBCMode() && m_pcEncCfg->getIBCHashSearch() && (m_pcEncCfg->getIBCFastMethod() & IBC_FAST_METHOD_ADAPTIVE_SEARCHRANGE))
  {
    const int hashHitRatio = m_ibcHashMap.getHashHitRatio(area.Y()); // in percent
    if (hashHitRatio < 5) // 5%
    {
      m_ctuIbcSearchRangeX >>= 1;
      m_ctuIbcSearchRangeY >>= 1;
    }
    if (cs.slice->getNumRefIdx(REF_PIC_LIST_0) > 0)
    {
      m_ctuIbcSearchRangeX >>= 1;
      m_ctuIbcSearchRangeY >>= 1;
    }
  }
  // init current context pointer
  m_CurrCtx = m_CtxBuffer.data();

  CodingStructure *tempCS = m_pTempCS[gp_sizeIdxInfo->idxFrom( area.lumaSize().width )][gp_sizeIdxInfo->idxFrom( area.lumaSize().height )];
  CodingStructure *bestCS = m_pBestCS[gp_sizeIdxInfo->idxFrom( area.lumaSize().width )][gp_sizeIdxInfo->idxFrom( area.lumaSize().height )];

  cs.initSubStructure(*tempCS, partitioner.chType, partitioner.currArea(), false);
  cs.initSubStructure(*bestCS, partitioner.chType, partitioner.currArea(), false);
  tempCS->currQP[CH_L] = bestCS->currQP[CH_L] =
  tempCS->baseQP       = bestCS->baseQP       = currQP[CH_L];
  tempCS->prevQP[CH_L] = bestCS->prevQP[CH_L] = prevQP[CH_L];

  xCompressCU(tempCS, bestCS, partitioner);
  cs.slice->m_mapPltCost[0].clear();
  cs.slice->m_mapPltCost[1].clear();
  // all signals were already copied during compression if the CTU was split - at this point only the structures are copied to the top level CS
  const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1;
  cs.useSubStructure(*bestCS, partitioner.chType, CS::getArea(*bestCS, area, partitioner.chType), copyUnsplitCTUSignals,
                     false, false, copyUnsplitCTUSignals, true);

  if (CS::isDualITree (cs) && isChromaEnabled (cs.pcv->chrFormat))
  {
    m_CABACEstimator->getCtx() = m_CurrCtx->start;

    partitioner.initCtu(area, CH_C, *cs.slice);

    cs.initSubStructure(*tempCS, partitioner.chType, partitioner.currArea(), false);
    cs.initSubStructure(*bestCS, partitioner.chType, partitioner.currArea(), false);
    tempCS->currQP[CH_C] = bestCS->currQP[CH_C] =
    tempCS->baseQP       = bestCS->baseQP       = currQP[CH_C];
    tempCS->prevQP[CH_C] = bestCS->prevQP[CH_C] = prevQP[CH_C];

    xCompressCU(tempCS, bestCS, partitioner);

    const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1;
    cs.useSubStructure(*bestCS, partitioner.chType, CS::getArea(*bestCS, area, partitioner.chType),
                       copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals, true);
  }

  if (m_pcEncCfg->getUseRateCtrl())
  {
    (m_pcRateCtrl->getRCPic()->getLCU(ctuRsAddr)).m_actualMSE = (double)bestCS->dist / (double)m_pcRateCtrl->getRCPic()->getLCU(ctuRsAddr).m_numberOfPixel;
  }
  // reset context states and uninit context pointer
  m_CABACEstimator->getCtx() = m_CurrCtx->start;
  m_CurrCtx                  = 0;


  // Ensure that a coding was found
  // Selected mode's RD-cost must be not MAX_DOUBLE.
  CHECK( bestCS->cus.empty()                                   , "No possible encoding found" );
  CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" );
  CHECK( bestCS->cost             == MAX_DOUBLE                , "No possible encoding found" );
}

// ====================================================================================================================
// Protected member functions
// ====================================================================================================================

static int xCalcHADs8x8_ISlice(const Pel *piOrg, const int iStrideOrg)
{
  int k, i, j, jj;
  int diff[64], m1[8][8], m2[8][8], m3[8][8], iSumHad = 0;

  for (k = 0; k < 64; k += 8)
  {
    diff[k + 0] = piOrg[0];
    diff[k + 1] = piOrg[1];
    diff[k + 2] = piOrg[2];
    diff[k + 3] = piOrg[3];
    diff[k + 4] = piOrg[4];
    diff[k + 5] = piOrg[5];
    diff[k + 6] = piOrg[6];
    diff[k + 7] = piOrg[7];

    piOrg += iStrideOrg;
  }

  //horizontal
  for (j = 0; j < 8; j++)
  {
    jj = j << 3;
    m2[j][0] = diff[jj    ] + diff[jj + 4];
    m2[j][1] = diff[jj + 1] + diff[jj + 5];
    m2[j][2] = diff[jj + 2] + diff[jj + 6];
    m2[j][3] = diff[jj + 3] + diff[jj + 7];
    m2[j][4] = diff[jj    ] - diff[jj + 4];
    m2[j][5] = diff[jj + 1] - diff[jj + 5];
    m2[j][6] = diff[jj + 2] - diff[jj + 6];
    m2[j][7] = diff[jj + 3] - diff[jj + 7];

    m1[j][0] = m2[j][0] + m2[j][2];
    m1[j][1] = m2[j][1] + m2[j][3];
    m1[j][2] = m2[j][0] - m2[j][2];
    m1[j][3] = m2[j][1] - m2[j][3];
    m1[j][4] = m2[j][4] + m2[j][6];
    m1[j][5] = m2[j][5] + m2[j][7];
    m1[j][6] = m2[j][4] - m2[j][6];
    m1[j][7] = m2[j][5] - m2[j][7];

    m2[j][0] = m1[j][0] + m1[j][1];
    m2[j][1] = m1[j][0] - m1[j][1];
    m2[j][2] = m1[j][2] + m1[j][3];
    m2[j][3] = m1[j][2] - m1[j][3];
    m2[j][4] = m1[j][4] + m1[j][5];
    m2[j][5] = m1[j][4] - m1[j][5];
    m2[j][6] = m1[j][6] + m1[j][7];
    m2[j][7] = m1[j][6] - m1[j][7];
  }

  //vertical
  for (i = 0; i < 8; i++)
  {
    m3[0][i] = m2[0][i] + m2[4][i];
    m3[1][i] = m2[1][i] + m2[5][i];
    m3[2][i] = m2[2][i] + m2[6][i];
    m3[3][i] = m2[3][i] + m2[7][i];
    m3[4][i] = m2[0][i] - m2[4][i];
    m3[5][i] = m2[1][i] - m2[5][i];
    m3[6][i] = m2[2][i] - m2[6][i];
    m3[7][i] = m2[3][i] - m2[7][i];

    m1[0][i] = m3[0][i] + m3[2][i];
    m1[1][i] = m3[1][i] + m3[3][i];
    m1[2][i] = m3[0][i] - m3[2][i];
    m1[3][i] = m3[1][i] - m3[3][i];
    m1[4][i] = m3[4][i] + m3[6][i];
    m1[5][i] = m3[5][i] + m3[7][i];
    m1[6][i] = m3[4][i] - m3[6][i];
    m1[7][i] = m3[5][i] - m3[7][i];

    m2[0][i] = m1[0][i] + m1[1][i];
    m2[1][i] = m1[0][i] - m1[1][i];
    m2[2][i] = m1[2][i] + m1[3][i];
    m2[3][i] = m1[2][i] - m1[3][i];
    m2[4][i] = m1[4][i] + m1[5][i];
    m2[5][i] = m1[4][i] - m1[5][i];
    m2[6][i] = m1[6][i] + m1[7][i];
    m2[7][i] = m1[6][i] - m1[7][i];
  }

  for (i = 0; i < 8; i++)
  {
    for (j = 0; j < 8; j++)
    {
      iSumHad += abs(m2[i][j]);
    }
  }
  iSumHad -= abs(m2[0][0]);
  iSumHad = (iSumHad + 2) >> 2;
  return(iSumHad);
}

int  EncCu::updateCtuDataISlice(const CPelBuf buf)
{
  int  xBl, yBl;
  const int iBlkSize = 8;
  const Pel* pOrgInit = buf.buf;
  int  iStrideOrig = buf.stride;

  int iSumHad = 0;
  for( yBl = 0; ( yBl + iBlkSize ) <= buf.height; yBl += iBlkSize )
  {
    for( xBl = 0; ( xBl + iBlkSize ) <= buf.width; xBl += iBlkSize )
    {
      const Pel* pOrg = pOrgInit + iStrideOrig*yBl + xBl;
      iSumHad += xCalcHADs8x8_ISlice( pOrg, iStrideOrig );
    }
  }
  return( iSumHad );
}

bool EncCu::xCheckBestMode( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
{
  bool bestCSUpdated = false;

  if( !tempCS->cus.empty() )
  {
    if( tempCS->cus.size() == 1 )
    {
      const CodingUnit& cu = *tempCS->cus.front();
      CHECK( cu.skip && !cu.firstPU->mergeFlag, "Skip flag without a merge flag is not allowed!" );
    }

#if WCG_EXT
    DTRACE_BEST_MODE( tempCS, bestCS, m_pcRdCost->getLambda( true ) );
#else
    DTRACE_BEST_MODE( tempCS, bestCS, m_pcRdCost->getLambda() );
#endif

#if MULTI_HYP_PRED
    if (tempCS->sps->getUseInterMultiHyp() && tempCS->slice->isInterB())
    {
      m_baseResultsForMH.insert(m_baseResultsForMH.end(), tempCS->m_meResults.begin(), tempCS->m_meResults.end());
#if MULTI_HYP_PRED
      tempCS->m_meResults.clear(); // avoid duplicate insert
#endif
    }
#endif
    if( m_modeCtrl->useModeResult( encTestMode, tempCS, partitioner ) )
    {

      std::swap( tempCS, bestCS );
      // store temp best CI for next CU coding
      m_CurrCtx->best = m_CABACEstimator->getCtx();
      m_bestModeUpdated = true;
      bestCSUpdated = true;
    }
  }

  // reset context states
  m_CABACEstimator->getCtx() = m_CurrCtx->start;
  return bestCSUpdated;

}

void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& partitioner, double maxCostAllowed )
{
  CHECK(maxCostAllowed < 0, "Wrong value of maxCostAllowed!");
#if JVET_AA0133_INTER_MTS_OPT
  m_pcInterSearch->setBestCost(maxCostAllowed);
#endif
#if ENABLE_SPLIT_PARALLELISM
  CHECK( m_dataId != tempCS->picture->scheduler.getDataId(), "Working in the wrong dataId!" );

  if( m_pcEncCfg->getNumSplitThreads() != 1 && tempCS->picture->scheduler.getSplitJobId() == 0 )
  {
    if( m_modeCtrl->isParallelSplit( *tempCS, partitioner ) )
    {
      m_modeCtrl->setParallelSplit( true );
      xCompressCUParallel( tempCS, bestCS, partitioner );
      return;
    }
  }

#endif
  uint32_t compBegin;
  uint32_t numComp;
  bool jointPLT = false;
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  if (CS::isDualITree(*bestCS))
#else
  if (partitioner.isSepTree( *tempCS ))
#endif
  {
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
    if( !CS::isDualITree(*tempCS) && partitioner.treeType != TREE_D )
    {
      compBegin = COMPONENT_Y;
      numComp = (tempCS->area.chromaFormat != CHROMA_400)?3: 1;
      jointPLT = true;
    }
    else
#endif
    {
    if (isLuma(partitioner.chType))
    {
      compBegin = COMPONENT_Y;
      numComp = 1;
    }
    else
    {
      compBegin = COMPONENT_Cb;
      numComp = 2;
    }
    }
  }
  else
  {
    compBegin = COMPONENT_Y;
    numComp = (tempCS->area.chromaFormat != CHROMA_400) ? 3 : 1;
    jointPLT = true;
  }
  SplitSeries splitmode = -1;
  uint8_t   bestLastPLTSize[MAX_NUM_CHANNEL_TYPE];
  Pel       bestLastPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE]; // store LastPLT for
  uint8_t   curLastPLTSize[MAX_NUM_CHANNEL_TYPE];
  Pel       curLastPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE]; // store LastPLT if no partition
  for (int i = compBegin; i < (compBegin + numComp); i++)
  {
    ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
    bestLastPLTSize[comID] = 0;
    curLastPLTSize[comID] = tempCS->prevPLT.curPLTSize[comID];
    memcpy(curLastPLT[i], tempCS->prevPLT.curPLT[i], tempCS->prevPLT.curPLTSize[comID] * sizeof(Pel));
  }

  Slice&   slice      = *tempCS->slice;
  const PPS &pps      = *tempCS->pps;
  const SPS &sps      = *tempCS->sps;
  const uint32_t uiLPelX  = tempCS->area.Y().lumaPos().x;
  const uint32_t uiTPelY  = tempCS->area.Y().lumaPos().y;
#if ENABLE_OBMC
  const unsigned wIdx = gp_sizeIdxInfo->idxFrom(partitioner.currArea().lwidth());
  const unsigned hIdx = gp_sizeIdxInfo->idxFrom(partitioner.currArea().lheight());
#endif
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  const ModeType modeTypeParent  = partitioner.modeType;
  const TreeType treeTypeParent  = partitioner.treeType;
  const ChannelType chTypeParent = partitioner.chType;
#endif
  const UnitArea currCsArea = clipArea( CS::getArea( *bestCS, bestCS->area, partitioner.chType ), *tempCS->picture );
#if MULTI_HYP_PRED
  m_baseResultsForMH.clear();
#endif
#if ENABLE_OBMC
  if (m_pTempCUWoOBMC && !slice.isIntra())
  {
    tempCS->initSubStructure(*m_pTempCUWoOBMC[wIdx][hIdx], partitioner.chType, partitioner.currArea(), false);
  }
#endif
#if JVET_Y0152_TT_ENC_SPEEDUP
  tempCS->splitRdCostBest = NULL;
#endif
  m_modeCtrl->initCULevel( partitioner, *tempCS );
#if JVET_Z0054_BLK_REF_PIC_REORDER
  m_pcInterSearch->setFillCurTplAboveARMC(false);
  m_pcInterSearch->setFillCurTplLeftARMC(false);
#endif

#if JVET_Z0118_GDR
  if (m_pcEncCfg->getGdrEnabled())
  {
    bool isCuInCleanArea = false;
    bool isCuInRefreshArea = false;

    bool isInGdrInterval = slice.getPicHeader()->getInGdrInterval();
    bool isRecoveryPocPic = slice.getPicHeader()->getIsGdrRecoveryPocPic();
    
    // 1.0 pic in GDR interval
    if (isInGdrInterval || isRecoveryPocPic)
    {
      // 1.1 set intra/inter area     
      int gdrBegX = tempCS->picHeader->getGdrBegX();
      int gdrEndX = tempCS->picHeader->getGdrEndX();

      isCuInCleanArea   = tempCS->isClean(tempCS->area.Y(), CHANNEL_TYPE_LUMA);     
      isCuInRefreshArea = tempCS->withinRefresh(gdrBegX, gdrEndX);

      // 1.2 switch recon based on clean/dirty current area
      tempCS->setReconBuf((isCuInCleanArea) ? PIC_RECONSTRUCTION_1 : PIC_RECONSTRUCTION_0);
      tempCS->picture->setCleanDirty(isCuInCleanArea);

      for (int rlist = REF_PIC_LIST_0; rlist < NUM_REF_PIC_LIST_01; rlist++)
      {
        int n = slice.getNumRefIdx((RefPicList)rlist);
        for (int idx = 0; idx < n; idx++)
        {
          Picture *refPic = slice.getReferencePicture((RefPicList)rlist, idx);
          if (refPic)
          {
            refPic->setCleanDirty(isCuInCleanArea);
          }
        }
      }

      // Need to keep the begining of intra refresh area when HashME is enabled
      bool splitCondition = tempCS->isCuCrossVB(gdrEndX);
      bool forceRefreshIRA = false;
      if (m_pcEncCfg->getUseHashME())
      {
        splitCondition |= tempCS->isCuCrossIRA(gdrBegX);
        forceRefreshIRA = true;
      }


      if (isCuInRefreshArea)
      {
        if (forceRefreshIRA)
        {
          m_modeCtrl->forceIntraMode();
        }
      }

      if (splitCondition)
      {
        // remove every prediction mode (remain split only)
        m_modeCtrl->forceRemovePredMode();

        const unsigned minQtSize = tempCS->pcv->getMinQtSize(*tempCS->slice, CHANNEL_TYPE_LUMA); // 8

        if (tempCS->area.lheight() <= minQtSize)
        {
          m_modeCtrl->forceRemoveTTH();
        }

        if (tempCS->area.lheight() < minQtSize)
        {
          m_modeCtrl->forceRemoveBTH();
        }

        if (tempCS->area.lwidth() > minQtSize * 2)
        {
          m_modeCtrl->forceRemoveBTV();
          m_modeCtrl->forceRemoveTTV();
        }

        if (tempCS->area.lwidth() < minQtSize || tempCS->area.lheight() < minQtSize)
        {
          m_modeCtrl->forceRemoveQT();
        }

        if (tempCS->area.lwidth() != tempCS->area.lheight())
        {
          m_modeCtrl->forceRemoveQT();
        }

        if (!m_modeCtrl->anyPredModeLeft())
        {
          m_modeCtrl->forceRemoveDontSplit();
        }
      }
    }
    // 2. pic in non-GDR interval
    else
    {
      tempCS->setReconBuf(PIC_RECONSTRUCTION_0);
      tempCS->picture->setCleanDirty(false);
            
      {
        // 2.1 setup reference picture for non-GDR
        for (int rlist = REF_PIC_LIST_0; rlist < NUM_REF_PIC_LIST_01; rlist++)
        {
          int n = slice.getNumRefIdx((RefPicList)rlist);
          for (int idx = 0; idx < n; idx++)
          {
            Picture *refPic = slice.getReferencePicture((RefPicList)rlist, idx);

            if (refPic)
            {
              bool isRefInGdrInterval  = refPic->cs->picHeader->getInGdrInterval();
              bool isRefRecoveryPocPic = refPic->cs->picHeader->getIsGdrRecoveryPocPic();

              if (isRefInGdrInterval || isRefRecoveryPocPic)
              {
                refPic->setCleanDirty(true);
              }
              else
              {
                refPic->setCleanDirty(false);
              }
            }
          }
        }
      }
    }
  }
#endif


  if( partitioner.currQtDepth == 0 && partitioner.currMtDepth == 0 && !tempCS->slice->isIntra() && ( sps.getUseSBT() || sps.getUseInterMTS() ) )
  {
    auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl );
    int maxSLSize = sps.getUseSBT() ? tempCS->slice->getSPS()->getMaxTbSize() : MTS_INTER_MAX_CU_SIZE;
    slsSbt->resetSaveloadSbt( maxSLSize );
#if ENABLE_SPLIT_PARALLELISM
    CHECK( tempCS->picture->scheduler.getSplitJobId() != 0, "The SBT search reset need to happen in sequential region." );
    if (m_pcEncCfg->getNumSplitThreads() > 1)
    {
      for (int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++)
      {
        auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt *>(m_pcEncLib->getCuEncoder(jId)->m_modeCtrl);
        slsSbt->resetSaveloadSbt(maxSLSize);
      }
    }
#endif
  }
  m_sbtCostSave[0] = m_sbtCostSave[1] = MAX_DOUBLE;
#if JVET_AA0133_INTER_MTS_OPT
  m_mtsCostSave = MAX_DOUBLE;
#endif
  m_CurrCtx->start = m_CABACEstimator->getCtx();

  m_cuChromaQpOffsetIdxPlus1 = 0;

  if( slice.getUseChromaQpAdj() )
  {
    // TODO M0133 : double check encoder decisions with respect to chroma QG detection and actual encode
    int lgMinCuSize = sps.getLog2MinCodingBlockSize() +
      std::max<int>(0, floorLog2(sps.getCTUSize()) - sps.getLog2MinCodingBlockSize() - int(slice.getCuChromaQpOffsetSubdiv() / 2));
    if( partitioner.currQgChromaEnable() )
    {
      m_cuChromaQpOffsetIdxPlus1 = ( ( uiLPelX >> lgMinCuSize ) + ( uiTPelY >> lgMinCuSize ) ) % ( pps.getChromaQpOffsetListLen() + 1 );
    }
  }

  if( !m_modeCtrl->anyMode() )
  {
    m_modeCtrl->finishCULevel( partitioner );
    return;
  }

  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cux", uiLPelX ) );
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuy", uiTPelY ) );
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuw", tempCS->area.lwidth() ) );
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuh", tempCS->area.lheight() ) );
  DTRACE( g_trace_ctx, D_COMMON, "@(%4d,%4d) [%2dx%2d]\n", tempCS->area.lx(), tempCS->area.ly(), tempCS->area.lwidth(), tempCS->area.lheight() );


  m_pcInterSearch->resetSavedAffineMotion();
#if TM_AMVP
  if (!slice.isIntra())
  {
    m_pcInterSearch->clearTplAmvpBuffer();
  }
#endif
#if INTER_LIC
  m_pcInterSearch->m_fastLicCtrl.init();
#endif

#if MULTI_HYP_PRED
  const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
  m_pcInterSearch->initMHPTmpBuffer(m_acRealMergeBuffer + MRG_MAX_NUM_CANDS, GEO_MAX_NUM_UNI_CANDS,
    m_acGeoWeightedBuffer, GEO_MAX_TRY_WEIGHTED_SAD,
    localUnitArea);
#endif
#if JVET_W0097_GPM_MMVD_TM
  m_mergeCandAvail = false;
#endif
  double bestIntPelCost = MAX_DOUBLE;

  if (tempCS->slice->getSPS()->getUseColorTrans())
  {
    tempCS->tmpColorSpaceCost = MAX_DOUBLE;
    bestCS->tmpColorSpaceCost = MAX_DOUBLE;
    tempCS->firstColorSpaceSelected = true;
    bestCS->firstColorSpaceSelected = true;
  }

  if (tempCS->slice->getSPS()->getUseColorTrans() && !CS::isDualITree(*tempCS))
  {
    tempCS->firstColorSpaceTestOnly = false;
    bestCS->firstColorSpaceTestOnly = false;
    tempCS->tmpColorSpaceIntraCost[0] = MAX_DOUBLE;
    tempCS->tmpColorSpaceIntraCost[1] = MAX_DOUBLE;
    bestCS->tmpColorSpaceIntraCost[0] = MAX_DOUBLE;
    bestCS->tmpColorSpaceIntraCost[1] = MAX_DOUBLE;

    if (tempCS->bestParent && tempCS->bestParent->firstColorSpaceTestOnly)
    {
      tempCS->firstColorSpaceTestOnly = bestCS->firstColorSpaceTestOnly = true;
    }
  }

#if JVET_Y0152_TT_ENC_SPEEDUP
  double splitRdCostBest[NUM_PART_SPLIT];
  std::fill(std::begin(splitRdCostBest), std::end(splitRdCostBest), MAX_DOUBLE);
#endif
  if( tempCS->slice->getCheckLDC() )
  {
    m_bestBcwCost[0] = m_bestBcwCost[1] = std::numeric_limits<double>::max();
    m_bestBcwIdx[0] = m_bestBcwIdx[1] = -1;
  }
  do
  {
    for (int i = compBegin; i < (compBegin + numComp); i++)
    {
      ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
      tempCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID];
      memcpy(tempCS->prevPLT.curPLT[i], curLastPLT[i], curLastPLTSize[comID] * sizeof(Pel));
    }
    EncTestMode currTestMode = m_modeCtrl->currTestMode();
    currTestMode.maxCostAllowed = maxCostAllowed;
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
    if (pps.getUseDQP() && CS::isDualITree(*tempCS) && isChroma(partitioner.chType))
#else
    if (pps.getUseDQP() && partitioner.isSepTree(*tempCS) && isChroma( partitioner.chType ))
#endif
    {
      const Position chromaCentral(tempCS->area.Cb().chromaPos().offset(tempCS->area.Cb().chromaSize().width >> 1, tempCS->area.Cb().chromaSize().height >> 1));
      const Position lumaRefPos(chromaCentral.x << getComponentScaleX(COMPONENT_Cb, tempCS->area.chromaFormat), chromaCentral.y << getComponentScaleY(COMPONENT_Cb, tempCS->area.chromaFormat));
      const CodingStructure* baseCS = bestCS->picture->cs;
      const CodingUnit* colLumaCu = baseCS->getCU(lumaRefPos, CHANNEL_TYPE_LUMA);

      if (colLumaCu)
      {
        currTestMode.qp = colLumaCu->qp;
      }
    }

#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
    if (partitioner.currQgEnable() && (
#if JVET_Y0240_BIM
        (m_pcEncCfg->getBIM()) ||
#endif
#if SHARP_LUMA_DELTA_QP
        (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()) ||
#endif
#if ENABLE_QPA_SUB_CTU
        (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && pps.getUseDQP())
#else
        false
#endif
      ))
    {
#if ENABLE_SPLIT_PARALLELISM
      CHECK( tempCS->picture->scheduler.getSplitJobId() > 0, "Changing lambda is only allowed in the master thread!" );
#endif
      if (currTestMode.qp >= 0)
      {
        updateLambda (&slice, currTestMode.qp,
 #if WCG_EXT && ER_CHROMA_QP_WCG_PPS
                      m_pcEncCfg->getWCGChromaQPControl().isEnabled(),
 #endif
                      CS::isDualITree (*tempCS) || (partitioner.currDepth == 0));
      }
    }
#endif

    if( currTestMode.type == ETM_INTER_ME )
    {
#if ENABLE_OBMC
      bool tryObmc = true;
#if JVET_AA0129_INTERHASH_OBMCOFF_RD
      if (m_pcEncCfg->getUseHashME())
      {
        tryObmc = false;
      }
#endif
#endif

      if( ( currTestMode.opts & ETO_IMV ) != 0 )
      {
        const bool skipAltHpelIF = ( int( ( currTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT ) == 4 ) && ( bestIntPelCost > 1.25 * bestCS->cost );
        if (!skipAltHpelIF)
        {
          tempCS->bestCS = bestCS;
#if ENABLE_OBMC
          tryObmc = xCheckRDCostInterIMV(tempCS, bestCS, partitioner, currTestMode, bestIntPelCost);
#else
          xCheckRDCostInterIMV(tempCS, bestCS, partitioner, currTestMode, bestIntPelCost);
#endif
          tempCS->bestCS = nullptr;
#if JVET_Y0152_TT_ENC_SPEEDUP
          splitRdCostBest[CTU_LEVEL] = bestCS->cost;
          tempCS->splitRdCostBest = splitRdCostBest;
#endif
        }
      }
      else
      {
        tempCS->bestCS = bestCS;
#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
        tryObmc = xCheckRDCostInter(tempCS, bestCS, partitioner, currTestMode);
#else
        xCheckRDCostInter( tempCS, bestCS, partitioner, currTestMode );
#endif
        tempCS->bestCS = nullptr;
#if JVET_Y0152_TT_ENC_SPEEDUP
        splitRdCostBest[CTU_LEVEL] = bestCS->cost;
        tempCS->splitRdCostBest = splitRdCostBest;
#endif
      }
#if ENABLE_OBMC
#if JVET_AA0129_INTERHASH_OBMCOFF_RD
      if ((!m_pcEncCfg->getUseHashME() && tryObmc && tempCS->cus.size() != 0) || (m_pcEncCfg->getUseHashME() && tryObmc))//todo 
#else
      if (tryObmc && tempCS->cus.size() != 0)//todo
#endif
      {
        xCheckRDCostInterWoOBMC(tempCS, bestCS, partitioner, currTestMode);
      }
#endif
    }
    else if (currTestMode.type == ETM_HASH_INTER)
    {
#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
      bool tryObmc = xCheckRDCostHashInter( tempCS, bestCS, partitioner, currTestMode );
#else
      xCheckRDCostHashInter( tempCS, bestCS, partitioner, currTestMode );
#endif
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
      if (tryObmc)
      {
        xCheckRDCostInterWoOBMC(  tempCS, bestCS, partitioner, currTestMode );
      }
#endif
    }
#if !MERGE_ENC_OPT
    else if( currTestMode.type == ETM_AFFINE )
    {
      xCheckRDCostAffineMerge2Nx2N( tempCS, bestCS, partitioner, currTestMode );
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
#endif
#if AFFINE_MMVD && !MERGE_ENC_OPT
    else if (currTestMode.type == ETM_AF_MMVD)
    {
      xCheckRDCostAffineMmvd2Nx2N(tempCS, bestCS, partitioner, currTestMode);
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
#endif
#if TM_MRG && !MERGE_ENC_OPT
    else if (currTestMode.type == ETM_MERGE_TM)
    {
      xCheckRDCostTMMerge2Nx2N(tempCS, bestCS, partitioner, currTestMode);
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
#endif
#if REUSE_CU_RESULTS
    else if( currTestMode.type == ETM_RECO_CACHED )
    {
      xReuseCachedResult( tempCS, bestCS, partitioner );
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
#endif
    else if( currTestMode.type == ETM_MERGE_SKIP )
    {
      xCheckRDCostMerge2Nx2N( tempCS, bestCS, partitioner, currTestMode );

      CodingUnit* cu = bestCS->getCU(partitioner.chType);

      if (cu)
      cu->mmvdSkip = cu->skip == false ? false : cu->mmvdSkip;
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
    else if( currTestMode.type == ETM_MERGE_GEO )
    {
#if JVET_W0097_GPM_MMVD_TM
      CodedCUInfo    &relatedCU = ((EncModeCtrlMTnoRQT *)m_modeCtrl)->getBlkInfo(partitioner.currArea());
      if (!relatedCU.isGPMTested)
      {
        xCheckRDCostMergeGeoComb2Nx2N(tempCS, bestCS, partitioner, currTestMode);
        relatedCU.isGPMTested = 1;
      }
      else
      {
        xCheckRDCostMergeGeoComb2Nx2N(tempCS, bestCS, partitioner, currTestMode, true);
      }
#else
      xCheckRDCostMergeGeo2Nx2N( tempCS, bestCS, partitioner, currTestMode );
#endif
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
#if MULTI_HYP_PRED
    else if (currTestMode.type == ETM_INTER_MULTIHYP)
    {
      xCheckRDCostInterMultiHyp2Nx2N(tempCS, bestCS, partitioner, currTestMode);
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
#endif
    else if( currTestMode.type == ETM_INTRA )
    {
      if (slice.getSPS()->getUseColorTrans() && !CS::isDualITree(*tempCS))
      {
        bool skipSecColorSpace = false;
        skipSecColorSpace = xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, (m_pcEncCfg->getRGBFormatFlag() ? true : false));
        if ((m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless()) && !m_pcEncCfg->getRGBFormatFlag())
        {
          skipSecColorSpace = true;
        }
        if (!skipSecColorSpace && !tempCS->firstColorSpaceTestOnly)
        {
          xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, (m_pcEncCfg->getRGBFormatFlag() ? false : true));
        }

        if (!tempCS->firstColorSpaceTestOnly)
        {
          if (tempCS->tmpColorSpaceIntraCost[0] != MAX_DOUBLE && tempCS->tmpColorSpaceIntraCost[1] != MAX_DOUBLE)
          {
            double skipCostRatio = m_pcEncCfg->getRGBFormatFlag() ? 1.1 : 1.0;
            if (tempCS->tmpColorSpaceIntraCost[1] > (skipCostRatio*tempCS->tmpColorSpaceIntraCost[0]))
            {
              tempCS->firstColorSpaceTestOnly = bestCS->firstColorSpaceTestOnly = true;
            }
          }
        }
        else
        {
          CHECK(tempCS->tmpColorSpaceIntraCost[1] != MAX_DOUBLE, "the RD test of the second color space should be skipped");
        }
      }
      else
      {
        xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, false);
      }
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
    else if (currTestMode.type == ETM_PALETTE)
    {
      xCheckPLT( tempCS, bestCS, partitioner, currTestMode );
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
    else if (currTestMode.type == ETM_IBC)
    {
      xCheckRDCostIBCMode(tempCS, bestCS, partitioner, currTestMode);
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
    else if (currTestMode.type == ETM_IBC_MERGE)
    {
      xCheckRDCostIBCModeMerge2Nx2N(tempCS, bestCS, partitioner, currTestMode);
#if JVET_Y0152_TT_ENC_SPEEDUP
      splitRdCostBest[CTU_LEVEL] = bestCS->cost;
      tempCS->splitRdCostBest = splitRdCostBest;
#endif
    }
    else if( isModeSplit( currTestMode ) )
    {
      if (bestCS->cus.size() != 0)
      {
        splitmode = bestCS->cus[0]->splitSeries;
      }
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
      assert( partitioner.modeType == tempCS->modeType );
      int signalModeConsVal = tempCS->signalModeCons( getPartSplit( currTestMode ), partitioner, modeTypeParent );
      int numRoundRdo = signalModeConsVal == LDT_MODE_TYPE_SIGNAL ? 2 : 1;
      bool skipInterPass = false;
      for( int i = 0; i < numRoundRdo; i++ )
      {
        //change cons modes
        if( signalModeConsVal == LDT_MODE_TYPE_SIGNAL )
        {
          CHECK( numRoundRdo != 2, "numRoundRdo shall be 2 - [LDT_MODE_TYPE_SIGNAL]" );
          tempCS->modeType = partitioner.modeType = (i == 0) ? MODE_TYPE_INTER : MODE_TYPE_INTRA;
        }
        else if( signalModeConsVal == LDT_MODE_TYPE_INFER )
        {
          CHECK( numRoundRdo != 1, "numRoundRdo shall be 1 - [LDT_MODE_TYPE_INFER]" );
          tempCS->modeType = partitioner.modeType = MODE_TYPE_INTRA;
        }
        else if( signalModeConsVal == LDT_MODE_TYPE_INHERIT )
        {
          CHECK( numRoundRdo != 1, "numRoundRdo shall be 1 - [LDT_MODE_TYPE_INHERIT]" );
          tempCS->modeType = partitioner.modeType = modeTypeParent;
        }

        //for lite intra encoding fast algorithm, set the status to save inter coding info
        if( modeTypeParent == MODE_TYPE_ALL && tempCS->modeType == MODE_TYPE_INTER )
        {
          m_pcIntraSearch->setSaveCuCostInSCIPU( true );
          m_pcIntraSearch->setNumCuInSCIPU( 0 );
        }
        else if( modeTypeParent == MODE_TYPE_ALL && tempCS->modeType != MODE_TYPE_INTER )
        {
          m_pcIntraSearch->setSaveCuCostInSCIPU( false );
          if( tempCS->modeType == MODE_TYPE_ALL )
          {
            m_pcIntraSearch->setNumCuInSCIPU( 0 );
          }
        }

#if JVET_Y0152_TT_ENC_SPEEDUP
        xCheckModeSplit(tempCS, bestCS, partitioner, currTestMode, modeTypeParent, skipInterPass, splitRdCostBest);
        tempCS->splitRdCostBest = splitRdCostBest;
#else
        xCheckModeSplit( tempCS, bestCS, partitioner, currTestMode, modeTypeParent, skipInterPass );
#endif
#else
#if JVET_Y0152_TT_ENC_SPEEDUP
      xCheckModeSplit(tempCS, bestCS, partitioner, currTestMode, splitRdCostBest);
      tempCS->splitRdCostBest = splitRdCostBest;
#else
      xCheckModeSplit(tempCS, bestCS, partitioner, currTestMode);
#endif
#endif
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
        //recover cons modes
        tempCS->modeType = partitioner.modeType = modeTypeParent;
        tempCS->treeType = partitioner.treeType = treeTypeParent;
        partitioner.chType = chTypeParent;
        if( modeTypeParent == MODE_TYPE_ALL )
        {
          m_pcIntraSearch->setSaveCuCostInSCIPU( false );
          if( numRoundRdo == 2 && tempCS->modeType == MODE_TYPE_INTRA )
          {
            m_pcIntraSearch->initCuAreaCostInSCIPU();
          }
        }
        if( skipInterPass )
        {
          break;
        }
      }
#endif
#if JVET_Z0118_GDR
    if (bestCS->cus.size() > 0 && splitmode != bestCS->cus[0]->splitSeries)
#else
      if (splitmode != bestCS->cus[0]->splitSeries)
#endif
      {
        splitmode = bestCS->cus[0]->splitSeries;
        const CodingUnit&     cu = *bestCS->cus.front();
        cu.cs->prevPLT = bestCS->prevPLT;
        for (int i = compBegin; i < (compBegin + numComp); i++)
        {
          ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
          bestLastPLTSize[comID] = bestCS->cus[0]->cs->prevPLT.curPLTSize[comID];
          memcpy(bestLastPLT[i], bestCS->cus[0]->cs->prevPLT.curPLT[i], bestCS->cus[0]->cs->prevPLT.curPLTSize[comID] * sizeof(Pel));
        }
      }
    }
    else
    {
      THROW( "Don't know how to handle mode: type = " << currTestMode.type << ", options = " << currTestMode.opts );
    }
  } while( m_modeCtrl->nextMode( *tempCS, partitioner ) );


  //////////////////////////////////////////////////////////////////////////
  // Finishing CU
#if ENABLE_SPLIT_PARALLELISM
  if( bestCS->cus.empty() )
  {
    CHECK( bestCS->cost != MAX_DOUBLE, "Cost should be maximal if no encoding found" );
    CHECK( bestCS->picture->scheduler.getSplitJobId() == 0, "Should always get a result in serial case" );

    m_modeCtrl->finishCULevel( partitioner );
    return;
  }

#endif
  if( tempCS->cost == MAX_DOUBLE && bestCS->cost == MAX_DOUBLE )
  {
    //although some coding modes were planned to be tried in RDO, no coding mode actually finished encoding due to early termination
    //thus tempCS->cost and bestCS->cost are both MAX_DOUBLE; in this case, skip the following process for normal case
    m_modeCtrl->finishCULevel( partitioner );
    return;
  }

  // set context states
  m_CABACEstimator->getCtx() = m_CurrCtx->best;

  // QP from last processed CU for further processing
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  //copy the qp of the last non-chroma CU
  int numCUInThisNode = (int)bestCS->cus.size();
  if( numCUInThisNode > 1 && bestCS->cus.back()->chType == CHANNEL_TYPE_CHROMA && !CS::isDualITree( *bestCS ) )
  {
    CHECK( bestCS->cus[numCUInThisNode-2]->chType != CHANNEL_TYPE_LUMA, "wrong chType" );
    bestCS->prevQP[partitioner.chType] = bestCS->cus[numCUInThisNode-2]->qp;
  }
  else
  {
#endif
  bestCS->prevQP[partitioner.chType] = bestCS->cus.back()->qp;
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  }
#endif
  if ((!slice.isIntra() || slice.getSPS()->getIBCFlag())
    && partitioner.chType == CHANNEL_TYPE_LUMA
    && bestCS->cus.size() == 1 && (bestCS->cus.back()->predMode == MODE_INTER || bestCS->cus.back()->predMode == MODE_IBC)
    && bestCS->area.Y() == (*bestCS->cus.back()).Y()
    )
  {
    const CodingUnit&     cu = *bestCS->cus.front();

    bool isIbcSmallBlk = CU::isIBC(cu) && (cu.lwidth() * cu.lheight() <= 16);
    CU::saveMotionInHMVP( cu, isIbcSmallBlk );
  }
  bestCS->picture->getPredBuf(currCsArea).copyFrom(bestCS->getPredBuf(currCsArea));
#if JVET_Z0118_GDR
  bestCS->updateReconMotIPM(currCsArea); // xcomrpessCU - need 
#else
  bestCS->picture->getRecoBuf(currCsArea).copyFrom(bestCS->getRecoBuf(currCsArea));
#endif  
  
  m_modeCtrl->finishCULevel( partitioner );
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  if( m_pcIntraSearch->getSaveCuCostInSCIPU() && bestCS->cus.size() == 1 )
  {
    m_pcIntraSearch->saveCuAreaCostInSCIPU( Area( partitioner.currArea().lumaPos(), partitioner.currArea().lumaSize() ), bestCS->cost );
  }
#endif
#if ENABLE_SPLIT_PARALLELISM
  if( tempCS->picture->scheduler.getSplitJobId() == 0 && m_pcEncCfg->getNumSplitThreads() != 1 )
  {
    tempCS->picture->finishParallelPart( currCsArea );
  }

#endif
  if (bestCS->cus.size() == 1) // no partition
  {
    CHECK(bestCS->cus[0]->tileIdx != bestCS->pps->getTileIdx(bestCS->area.lumaPos()), "Wrong tile index!");
    if (bestCS->cus[0]->predMode == MODE_PLT)
    {
      for (int i = compBegin; i < (compBegin + numComp); i++)
      {
        ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
        bestCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID];
        memcpy(bestCS->prevPLT.curPLT[i], curLastPLT[i], curLastPLTSize[comID] * sizeof(Pel));
      }
      bestCS->reorderPrevPLT(bestCS->prevPLT, bestCS->cus[0]->curPLTSize, bestCS->cus[0]->curPLT, bestCS->cus[0]->reuseflag, compBegin, numComp, jointPLT);
    }
    else
    {
      for (int i = compBegin; i<(compBegin + numComp); i++)
      {
        ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
        bestCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID];
        memcpy(bestCS->prevPLT.curPLT[i], curLastPLT[i], bestCS->prevPLT.curPLTSize[comID] * sizeof(Pel));
      }
    }
  }
  else
  {
    for (int i = compBegin; i<(compBegin + numComp); i++)
    {
      ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
      bestCS->prevPLT.curPLTSize[comID] = bestLastPLTSize[comID];
      memcpy(bestCS->prevPLT.curPLT[i], bestLastPLT[i], bestCS->prevPLT.curPLTSize[comID] * sizeof(Pel));
    }
  }
  const CodingUnit&     cu = *bestCS->cus.front();
  cu.cs->prevPLT = bestCS->prevPLT;
  // Assert if Best prediction mode is NONE
  // Selected mode's RD-cost must be not MAX_DOUBLE.
  CHECK( bestCS->cus.empty()                                   , "No possible encoding found" );
  CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" );
  CHECK( bestCS->cost             == MAX_DOUBLE                , "No possible encoding found" );
}

#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
void EncCu::updateLambda (Slice* slice, const int dQP,
 #if WCG_EXT && ER_CHROMA_QP_WCG_PPS
                          const bool useWCGChromaControl,
 #endif
                          const bool updateRdCostLambda)
{
#if WCG_EXT && ER_CHROMA_QP_WCG_PPS
  if (useWCGChromaControl)
  {
    const double lambda = m_pcSliceEncoder->initializeLambda (slice, m_pcSliceEncoder->getGopId(), slice->getSliceQp(), (double)dQP);
    const int clippedQP = Clip3 (-slice->getSPS()->getQpBDOffset (CHANNEL_TYPE_LUMA), MAX_QP, dQP);

    m_pcSliceEncoder->setUpLambda (slice, lambda, clippedQP);
    return;
  }
#endif
  int iQP = dQP;
  const double oldQP     = (double)slice->getSliceQpBase();
#if ENABLE_QPA_SUB_CTU
  const double oldLambda = (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && slice->getPPS()->getUseDQP()) ? slice->getLambdas()[0] :
                           m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), oldQP, oldQP, iQP);
#else
  const double oldLambda = m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), oldQP, oldQP, iQP);
#endif
  const double newLambda = oldLambda * pow (2.0, ((double)dQP - oldQP) / 3.0);
#if RDOQ_CHROMA_LAMBDA
  const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Y),
                                                 newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cb),
                                                 newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cr)};
  m_pcTrQuant->setLambdas (lambdaArray);
#else
  m_pcTrQuant->setLambda (newLambda);
#endif
  if (updateRdCostLambda)
  {
    m_pcRdCost->setLambda (newLambda, slice->getSPS()->getBitDepths());
#if WCG_EXT
    if (!m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())
    {
      m_pcRdCost->saveUnadjustedLambda();
    }
#endif
  }
}
#endif // SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU

#if ENABLE_SPLIT_PARALLELISM
//#undef DEBUG_PARALLEL_TIMINGS
//#define DEBUG_PARALLEL_TIMINGS 1
void EncCu::xCompressCUParallel( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner )
{
  const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() );
  const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() );

  Picture* picture = tempCS->picture;

  int numJobs = m_modeCtrl->getNumParallelJobs( *bestCS, partitioner );

  bool    jobUsed                            [NUM_RESERVERD_SPLIT_JOBS];
  std::fill( jobUsed, jobUsed + NUM_RESERVERD_SPLIT_JOBS, false );

  const UnitArea currArea = CS::getArea( *tempCS, partitioner.currArea(), partitioner.chType );
  const bool doParallel   = !m_pcEncCfg->getForceSingleSplitThread();
  omp_set_num_threads( m_pcEncCfg->getNumSplitThreads() );

#pragma omp parallel for schedule(dynamic,1) if(doParallel)
  for( int jId = 1; jId <= numJobs; jId++ )
  {
    // thread start
    picture->scheduler.setSplitThreadId();
    picture->scheduler.setSplitJobId( jId );

    QTBTPartitioner jobPartitioner;
    EncCu*       jobCuEnc       = m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) );
    auto*        jobBlkCache    = dynamic_cast<CacheBlkInfoCtrl*>( jobCuEnc->m_modeCtrl );
#if REUSE_CU_RESULTS
    auto*        jobBestCache   = dynamic_cast<BestEncInfoCache*>( jobCuEnc->m_modeCtrl );
#endif

    jobPartitioner.copyState( partitioner );
    jobCuEnc      ->copyState( this, jobPartitioner, currArea, true );

    if( jobBlkCache  ) { jobBlkCache ->tick(); }
#if REUSE_CU_RESULTS
    if( jobBestCache ) { jobBestCache->tick(); }

#endif
    CodingStructure *&jobBest = jobCuEnc->m_pBestCS[wIdx][hIdx];
    CodingStructure *&jobTemp = jobCuEnc->m_pTempCS[wIdx][hIdx];

    jobUsed[jId] = true;

    jobCuEnc->xCompressCU( jobTemp, jobBest, jobPartitioner );

    picture->scheduler.setSplitJobId( 0 );
    // thread stop
  }
  picture->scheduler.setSplitThreadId( 0 );

  int    bestJId  = 0;
  double bestCost = bestCS->cost;
  for( int jId = 1; jId <= numJobs; jId++ )
  {
    EncCu* jobCuEnc = m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) );

    if( jobUsed[jId] && jobCuEnc->m_pBestCS[wIdx][hIdx]->cost < bestCost )
    {
      bestCost = jobCuEnc->m_pBestCS[wIdx][hIdx]->cost;
      bestJId  = jId;
    }
  }

  if( bestJId > 0 )
  {
    copyState( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( bestJId ) ), partitioner, currArea, false );
    m_CurrCtx->best = m_CABACEstimator->getCtx();

    tempCS = m_pTempCS[wIdx][hIdx];
    bestCS = m_pBestCS[wIdx][hIdx];
  }

  const int      bitDepthY = tempCS->sps->getBitDepth( CH_L );
  const UnitArea clipdArea = clipArea( currArea, *picture );

  CHECK( calcCheckSum( picture->getRecoBuf( clipdArea.Y() ), bitDepthY ) != calcCheckSum( bestCS->getRecoBuf( clipdArea.Y() ), bitDepthY ), "Data copied incorrectly!" );

  picture->finishParallelPart( currArea );

  if( auto *blkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) )
  {
    for( int jId = 1; jId <= numJobs; jId++ )
    {
      if( !jobUsed[jId] || jId == bestJId ) continue;

      auto *jobBlkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) )->m_modeCtrl );
      CHECK( !jobBlkCache, "If own mode controller has blk info cache capability so should all other mode controllers!" );
      blkCache->CacheBlkInfoCtrl::copyState( *jobBlkCache, partitioner.currArea() );
    }

    blkCache->tick();
  }
#if REUSE_CU_RESULTS

  if( auto *blkCache = dynamic_cast<BestEncInfoCache*>( m_modeCtrl ) )
  {
    for( int jId = 1; jId <= numJobs; jId++ )
    {
      if( !jobUsed[jId] || jId == bestJId ) continue;

      auto *jobBlkCache = dynamic_cast<BestEncInfoCache*>( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) )->m_modeCtrl );
      CHECK( !jobBlkCache, "If own mode controller has blk info cache capability so should all other mode controllers!" );
      blkCache->BestEncInfoCache::copyState( *jobBlkCache, partitioner.currArea() );
    }

    blkCache->tick();
  }
#endif
}

void EncCu::copyState( EncCu* other, Partitioner& partitioner, const UnitArea& currArea, const bool isDist )
{
  const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth () );
  const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() );

  if( isDist )
  {
    other->m_pBestCS[wIdx][hIdx]->initSubStructure( *m_pBestCS[wIdx][hIdx], partitioner.chType, partitioner.currArea(), false );
    other->m_pTempCS[wIdx][hIdx]->initSubStructure( *m_pTempCS[wIdx][hIdx], partitioner.chType, partitioner.currArea(), false );
  }
  else
  {
          CodingStructure* dst =        m_pBestCS[wIdx][hIdx];
    const CodingStructure* src = other->m_pBestCS[wIdx][hIdx];
    bool keepResi = KEEP_PRED_AND_RESI_SIGNALS;
    bool keepPred = true;

    dst->useSubStructure( *src, partitioner.chType, currArea, keepPred, true, keepResi, keepResi, true );

    dst->cost           =  src->cost;
    dst->dist           =  src->dist;
    dst->fracBits       =  src->fracBits;
    dst->features       =  src->features;
  }

  if( isDist )
  {
    m_CurrCtx = m_CtxBuffer.data();
  }

  m_pcInterSearch->copyState( *other->m_pcInterSearch );
  m_modeCtrl     ->copyState( *other->m_modeCtrl, partitioner.currArea() );
  m_pcRdCost     ->copyState( *other->m_pcRdCost );
  m_pcTrQuant    ->copyState( *other->m_pcTrQuant );
  if( m_pcEncCfg->getLmcs() )
  {
    EncReshape *encReshapeThis  = dynamic_cast<EncReshape*>(       m_pcReshape);
    EncReshape *encReshapeOther = dynamic_cast<EncReshape*>(other->m_pcReshape);
    encReshapeThis->copyState( *encReshapeOther );
  }

  m_CABACEstimator->getCtx() = other->m_CABACEstimator->getCtx();
}
#endif
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
#if JVET_Y0152_TT_ENC_SPEEDUP
void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, double *splitRdCostBest)
#else
void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
#endif
#else
#if JVET_Y0152_TT_ENC_SPEEDUP
void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, const ModeType modeTypeParent, bool &skipInterPass, double *splitRdCostBest)
#else
void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, const ModeType modeTypeParent, bool &skipInterPass )
#endif
#endif
{
  const int qp                = encTestMode.qp;
  const Slice &slice          = *tempCS->slice;
  const int oldPrevQp         = tempCS->prevQP[partitioner.chType];
  const auto oldMotionLut     = tempCS->motionLut;
#if ENABLE_QPA_SUB_CTU
  const PPS &pps              = *tempCS->pps;
  const uint32_t currDepth    = partitioner.currDepth;
#endif
  const auto oldPLT           = tempCS->prevPLT;

  const PartSplit split = getPartSplit( encTestMode );
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  const ModeType modeTypeChild = partitioner.modeType;
#endif
  CHECK( split == CU_DONT_SPLIT, "No proper split provided!" );

  tempCS->initStructData( qp );

  m_CABACEstimator->getCtx() = m_CurrCtx->start;

  const TempCtx ctxStartSP( m_CtxCache, SubCtx( Ctx::SplitFlag,   m_CABACEstimator->getCtx() ) );
  const TempCtx ctxStartQt( m_CtxCache, SubCtx( Ctx::SplitQtFlag, m_CABACEstimator->getCtx() ) );
  const TempCtx ctxStartHv( m_CtxCache, SubCtx( Ctx::SplitHvFlag, m_CABACEstimator->getCtx() ) );
  const TempCtx ctxStart12( m_CtxCache, SubCtx( Ctx::Split12Flag, m_CABACEstimator->getCtx() ) );
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  const TempCtx ctxStartMC( m_CtxCache, SubCtx( Ctx::ModeConsFlag, m_CABACEstimator->getCtx() ) );
#endif
  m_CABACEstimator->resetBits();

  m_CABACEstimator->split_cu_mode( split, *tempCS, partitioner );
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  m_CABACEstimator->mode_constraint( split, *tempCS, partitioner, modeTypeChild );
#endif
  const double factor = ( tempCS->currQP[partitioner.chType] > 30 ? 1.1 : 1.075 );
  tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
  if (!tempCS->useDbCost)
    CHECK(bestCS->costDbOffset != 0, "error");
  const double cost   = m_pcRdCost->calcRdCost( uint64_t( m_CABACEstimator->getEstFracBits() + ( ( bestCS->fracBits ) / factor ) ), Distortion( bestCS->dist / factor ) ) + bestCS->costDbOffset / factor;

  m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitFlag,   ctxStartSP );
  m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitQtFlag, ctxStartQt );
  m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitHvFlag, ctxStartHv );
  m_CABACEstimator->getCtx() = SubCtx( Ctx::Split12Flag, ctxStart12 );
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  m_CABACEstimator->getCtx() = SubCtx( Ctx::ModeConsFlag, ctxStartMC );
#endif
  if (cost > bestCS->cost + bestCS->costDbOffset
#if ENABLE_QPA_SUB_CTU
    || (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && pps.getUseDQP() && (slice.getCuQpDeltaSubdiv() > 0) && (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) &&
        (currDepth == 0)) // force quad-split or no split at CTU level
#endif
    )
  {
    xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
    return;
  }
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  const bool chromaNotSplit = modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTRA ? true : false;
  if( partitioner.treeType != TREE_D )
  {
    tempCS->treeType = TREE_L;
  }
  else
  {
    if( chromaNotSplit )
    {
      CHECK( partitioner.chType != CHANNEL_TYPE_LUMA, "chType must be luma" );
      tempCS->treeType = partitioner.treeType = TREE_L;
    }
    else
    {
      tempCS->treeType = partitioner.treeType = TREE_D;
    }
  }
#endif

  partitioner.splitCurrArea( split, *tempCS );
  bool qgEnableChildren = partitioner.currQgEnable(); // QG possible at children level

  m_CurrCtx++;

  tempCS->getRecoBuf().fill( 0 );

  tempCS->getPredBuf().fill(0);
  AffineMVInfo tmpMVInfo;
  bool isAffMVInfoSaved;
  m_pcInterSearch->savePrevAffMVInfo(0, tmpMVInfo, isAffMVInfoSaved);
  BlkUniMvInfo tmpUniMvInfo;
  bool         isUniMvInfoSaved = false;
  if (!tempCS->slice->isIntra())
  {
    m_pcInterSearch->savePrevUniMvInfo(tempCS->area.Y(), tmpUniMvInfo, isUniMvInfoSaved);
  }
#if INTER_LIC
  BlkUniMvInfo tmpUniMvInfoLIC;
  bool         isUniMvInfoSavedLIC = false;
  if (tempCS->slice->getUseLIC() && !tempCS->slice->isIntra())
  {
    m_pcInterSearch->swapUniMvBuffer();
    m_pcInterSearch->savePrevUniMvInfo(tempCS->area.Y(), tmpUniMvInfoLIC, isUniMvInfoSavedLIC);
    m_pcInterSearch->swapUniMvBuffer();
  }
#endif

  do
  {
    const auto &subCUArea  = partitioner.currArea();

    if( tempCS->picture->Y().contains( subCUArea.lumaPos() ) )
    {
      const unsigned wIdx    = gp_sizeIdxInfo->idxFrom( subCUArea.lwidth () );
      const unsigned hIdx    = gp_sizeIdxInfo->idxFrom( subCUArea.lheight() );

      CodingStructure *tempSubCS = m_pTempCS[wIdx][hIdx];
      CodingStructure *bestSubCS = m_pBestCS[wIdx][hIdx];

      tempCS->initSubStructure( *tempSubCS, partitioner.chType, subCUArea, false );
      tempCS->initSubStructure( *bestSubCS, partitioner.chType, subCUArea, false );
      tempSubCS->bestParent = bestSubCS->bestParent = bestCS;
      double newMaxCostAllowed = isLuma(partitioner.chType) ? std::min(encTestMode.maxCostAllowed, bestCS->cost - m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist)) : MAX_DOUBLE;
      newMaxCostAllowed = std::max(0.0, newMaxCostAllowed);
      xCompressCU(tempSubCS, bestSubCS, partitioner, newMaxCostAllowed);
      tempSubCS->bestParent = bestSubCS->bestParent = nullptr;

      if( bestSubCS->cost == MAX_DOUBLE )
      {
        CHECK( split == CU_QUAD_SPLIT, "Split decision reusing cannot skip quad split" );
        tempCS->cost = MAX_DOUBLE;
        tempCS->costDbOffset = 0;
        tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
        m_CurrCtx--;
        partitioner.exitCurrSplit();
        xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );

#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
        if( partitioner.chType == CHANNEL_TYPE_LUMA )
        {
          tempCS->motionLut = oldMotionLut;
        }
#endif

#if JVET_Z0118_GDR      
        tempCS->motionLut = oldMotionLut;
        tempCS->prevPLT = oldPLT;
        tempCS->releaseIntermediateData();
        tempCS->prevQP[partitioner.chType] = oldPrevQp;
#endif        

        return;
      }

      bool keepResi = KEEP_PRED_AND_RESI_SIGNALS;
      tempCS->useSubStructure( *bestSubCS, partitioner.chType, CS::getArea( *tempCS, subCUArea, partitioner.chType ), KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, keepResi, true );

      if( partitioner.currQgEnable() )
      {
        tempCS->prevQP[partitioner.chType] = bestSubCS->prevQP[partitioner.chType];
      }
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
      if( partitioner.isConsInter() )
      {
        for( int i = 0; i < bestSubCS->cus.size(); i++ )
        {
          CHECK( bestSubCS->cus[i]->predMode != MODE_INTER, "all CUs must be inter mode in an Inter coding region (SCIPU)" );
        }
      }
      else if( partitioner.isConsIntra() )
      {
        for( int i = 0; i < bestSubCS->cus.size(); i++ )
        {
          CHECK( bestSubCS->cus[i]->predMode == MODE_INTER, "all CUs must not be inter mode in an Intra coding region (SCIPU)" );
        }
      }
#endif
      tempSubCS->releaseIntermediateData();
      bestSubCS->releaseIntermediateData();
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
      if( !tempCS->slice->isIntra() && partitioner.isConsIntra() )
      {
        tempCS->cost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist );
        if( tempCS->cost > bestCS->cost )
        {
          tempCS->cost = MAX_DOUBLE;
          tempCS->costDbOffset = 0;
          tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
          m_CurrCtx--;
          partitioner.exitCurrSplit();
          if( partitioner.chType == CHANNEL_TYPE_LUMA )
          {
            tempCS->motionLut = oldMotionLut;
          }
          return;
        }
      }
#endif
    }
  } while( partitioner.nextPart( *tempCS ) );

  partitioner.exitCurrSplit();


  m_CurrCtx--;
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  if( chromaNotSplit )
  {
    //Note: In local dual tree region, the chroma CU refers to the central luma CU's QP.
    //If the luma CU QP shall be predQP (no residual in it and before it in the QG), it must be revised to predQP before encoding the chroma CU
    //Otherwise, the chroma CU uses predQP+deltaQP in encoding but is decoded as using predQP, thus causing encoder-decoded mismatch on chroma qp.
    if( tempCS->pps->getUseDQP() )
    {
      //find parent CS that including all coded CUs in the QG before this node
      CodingStructure* qgCS = tempCS;
      bool deltaQpCodedBeforeThisNode = false;
      if( partitioner.currArea().lumaPos() != partitioner.currQgPos )
      {
        int numParentNodeToQgCS = 0;
        while( qgCS->area.lumaPos() != partitioner.currQgPos )
        {
          CHECK( qgCS->parent == nullptr, "parent of qgCS shall exsit" );
          qgCS = qgCS->parent;
          numParentNodeToQgCS++;
        }

        //check whether deltaQP has been coded (in luma CU or luma&chroma CU) before this node
        CodingStructure* parentCS = tempCS->parent;
        for( int i = 0; i < numParentNodeToQgCS; i++ )
        {
          //checking each parent
          CHECK( parentCS == nullptr, "parentCS shall exsit" );
          for( const auto &cu : parentCS->cus )
          {
            if( cu->rootCbf && !isChroma( cu->chType ) )
            {
              deltaQpCodedBeforeThisNode = true;
              break;
            }
          }
          parentCS = parentCS->parent;
        }
      }

      //revise luma CU qp before the first luma CU with residual in the SCIPU to predQP
      if( !deltaQpCodedBeforeThisNode )
      {
        //get pred QP of the QG
        const CodingUnit* cuFirst = qgCS->getCU( CHANNEL_TYPE_LUMA );
        CHECK( cuFirst->lumaPos() != partitioner.currQgPos, "First cu of the Qg is wrong" );
        int predQp = CU::predictQP( *cuFirst, qgCS->prevQP[CHANNEL_TYPE_LUMA] );

        //revise to predQP
        int firstCuHasResidual = (int)tempCS->cus.size();
        for( int i = 0; i < tempCS->cus.size(); i++ )
        {
          if( tempCS->cus[i]->rootCbf )
          {
            firstCuHasResidual = i;
            break;
          }
        }

        for( int i = 0; i < firstCuHasResidual; i++ )
        {
          tempCS->cus[i]->qp = predQp;
        }
      }
    }
    assert( tempCS->treeType == TREE_L );
    uint32_t numCuPuTu[6];
    tempCS->picture->cs->getNumCuPuTuOffset( numCuPuTu );
    tempCS->picture->cs->useSubStructure( *tempCS, partitioner.chType, CS::getArea( *tempCS, partitioner.currArea(), partitioner.chType ), false, true, false, false, false );

    if (isChromaEnabled(tempCS->pcv->chrFormat))
    {
    partitioner.chType = CHANNEL_TYPE_CHROMA;
    tempCS->treeType = partitioner.treeType = TREE_C;

    m_CurrCtx++;

    const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() );
    const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() );
    CodingStructure *tempCSChroma = m_pTempCS2[wIdx][hIdx];
    CodingStructure *bestCSChroma = m_pBestCS2[wIdx][hIdx];
    tempCS->initSubStructure( *tempCSChroma, partitioner.chType, partitioner.currArea(), false );
    tempCS->initSubStructure( *bestCSChroma, partitioner.chType, partitioner.currArea(), false );
    tempCS->treeType = TREE_D;
    xCompressCU( tempCSChroma, bestCSChroma, partitioner );

    //attach chromaCS to luma CS and update cost
    bool keepResi = KEEP_PRED_AND_RESI_SIGNALS;
    //bestCSChroma->treeType = tempCSChroma->treeType = TREE_C;
    CHECK( bestCSChroma->treeType != TREE_C || tempCSChroma->treeType != TREE_C, "wrong treeType for chroma CS" );
    tempCS->useSubStructure( *bestCSChroma, partitioner.chType, CS::getArea( *bestCSChroma, partitioner.currArea(), partitioner.chType ), KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, true, true );

    //release tmp resource
    tempCSChroma->releaseIntermediateData();
    bestCSChroma->releaseIntermediateData();
    //tempCS->picture->cs->releaseIntermediateData();
      m_CurrCtx--;
    }
    tempCS->picture->cs->clearCuPuTuIdxMap( partitioner.currArea(), numCuPuTu[0], numCuPuTu[1], numCuPuTu[2], numCuPuTu + 3 );


    //recover luma tree status
    partitioner.chType = CHANNEL_TYPE_LUMA;
    partitioner.treeType = TREE_D;
    partitioner.modeType = MODE_TYPE_ALL;
  }
#endif
  // Finally, generate split-signaling bits for RD-cost check
  const PartSplit implicitSplit = partitioner.getImplicitSplit( *tempCS );

  {
    bool enforceQT = implicitSplit == CU_QUAD_SPLIT;

    // LARGE CTU bug
    if( m_pcEncCfg->getUseFastLCTU() )
    {
      unsigned minDepth = 0;
      unsigned maxDepth = floorLog2(tempCS->sps->getCTUSize()) - floorLog2(tempCS->sps->getMinQTSize(slice.getSliceType(), partitioner.chType));

      if( auto ad = dynamic_cast<AdaptiveDepthPartitioner*>( &partitioner ) )
      {
        ad->setMaxMinDepth( minDepth, maxDepth, *tempCS );
      }

      if( minDepth > partitioner.currQtDepth )
      {
        // enforce QT
        enforceQT = true;
      }
    }

    if( !enforceQT )
    {
      m_CABACEstimator->resetBits();

      m_CABACEstimator->split_cu_mode( split, *tempCS, partitioner );
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
      partitioner.modeType = modeTypeParent;
      m_CABACEstimator->mode_constraint( split, *tempCS, partitioner, modeTypeChild );
#endif
      tempCS->fracBits += m_CABACEstimator->getEstFracBits(); // split bits
    }
  }

  tempCS->cost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist );

  // Check Delta QP bits for splitted structure
  if( !qgEnableChildren ) // check at deepest QG level only
  xCheckDQP( *tempCS, partitioner, true );

  // If the configuration being tested exceeds the maximum number of bytes for a slice / slice-segment, then
  // a proper RD evaluation cannot be performed. Therefore, termination of the
  // slice/slice-segment must be made prior to this CTU.
  // This can be achieved by forcing the decision to be that of the rpcTempCU.
  // The exception is each slice / slice-segment must have at least one CTU.
  if (bestCS->cost != MAX_DOUBLE)
  {
  }
  else
  {
    bestCS->costDbOffset = 0;
  }
  tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  if( tempCS->cus.size() > 0 && modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTER )
  {
    int areaSizeNoResiCu = 0;
    for( int k = 0; k < tempCS->cus.size(); k++ )
    {
      areaSizeNoResiCu += (tempCS->cus[k]->rootCbf == false) ? tempCS->cus[k]->lumaSize().area() : 0;
    }
    if( areaSizeNoResiCu >= (tempCS->area.lumaSize().area() >> 1) )
    {
      skipInterPass = true;
    }
  }
#endif
#if JVET_Y0152_TT_ENC_SPEEDUP
  splitRdCostBest[getPartSplit(encTestMode)] = tempCS->cost;
#endif
  // RD check for sub partitioned coding structure.
  xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );

  if (isAffMVInfoSaved)
    m_pcInterSearch->addAffMVInfo(tmpMVInfo);
  if (!tempCS->slice->isIntra() && isUniMvInfoSaved)
  {
    m_pcInterSearch->addUniMvInfo(tmpUniMvInfo);
  }
#if INTER_LIC
  if (!tempCS->slice->isIntra() && isUniMvInfoSavedLIC)
  {
    m_pcInterSearch->swapUniMvBuffer();
    m_pcInterSearch->addUniMvInfo(tmpUniMvInfoLIC);
    m_pcInterSearch->swapUniMvBuffer();
  }
#endif

  tempCS->motionLut = oldMotionLut;

  tempCS->prevPLT   = oldPLT;

  tempCS->releaseIntermediateData();

  tempCS->prevQP[partitioner.chType] = oldPrevQp;
}

bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, bool adaptiveColorTrans)
{
  double          bestInterCost             = m_modeCtrl->getBestInterCost();
  double          costSize2Nx2NmtsFirstPass = m_modeCtrl->getMtsSize2Nx2NFirstPassCost();
  bool            skipSecondMtsPass         = m_modeCtrl->getSkipSecondMTSPass();
  const SPS&      sps                       = *tempCS->sps;
  const int       maxSizeMTS                = MTS_INTRA_MAX_CU_SIZE;
  uint8_t         considerMtsSecondPass     = ( sps.getUseIntraMTS() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeMTS && partitioner.currArea().lheight() <= maxSizeMTS ) ? 1 : 0;

  bool   useIntraSubPartitions   = false;
  double maxCostAllowedForChroma = MAX_DOUBLE;
  const  CodingUnit *bestCU      = bestCS->getCU( partitioner.chType );
  Distortion interHad = m_modeCtrl->getInterHad();
#if JVET_W0123_TIMD_FUSION
  int timdMode = 0;
  int timdModeSecondary = 0;
  bool timdIsBlended = false;
  int  timdFusionWeight[2] = { 0 };
#endif


  double dct2Cost                =   MAX_DOUBLE;
  double bestNonDCT2Cost         = MAX_DOUBLE;
  double trGrpBestCost     [ 4 ] = { MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE };
  double globalBestCost          =   MAX_DOUBLE;
  bool   bestSelFlag       [ 4 ] = { false, false, false, false };
  bool   trGrpCheck        [ 4 ] = { true, true, true, true };
  int    startMTSIdx       [ 4 ] = { 0, 1, 2, 3 };
  int    endMTSIdx         [ 4 ] = { 0, 1, 2, 3 };
#if JVET_W0103_INTRA_MTS
#if JVET_Y0142_ADAPT_INTRA_MTS
  endMTSIdx[0] = 5; //put all MTS candidates in "Grp 0"
#else
  endMTSIdx[0] = 3; //put all MTS candidates in "Grp 0"
#endif
#endif
  double trGrpStopThreshold[ 3 ] = { 1.001, 1.001, 1.001 };
  int    bestMtsFlag             =   0;
  int    bestLfnstIdx            =   0;

#if EXTENDED_LFNST || JVET_W0119_LFNST_EXTENSION
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  const int  maxLfnstIdx         = (CS::isDualITree(*tempCS) && partitioner.chType == CHANNEL_TYPE_CHROMA && (partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8))
                                   || (partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize()) ? 0 : 3;
#else
  const int  maxLfnstIdx         = ( partitioner.isSepTree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) )
                                   || ( partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize() ) ? 0 : 3;
#endif
#else
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  const int  maxLfnstIdx         = (CS::isDualITree(*tempCS) && partitioner.chType == CHANNEL_TYPE_CHROMA && (partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8))
                                   || (partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize()) ? 0 : 2;
#else
  const int  maxLfnstIdx         = ( partitioner.isSepTree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) )
                                   || ( partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize() ) ? 0 : 2;
#endif
#endif

  bool       skipOtherLfnst      = false;
  int        startLfnstIdx       = 0;
  int        endLfnstIdx         = sps.getUseLFNST() ? maxLfnstIdx : 0;
#if INTRA_TRANS_ENC_OPT
  if (m_pcEncCfg->getIntraPeriod() == 1)
  {
    CodedCUInfo    &relatedCU = ((EncModeCtrlMTnoRQT *)m_modeCtrl)->getBlkInfo(partitioner.currArea());
    if (isLuma(partitioner.chType) && relatedCU.skipLfnstTest)
    {
      endLfnstIdx = startLfnstIdx;
    }
  }
#endif
#if JVET_W0103_INTRA_MTS
  int grpNumMax = 1;
#else
  int grpNumMax = sps.getUseLFNST() ? m_pcEncCfg->getMTSIntraMaxCand() : 1;
#endif
  m_modeCtrl->setISPWasTested(false);
  m_pcIntraSearch->invalidateBestModeCost();
  if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS))
  {
    if ((m_pcEncCfg->getRGBFormatFlag() && adaptiveColorTrans) || (!m_pcEncCfg->getRGBFormatFlag() && !adaptiveColorTrans))
    {
      m_pcIntraSearch->invalidateBestRdModeFirstColorSpace();
    }
  }

  bool foundZeroRootCbf = false;
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  if (sps.getUseColorTrans())
  {
    CHECK(tempCS->treeType != TREE_D || partitioner.treeType != TREE_D, "localtree should not be applied when adaptive color transform is enabled");
    CHECK(tempCS->modeType != MODE_TYPE_ALL || partitioner.modeType != MODE_TYPE_ALL, "localtree should not be applied when adaptive color transform is enabled");
    CHECK(adaptiveColorTrans && (CS::isDualITree(*tempCS) || partitioner.chType != CHANNEL_TYPE_LUMA), "adaptive color transform cannot be applied to dual-tree");
  }
#endif
#if ENABLE_DIMD
  bool dimdBlending = false;
  int  dimdMode = 0;
  int  dimdBlendMode[2] = { 0 };
  int  dimdRelWeight[3] = { 0 };
  bool dimdDerived = false;

  if (isLuma(partitioner.chType))
  {
    CodingUnit cu(tempCS->area);
    cu.cs = tempCS;
    cu.slice = tempCS->slice;
    cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
    PredictionUnit pu(tempCS->area);
    pu.cu = &cu;
    pu.cs = tempCS;

    if( cu.slice->getSPS()->getUseDimd() )
    {
      const CompArea &area = cu.Y();
      IntraPrediction::deriveDimdMode(bestCS->picture->getRecoBuf(area), area, cu);

      dimdDerived = true;
      dimdBlending = cu.dimdBlending;
      dimdMode = cu.dimdMode;
      dimdBlendMode[0] = cu.dimdBlendMode[0];
      dimdBlendMode[1] = cu.dimdBlendMode[1];
      dimdRelWeight[0] = cu.dimdRelWeight[0];
      dimdRelWeight[1] = cu.dimdRelWeight[1];
      dimdRelWeight[2] = cu.dimdRelWeight[2];
    }

#if SECONDARY_MPM
    m_pcIntraSearch->getMpmListSize() = PU::getIntraMPMs(pu, m_pcIntraSearch->getMPMList(), m_pcIntraSearch->getNonMPMList());
#endif
  }
#elif SECONDARY_MPM
  if( isLuma( partitioner.chType ) )
  {
    CodingUnit cu( tempCS->area );
    cu.cs = tempCS;
    cu.slice = tempCS->slice;
    cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
    PredictionUnit pu( tempCS->area );
    pu.cu = &cu;
    pu.cs = tempCS;

    m_pcIntraSearch->getMpmListSize() = PU::getIntraMPMs( pu, m_pcIntraSearch->getMPMList(), m_pcIntraSearch->getNonMPMList() );
  }
#endif

#if JVET_W0123_TIMD_FUSION
  bool timdDerived = false;
#endif
#if INTRA_TRANS_ENC_OPT
  m_pcIntraSearch->m_skipTimdLfnstMtsPass = false;
  m_modeCtrl->resetLfnstCost();
#endif
  for( int trGrpIdx = 0; trGrpIdx < grpNumMax; trGrpIdx++ )
  {
    const uint8_t startMtsFlag = trGrpIdx > 0;
    const uint8_t endMtsFlag   = sps.getUseLFNST() ? considerMtsSecondPass : 0;

    if( ( trGrpIdx == 0 || ( !skipSecondMtsPass && considerMtsSecondPass ) ) && trGrpCheck[ trGrpIdx ] )
    {
      for( int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++ )
      {
        for( uint8_t mtsFlag = startMtsFlag; mtsFlag <= endMtsFlag; mtsFlag++ )
        {
          if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS))
          {
            m_pcIntraSearch->setSavedRdModeIdx(trGrpIdx*(NUM_LFNST_NUM_PER_SET * 2) + lfnstIdx * 2 + mtsFlag);
          }
          if (mtsFlag > 0 && lfnstIdx > 0)
          {
            continue;
          }
          //3) if interHad is 0, only try further modes if some intra mode was already better than inter
          if( sps.getUseLFNST() && m_pcEncCfg->getUsePbIntraFast() && !tempCS->slice->isIntra() && bestCU && CU::isInter( *bestCS->getCU( partitioner.chType ) ) && interHad == 0 )
          {
            continue;
          }

          tempCS->initStructData( encTestMode.qp );

          CodingUnit &cu      = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType );

          partitioner.setCUData( cu );
          cu.slice            = tempCS->slice;
          cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
          cu.skip             = false;
          cu.mmvdSkip = false;
          cu.predMode         = MODE_INTRA;
          cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
          cu.qp               = encTestMode.qp;
#if ENABLE_DIMD
          cu.dimd = false;
          if( dimdDerived )
          {
            cu.dimdBlending = dimdBlending;
            cu.dimdMode = dimdMode;
            cu.dimdBlendMode[0] = dimdBlendMode[0];
            cu.dimdBlendMode[1] = dimdBlendMode[1];
            cu.dimdRelWeight[0] = dimdRelWeight[0];
            cu.dimdRelWeight[1] = dimdRelWeight[1];
            cu.dimdRelWeight[2] = dimdRelWeight[2];
          }
#endif
          cu.lfnstIdx         = lfnstIdx;
          cu.mtsFlag          = mtsFlag;
          cu.ispMode          = NOT_INTRA_SUBPARTITIONS;
          cu.colorTransform = adaptiveColorTrans;

          CU::addPUs( cu );
#if JVET_W0123_TIMD_FUSION
          cu.timd = false;
          if (isLuma(partitioner.chType) && cu.slice->getSPS()->getUseTimd())
          {
            if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE)
            {
              timdDerived = true;
            }
            if (!timdDerived)
            {
              const CompArea &area = cu.Y();
              cu.timdMode = m_pcIntraSearch->deriveTimdMode(bestCS->picture->getRecoBuf(area), area, cu);
              timdMode = cu.timdMode;
              timdDerived = true;
              timdModeSecondary = cu.timdModeSecondary;
              timdIsBlended     = cu.timdIsBlended;
              timdFusionWeight[0] = cu.timdFusionWeight[0];
              timdFusionWeight[1] = cu.timdFusionWeight[1];
            }
            else
            {
              cu.timdMode = timdMode;
              cu.timdModeSecondary = timdModeSecondary;
              cu.timdIsBlended     = timdIsBlended;
              cu.timdFusionWeight[0] = timdFusionWeight[0];
              cu.timdFusionWeight[1] = timdFusionWeight[1];
            }
          }
#endif

          tempCS->interHad    = interHad;

          m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;

          bool validCandRet = false;
          if( isLuma( partitioner.chType ) )
          {
            //ISP uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary subpartitions
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
            double bestCostSoFar = CS::isDualITree(*tempCS) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
            if (CS::isDualITree(*tempCS) && encTestMode.maxCostAllowed < bestCostSoFar)
#else
            double bestCostSoFar = partitioner.isSepTree(*tempCS) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
            if (partitioner.isSepTree(*tempCS) && encTestMode.maxCostAllowed < bestCostSoFar)
#endif
            {
              bestCostSoFar = encTestMode.maxCostAllowed;
            }
            validCandRet = m_pcIntraSearch->estIntraPredLumaQT(cu, partitioner, bestCostSoFar, mtsFlag, startMTSIdx[trGrpIdx], endMTSIdx[trGrpIdx], (trGrpIdx > 0), !cu.colorTransform ? bestCS : nullptr);
            if ((!validCandRet || (cu.ispMode && cu.firstTU->cbf[COMPONENT_Y] == 0)))
            {
              continue;
            }
#if JVET_W0123_TIMD_FUSION
            PU::spanIpmInfoIntra(*cu.firstPU);
#endif
#if JVET_W0123_TIMD_FUSION
            if (m_pcEncCfg->getUseFastISP() && validCandRet && !mtsFlag && !lfnstIdx && !cu.colorTransform && !cu.timd)
#else
            if (m_pcEncCfg->getUseFastISP() && validCandRet && !mtsFlag && !lfnstIdx && !cu.colorTransform)
#endif
            {
              m_modeCtrl->setISPMode(cu.ispMode);
              m_modeCtrl->setISPLfnstIdx(cu.lfnstIdx);
              m_modeCtrl->setMIPFlagISPPass(cu.mipFlag);
#if JVET_V0130_INTRA_TMP
			        m_modeCtrl->setTPMFlagISPPass(cu.tmpFlag);
#endif
              m_modeCtrl->setBestISPIntraModeRelCU(cu.ispMode ? PU::getFinalIntraMode(*cu.firstPU, CHANNEL_TYPE_LUMA) : UINT8_MAX);
              m_modeCtrl->setBestDCT2NonISPCostRelCU(m_modeCtrl->getMtsFirstPassNoIspCost());
            }

            if (sps.getUseColorTrans() && m_pcEncCfg->getRGBFormatFlag() && !CS::isDualITree(*tempCS) && !cu.colorTransform)
            {
              double curLumaCost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);
              if (curLumaCost > bestCS->cost)
              {
                continue;
              }
            }

            useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS;
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
            if (!CS::isDualITree(*tempCS))
#else
            if( !partitioner.isSepTree( *tempCS ) )
#endif
            {
              tempCS->lumaCost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist );
              if( useIntraSubPartitions )
              {
                //the difference between the best cost so far and the current luma cost is stored to avoid testing the Cr component if the cost of luma + Cb is larger than the best cost
                maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE;
              }
            }

            if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max()
                && tempCS->interHad == 0)
            {
              interHad = 0;
              // JEM assumes only perfect reconstructions can from now on beat the inter mode
              m_modeCtrl->enforceInterHad( 0 );
              continue;
            }
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
            if (!CS::isDualITree(*tempCS))
#else
            if( !partitioner.isSepTree( *tempCS ) )
#endif
            {
              if (!cu.colorTransform)
              {
#if JVET_Z0118_GDR
                cu.cs->updateReconMotIPM(cu.Y()); // xcomrpessCU - need 
#else
                cu.cs->picture->getRecoBuf(cu.Y()).copyFrom(cu.cs->getRecoBuf(COMPONENT_Y));
#endif
                cu.cs->picture->getPredBuf(cu.Y()).copyFrom(cu.cs->getPredBuf(COMPONENT_Y));
              }
              else
              {
#if JVET_Z0118_GDR
                cu.cs->updateReconMotIPM(cu); // xcomrpessCU - need 
#else
                cu.cs->picture->getRecoBuf(cu).copyFrom(cu.cs->getRecoBuf(cu));
#endif
                cu.cs->picture->getPredBuf(cu).copyFrom(cu.cs->getPredBuf(cu));
              }
            }
          }
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
          if (tempCS->area.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA || !CS::isDualITree(*tempCS)) && !cu.colorTransform)
#else
          if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !cu.isSepTree() ) && !cu.colorTransform )
#endif
          {
            TUIntraSubPartitioner subTuPartitioner( partitioner );
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
            m_pcIntraSearch->estIntraPredChromaQT(cu, (!useIntraSubPartitions || (CS::isDualITree(*tempCS) && !isLuma(CHANNEL_TYPE_CHROMA))) ? partitioner : subTuPartitioner, maxCostAllowedForChroma);
#else
            m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( cu.isSepTree() && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma );
#endif
            if( useIntraSubPartitions && !cu.ispMode )
            {
              //At this point the temp cost is larger than the best cost. Therefore, we can already skip the remaining calculations
              continue;
            }
          }

          cu.rootCbf = false;

          for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ )
          {
            cu.rootCbf |= cu.firstTU->cbf[t] != 0;
          }

          if (!cu.rootCbf)
          {
            cu.colorTransform = false;
            foundZeroRootCbf = true;
          }

          // Get total bits for current mode: encode CU
          m_CABACEstimator->resetBits();

          if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag())
            && cu.Y().valid()
            )
          {
            m_CABACEstimator->cu_skip_flag ( cu );
          }
          m_CABACEstimator->pred_mode      ( cu );
#if ENABLE_DIMD
          m_CABACEstimator->cu_dimd_flag   ( cu );
#endif
          m_CABACEstimator->adaptive_color_transform(cu);
          m_CABACEstimator->cu_pred_data   ( cu );

          // Encode Coefficients
          CUCtx cuCtx;
          cuCtx.isDQPCoded = true;
          cuCtx.isChromaQpAdjCoded = true;
          m_CABACEstimator->cu_residual( cu, partitioner, cuCtx );

          tempCS->fracBits = m_CABACEstimator->getEstFracBits();
          tempCS->cost     = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);


          double tmpCostWithoutSplitFlags = tempCS->cost;
          xEncodeDontSplit( *tempCS, partitioner );

          xCheckDQP( *tempCS, partitioner );
          xCheckChromaQPOffset( *tempCS, partitioner );

          // Check if low frequency non-separable transform (LFNST) is too expensive
          if( lfnstIdx && !cuCtx.lfnstLastScanPos && !cu.ispMode )
          {
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
            bool cbfAtZeroDepth = CS::isDualITree(*tempCS) ?
#else
            bool cbfAtZeroDepth = cu.isSepTree() ?
#endif
                                       cu.rootCbf
                                     : (tempCS->area.chromaFormat != CHROMA_400 && std::min( cu.firstTU->blocks[ 1 ].width, cu.firstTU->blocks[ 1 ].height ) < 4) ?
                                            TU::getCbfAtDepth( *cu.firstTU, COMPONENT_Y, 0 )
                                          : cu.rootCbf;
#if INTRA_TRANS_ENC_OPT
            if (CS::isDualITree(*tempCS) && (partitioner.chType == CHANNEL_TYPE_LUMA))
            {
              CHECK(cbfAtZeroDepth, "such case should be wrapped out during the RD!");
            }
#endif
            if( cbfAtZeroDepth )
            {
              tempCS->cost = MAX_DOUBLE;
              tmpCostWithoutSplitFlags = MAX_DOUBLE;
            }
          }

          if (isLuma(partitioner.chType) && cu.firstTU->mtsIdx[COMPONENT_Y] > MTS_SKIP)
          {
            CHECK(!cuCtx.mtsLastScanPos, "MTS is disallowed to only contain DC coefficient");
          }

          if( mtsFlag == 0 && lfnstIdx == 0 )
          {
            dct2Cost = tempCS->cost;
          }
          else if (tmpCostWithoutSplitFlags < bestNonDCT2Cost)
          {
            bestNonDCT2Cost = tmpCostWithoutSplitFlags;
          }

          if( tempCS->cost < bestCS->cost )
          {
            m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags );
          }

          if( !mtsFlag ) static_cast< double& >( costSize2Nx2NmtsFirstPass ) = tempCS->cost;

          if( sps.getUseLFNST() && !tempCS->cus.empty() )
          {
            skipOtherLfnst = m_modeCtrl->checkSkipOtherLfnst( encTestMode, tempCS, partitioner );
          }

          xCalDebCost( *tempCS, partitioner );
          tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();

#if WCG_EXT
          DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
#else
          DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
#endif
          if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS))
          {
            int colorSpaceIdx = ((m_pcEncCfg->getRGBFormatFlag() && adaptiveColorTrans) || (!m_pcEncCfg->getRGBFormatFlag() && !adaptiveColorTrans)) ? 0 : 1;
            if (tempCS->cost < tempCS->tmpColorSpaceIntraCost[colorSpaceIdx])
            {
              tempCS->tmpColorSpaceIntraCost[colorSpaceIdx] = tempCS->cost;
              bestCS->tmpColorSpaceIntraCost[colorSpaceIdx] = tempCS->cost;
            }
          }
         
          if( !sps.getUseLFNST() )
          {
            xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
          }
          else
          {
            if( xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ) )
            {
              trGrpBestCost[ trGrpIdx ] = globalBestCost = bestCS->cost;
              bestSelFlag  [ trGrpIdx ] = true;
              bestMtsFlag               = mtsFlag;
              bestLfnstIdx              = lfnstIdx;

              if( bestCS->cus.size() == 1 )
              {
                CodingUnit &cu = *bestCS->cus.front();
                if (cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP)
                {
                  if( ( floorLog2( cu.firstTU->blocks[ COMPONENT_Y ].width ) + floorLog2( cu.firstTU->blocks[ COMPONENT_Y ].height ) ) >= 6 )
                  {
                    endLfnstIdx = 0;
                  }
                }
              }
            }
            
            //we decide to skip the non-DCT-II transforms and LFNST according to the ISP results
            if ((endMtsFlag > 0 || endLfnstIdx > 0) && (cu.ispMode || (bestCS && bestCS->cus[0]->ispMode)) && tempCS->slice->isIntra() && m_pcEncCfg->getUseFastISP())
            {
              double bestCostDct2NoIsp = m_modeCtrl->getMtsFirstPassNoIspCost();
              double bestIspCost       = m_modeCtrl->getIspCost();
              CHECKD( bestCostDct2NoIsp <= bestIspCost, "wrong cost!" );
              double threshold = 1.4;
              
              double lfnstThreshold = 1.01 * threshold;
              if( m_modeCtrl->getStopNonDCT2Transforms() || bestCostDct2NoIsp > bestIspCost*lfnstThreshold )
              {
                endLfnstIdx = lfnstIdx;
              }

              if ( m_modeCtrl->getStopNonDCT2Transforms() || bestCostDct2NoIsp > bestIspCost*threshold )
              {
                skipSecondMtsPass = true;
                m_modeCtrl->setSkipSecondMTSPass( true );
                break;
              }
            }
            //now we check whether the second pass of SIZE_2Nx2N and the whole Intra SIZE_NxN should be skipped or not
            if( !mtsFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA )
            {
              const double thEmtInterFastSkipIntra = 1.4; // Skip checking Intra if "2Nx2N using DCT2" is worse than best Inter mode
              if( costSize2Nx2NmtsFirstPass > thEmtInterFastSkipIntra * bestInterCost )
              {
                skipSecondMtsPass = true;
                m_modeCtrl->setSkipSecondMTSPass( true );
                break;
              }
            }
#if JVET_W0103_INTRA_MTS
            if (lfnstIdx && m_modeCtrl->getMtsFirstPassNoIspCost() != MAX_DOUBLE && isLuma(partitioner.chType))
            {
              double threshold = 1.5;
              if (m_modeCtrl->getMtsFirstPassNoIspCost() > threshold * bestCS->cost)
              {
                endLfnstIdx = lfnstIdx;
              }
            }
#endif
          }

        } //for emtCuFlag
        if( skipOtherLfnst )
        {
          startLfnstIdx = lfnstIdx;
          endLfnstIdx   = lfnstIdx;
          break;
        }
      } //for lfnstIdx
    } //if (!skipSecondMtsPass && considerMtsSecondPass && trGrpCheck[iGrpIdx])

    if( sps.getUseLFNST() && trGrpIdx < 3 )
    {
      trGrpCheck[ trGrpIdx + 1 ] = false;

      if( bestSelFlag[ trGrpIdx ] && considerMtsSecondPass )
      {
        double dCostRatio = dct2Cost / trGrpBestCost[ trGrpIdx ];
        trGrpCheck[ trGrpIdx + 1 ] = ( bestMtsFlag != 0 || bestLfnstIdx != 0 ) && dCostRatio < trGrpStopThreshold[ trGrpIdx ];
      }
    }
  } //trGrpIdx

  if(!adaptiveColorTrans)
  m_modeCtrl->setBestNonDCT2Cost(bestNonDCT2Cost);
  return foundZeroRootCbf;
}


void EncCu::xCheckPLT(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
{
  if (((partitioner.currArea().lumaSize().width * partitioner.currArea().lumaSize().height <= 16) && (isLuma(partitioner.chType)) )
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
        || ((partitioner.currArea().chromaSize().width * partitioner.currArea().chromaSize().height <= 16) && (!isLuma(partitioner.chType)) && CS::isDualITree(*tempCS)) )
#else
        || ((partitioner.currArea().chromaSize().width * partitioner.currArea().chromaSize().height <= 16) && (!isLuma(partitioner.chType)) && partitioner.isSepTree(*tempCS) )
        || (partitioner.isLocalSepTree(*tempCS)  && (!isLuma(partitioner.chType))  )  )
#endif
  {
    return;
  }
  tempCS->initStructData(encTestMode.qp);
  CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType);
  partitioner.setCUData(cu);
  cu.slice = tempCS->slice;
  cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
  cu.skip = false;
  cu.mmvdSkip = false;
  cu.predMode = MODE_PLT;

  cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
  cu.qp = encTestMode.qp;
  cu.bdpcmMode = 0;

  tempCS->addPU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType);
  tempCS->addTU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType);
  // Search
  tempCS->dist = 0;
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  if (CS::isDualITree(*tempCS))
#else
  if (cu.isSepTree())
#endif
  {
    if (isLuma(partitioner.chType))
    {
      m_pcIntraSearch->PLTSearch(*tempCS, partitioner, COMPONENT_Y, 1);
    }
    if (tempCS->area.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA))
    {
      m_pcIntraSearch->PLTSearch(*tempCS, partitioner, COMPONENT_Cb, 2);
    }
  }
  else
  {
    if( cu.chromaFormat != CHROMA_400 )
    {
      m_pcIntraSearch->PLTSearch(*tempCS, partitioner, COMPONENT_Y, 3);
    }
    else
    {
      m_pcIntraSearch->PLTSearch(*tempCS, partitioner, COMPONENT_Y, 1);
    }
  }


  m_CABACEstimator->getCtx() = m_CurrCtx->start;
  m_CABACEstimator->resetBits();
  if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag())
    && cu.Y().valid())
  {
    m_CABACEstimator->cu_skip_flag(cu);
  }
  m_CABACEstimator->pred_mode(cu);

  // signaling
  CUCtx cuCtx;
  cuCtx.isDQPCoded = true;
  cuCtx.isChromaQpAdjCoded = true;
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  if (CS::isDualITree(*tempCS))
#else
  if (cu.isSepTree())
#endif
  {
    if (isLuma(partitioner.chType))
    {
      m_CABACEstimator->cu_palette_info(cu, COMPONENT_Y, 1, cuCtx);
    }
    if (tempCS->area.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA))
    {
      m_CABACEstimator->cu_palette_info(cu, COMPONENT_Cb, 2, cuCtx);
    }
  }
  else
  {
    if( cu.chromaFormat != CHROMA_400 )
    {
      m_CABACEstimator->cu_palette_info(cu, COMPONENT_Y, 3, cuCtx);
    }
    else
    {
      m_CABACEstimator->cu_palette_info(cu, COMPONENT_Y, 1, cuCtx);
    }
  }
  tempCS->fracBits = m_CABACEstimator->getEstFracBits();
  tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);

  xEncodeDontSplit(*tempCS, partitioner);
  xCheckDQP(*tempCS, partitioner);
  xCheckChromaQPOffset( *tempCS, partitioner );
  xCalDebCost(*tempCS, partitioner);
  tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();

  const Area currCuArea = cu.block(getFirstComponentOfChannel(partitioner.chType));
  cu.slice->m_mapPltCost[isChroma(partitioner.chType)][currCuArea.pos()][currCuArea.size()] = tempCS->cost;
#if WCG_EXT
  DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda(true));
#else
  DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda());
#endif
  xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);
}

void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeepCtx )
{
  CHECK( bKeepCtx && cs.cus.size() <= 1 && partitioner.getImplicitSplit( cs ) == CU_DONT_SPLIT, "bKeepCtx should only be set in split case" );
  CHECK( !bKeepCtx && cs.cus.size() > 1, "bKeepCtx should never be set for non-split case" );

  if( !cs.pps->getUseDQP() )
  {
    return;
  }

#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  if (CS::isDualITree(cs) && isChroma(partitioner.chType))
#else
  if (partitioner.isSepTree(cs) && isChroma(partitioner.chType))
#endif
  {
    return;
  }

  if( !partitioner.currQgEnable() ) // do not consider split or leaf/not leaf QG condition (checked by caller)
  {
    return;
  }


  CodingUnit* cuFirst = cs.getCU( partitioner.chType );

  CHECK( !cuFirst, "No CU available" );

  bool hasResidual = false;
  for( const auto &cu : cs.cus )
  {
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
    if ( cu->rootCbf )
#else
    //not include the chroma CU because chroma CU is decided based on corresponding luma QP and deltaQP is not signaled at chroma CU
    if( cu->rootCbf && !isChroma( cu->chType ))
#endif
    {
      hasResidual = true;
      break;
    }
  }

  int predQP = CU::predictQP( *cuFirst, cs.prevQP[partitioner.chType] );

  if( hasResidual )
  {
    TempCtx ctxTemp( m_CtxCache );
    if( !bKeepCtx ) ctxTemp = SubCtx( Ctx::DeltaQP, m_CABACEstimator->getCtx() );

    m_CABACEstimator->resetBits();
    m_CABACEstimator->cu_qp_delta( *cuFirst, predQP, cuFirst->qp );

    cs.fracBits += m_CABACEstimator->getEstFracBits(); // dQP bits
    cs.cost      = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);


    if( !bKeepCtx ) m_CABACEstimator->getCtx() = SubCtx( Ctx::DeltaQP, ctxTemp );

    // NOTE: reset QPs for CUs without residuals up to first coded CU
    for( const auto &cu : cs.cus )
    {
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
      if ( cu->rootCbf )
#else
      //not include the chroma CU because chroma CU is decided based on corresponding luma QP and deltaQP is not signaled at chroma CU
      if( cu->rootCbf && !isChroma( cu->chType ))
#endif
      {
        break;
      }
      cu->qp = predQP;
    }
  }
  else
  {
    // No residuals: reset CU QP to predicted value
    for( const auto &cu : cs.cus )
    {
      cu->qp = predQP;
    }
  }
}

void EncCu::xCheckChromaQPOffset( CodingStructure& cs, Partitioner& partitioner )
{
  // doesn't apply if CU chroma QP offset is disabled
  if( !cs.slice->getUseChromaQpAdj() )
  {
    return;
  }

  // doesn't apply to luma CUs
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  if (CS::isDualITree(cs) && isLuma(partitioner.chType))
#else
  if( partitioner.isSepTree(cs) && isLuma(partitioner.chType) )
#endif
  {
    return;
  }

  // not needed after the first coded TU in the chroma QG
  if( !partitioner.currQgChromaEnable() )
  {
    return;
  }

  CodingUnit& cu = *cs.getCU( partitioner.chType );

  // check if chroma is coded or not
  bool hasResidual = false;
  for( const TransformUnit &tu : CU::traverseTUs(cu) )
  {
    if( tu.cbf[COMPONENT_Cb] || tu.cbf[COMPONENT_Cr] )
    {
      hasResidual = true;
      break;
    }
  }

  if( hasResidual )
  {
    // estimate cost for coding cu_chroma_qp_offset
    TempCtx ctxTempAdjFlag( m_CtxCache );
    TempCtx ctxTempAdjIdc( m_CtxCache );
    ctxTempAdjFlag = SubCtx( Ctx::ChromaQpAdjFlag, m_CABACEstimator->getCtx() );
    ctxTempAdjIdc = SubCtx( Ctx::ChromaQpAdjIdc,   m_CABACEstimator->getCtx() );
    m_CABACEstimator->resetBits();
    m_CABACEstimator->cu_chroma_qp_offset( cu );
    cs.fracBits += m_CABACEstimator->getEstFracBits();
    cs.cost      = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
    m_CABACEstimator->getCtx() = SubCtx( Ctx::ChromaQpAdjFlag, ctxTempAdjFlag );
    m_CABACEstimator->getCtx() = SubCtx( Ctx::ChromaQpAdjIdc,  ctxTempAdjIdc  );
  }
  else
  {
    // reset chroma QP offset to 0 if it will not be coded
    cu.chromaQpAdj = 0;
  }
}

#if!REMOVE_PCM
void EncCu::xFillPCMBuffer( CodingUnit &cu )
{
  const ChromaFormat format        = cu.chromaFormat;
  const uint32_t numberValidComponents = getNumberValidComponents(format);

  for( auto &tu : CU::traverseTUs( cu ) )
  {
    for( uint32_t ch = 0; ch < numberValidComponents; ch++ )
    {
      const ComponentID compID = ComponentID( ch );
      const CompArea &compArea = tu.blocks[ compID ];

      if( tu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y )
      {
        tu.getPcmbuf( compID ).rspSignal( tu.cs->getOrgBuf( compArea ), m_pcReshape->getFwdLUT() );
      }
      else
      {
        tu.getPcmbuf( compID ).copyFrom( tu.cs->getOrgBuf( compArea ) );
      }
    }
  }
}
#endif

#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
bool EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
#else
void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
#endif
{
#if ENABLE_OBMC
  double bestOBMCCost = MAX_DOUBLE;
#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
  bool   validMode = false;
#endif
#endif
  bool isPerfectMatch = false;

  tempCS->initStructData(encTestMode.qp);
  m_pcInterSearch->resetBufferedUniMotions();
  m_pcInterSearch->setAffineModeSelected(false);
  CodingUnit &cu = tempCS->addCU(tempCS->area, partitioner.chType);

  partitioner.setCUData(cu);
  cu.slice = tempCS->slice;
  cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
  cu.skip = false;
  cu.predMode = MODE_INTER;
  cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
  cu.qp = encTestMode.qp;
#if INTER_LIC
  cu.LICFlag = false;
#endif
  CU::addPUs(cu);
  cu.mmvdSkip = false;
  cu.firstPU->mmvdMergeFlag = false;

  if (m_pcInterSearch->predInterHashSearch(cu, partitioner, isPerfectMatch))
  {
    double equBcwCost = MAX_DOUBLE;

    m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
#if ENABLE_OBMC //normal inter
  const unsigned wIdx = gp_sizeIdxInfo->idxFrom(partitioner.currArea().lwidth());
  CodingStructure *prevCS = tempCS;
  PelUnitBuf tempWoOBMCBuf = m_tempWoOBMCBuffer.subBuf(UnitAreaRelative(cu, cu));
  tempWoOBMCBuf.copyFrom(tempCS->getPredBuf(cu));
  cu.isobmcMC = true;
  cu.obmcFlag = true;
  m_pcInterSearch->subBlockOBMC(*cu.firstPU);
  cu.isobmcMC = false;
#endif
    xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
      , 0
      , &equBcwCost
    );
#if ENABLE_OBMC // xCheckRDCostInter
  double tempCost = (prevCS == tempCS) ? tempCS->cost : bestCS->cost;
  if (m_pTempCUWoOBMC && tempCost < bestOBMCCost)
  {
    const unsigned hIdx = gp_sizeIdxInfo->idxFrom(prevCS->area.lheight());
    m_pTempCUWoOBMC[wIdx][hIdx]->clearCUs();
    m_pTempCUWoOBMC[wIdx][hIdx]->clearPUs();
    m_pTempCUWoOBMC[wIdx][hIdx]->clearTUs();
    m_pTempCUWoOBMC[wIdx][hIdx]->copyStructure(*prevCS, partitioner.chType);

    m_pPredBufWoOBMC[wIdx][hIdx].copyFrom(tempWoOBMCBuf);
    m_pTempCUWoOBMC[wIdx][hIdx]->getPredBuf(cu).copyFrom(prevCS->getPredBuf(cu));

    bestOBMCCost = tempCost;
#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
    validMode = true;
#endif
  }
#endif
    if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
    {
      xCalDebCost( *bestCS, partitioner );
    }
  }
  tempCS->initStructData(encTestMode.qp);
  int minSize = min(cu.lwidth(), cu.lheight());
  if (minSize < 64)
  {
    isPerfectMatch = false;
  }
  m_modeCtrl->setIsHashPerfectMatch(isPerfectMatch);
#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
  return validMode;
#endif
}

void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
{
  const Slice &slice = *tempCS->slice;

  CHECK( slice.getSliceType() == I_SLICE, "Merge modes not available for I-slices" );

  tempCS->initStructData( encTestMode.qp );

  MergeCtx mergeCtx;
#if JVET_W0090_ARMC_TM
  MergeCtx mergeCtxtmp;
#endif
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
  uint32_t               mmvdLUT[MMVD_ADD_NUM];
#endif
#if JVET_AA0093_ENHANCED_MMVD_EXTENSION
  uint8_t numBaseAffine = AF_MMVD_BASE_NUM;
  unsigned ctxId = 0;
  {
    CodingUnit cu( tempCS->area );
    cu.cs       = tempCS;
    cu.predMode = MODE_INTER;
    cu.slice    = tempCS->slice;
    cu.tileIdx  = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
    const CodingUnit*  cuLeft  = tempCS->getCURestricted(cu.lumaPos().offset(-1, 0), cu, partitioner.chType);
    ctxId = (cuLeft && cuLeft->affine) ? 1 : 0;
    const CodingUnit*  cuAbove = tempCS->getCURestricted(cu.lumaPos().offset( 0,-1), cu, partitioner.chType);
    ctxId += (cuAbove && cuAbove->affine) ? 1 : 0;
  }
  numBaseAffine = (ctxId == 0) ? 1 : ((ctxId == 1) ? 2 : AF_MMVD_BASE_NUM);
#endif
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
  uint32_t               affMmvdLUT[AF_MMVD_NUM];
#endif
  const SPS &sps = *tempCS->sps;

#if MERGE_ENC_OPT
  const bool affineMrgAvail = (sps.getUseAffine() || sps.getSbTMVPEnabledFlag()) && slice.getPicHeader()->getMaxNumAffineMergeCand()
    && !(bestCS->area.lumaSize().width < 8 || bestCS->area.lumaSize().height < 8);

  AffineMergeCtx affineMergeCtx;
#if JVET_W0090_ARMC_TM
  AffineMergeCtx affineMergeCtxTmp;
#endif
  MergeCtx mrgCtx;
#if TM_MRG
  MergeCtx tmMrgCtx;
#if JVET_X0141_CIIP_TIMD_TM
  MergeCtx ciipTmMrgCtx;
#endif
#endif


#if JVET_X0049_ADAPT_DMVR
  MergeCtx bmMrgCtx;
  bool checkBmMrg = false;
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
  MergeCtx bmMrgCtxDir2;
#endif
#endif

  if (sps.getSbTMVPEnabledFlag())
  {
    Size bufSize = g_miScaling.scale(tempCS->area.lumaSize());
    mergeCtx.subPuMvpMiBuf = MotionBuf( m_SubPuMiBuf, bufSize );
    mrgCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
    affineMergeCtx.mrgCtx = &mrgCtx;
#if TM_MRG
    tmMrgCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
#if JVET_X0141_CIIP_TIMD_TM
    ciipTmMrgCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
#endif
#endif
  }
#endif

#if MULTI_PASS_DMVR
  bool applyBDMVR[MRG_MAX_NUM_CANDS] = { false };
#if TM_MRG && MERGE_ENC_OPT
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
  bool admvrRefinedMotion = false;
  bool applyBDMVR4TM[TM_MRG_MAX_NUM_INIT_CANDS] = { false };
#else
  bool applyBDMVR4TM[TM_MRG_MAX_NUM_CANDS] = { false };
#endif
#endif
#if JVET_X0049_ADAPT_DMVR
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
  bool applyBDMVR4BM[(BM_MRG_MAX_NUM_INIT_CANDS << 1)<<1] = { false };
#else
  bool applyBDMVR4BM[(BM_MRG_MAX_NUM_CANDS << 1)<<1] = { false };
#endif
#endif
#endif
#if !MULTI_PASS_DMVR
  Mv   refinedMvdL0[MAX_NUM_PARTS_IN_CTU][MRG_MAX_NUM_CANDS];
#endif
  setMergeBestSATDCost( MAX_DOUBLE );

  {
    // first get merge candidates
    CodingUnit cu( tempCS->area );
    cu.cs       = tempCS;
    cu.predMode = MODE_INTER;
    cu.slice    = tempCS->slice;
    cu.tileIdx  = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
#if INTER_LIC
    cu.LICFlag  = false;
#endif

    PredictionUnit pu( tempCS->area );
    pu.cu = &cu;
    pu.cs = tempCS;
#if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG)
    pu.tmMergeFlag = false;
#endif
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC && !JVET_Y0134_TMVP_NAMVP_CAND_REORDERING
    int nWidth = pu.lumaSize().width;
    int nHeight = pu.lumaSize().height;
    bool tplAvail = m_pcInterSearch->xAMLGetCurBlkTemplate(pu, nWidth, nHeight);
#endif
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
    int nWidth = pu.lumaSize().width;
    int nHeight = pu.lumaSize().height;
    bool tplAvail = m_pcInterSearch->xAMLGetCurBlkTemplate(pu, nWidth, nHeight);

    MergeCtx tmvpMergeCandCtx;
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
    if (sps.getUseAML() && tplAvail)
    {
      PU::getTmvpMergeCand(pu, tmvpMergeCandCtx);
    }
#else
    if (sps.getUseAML())
    {
      PU::getTmvpMergeCand(pu, tmvpMergeCandCtx);
      if (tplAvail)
      {
        m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, tmvpMergeCandCtx, 1);
      }
      else
      {
        tmvpMergeCandCtx.numValidMergeCand = std::min(1, tmvpMergeCandCtx.numValidMergeCand);
      }
    }
#endif
    MergeCtx namvpMergeCandCtx;
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
    if (sps.getUseAML() && tplAvail)
    {
      PU::getNonAdjacentMergeCand(pu, namvpMergeCandCtx);
    }
#else    
    if (sps.getUseAML())
    {
      PU::getNonAdjacentMergeCand(pu, namvpMergeCandCtx);
      if (tplAvail)
      {
        m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, namvpMergeCandCtx, 9);
      }
      else
      {
        namvpMergeCandCtx.numValidMergeCand = std::min(9, namvpMergeCandCtx.numValidMergeCand);
      }
    }
#endif
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
    if (!tplAvail)
    {
      PU::getInterMergeCandidates(pu, mergeCtx, 0, -1);
      tmMrgCtx.numValidMergeCand = pu.cs->sps->getMaxNumMergeCand();
    }
    else
#endif
#endif

    PU::getInterMergeCandidates(pu, mergeCtx
      , 0
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
      , -1
      , (sps.getUseAML()) ? &tmvpMergeCandCtx : NULL
      , (sps.getUseAML()) ? &namvpMergeCandCtx : NULL
#endif
    );
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
    if (sps.getUseAML())
    {
      if (tplAvail)
      {
#if JVET_Z0102_NO_ARMC_FOR_ZERO_CAND 
        m_pcInterSearch->adjustMergeCandidates(pu, mergeCtx, pu.cs->sps->getMaxNumMergeCand());
#else
        m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, mergeCtx, pu.cs->sps->getMaxNumMergeCand());
#endif
      }
    }
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
    if (mergeCtx.numValidMergeCand != pu.cs->sps->getMaxNumMergeCand())
    {
      mergeCtx.numValidMergeCand = pu.cs->sps->getMaxNumMergeCand();
    }
    for (uint32_t ui = mergeCtx.numValidMergeCand; ui < NUM_MERGE_CANDS; ++ui)
    {
      mergeCtx.BcwIdx[ui] = BCW_DEFAULT;
#if INTER_LIC
      mergeCtx.LICFlags[ui] = false;
#endif
      mergeCtx.interDirNeighbours[ui]                  = 0;
      mergeCtx.mvFieldNeighbours[(ui << 1)].refIdx     = NOT_VALID;
      mergeCtx.mvFieldNeighbours[(ui << 1) + 1].refIdx = NOT_VALID;
      mergeCtx.useAltHpelIf[ui]                        = false;
#if MULTI_HYP_PRED
      mergeCtx.addHypNeighbours[ui].clear();
#endif
      mergeCtx.candCost[ui] = MAX_UINT64;
    }
#endif
    PU::getInterMergeCandidates(pu, mergeCtxtmp, 0);
#endif

#if JVET_X0141_CIIP_TIMD_TM && TM_MRG
    if (cu.cs->sps->getUseCiipTmMrg())
    {
      pu.tmMergeFlag = true;
      pu.ciipFlag = true;
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
      ciipTmMrgCtx = mergeCtxtmp;
      ciipTmMrgCtx.numValidMergeCand = int(pu.cs->sps->getMaxNumCiipTMMergeCand());
      memcpy(ciipTmMrgCtx.BcwIdx, mergeCtxtmp.BcwIdx, CIIP_TM_MRG_MAX_NUM_CANDS * sizeof(uint8_t));
      memcpy(ciipTmMrgCtx.interDirNeighbours, mergeCtxtmp.interDirNeighbours, CIIP_TM_MRG_MAX_NUM_CANDS * sizeof(unsigned char));
      memcpy(ciipTmMrgCtx.mvFieldNeighbours, mergeCtxtmp.mvFieldNeighbours, (CIIP_TM_MRG_MAX_NUM_CANDS << 1) * sizeof(MvField));
      memcpy(ciipTmMrgCtx.useAltHpelIf, mergeCtxtmp.useAltHpelIf, CIIP_TM_MRG_MAX_NUM_CANDS * sizeof(bool));
#if INTER_LIC
      memcpy(ciipTmMrgCtx.LICFlags, mergeCtxtmp.LICFlags, CIIP_TM_MRG_MAX_NUM_CANDS * sizeof(bool));
#endif
#if MULTI_HYP_PRED
      memcpy(ciipTmMrgCtx.addHypNeighbours, mergeCtxtmp.addHypNeighbours, CIIP_TM_MRG_MAX_NUM_CANDS * sizeof(MultiHypVec));
#endif
#else
      ciipTmMrgCtx = mergeCtx;
      ciipTmMrgCtx.numValidMergeCand = int(pu.cs->sps->getMaxNumCiipTMMergeCand());
      memcpy(ciipTmMrgCtx.BcwIdx, mergeCtx.BcwIdx, CIIP_TM_MRG_MAX_NUM_CANDS * sizeof(uint8_t));
      memcpy(ciipTmMrgCtx.interDirNeighbours, mergeCtx.interDirNeighbours, CIIP_TM_MRG_MAX_NUM_CANDS * sizeof(unsigned char));
      memcpy(ciipTmMrgCtx.mvFieldNeighbours, mergeCtx.mvFieldNeighbours, (CIIP_TM_MRG_MAX_NUM_CANDS << 1) * sizeof(MvField));
      memcpy(ciipTmMrgCtx.useAltHpelIf, mergeCtx.useAltHpelIf, CIIP_TM_MRG_MAX_NUM_CANDS * sizeof(bool));
#if INTER_LIC
      memcpy(ciipTmMrgCtx.LICFlags, mergeCtx.LICFlags, CIIP_TM_MRG_MAX_NUM_CANDS * sizeof(bool));
#endif
#if MULTI_HYP_PRED
      memcpy(ciipTmMrgCtx.addHypNeighbours, mergeCtx.addHypNeighbours, CIIP_TM_MRG_MAX_NUM_CANDS * sizeof(MultiHypVec));
#endif
#endif

      for (uint32_t uiMergeCand = 0; uiMergeCand < ciipTmMrgCtx.numValidMergeCand; uiMergeCand++)
      {
        ciipTmMrgCtx.setMergeInfo(pu, uiMergeCand);
        m_pcInterSearch->deriveTMMv(pu);

        // Store refined motion back to ciipTmMrgCtx
        ciipTmMrgCtx.interDirNeighbours[uiMergeCand] = pu.interDir;
        ciipTmMrgCtx.BcwIdx[uiMergeCand] = pu.cu->BcwIdx;  // Bcw may change, because bi may be reduced to uni by deriveTMMv(pu)
        ciipTmMrgCtx.mvFieldNeighbours[2 * uiMergeCand].setMvField(pu.mv[0], pu.refIdx[0]);
        ciipTmMrgCtx.mvFieldNeighbours[2 * uiMergeCand + 1].setMvField(pu.mv[1], pu.refIdx[1]);
        if (pu.interDir == 1)
        {
          ciipTmMrgCtx.mvFieldNeighbours[2 * uiMergeCand + 1].setMvField(Mv(), NOT_VALID);
        }
        if (pu.interDir == 2)
        {
          ciipTmMrgCtx.mvFieldNeighbours[2 * uiMergeCand].setMvField(Mv(), NOT_VALID);
        }
      }
#if JVET_W0090_ARMC_TM
      if (sps.getUseAML())
      {
        m_pcInterSearch->adjustInterMergeCandidates(pu, ciipTmMrgCtx);
      }
#endif
      pu.tmMergeFlag = false;
      pu.ciipFlag = false;
    }
#endif
#if JVET_W0097_GPM_MMVD_TM
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
    m_mergeCand.copyMergeCtx(mergeCtxtmp);
#else
    m_mergeCand.copyMergeCtx(mergeCtx);
#endif
    m_mergeCandAvail = true;
#endif
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
    PU::getInterMMVDMergeCandidates(pu, mergeCtxtmp);
#else
    PU::getInterMMVDMergeCandidates(pu, mergeCtx);
#if JVET_W0090_ARMC_TM
    mergeCtxtmp = mergeCtx;
    if (sps.getUseAML())
    {
      m_pcInterSearch->adjustInterMergeCandidates(pu, mergeCtx);
    }
#endif
#endif
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
    bool flag = pu.mmvdMergeFlag;
    pu.mmvdMergeFlag = true;
    m_pcInterSearch->sortInterMergeMMVDCandidates(pu, mergeCtxtmp, mmvdLUT);
    pu.mmvdMergeFlag = flag;
#endif
#if TM_MRG && MERGE_ENC_OPT
    if (cu.cs->sps->getUseDMVDMode())
    {
      cu.firstPU = &pu;
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC || JVET_AA0093_REFINED_MOTION_FOR_ARMC
      pu.tmMergeFlag = true;
#endif
      MergeCtx tmvpTmMergeCandCtx;
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
      if (sps.getUseAML() && tplAvail)
      {
        PU::getTmvpMergeCand(pu, tmvpTmMergeCandCtx);
      }
#else
      if (sps.getUseAML())
      {
        PU::getTmvpMergeCand(pu, tmvpTmMergeCandCtx);
        if (tplAvail)
        {
          m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, tmvpTmMergeCandCtx, 1);
        }
        else
        {
          tmvpTmMergeCandCtx.numValidMergeCand = std::min(1, tmvpTmMergeCandCtx.numValidMergeCand);
        }
      }
#endif
      MergeCtx namvpTmMergeCandCtx;
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
      if (sps.getUseAML() && tplAvail)
      {
        PU::getNonAdjacentMergeCand(pu, namvpTmMergeCandCtx);
      }
#else
      if (sps.getUseAML())
      {
        PU::getNonAdjacentMergeCand(pu, namvpTmMergeCandCtx);
        if (tplAvail)
        {
          m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, namvpTmMergeCandCtx, 9);
        }
        else
        {
          namvpTmMergeCandCtx.numValidMergeCand = std::min(9, namvpTmMergeCandCtx.numValidMergeCand);
        }
      }
#endif
#endif
      pu.tmMergeFlag = true;
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC && JVET_Y0134_TMVP_NAMVP_CAND_REORDERING
      if (!tplAvail)
      {
        PU::getInterMergeCandidates(pu, tmMrgCtx, 0, -1);
        tmMrgCtx.numValidMergeCand = pu.cs->sps->getMaxNumTMMergeCand();
      }
      else
#endif
      PU::getInterMergeCandidates(pu, tmMrgCtx, 0
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
        , -1
        , (sps.getUseAML()) ? &tmvpTmMergeCandCtx : NULL
        , (sps.getUseAML()) ? &namvpTmMergeCandCtx : NULL
#endif
      );
#if JVET_W0090_ARMC_TM
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
      bool tmMergeRefinedMotion = PU::isArmcRefinedMotionEnabled(pu, 2);
      tmMergeRefinedMotion &= tplAvail;
#endif
      if (sps.getUseAML())
      {
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING
        if (tplAvail)
        {
#if JVET_Z0102_NO_ARMC_FOR_ZERO_CAND 
          m_pcInterSearch->adjustMergeCandidates(pu, tmMrgCtx, pu.cs->sps->getMaxNumTMMergeCand());
#else
          m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, tmMrgCtx, pu.cs->sps->getMaxNumTMMergeCand());
#endif
        }
        if (tmMrgCtx.numValidMergeCand > pu.cs->sps->getMaxNumTMMergeCand())
        {
          tmMrgCtx.numValidMergeCand = pu.cs->sps->getMaxNumTMMergeCand();
        }
#else
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
        if (!tmMergeRefinedMotion)
#endif
        m_pcInterSearch->adjustInterMergeCandidates(pu, tmMrgCtx);
#endif
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
        if (tmMrgCtx.numCandToTestEnc > tmMrgCtx.numValidMergeCand)
        {
          tmMrgCtx.numCandToTestEnc = tmMrgCtx.numValidMergeCand;
        }
        for (uint32_t ui = tmMrgCtx.numValidMergeCand; ui < NUM_MERGE_CANDS; ++ui)
        {
          tmMrgCtx.BcwIdx[ui] = BCW_DEFAULT;
#if INTER_LIC
          tmMrgCtx.LICFlags[ui] = false;
#endif
          tmMrgCtx.interDirNeighbours[ui] = 0;
          tmMrgCtx.mvFieldNeighbours[(ui << 1)].refIdx = NOT_VALID;
          tmMrgCtx.mvFieldNeighbours[(ui << 1) + 1].refIdx = NOT_VALID;
          tmMrgCtx.useAltHpelIf[ui] = false;
#if MULTI_HYP_PRED
          tmMrgCtx.addHypNeighbours[ui].clear();
#endif
          tmMrgCtx.candCost[ui] = MAX_UINT64;
        }
#endif
      }
#endif

#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
      if (tmMergeRefinedMotion)
      {
        pu.reduceTplSize = true;
      }
      Distortion tempCost[1];
#endif

      for( uint32_t uiMergeCand = 0; uiMergeCand < tmMrgCtx.numValidMergeCand; uiMergeCand++ )
      {
        tmMrgCtx.setMergeInfo( pu, uiMergeCand );
#if MULTI_PASS_DMVR
        applyBDMVR4TM[uiMergeCand] = PU::checkBDMVRCondition(pu);
        if (applyBDMVR4TM[uiMergeCand])
        {
          pu.bdmvrRefine = true;
          m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[(uiMergeCand << 1) + 1]);
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
          if (tmMergeRefinedMotion)
          {
            applyBDMVR4TM[uiMergeCand] =  m_pcInterSearch->processBDMVR(pu, 1, tempCost);
          }
          else
#endif
          applyBDMVR4TM[uiMergeCand] =  m_pcInterSearch->processBDMVR(pu);
        }
        else
        {
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
          m_pcInterSearch->deriveTMMv(pu, tempCost);
#else
          m_pcInterSearch->deriveTMMv(pu);
#endif
        }
#else
        m_pcInterSearch->deriveTMMv( pu );
#endif

#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
        tmMrgCtx.candCost[uiMergeCand] = tempCost[0];
#endif
        // Store refined motion back to tmMrgCtx
        tmMrgCtx.interDirNeighbours[uiMergeCand] = pu.interDir;
        tmMrgCtx.BcwIdx[uiMergeCand] = pu.cu->BcwIdx;  // Bcw may change, because bi may be reduced to uni by deriveTMMv(pu)
        tmMrgCtx.mvFieldNeighbours[2 * uiMergeCand    ].setMvField( pu.mv[0], pu.refIdx[0] );
        tmMrgCtx.mvFieldNeighbours[2 * uiMergeCand + 1].setMvField( pu.mv[1], pu.refIdx[1] );
        if( pu.interDir == 1 )
        {
          tmMrgCtx.mvFieldNeighbours[2 * uiMergeCand + 1].setMvField( Mv(), NOT_VALID );
        }
        if( pu.interDir == 2 )
        {
          tmMrgCtx.mvFieldNeighbours[2 * uiMergeCand    ].setMvField( Mv(), NOT_VALID );
        }
      }
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
      pu.reduceTplSize = false;
#endif

#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
      if (tmMergeRefinedMotion)
      {
        m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, tmMrgCtx, applyBDMVR4TM, NULL, NULL, pu.cs->sps->getMaxNumTMMergeCand());
        pu.tmMergeFlag = true;
        for( uint32_t uiMergeCand = 0; uiMergeCand < tmMrgCtx.numValidMergeCand; uiMergeCand++ )
        {
          tmMrgCtx.setMergeInfo( pu, uiMergeCand );
#if MULTI_PASS_DMVR
          applyBDMVR4TM[uiMergeCand] = PU::checkBDMVRCondition(pu);
          if (applyBDMVR4TM[uiMergeCand])
          {
            pu.bdmvrRefine = true;
            m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[(uiMergeCand << 1) + 1]);
            applyBDMVR4TM[uiMergeCand] =  m_pcInterSearch->processBDMVR(pu);
          }
          else
          {
            m_pcInterSearch->deriveTMMv(pu);
          }

          tmMrgCtx.interDirNeighbours[uiMergeCand] = pu.interDir;
          tmMrgCtx.BcwIdx[uiMergeCand] = pu.cu->BcwIdx;  // Bcw may change, because bi may be reduced to uni by deriveTMMv(pu)
          tmMrgCtx.mvFieldNeighbours[2 * uiMergeCand    ].setMvField( pu.mv[0], pu.refIdx[0] );
          tmMrgCtx.mvFieldNeighbours[2 * uiMergeCand + 1].setMvField( pu.mv[1], pu.refIdx[1] );
          if( pu.interDir == 1 )
          {
            tmMrgCtx.mvFieldNeighbours[2 * uiMergeCand + 1].setMvField( Mv(), NOT_VALID );
          }
          if( pu.interDir == 2 )
          {
            tmMrgCtx.mvFieldNeighbours[2 * uiMergeCand    ].setMvField( Mv(), NOT_VALID );
          }
#endif
        }
      }
#endif
      pu.tmMergeFlag = false;
#if MULTI_PASS_DMVR
      pu.bdmvrRefine = false;
#endif
    }
#endif
#if MERGE_ENC_OPT
    if (affineMrgAvail)
    {
      pu.regularMergeFlag = false;
      cu.affine = true;
      PU::getAffineMergeCand(pu, affineMergeCtx
#if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION && JVET_W0090_ARMC_TM
        , m_pcInterSearch
#endif
      );
#if JVET_W0090_ARMC_TM
      affineMergeCtxTmp = affineMergeCtx;
#if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION
      affineMergeCtxTmp.numValidMergeCand = slice.getPicHeader()->getMaxNumAffineMergeCand();
      affineMergeCtxTmp.maxNumMergeCand = slice.getPicHeader()->getMaxNumAffineMergeCand();
#endif
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
#if JVET_AA0093_ENHANCED_MMVD_EXTENSION
      m_pcInterSearch->sortAffineMergeCandidates(pu, affineMergeCtxTmp, affMmvdLUT, (numBaseAffine - 1 ) * AF_MMVD_MAX_REFINE_NUM, true);
#else
      m_pcInterSearch->sortAffineMergeCandidates(pu, affineMergeCtxTmp, affMmvdLUT);
#endif
#endif
      if (sps.getUseAML())
      {
        m_pcInterSearch->adjustAffineMergeCandidates(pu, affineMergeCtx);
#if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION
        affineMergeCtx.numValidMergeCand = slice.getPicHeader()->getMaxNumAffineMergeCand();
        affineMergeCtx.maxNumMergeCand = slice.getPicHeader()->getMaxNumAffineMergeCand();
#endif
      }
#endif
      cu.affine = false;
    }
#endif
    pu.regularMergeFlag = true;

#if MULTI_PASS_DMVR
    if (cu.cs->sps->getUseDMVDMode())
    {
      cu.firstPU = &pu;
      for (uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++)
      {
        if( mergeCtx.interDirNeighbours[uiMergeCand] == 3 )
        {
          mergeCtx.setMergeInfo( pu, uiMergeCand );
          applyBDMVR[uiMergeCand] = PU::checkBDMVRCondition(pu);

          if (applyBDMVR[uiMergeCand])
          {
            pu.bdmvrRefine = true;
            m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR[uiMergeCand << 1], m_mvBufBDMVR[(uiMergeCand << 1) + 1]);

            if (mergeCtx.xCheckSimilarMotion(pu.mergeIdx, PU::getBDMVRMvdThreshold(pu)))
            {
              // span motion to subPU
              for (int subPuIdx = 0; subPuIdx < MAX_NUM_SUBCU_DMVR; subPuIdx++)
              {
                m_mvBufBDMVR[uiMergeCand << 1][subPuIdx] = pu.mv[0];
                m_mvBufBDMVR[(uiMergeCand << 1) + 1][subPuIdx] = pu.mv[1];
              }
            }
            else
            {
              m_pcInterSearch->processBDMVR(pu);
            }
          }
        }
      }

#if JVET_X0049_ADAPT_DMVR
      checkBmMrg = PU::isBMMergeFlagCoded(pu);
      if (checkBmMrg)
      {
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
        admvrRefinedMotion = PU::isArmcRefinedMotionEnabled(pu, 1);
        admvrRefinedMotion &= tplAvail;
#endif
        pu.bmMergeFlag = true;
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
        MergeCtx tmvpMergeCandCtx2;
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
        if (sps.getUseAML() && tplAvail)
        {
          PU::getTmvpBMCand(pu, tmvpMergeCandCtx2);
        }
#else
        if (sps.getUseAML())
        {
          PU::getTmvpBMCand(pu, tmvpMergeCandCtx2);
          pu.bmDir = 0;
          if (tplAvail)
          {
            m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, tmvpMergeCandCtx2, 1);
          }
          else
          {
            tmvpMergeCandCtx2.numValidMergeCand = std::min(1, tmvpMergeCandCtx2.numValidMergeCand);
          }
        }
#endif
        MergeCtx namvpMergeCandCtx2;
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
        if (sps.getUseAML() && tplAvail)
        {
          PU::getNonAdjacentBMCand(pu, namvpMergeCandCtx2);
        }
#else        
        if (sps.getUseAML())
        {
          PU::getNonAdjacentBMCand(pu, namvpMergeCandCtx2);
          pu.bmDir = 0;
          if (tplAvail)
          {
            m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, namvpMergeCandCtx2, 3);
          }
          else
          {
            namvpMergeCandCtx2.numValidMergeCand = std::min(3, namvpMergeCandCtx2.numValidMergeCand);
          }
        }
#endif
#if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
        if (!tplAvail)
        {
          PU::getInterBMCandidates(pu, bmMrgCtx
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
            , -1
            , NULL
            , NULL
#endif
          );
        }
        else
#endif
#endif

        PU::getInterBMCandidates(pu, bmMrgCtx
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
          , -1
          , (sps.getUseAML()) ? &tmvpMergeCandCtx2 : NULL
          , (sps.getUseAML()) ? &namvpMergeCandCtx2 : NULL
#endif
        );
#if JVET_W0090_ARMC_TM
        if (pu.cs->sps->getUseAML())
        {
          pu.bmDir = 0;
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING
          if (tplAvail)
          {
#if JVET_Z0102_NO_ARMC_FOR_ZERO_CAND 
            m_pcInterSearch->adjustMergeCandidates(pu, bmMrgCtx, pu.cs->sps->getMaxNumBMMergeCand());
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
            if (bmMrgCtx.numCandToTestEnc > bmMrgCtx.numValidMergeCand)
            {
              bmMrgCtx.numCandToTestEnc = bmMrgCtx.numValidMergeCand;
            }
#endif
#else
            m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, bmMrgCtx, pu.cs->sps->getMaxNumBMMergeCand());
#endif
          }
          if (bmMrgCtx.numValidMergeCand > pu.cs->sps->getMaxNumBMMergeCand())
          {
            bmMrgCtx.numValidMergeCand = pu.cs->sps->getMaxNumBMMergeCand();
          }
#else
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
          if (!admvrRefinedMotion)
#endif
          m_pcInterSearch->adjustInterMergeCandidates(pu, bmMrgCtx);
#endif
        }
#endif
        if (bmMrgCtx.numValidMergeCand == 0)
        {
          checkBmMrg = false;
        }
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
        bmMrgCtxDir2 = bmMrgCtx;
#endif

        pu.bmMergeFlag = false;
        pu.bdmvrRefine = false;
      }
#endif
    }
#endif
  }
#if AFFINE_MMVD && MERGE_ENC_OPT
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
  int  afMmvdBaseIdxToMergeIdxOffset = (int)PU::getMergeIdxFromAfMmvdBaseIdx(affineMergeCtxTmp, 0);
#else
  int  afMmvdBaseIdxToMergeIdxOffset = (int)PU::getMergeIdxFromAfMmvdBaseIdx(affineMergeCtx, 0);
#endif
  int  afMmvdBaseCount = std::min<int>((int)AF_MMVD_BASE_NUM, affineMergeCtx.numValidMergeCand - afMmvdBaseIdxToMergeIdxOffset);
  bool affineMmvdAvail = affineMrgAvail && afMmvdBaseCount >= 1 && sps.getUseAffineMmvdMode();
#endif
  bool candHasNoResidual[MRG_MAX_NUM_CANDS + MMVD_ADD_NUM] = { false };
  bool                                        bestIsSkip = false;
  bool                                        bestIsMMVDSkip = true;
#if !MERGE_ENC_OPT
  PelUnitBuf                                  acMergeBuffer[MRG_MAX_NUM_CANDS];
#endif
  PelUnitBuf                                  acMergeTmpBuffer[MRG_MAX_NUM_CANDS];
#if JVET_X0141_CIIP_TIMD_TM && TM_MRG
  PelUnitBuf                                  acTmMergeTmpBuffer[MRG_MAX_NUM_CANDS];
#endif
  PelUnitBuf                                  acMergeRealBuffer[MMVD_MRG_MAX_RD_BUF_NUM];
  PelUnitBuf *                                acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM];
  PelUnitBuf *                                singleMergeTempBuffer;
#if !MERGE_ENC_OPT
  int                                         insertPos;
#endif
  unsigned                                    uiNumMrgSATDCand = mergeCtx.numValidMergeCand + MMVD_ADD_NUM;

#if !MERGE_ENC_OPT
  struct ModeInfo
  {
    uint32_t mergeCand;
    bool     isRegularMerge;
    bool     isMMVD;
    bool     isCIIP;
#if CIIP_PDPC
    bool     isCiipPDPC;
    ModeInfo() : mergeCand(0), isRegularMerge(false), isMMVD(false), isCIIP(false), isCiipPDPC(false) {}
    ModeInfo(const uint32_t mergeCand, const bool isRegularMerge, const bool isMMVD, const bool isCIIP, const bool isCiipPDPC) :
      mergeCand(mergeCand), isRegularMerge(isRegularMerge), isMMVD(isMMVD), isCIIP(isCIIP), isCiipPDPC( isCiipPDPC ) {}
#else
    ModeInfo() : mergeCand(0), isRegularMerge(false), isMMVD(false), isCIIP(false) {}
    ModeInfo(const uint32_t mergeCand, const bool isRegularMerge, const bool isMMVD, const bool isCIIP) :
      mergeCand(mergeCand), isRegularMerge(isRegularMerge), isMMVD(isMMVD), isCIIP(isCIIP) {}
#endif
  };
#endif
  static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  RdModeList;
  bool                                        mrgTempBufSet = false;
  const int candNum = mergeCtx.numValidMergeCand + (tempCS->sps->getUseMMVD() ? std::min<int>(MMVD_BASE_MV_NUM, mergeCtx.numValidMergeCand) * MMVD_MAX_REFINE_NUM : 0);

  for (int i = 0; i < candNum; i++)
  {
    if (i < mergeCtx.numValidMergeCand)
    {
#if CIIP_PDPC
#if MERGE_ENC_OPT
#if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION
      RdModeList.push_back(ModeInfo(i, true, false, false, false, 0, false));
#else
      RdModeList.push_back(ModeInfo(i, true, false, false, false, false));
#endif
#else
      RdModeList.push_back(ModeInfo(i, true, false, false, false));
#endif
#else
#if MERGE_ENC_OPT
#if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION
      RdModeList.push_back(ModeInfo(i, true, false, false, 0, false));
#else
      RdModeList.push_back(ModeInfo(i, true, false, false, false));
#endif
#else
      RdModeList.push_back(ModeInfo(i, true, false, false));
#endif
#endif
    }
    else
    {
#if MERGE_ENC_OPT
#if CIIP_PDPC
#if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION
      RdModeList.push_back(ModeInfo(std::min(MMVD_ADD_NUM, i - mergeCtx.numValidMergeCand), false, true, false, false, 0, false));
#else
      RdModeList.push_back(ModeInfo(std::min(MMVD_ADD_NUM, i - mergeCtx.numValidMergeCand), false, true, false, false, false));
#endif
#else
#if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION
      RdModeList.push_back(ModeInfo(std::min(MMVD_ADD_NUM, i - mergeCtx.numValidMergeCand), false, true, false, 0, false));
#else
      RdModeList.push_back(ModeInfo(std::min(MMVD_ADD_NUM, i - mergeCtx.numValidMergeCand), false, true, false, false));
#endif
#endif
#else
#if CIIP_PDPC
      RdModeList.push_back(ModeInfo(std::min(MMVD_ADD_NUM, i - mergeCtx.numValidMergeCand), false, true, false, false));
#else
      RdModeList.push_back(ModeInfo(std::min(MMVD_ADD_NUM, i - mergeCtx.numValidMergeCand), false, true, false));
#endif
#endif
    }
  }

  const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
  for (unsigned i = 0; i < MMVD_MRG_MAX_RD_BUF_NUM; i++)
  {
    acMergeRealBuffer[i] = m_acMergeBuffer[i].getBuf(localUnitArea);
    if (i < MMVD_MRG_MAX_RD_NUM)
    {
      acMergeTempBuffer[i] = acMergeRealBuffer + i;
    }
    else
    {
      singleMergeTempBuffer = acMergeRealBuffer + i;
    }
  }

  bool isIntrainterEnabled = sps.getUseCiip();
#if CIIP_RM_BLOCK_SIZE_CONSTRAINTS
#if CTU_256
  const int maxSize = std::min<int>( MAX_TB_SIZEY, MAX_INTRA_SIZE );

  if( bestCS->area.lwidth() * bestCS->area.lheight() < 32 || bestCS->area.lwidth() > maxSize || bestCS->area.lheight() > maxSize )
#else
  if (bestCS->area.lwidth() * bestCS->area.lheight() < 32)
#endif
#else
  if (bestCS->area.lwidth() * bestCS->area.lheight() < 64 || bestCS->area.lwidth() >= MAX_CU_SIZE || bestCS->area.lheight() >= MAX_CU_SIZE)
#endif
  {
    isIntrainterEnabled = false;
  }
  bool isTestSkipMerge[MRG_MAX_NUM_CANDS] = { false };
#if MERGE_ENC_OPT
  if (m_pcEncCfg->getUseFastMerge() || isIntrainterEnabled || affineMrgAvail
    )
#else
  if( m_pcEncCfg->getUseFastMerge() || isIntrainterEnabled)
#endif
  {
#if MERGE_ENC_OPT
    uiNumMrgSATDCand = m_pcEncCfg->getNumFullRDMerge();
#else
    uiNumMrgSATDCand = NUM_MRG_SATD_CAND;
#endif
    if (isIntrainterEnabled)
    {
      uiNumMrgSATDCand += 1;
    }
    bestIsSkip       = false;

    if( auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >( m_modeCtrl ) )
    {
      if (slice.getSPS()->getIBCFlag())
      {
        ComprCUCtx cuECtx = m_modeCtrl->getComprCUCtx();
        bestIsSkip = blkCache->isSkip(tempCS->area) && cuECtx.bestCU;
      }
      else
      {
        bestIsSkip = blkCache->isSkip( tempCS->area );
      }
      bestIsMMVDSkip = blkCache->isMMVDSkip(tempCS->area);
    }

    if (isIntrainterEnabled) // always perform low complexity check
    {
      bestIsSkip = false;
    }
#if MERGE_ENC_OPT
    if (affineMrgAvail)
    {
      bestIsSkip = false;
      uiNumMrgSATDCand += NUM_AFF_MRG_SATD_CAND;
    }

#if TM_MRG
    if (tempCS->sps->getUseDMVDMode())
    {
      bestIsSkip = false;
      uiNumMrgSATDCand += TM_MAX_NUM_SATD_CAND;
    }
#endif
#endif

    static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> candCostList;

    // 1. Pass: get SATD-cost for selected candidates and reduce their count
    if( !bestIsSkip )
    {
      RdModeList.clear();
      mrgTempBufSet       = true;
      const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx());

      CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
#if !MERGE_ENC_OPT
      const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda( ) * FRAC_BITS_SCALE;
#endif
      partitioner.setCUData( cu );
      cu.slice            = tempCS->slice;
      cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
#if INTER_LIC
      cu.LICFlag          = false;
#endif
      cu.skip             = false;
      cu.mmvdSkip = false;
      cu.geoFlag          = false;
    //cu.affine
      cu.predMode         = MODE_INTER;
    //cu.LICFlag
      cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
      cu.qp               = encTestMode.qp;
    //cu.emtFlag  is set below

      PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
#if MERGE_ENC_OPT
      cu.affine = false;
      pu.ciipFlag = false;
#if CIIP_PDPC
      pu.ciipPDPC = false;
#endif
      pu.mmvdMergeFlag = false;
#if AFFINE_MMVD
      pu.afMmvdFlag = false;
#endif
#if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG)
      pu.tmMergeFlag = false;
#endif
#if JVET_X0049_ADAPT_DMVR
      pu.bmMergeFlag = false;
#endif
#if MULTI_PASS_DMVR
      pu.bdmvrRefine = false;
#endif
#endif

      DistParam distParam;
      const bool bUseHadamard = !tempCS->slice->getDisableSATDForRD();
      m_pcRdCost->setDistParam (distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth (CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);

#if MERGE_ENC_OPT
      xCheckSATDCostRegularMerge(tempCS, cu, pu, mergeCtx, acMergeTempBuffer, singleMergeTempBuffer, acMergeTmpBuffer
#if !MULTI_PASS_DMVR
                               , refinedMvdL0
#endif
                               , uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart
#if MULTI_PASS_DMVR
                               , applyBDMVR
#endif
      );
#else
      const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) );
#if MULTI_HYP_PRED
      const bool testMHP = tempCS->sps->getUseInterMultiHyp()
        && (tempCS->area.lumaSize().area() > MULTI_HYP_PRED_RESTRICT_BLOCK_SIZE 
        && std::min(tempCS->area.lwidth(), tempCS->area.lheight()) >= MULTI_HYP_PRED_RESTRICT_MIN_WH);
#endif
      for( uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++ )
      {
        mergeCtx.setMergeInfo( pu, uiMergeCand );

        PU::spanMotionInfo( pu, mergeCtx );
#if !MULTI_PASS_DMVR
        pu.mvRefine = true;
#endif
        distParam.cur = singleMergeTempBuffer->Y();
        acMergeTmpBuffer[uiMergeCand] = m_acMergeTmpBuffer[uiMergeCand].getBuf(localUnitArea);
#if INTER_LIC
        m_pcInterSearch->m_storeBeforeLIC = mergeCtx.interDirNeighbours[uiMergeCand] == 3 ? false : true;
        if (m_pcInterSearch->m_storeBeforeLIC)
        {
          m_pcInterSearch->m_predictionBeforeLIC = acMergeTmpBuffer[uiMergeCand];
          m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, true);
        }
        else
#endif
#if MULTI_PASS_DMVR
        if (applyBDMVR[uiMergeCand])
        {
          if (isIntrainterEnabled)
          {
#if MULTI_HYP_PRED
            pu.addHypData.clear();
            pu.numMergedAddHyps = 0;
#endif
            pu.mvRefine = false;
            pu.ciipFlag = true;
            m_pcInterSearch->motionCompensation(pu, acMergeTmpBuffer[uiMergeCand]);
            pu.ciipFlag = false;
#if MULTI_HYP_PRED
            mergeCtx.setMergeInfo(pu, uiMergeCand);
#endif
          }
          pu.bdmvrRefine = true;
          m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR[uiMergeCand << 1], m_mvBufBDMVR[(uiMergeCand << 1) + 1]);
          pu.mvRefine = true;

          m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer);

          if( pu.bdmvrRefine )
          {
            ::memcpy( m_mvBufEncBDOF[uiMergeCand], m_pcInterSearch->getBdofSubPuMvOffset(), sizeof( Mv ) * BDOF_SUBPU_MAX_NUM );
          }

          pu.mvRefine = false;
        }
        else
        {
#endif
        m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, true, &(acMergeTmpBuffer[uiMergeCand]));
#if MULTI_PASS_DMVR
        }
#endif
#if INTER_LIC
        m_pcInterSearch->m_storeBeforeLIC = false;
#endif
        acMergeBuffer[uiMergeCand] = m_acRealMergeBuffer[uiMergeCand].getBuf(localUnitArea);
        acMergeBuffer[uiMergeCand].copyFrom(*singleMergeTempBuffer);
#if !MULTI_PASS_DMVR
        pu.mvRefine = false;
        if( mergeCtx.interDirNeighbours[uiMergeCand] == 3 )
        {
          mergeCtx.mvFieldNeighbours[2*uiMergeCand].mv   = pu.mv[0];
          mergeCtx.mvFieldNeighbours[2*uiMergeCand+1].mv = pu.mv[1];
          {
            int dx, dy, i, j, num = 0;
            dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
            dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH);
            if (PU::checkDMVRCondition(pu))
            {
              for (i = 0; i < (pu.lumaSize().height); i += dy)
              {
                for (j = 0; j < (pu.lumaSize().width); j += dx)
                {
                  refinedMvdL0[num][uiMergeCand] = pu.mvdL0SubPu[num];
                  num++;
                }
              }
            }
          }
        }
#endif

        Distortion uiSad = distParam.distFunc(distParam);
        m_CABACEstimator->getCtx() = ctxStart;
        uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
        double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
#if MULTI_HYP_PRED
        if (testMHP && pu.addHypData.size() < tempCS->sps->getMaxNumAddHyps())
        {
          uint32_t uiBitsCand = uiMergeCand + 1 + 1 + 1; // one bit for merge flag,  one bit for subblock_merge_flag, and one bit for regualr_merge_flag
          MEResult mergeResult;
          mergeResult.cu = cu;
          mergeResult.pu = pu;
          mergeResult.bits = uiBitsCand;
          mergeResult.cost = uiSad + m_pcRdCost->getCost(uiBitsCand);
          m_baseResultsForMH.push_back(mergeResult);
        }
#endif
        insertPos = -1;
#if CIIP_PDPC
        updateCandList(ModeInfo(uiMergeCand, true, false, false, false), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
#else
        updateCandList(ModeInfo(uiMergeCand, true, false, false), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
#endif
        if (insertPos != -1)
        {
          if (insertPos == RdModeList.size() - 1)
          {
            swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
          }
          else
          {
            for (uint32_t i = uint32_t(RdModeList.size()) - 1; i > insertPos; i--)
            {
              swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
            }
            swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
          }
        }
        CHECK(std::min(uiMergeCand + 1, uiNumMrgSATDCand) != RdModeList.size(), "");
#if MULTI_PASS_DMVR
        pu.bdmvrRefine = false;
#endif
      }
#endif
      if (isIntrainterEnabled)
      {
#if MERGE_ENC_OPT
        xCheckSATDCostCiipMerge(tempCS, cu, pu, mergeCtx, acMergeTempBuffer, singleMergeTempBuffer, acMergeTmpBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart);
#if JVET_X0141_CIIP_TIMD_TM && TM_MRG
        if (sps.getUseCiipTmMrg())
        {
            xCheckSATDCostCiipTmMerge(tempCS, cu, pu, ciipTmMrgCtx, acMergeTempBuffer, singleMergeTempBuffer, acTmMergeTmpBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart);
        }
#endif
#else
        // prepare for Intra bits calculation
        pu.ciipFlag = true;

        // save the to-be-tested merge candidates
        uint32_t CiipMergeCand[NUM_MRG_SATD_CAND];
        for (uint32_t mergeCnt = 0; mergeCnt < std::min(NUM_MRG_SATD_CAND, (const int)mergeCtx.numValidMergeCand); mergeCnt++)
        {
          CiipMergeCand[mergeCnt] = RdModeList[mergeCnt].mergeCand;
        }
#if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION
        int intraMode = PLANAR_IDX;
        if (mergeCtx.numValidMergeCand)
        {
          const CompArea &area = cu.Y();
          if (cu.slice->getSPS()->getUseTimd() && (cu.lwidth() * cu.lheight() <= CIIP_MAX_SIZE))
          {
#if SECONDARY_MPM && ENABLE_DIMD
            IntraPrediction::deriveDimdMode(cu.cs->picture->getRecoBuf(area), area, cu);
#endif
            cu.timdMode = m_pcIntraSearch->deriveTimdMode(cu.cs->picture->getRecoBuf(area), area, cu);
            intraMode = MAP131TO67(cu.timdMode);
          }
        }
#endif
        for (uint32_t mergeCnt = 0; mergeCnt < std::min(std::min(NUM_MRG_SATD_CAND, (const int)mergeCtx.numValidMergeCand), 4); mergeCnt++)
        {
          uint32_t mergeCand = CiipMergeCand[mergeCnt];
          acMergeTmpBuffer[mergeCand] = m_acMergeTmpBuffer[mergeCand].getBuf(localUnitArea);

          // estimate merge bits
          mergeCtx.setMergeInfo(pu, mergeCand);

          // first round
          pu.intraDir[0] = PLANAR_IDX;
#if CIIP_PDPC
          for (int intraCnt = 0; intraCnt < 2; intraCnt++)
          {
            pu.ciipPDPC = intraCnt == 1;
#else
          uint32_t intraCnt = 0;
#endif
#if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION
          pu.intraDir[0] = pu.ciipPDPC ? PLANAR_IDX : intraMode;
#endif
          PelBuf ciipBuff = m_ciipBuffer[intraCnt].getBuf(localUnitArea.Y());
          // generate intrainter Y prediction
          if (mergeCnt == 0)
          {
            m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Y());
            m_pcIntraSearch->predIntraAng(COMPONENT_Y, ciipBuff, pu);
          }
//#if INTER_LIC
//          if( mergeCtx.interDirNeighbours[mergeCand] != 3 )
//          {
//            pu.cs->getPredBuf( pu ).copyFrom( m_acRealMergeBuffer[MRG_MAX_NUM_CANDS + mergeCand].getBuf( localUnitArea ) );
//          }
//          else
//#endif
            if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
            {
              m_pcIntraSearch->geneWeightedPred<true>(COMPONENT_Y, singleMergeTempBuffer->Y(), pu, acMergeTmpBuffer[mergeCand].Y(), ciipBuff, m_pcReshape->getFwdLUT().data());
            }
            else
            {
              m_pcIntraSearch->geneWeightedPred<false>(COMPONENT_Y, singleMergeTempBuffer->Y(), pu, acMergeTmpBuffer[mergeCand].Y(), ciipBuff);
            }

          // calculate cost
            if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
            {
              PelBuf tmp = m_acGeoWeightedBuffer->getBuf(localUnitArea.Y());
              tmp.rspSignal(singleMergeTempBuffer->Y(), m_pcReshape->getInvLUT());
              distParam.cur = tmp;
            }
            else
            {
              distParam.cur = singleMergeTempBuffer->Y();
            }

          //distParam.cur = pu.cs->getPredBuf(pu).Y();
          Distortion sadValue = distParam.distFunc(distParam);
          if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
          {
            pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT());
          }
          m_CABACEstimator->getCtx() = ctxStart;
          pu.regularMergeFlag = false;
          uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
          double cost = (double)sadValue + (double)fracBits * sqrtLambdaForFirstPassIntra;
          insertPos = -1;
#if CIIP_PDPC
          updateCandList(ModeInfo(mergeCand, false, false, true, pu.ciipPDPC), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
#else
          updateCandList(ModeInfo(mergeCand, false, false, true), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
#endif
          if (insertPos != -1)
          {
            for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
            {
              swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
            }
            swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
          }
#if CIIP_PDPC
          }
#endif
        }
        pu.ciipFlag = false;
#if CIIP_PDPC
        pu.ciipPDPC = false;
#endif
#endif
      }
      if ( pu.cs->sps->getUseMMVD() )
      {
#if MERGE_ENC_OPT
#if JVET_W0090_ARMC_TM
        xCheckSATDCostMmvdMerge(tempCS, cu, pu, mergeCtxtmp, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
                           ,     mmvdLUT
#endif
                                );
#else
        xCheckSATDCostMmvdMerge(tempCS, cu, pu, mergeCtx, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
                           ,     mmvdLUT
#endif
                                );
#endif
#else
        cu.mmvdSkip = true;
        pu.regularMergeFlag = true;
        const int tempNum = (mergeCtx.numValidMergeCand > 1) ? MMVD_ADD_NUM : MMVD_ADD_NUM >> 1;
        for (int mmvdMergeCand = 0; mmvdMergeCand < tempNum; mmvdMergeCand++)
        {
          int baseIdx = mmvdMergeCand / MMVD_MAX_REFINE_NUM;
          int refineStep = (mmvdMergeCand - (baseIdx * MMVD_MAX_REFINE_NUM)) / 4;
          if (refineStep >= m_pcEncCfg->getMmvdDisNum())
            continue;
#if JVET_W0090_ARMC_TM
          mergeCtxtmp.setMmvdMergeCandiInfo(pu, mmvdMergeCand);
#else
          mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand);
#endif

          PU::spanMotionInfo(pu, mergeCtx);
          pu.mvRefine = true;
          distParam.cur = singleMergeTempBuffer->Y();
          pu.mmvdEncOptMode = (refineStep > 2 ? 2 : 1);
          CHECK(!pu.mmvdMergeFlag, "MMVD merge should be set");
          // Don't do chroma MC here
          m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, false);
          pu.mmvdEncOptMode = 0;
          pu.mvRefine = false;
          Distortion uiSad = distParam.distFunc(distParam);

          m_CABACEstimator->getCtx() = ctxStart;
          uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
          double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
#if MULTI_HYP_PRED
          if (testMHP && pu.addHypData.size() < tempCS->sps->getMaxNumAddHyps())
          {
            uint32_t uiBitsCand = baseIdx + refineStep + 2 + 1 + 1 + 1; // one bit for merge flag,  one bit for subblock_merge_flag, and one bit for regualr_merge_flag
            MEResult mergeResult;
            mergeResult.cu = cu;
            mergeResult.pu = pu;
            mergeResult.bits = uiBitsCand;
            mergeResult.cost = uiSad + m_pcRdCost->getCost(uiBitsCand);
            m_baseResultsForMH.push_back(mergeResult);
          }
#endif
          insertPos = -1;
#if CIIP_PDPC
          updateCandList(ModeInfo(mmvdMergeCand, false, true, false, false), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
#else
          updateCandList(ModeInfo(mmvdMergeCand, false, true, false), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
#endif
          if (insertPos != -1)
          {
            for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
            {
              swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
            }
            swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
          }
        }
#endif
      }
#if MERGE_ENC_OPT
#if TM_MRG
      if (sps.getUseDMVDMode())
      {
        xCheckSATDCostTMMerge(tempCS, cu, pu, tmMrgCtx, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart
#if MULTI_PASS_DMVR
          , applyBDMVR4TM
#endif
        );
      }
#endif
      if (affineMrgAvail)
      {
        xCheckSATDCostAffineMerge(tempCS, cu, pu, affineMergeCtx, mrgCtx, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart);
      }
#if AFFINE_MMVD
      if (affineMmvdAvail)
      {
#if JVET_W0090_ARMC_TM
        xCheckSATDCostAffineMmvdMerge(tempCS, cu, pu, affineMergeCtxTmp, mrgCtx, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
                                          , affMmvdLUT
#if JVET_AA0093_ENHANCED_MMVD_EXTENSION
  , numBaseAffine
#endif
#endif
                                      );
#else
        xCheckSATDCostAffineMmvdMerge(tempCS, cu, pu, affineMergeCtx, mrgCtx, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
                                          , affMmvdLUT
#endif
                                      );
#endif
      }
#endif
#endif
#if JVET_X0049_ADAPT_DMVR
      if (sps.getUseDMVDMode() && checkBmMrg)
      {
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
        xCheckSATDCostBMMerge(tempCS, cu, pu, bmMrgCtx, bmMrgCtxDir2, admvrRefinedMotion, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart
#if MULTI_PASS_DMVR
          , applyBDMVR4BM
#endif
        );
#else
        xCheckSATDCostBMMerge(tempCS, cu, pu, bmMrgCtx, acMergeTempBuffer, singleMergeTempBuffer, uiNumMrgSATDCand, RdModeList, candCostList, distParam, ctxStart
#if MULTI_PASS_DMVR
          , applyBDMVR4BM
#endif
        );
#endif
      }
#endif
      // Try to limit number of candidates using SATD-costs
      for( uint32_t i = 1; i < uiNumMrgSATDCand; i++ )
      {
        if( candCostList[i] > MRG_FAST_RATIO * candCostList[0] )
        {
          uiNumMrgSATDCand = i;
          break;
        }
      }

      setMergeBestSATDCost( candCostList[0] );

      if (isIntrainterEnabled && isChromaEnabled(pu.cs->pcv->chrFormat))
      {
        pu.ciipFlag = true;
        bool tag[2] = { false, false };

        for (uint32_t mergeCnt = 0; mergeCnt < uiNumMrgSATDCand; mergeCnt++)
        {
          if (RdModeList[mergeCnt].isCIIP)
          {
            pu.intraDir[0] = PLANAR_IDX;
            pu.intraDir[1] = DM_CHROMA_IDX;
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
            if (pu.chromaSize().width == 2)
              continue;
#endif
#if CIIP_PDPC
            pu.ciipPDPC = RdModeList[mergeCnt].isCiipPDPC;
            uint32_t bufIdx = pu.ciipPDPC ? 1 : 0;
#else
            uint32_t bufIdx = 0;
#endif
#if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION
              pu.intraDir[0] = RdModeList[mergeCnt].intraMode;
#endif
            if (!tag[bufIdx])
            {
              m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb());
              PelBuf ciipBuffCb = m_ciipBuffer[bufIdx].getBuf(localUnitArea.Cb());
              m_pcIntraSearch->predIntraAng(COMPONENT_Cb, ciipBuffCb, pu);

              m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cr());
              PelBuf ciipBuffCr = m_ciipBuffer[bufIdx].getBuf(localUnitArea.Cr());
              m_pcIntraSearch->predIntraAng(COMPONENT_Cr, ciipBuffCr, pu);

              tag[bufIdx] = true;
            }
          }
        }
        pu.ciipFlag = false;
#if CIIP_PDPC
        pu.ciipPDPC = false;
#endif
      }

      tempCS->initStructData( encTestMode.qp );
      m_CABACEstimator->getCtx() = ctxStart;
    }
    else
    {
      if (bestIsMMVDSkip)
      {
#if JVET_AA0093_ENHANCED_MMVD_EXTENSION
        uiNumMrgSATDCand = mergeCtx.numValidMergeCand + std::min<int>(MMVD_BASE_MV_NUM, mergeCtx.numValidMergeCand) * MMVD_MAX_REFINE_NUM;
#else
        uiNumMrgSATDCand = mergeCtx.numValidMergeCand + ((mergeCtx.numValidMergeCand > 1) ? MMVD_ADD_NUM : MMVD_ADD_NUM >> 1);
#endif
      }
      else
      {
        uiNumMrgSATDCand = mergeCtx.numValidMergeCand;
      }
    }
  }
  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
  uint32_t iteration;
  uint32_t iterationBegin = 0;
  iteration = 2;
  for (uint32_t uiNoResidualPass = iterationBegin; uiNoResidualPass < iteration; ++uiNoResidualPass)
  {
    for( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ )
    {
      uint32_t uiMergeCand = RdModeList[uiMrgHADIdx].mergeCand;

      if (uiNoResidualPass != 0 && RdModeList[uiMrgHADIdx].isCIIP) // intrainter does not support skip mode
      {
        if (isTestSkipMerge[uiMergeCand])
        {
          continue;
        }
      }
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
     if (RdModeList[uiMrgHADIdx].isMMVD && (uiMergeCand - (uiMergeCand / MMVD_MAX_REFINE_NUM)* MMVD_MAX_REFINE_NUM >= (MMVD_MAX_REFINE_NUM >> MMVD_SIZE_SHIFT)))
     {
	     continue;
     }
#endif

      if (((uiNoResidualPass != 0) && candHasNoResidual[uiMrgHADIdx])
       || ( (uiNoResidualPass == 0) && bestIsSkip ) )
      {
        continue;
      }
      // first get merge candidates
      CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );

      partitioner.setCUData( cu );
      cu.slice            = tempCS->slice;
      cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
      cu.skip             = false;
      cu.mmvdSkip = false;
#if INTER_LIC
      cu.LICFlag          = false;
#endif
#if MERGE_ENC_OPT
      cu.affine = false;
#endif

      cu.geoFlag          = false;
    //cu.affine
      cu.predMode         = MODE_INTER;
    //cu.LICFlag
      cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
      cu.qp               = encTestMode.qp;
      PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
#if AFFINE_MMVD && MERGE_ENC_OPT
      pu.afMmvdFlag       = false;
#endif
#if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG)
      pu.tmMergeFlag      = false;
#endif
#if JVET_X0049_ADAPT_DMVR
      pu.bmMergeFlag      = false;
      pu.bmDir            = 0;
#endif
#if MULTI_PASS_DMVR
      bool isDMVR         = false;
#endif
#if ENABLE_OBMC
      pu.ciipFlag = false;
#endif
#if JVET_X0141_CIIP_TIMD_TM
      pu.intraDir[0] = PLANAR_IDX;
#endif
      if (uiNoResidualPass == 0 && RdModeList[uiMrgHADIdx].isCIIP)
      {
        cu.mmvdSkip = false;
#if JVET_X0141_CIIP_TIMD_TM && TM_MRG
        pu.tmMergeFlag = RdModeList[uiMrgHADIdx].isTMMrg;
#endif
#if MULTI_HYP_PRED
        pu.ciipFlag = true;
#if  JVET_X0141_CIIP_TIMD_TM && TM_MRG
        if (pu.tmMergeFlag)
        {
          ciipTmMrgCtx.setMergeInfo(pu, uiMergeCand);
        }
        else
#endif
        mergeCtx.setMergeInfo(pu, uiMergeCand);
#else
        mergeCtx.setMergeInfo(pu, uiMergeCand);
        pu.ciipFlag = true;
#endif
#if CIIP_PDPC
        pu.ciipPDPC = RdModeList[uiMrgHADIdx].isCiipPDPC;
#endif
        pu.regularMergeFlag = false;
#if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION
        pu.intraDir[0] = RdModeList[uiMrgHADIdx].intraMode;
#else
        pu.intraDir[0] = PLANAR_IDX;
#endif
        CHECK(pu.intraDir[0]<0 || pu.intraDir[0]>(NUM_LUMA_MODE - 1), "out of intra mode");
        pu.intraDir[1] = DM_CHROMA_IDX;
      }
      else if (RdModeList[uiMrgHADIdx].isMMVD)
      {
        cu.mmvdSkip = true;
        pu.regularMergeFlag = true;
#if JVET_W0090_ARMC_TM
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
        mergeCtxtmp.setMmvdMergeCandiInfo(pu, uiMergeCand, mmvdLUT[uiMergeCand]);
#else
        mergeCtxtmp.setMmvdMergeCandiInfo(pu, uiMergeCand);
#endif
#else
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
        mergeCtx.setMmvdMergeCandiInfo(pu, uiMergeCand, mmvdLUT[uiMergeCand]);
#else
        mergeCtx.setMmvdMergeCandiInfo(pu, uiMergeCand);
#endif
#endif
      }
#if MERGE_ENC_OPT
#if AFFINE_MMVD
      else if (RdModeList[uiMrgHADIdx].isAffineMmvd)
      {
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
        int uiMergeCandTemp = affMmvdLUT[uiMergeCand];
        int baseIdx = (int)uiMergeCandTemp / AF_MMVD_MAX_REFINE_NUM;
        int stepIdx = (int)uiMergeCandTemp - baseIdx * AF_MMVD_MAX_REFINE_NUM;
#else
        int baseIdx = (int)uiMergeCand / AF_MMVD_MAX_REFINE_NUM;
        int stepIdx = (int)uiMergeCand - baseIdx * AF_MMVD_MAX_REFINE_NUM;
#endif
        int dirIdx  = stepIdx % AF_MMVD_OFFSET_DIR;
            stepIdx = stepIdx / AF_MMVD_OFFSET_DIR;

        cu.affine           = true;
        cu.imv              = IMV_OFF;
        cu.mmvdSkip         = false;
        pu.regularMergeFlag = false;
        pu.mmvdMergeFlag    = false;

        pu.mergeFlag      = true;
        pu.afMmvdFlag     = true;
        pu.afMmvdBaseIdx  = (uint8_t)baseIdx;
        pu.afMmvdDir      = (uint8_t)dirIdx;
        pu.afMmvdStep     = (uint8_t)stepIdx;
        pu.mergeIdx       = (uint8_t)(baseIdx + afMmvdBaseIdxToMergeIdxOffset);
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
        pu.afMmvdMergeIdx = uiMergeCand;
#endif
#if JVET_W0090_ARMC_TM
        pu.mergeType = affineMergeCtxTmp.mergeType[pu.mergeIdx];
#if INTER_LIC
        pu.cu->LICFlag = affineMergeCtxTmp.LICFlags[pu.mergeIdx];
#endif
        pu.interDir = affineMergeCtxTmp.interDirNeighbours[pu.mergeIdx];
        pu.cu->affineType = affineMergeCtxTmp.affineType[pu.mergeIdx];
        pu.cu->BcwIdx = affineMergeCtxTmp.BcwIdx[pu.mergeIdx];
        pu.mmvdMergeFlag = false;
        pu.ciipFlag = false;

        CHECK(pu.mergeIdx >= affineMergeCtxTmp.numValidMergeCand, "Invalid merge index for AffineMMVD");

        MvField mvfMmvd[2][3];
        PU::getAfMmvdMvf(pu, affineMergeCtxTmp, mvfMmvd, pu.mergeIdx, pu.afMmvdStep, pu.afMmvdDir);
#else
        pu.mergeType      = affineMergeCtx.mergeType         [pu.mergeIdx];
#if INTER_LIC
        pu.cu->LICFlag    = affineMergeCtx.LICFlags          [pu.mergeIdx];
#endif
        pu.interDir       = affineMergeCtx.interDirNeighbours[pu.mergeIdx];
        pu.cu->affineType = affineMergeCtx.affineType        [pu.mergeIdx];
        pu.cu->BcwIdx     = affineMergeCtx.BcwIdx            [pu.mergeIdx];
        pu.mmvdMergeFlag  = false;
        pu.ciipFlag       = false;

        CHECK(pu.mergeIdx >= affineMergeCtx.numValidMergeCand, "Invalid merge index for AffineMMVD");

        MvField mvfMmvd[2][3];
        PU::getAfMmvdMvf(pu, affineMergeCtx, mvfMmvd, pu.mergeIdx, pu.afMmvdStep, pu.afMmvdDir);
#endif

        for (int i = 0; i < 2; i++)
        {
          pu.refIdx[i] = mvfMmvd[i][0].refIdx;
          pu.mvAffi[i][0] = mvfMmvd[i][0].mv;
          pu.mvAffi[i][1] = mvfMmvd[i][1].mv;
          pu.mvAffi[i][2] = mvfMmvd[i][2].mv;
        }

        PU::spanMotionInfo(pu);
      }
#endif
      else if (RdModeList[uiMrgHADIdx].isAffine)
      {
        CHECK(uiMergeCand >= affineMergeCtx.numValidMergeCand, "");
        cu.mmvdSkip = false;
        cu.affine = true;
        cu.imv = 0;
        pu.regularMergeFlag = false;
        pu.mergeFlag = true;
        pu.mergeIdx = uiMergeCand;
        pu.mmvdMergeFlag = false;
        pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand];
        cu.affineType = affineMergeCtx.affineType[uiMergeCand];
        cu.BcwIdx = affineMergeCtx.BcwIdx[uiMergeCand];
#if INTER_LIC
        cu.LICFlag = affineMergeCtx.LICFlags[uiMergeCand];
#endif
        pu.mv[0].setZero();
        pu.mv[1].setZero();
        pu.mvd[REF_PIC_LIST_0] = Mv();
        pu.mvd[REF_PIC_LIST_1] = Mv();
        pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
        pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
        pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
        pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
        pu.mergeType = affineMergeCtx.mergeType[uiMergeCand];
        if (pu.mergeType == MRG_TYPE_SUBPU_ATMVP)
        {
          pu.refIdx[0] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][0].refIdx;
          pu.refIdx[1] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][0].refIdx;
          PU::spanMotionInfo(pu, mrgCtx);
        }
        else
        {
          for (int i = 0; i < 2; i++)
          {
            pu.refIdx[i] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + i][0].refIdx;
            pu.mvAffi[i][0] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + i][0].mv;
            pu.mvAffi[i][1] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + i][1].mv;
            pu.mvAffi[i][2] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + i][2].mv;
          }

          PU::spanMotionInfo(pu);
        }
      }
#if TM_MRG && MERGE_ENC_OPT
#if JVET_X0141_CIIP_TIMD_TM
      else if (RdModeList[uiMrgHADIdx].isTMMrg && !RdModeList[uiMrgHADIdx].isCIIP)
#else
      else if (RdModeList[uiMrgHADIdx].isTMMrg)
#endif
      {
        cu.mmvdSkip         = false;
        pu.regularMergeFlag = true;
        pu.tmMergeFlag      = true;
#if JVET_X0141_CIIP_TIMD_TM
        pu.ciipFlag = false;
#endif
        tmMrgCtx.setMergeInfo(pu, uiMergeCand);
#if MULTI_PASS_DMVR
        if (applyBDMVR4TM[uiMergeCand])
        {
          isDMVR = true;
          pu.bdmvrRefine = true;
          m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[(uiMergeCand << 1) + 1]);
        }
#endif
      }
#endif
#if JVET_X0049_ADAPT_DMVR
      else if (RdModeList[uiMrgHADIdx].isBMMrg)
      {
        cu.mmvdSkip = false;
        pu.regularMergeFlag = true;
        pu.bmMergeFlag = true;
        pu.bmDir = RdModeList[uiMrgHADIdx].bmDir;
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
        if (pu.bmDir == 1)
        {
          bmMrgCtx.setMergeInfo(pu, uiMergeCand);
        }
        else
        {
          bmMrgCtxDir2.setMergeInfo(pu, uiMergeCand);
        }
#else
        bmMrgCtx.setMergeInfo(pu, uiMergeCand);
#endif
        if (applyBDMVR4BM[uiMergeCand])
        {
          isDMVR = true;
          pu.bdmvrRefine = true;
          m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4BM[uiMergeCand << 1], m_mvBufBDMVR4BM[(uiMergeCand << 1) + 1]);
        }
      }
#endif
#endif
      else
      {
        cu.mmvdSkip = false;
        pu.regularMergeFlag = true;
        mergeCtx.setMergeInfo(pu, uiMergeCand);
#if JVET_X0141_CIIP_TIMD_TM && TM_MRG
        pu.ciipFlag = false;
        pu.tmMergeFlag = false;
#endif
#if MULTI_PASS_DMVR
        if (applyBDMVR[uiMergeCand])
        {
          isDMVR = true;
          pu.bdmvrRefine = true;
          m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR[uiMergeCand << 1], m_mvBufBDMVR[(uiMergeCand << 1) + 1]);
        }
#endif
      }
#if MERGE_ENC_OPT
      if (!RdModeList[uiMrgHADIdx].isAffine && !RdModeList[uiMrgHADIdx].isGeo)
#endif
#if MULTI_PASS_DMVR
      if( !pu.bdmvrRefine )
      {
        PU::spanMotionInfo( pu, mergeCtx );
      }
#else
      PU::spanMotionInfo( pu, mergeCtx );
#endif

      if( m_pcEncCfg->getMCTSEncConstraint() )
      {
#if !MULTI_PASS_DMVR
        bool isDMVR = PU::checkDMVRCondition( pu );
#endif
        if( ( isDMVR && MCTSHelper::isRefBlockAtRestrictedTileBoundary( pu ) ) || ( !isDMVR && !( MCTSHelper::checkMvBufferForMCTSConstraint( pu ) ) ) )
        {
          // Do not use this mode
          tempCS->initStructData( encTestMode.qp );
          continue;
        }
      }
#if MERGE_ENC_OPT
      if (mrgTempBufSet && uiMrgHADIdx < MMVD_MRG_MAX_RD_NUM)
#else
      if( mrgTempBufSet )
#endif
      {
#if !MULTI_PASS_DMVR
        {
          int dx, dy, i, j, num = 0;
          dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
          dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH);
          if (PU::checkDMVRCondition(pu))
          {
            for (i = 0; i < (pu.lumaSize().height); i += dy)
            {
              for (j = 0; j < (pu.lumaSize().width); j += dx)
              {
                pu.mvdL0SubPu[num] = refinedMvdL0[num][uiMergeCand];
                num++;
              }
            }
          }
        }
#endif
        if (pu.ciipFlag)
        {
#if CIIP_PDPC
          uint32_t bufIdx = pu.ciipPDPC ? 1 : 0;
#else
          uint32_t bufIdx = 0;
#endif
#if JVET_X0090_CIIP_FIX
          m_pcInterSearch->motionCompensation(pu);
#if ENABLE_OBMC
          cu.isobmcMC = true;
          cu.obmcFlag = true;
          m_pcInterSearch->subBlockOBMC(pu);
          cu.isobmcMC = false;
#endif
          if (cu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
          {
            m_pcIntraSearch->geneWeightedPred<true>(COMPONENT_Y, tempCS->getPredBuf(pu).Y(), pu, tempCS->getPredBuf(pu).Y(), m_ciipBuffer[bufIdx].getBuf(localUnitArea.Y()), m_pcReshape->getFwdLUT().data());
          }
          else
          {
            m_pcIntraSearch->geneWeightedPred<false>(COMPONENT_Y, tempCS->getPredBuf(pu).Y(), pu, tempCS->getPredBuf(pu).Y(), m_ciipBuffer[bufIdx].getBuf(localUnitArea.Y()));
          }

#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
          if (isChromaEnabled(pu.chromaFormat))
#else
          if (isChromaEnabled(pu.chromaFormat) && pu.chromaSize().width > 2)
#endif
          {
            m_pcIntraSearch->geneWeightedPred<false>(COMPONENT_Cb, tempCS->getPredBuf(pu).Cb(), pu, tempCS->getPredBuf(pu).Cb(), m_ciipBuffer[bufIdx].getBuf(localUnitArea.Cb()));
            m_pcIntraSearch->geneWeightedPred<false>(COMPONENT_Cr, tempCS->getPredBuf(pu).Cr(), pu, tempCS->getPredBuf(pu).Cr(), m_ciipBuffer[bufIdx].getBuf(localUnitArea.Cr()));
          }
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
          else if (isChromaEnabled(pu.chromaFormat))
          {
            tempCS->getPredBuf().Cb().copyFrom(tempCS->getPredBuf(pu).Cb());
            tempCS->getPredBuf().Cr().copyFrom(tempCS->getPredBuf(pu).Cr());
          }
#endif
#else
          // Luma CIIP was already done in SATD check stage and stored
          tempCS->getPredBuf().Y().copyFrom( acMergeTempBuffer[uiMrgHADIdx]->Y() );

#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
          if (isChromaEnabled(pu.chromaFormat))
#else
          if( isChromaEnabled( pu.chromaFormat ) && pu.chromaSize().width > 2 )
#endif
          {
#if JVET_X0141_CIIP_TIMD_TM && TM_MRG
            if (pu.tmMergeFlag)
            {
              m_pcIntraSearch->geneWeightedPred<false>(COMPONENT_Cb, tempCS->getPredBuf(pu).Cb(), pu, acTmMergeTmpBuffer[uiMergeCand].Cb(), m_ciipBuffer[bufIdx].getBuf(localUnitArea.Cb()));
              m_pcIntraSearch->geneWeightedPred<false>(COMPONENT_Cr, tempCS->getPredBuf(pu).Cr(), pu, acTmMergeTmpBuffer[uiMergeCand].Cr(), m_ciipBuffer[bufIdx].getBuf(localUnitArea.Cr()));
            }
            else
            {
              m_pcIntraSearch->geneWeightedPred<false>(COMPONENT_Cb, tempCS->getPredBuf(pu).Cb(), pu, acMergeTmpBuffer[uiMergeCand].Cb(), m_ciipBuffer[bufIdx].getBuf(localUnitArea.Cb()));
              m_pcIntraSearch->geneWeightedPred<false>(COMPONENT_Cr, tempCS->getPredBuf(pu).Cr(), pu, acMergeTmpBuffer[uiMergeCand].Cr(), m_ciipBuffer[bufIdx].getBuf(localUnitArea.Cr()));
            }
#else
            m_pcIntraSearch->geneWeightedPred<false>( COMPONENT_Cb, tempCS->getPredBuf( pu ).Cb(), pu, acMergeTmpBuffer[uiMergeCand].Cb(), m_ciipBuffer[bufIdx].getBuf( localUnitArea.Cb() ) );
            m_pcIntraSearch->geneWeightedPred<false>( COMPONENT_Cr, tempCS->getPredBuf( pu ).Cr(), pu, acMergeTmpBuffer[uiMergeCand].Cr(), m_ciipBuffer[bufIdx].getBuf( localUnitArea.Cr() ) );
#endif
          }
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
          else if (isChromaEnabled(pu.chromaFormat))
          {
            tempCS->getPredBuf().Cb().copyFrom(acMergeTmpBuffer[uiMergeCand].Cb());
            tempCS->getPredBuf().Cr().copyFrom(acMergeTmpBuffer[uiMergeCand].Cr());
          }
#endif
#endif
        }
        else
        {
          if (RdModeList[uiMrgHADIdx].isMMVD
#if AFFINE_MMVD && MERGE_ENC_OPT
            || RdModeList[uiMrgHADIdx].isAffineMmvd
#endif
            )
          {
            pu.mmvdEncOptMode = 0;
            m_pcInterSearch->motionCompensation(pu);
          }
#if MERGE_ENC_OPT
          else if (uiNoResidualPass != 0 && RdModeList[uiMrgHADIdx].isCIIP)
          {
            // perform regular MC instead, i.e. test skip mode
            pu.mvRefine = true;
            m_pcInterSearch->motionCompensation(pu);
            pu.mvRefine = false;
#if MULTI_PASS_DMVR
            if (!RdModeList[uiMrgHADIdx].isAffine && !RdModeList[uiMrgHADIdx].isGeo && pu.bdmvrRefine)
            {
#if TM_MRG
#if JVET_X0141_CIIP_TIMD_TM
              if (pu.tmMergeFlag && !RdModeList[uiMrgHADIdx].isCIIP)
#else
              if ( pu.tmMergeFlag )
#endif
              {
                PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_pcInterSearch->getBdofSubPuMvOffset() );
              }
              else
#endif
#if JVET_X0049_ADAPT_DMVR
              if( pu.bmMergeFlag ) 
              {
                PU::spanMotionInfo( pu, bmMrgCtx, m_mvBufBDMVR4BM[uiMergeCand << 1], m_mvBufBDMVR4BM[( uiMergeCand << 1 ) + 1], m_pcInterSearch->getBdofSubPuMvOffset() );
              }
              else
#endif
              PU::spanMotionInfo(pu, mergeCtx, m_mvBufBDMVR[uiMergeCand << 1], m_mvBufBDMVR[(uiMergeCand << 1) + 1], m_pcInterSearch->getBdofSubPuMvOffset());
            }
#endif
          }
#else
          else if (uiNoResidualPass != 0 && RdModeList[uiMrgHADIdx].isCIIP)
          {
            tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand]);
#if MULTI_PASS_DMVR
            if (pu.bdmvrRefine)
            {
#if TM_MRG
              if( pu.tmMergeFlag )
              {
                PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] );
              }
              else
#endif
                PU::spanMotionInfo(pu, mergeCtx, m_mvBufBDMVR[uiMergeCand << 1], m_mvBufBDMVR[(uiMergeCand << 1) + 1], m_mvBufEncBDOF[uiMergeCand]);
            }
#endif
          }
#endif
#if MERGE_ENC_OPT
          else if (RdModeList[uiMrgHADIdx].isAffine)
          {
            tempCS->getPredBuf().copyFrom(*acMergeTempBuffer[uiMrgHADIdx], true);
#if JVET_Z0136_OOB
            m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_X, true, true);
#else
            m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_X, false, true);
#endif
          }
#endif
          else
          {
            tempCS->getPredBuf().copyFrom(*acMergeTempBuffer[uiMrgHADIdx]);
#if MULTI_PASS_DMVR
#if MERGE_ENC_OPT
            if (!RdModeList[uiMrgHADIdx].isAffine && !RdModeList[uiMrgHADIdx].isGeo && pu.bdmvrRefine)
#else
            if(pu.bdmvrRefine)
#endif
            {
#if TM_MRG
              if( pu.tmMergeFlag )
              {
                PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] );
              }
              else
#endif
#if JVET_X0049_ADAPT_DMVR
              if( pu.bmMergeFlag )
              {
                PU::spanMotionInfo( pu, bmMrgCtx, m_mvBufBDMVR4BM[uiMergeCand << 1], m_mvBufBDMVR4BM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4BM[uiMergeCand] );
              }
              else
#endif
              PU::spanMotionInfo(pu, mergeCtx, m_mvBufBDMVR[uiMergeCand << 1], m_mvBufBDMVR[(uiMergeCand << 1) + 1], m_mvBufEncBDOF[uiMergeCand]);
            }
#endif
          }
        }
      }
      else
      {
        pu.mvRefine = true;
        m_pcInterSearch->motionCompensation( pu );
        pu.mvRefine = false;
#if MULTI_PASS_DMVR
        if (pu.bdmvrRefine)
        {
#if TM_MRG
          if( pu.tmMergeFlag )
          {
            PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_pcInterSearch->getBdofSubPuMvOffset() );
          }
          else
#endif
#if JVET_X0049_ADAPT_DMVR 
            if (pu.bmMergeFlag)
            {
              PU::spanMotionInfo(pu, bmMrgCtx, m_mvBufBDMVR4BM[uiMergeCand << 1], m_mvBufBDMVR4BM[(uiMergeCand << 1) + 1], m_mvBufEncBDOF4BM[uiMergeCand]);
            }
            else
#endif
            PU::spanMotionInfo(pu, mergeCtx, m_mvBufBDMVR[uiMergeCand << 1], m_mvBufBDMVR[(uiMergeCand << 1) + 1], m_pcInterSearch->getBdofSubPuMvOffset());
        }
#endif
      }
      if (!cu.mmvdSkip && !pu.ciipFlag && uiNoResidualPass != 0 && !cu.affine 
#if TM_MRG
          && !pu.tmMergeFlag
#endif
#if JVET_X0049_ADAPT_DMVR
        && !pu.bmMergeFlag
#endif
#if AFFINE_MMVD && MERGE_ENC_OPT
          && !pu.afMmvdFlag
#endif
        )
      {
        CHECK(uiMergeCand >= mergeCtx.numValidMergeCand, "out of normal merge");
        isTestSkipMerge[uiMergeCand] = true;
      }

#if ENABLE_OBMC
      cu.isobmcMC = true;
      cu.obmcFlag = true;
#if JVET_X0090_CIIP_FIX
      if (!pu.ciipFlag)
      {
        m_pcInterSearch->subBlockOBMC(pu);
      }
#else
      m_pcInterSearch->subBlockOBMC( pu );
#endif
      cu.isobmcMC = false;
#endif

      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL );

      if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip && !pu.ciipFlag)
      {
        bestIsSkip = !bestCS->cus.empty() && bestCS->getCU( partitioner.chType )->rootCbf == 0;
      }
      tempCS->initStructData( encTestMode.qp );
    }// end loop uiMrgHADIdx

    if( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() )
    {
      const CodingUnit     &bestCU = *bestCS->getCU( partitioner.chType );
      const PredictionUnit &bestPU = *bestCS->getPU( partitioner.chType );

      if( bestCU.rootCbf == 0 )
      {
        if( bestPU.mergeFlag )
        {
          m_modeCtrl->setEarlySkipDetected();
        }
        else if( m_pcEncCfg->getMotionEstimationSearchMethod() != MESEARCH_SELECTIVE )
        {
          int absolute_MV = 0;

          for( uint32_t uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ )
          {
            if( slice.getNumRefIdx( RefPicList( uiRefListIdx ) ) > 0 )
            {
              absolute_MV += bestPU.mvd[uiRefListIdx].getAbsHor() + bestPU.mvd[uiRefListIdx].getAbsVer();
            }
          }

          if( absolute_MV == 0 )
          {
            m_modeCtrl->setEarlySkipDetected();
          }
        }
      }
    }
  }
  if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
  {
    xCalDebCost( *bestCS, partitioner );
  }
}

#if JVET_W0097_GPM_MMVD_TM
void EncCu::xCheckRDCostMergeGeoComb2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode, bool isSecondPass)
{
  int numSATDCands = (m_fastGpmMmvdSearch && isSecondPass) ? 60 : 70;

  tempCS->initStructData(encTestMode.qp);
#if TM_MRG
  MergeCtx mergeCtx[GEO_NUM_TM_MV_CAND];
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
  MergeCtx& mergeCtxRegular = mergeCtx[GEO_TM_OFF];
#endif
#else
  MergeCtx mergeCtx;
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
  MergeCtx& mergeCtxRegular = mergeCtx;
#endif
#endif
  const SPS &sps = *tempCS->sps;
  CodedCUInfo& relatedCU = ((EncModeCtrlMTnoRQT *)m_modeCtrl)->getBlkInfo(pm.currArea());
  bool extMMVD = tempCS->picHeader->getGPMMMVDTableFlag();

  if (sps.getSbTMVPEnabledFlag())
  {
    Size bufSize = g_miScaling.scale(tempCS->area.lumaSize());
#if TM_MRG
    for (int i = 0; i < GEO_NUM_TM_MV_CAND; i++)
    {
      mergeCtx[i].subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
    }
#else
    mergeCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
#endif
  }

  // 1. bit estimation
  const double sqrtLambdaFracBits = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
  uint8_t maxNumMergeCandidates = tempCS->sps->getMaxNumGeoCand();
  const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx());

  double geoModeCost[GEO_NUM_PARTITION_MODE], geoMergeIdxCost[MRG_MAX_NUM_CANDS], geoMMVDFlagCost[2], geoMMVDIdxCost[GPM_EXT_MMVD_MAX_REFINE_NUM];
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
  double geoSigModeCost[GEO_NUM_SIG_PARTMODE];
#endif
#if TM_MRG
  double geoTMFlagCost[2];
#endif
#if JVET_AA0058_GPM_ADP_BLD
  double geoBldFlagCost[GEO_NUM_BLD];
#endif
  for (int idx = 0; idx < GEO_NUM_PARTITION_MODE; idx++)
  {
    uint64_t fracBits = m_CABACEstimator->geo_mode_est(ctxStart, idx);
    geoModeCost[idx] = (double)fracBits * sqrtLambdaFracBits;
  }
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
  if (sps.getUseAltGPMSplitModeCode())
  {
    for (int idx = 0; idx < GEO_NUM_SIG_PARTMODE; idx++)
    {
      uint64_t fracBits = m_CABACEstimator->geo_mode_est(ctxStart, idx, 1);
      geoSigModeCost[idx] = (double)fracBits * sqrtLambdaFracBits;
    }
  }
#endif
  for (int idx = 0; idx < maxNumMergeCandidates; idx++)
  {
    uint64_t fracBits = m_CABACEstimator->geo_mergeIdx_est(ctxStart, idx, maxNumMergeCandidates);
    geoMergeIdxCost[idx] = (double)fracBits * sqrtLambdaFracBits;
  }
  for (int idx = 0; idx < 2; idx++)
  {
    uint64_t fracBits = m_CABACEstimator->geo_mmvdFlag_est(ctxStart, idx);
    geoMMVDFlagCost[idx] = (double)fracBits * sqrtLambdaFracBits;
  }
  for (int idx = 0; idx < (extMMVD ? GPM_EXT_MMVD_MAX_REFINE_NUM : GPM_MMVD_MAX_REFINE_NUM); idx++)
  {
    uint64_t fractBits = m_CABACEstimator->geo_mmvdIdx_est(ctxStart, idx, extMMVD);
    geoMMVDIdxCost[idx] = (double)fractBits * sqrtLambdaFracBits;
  }
#if TM_MRG
  for (int idx = 0; idx < 2 && sps.getUseDMVDMode(); idx++)
  {
    uint64_t fracBits = m_CABACEstimator->geo_tmFlag_est(ctxStart, idx);
    geoTMFlagCost[idx] = (double)fracBits * sqrtLambdaFracBits;
  }
#endif
#if JVET_AA0058_GPM_ADP_BLD
  for (int idx = 0; idx < GEO_NUM_BLD; idx++)
  {
    uint64_t fracBits = m_CABACEstimator->geoBldFlagEst(ctxStart, idx);
    geoBldFlagCost[idx] = (double)fracBits * sqrtLambdaFracBits;
  }
#endif
#if JVET_Y0065_GPM_INTRA
  bool bUseOnlyOneVector = (tempCS->slice->isInterP() || tempCS->sps->getMaxNumGeoCand() == 1);
  double geoIntraFlag0Cost[2], geoIntraFlag1Cost[2][2], geoIntraIdxCost[GEO_MAX_NUM_INTRA_CANDS];
  for (int idx = 0; idx < 2; idx++)
  {
    uint64_t fracBits = m_CABACEstimator->geo_intraFlag_est(ctxStart, idx);
    geoIntraFlag0Cost[idx] = (double)fracBits * sqrtLambdaFracBits;
    geoIntraFlag1Cost[0][idx] = !bUseOnlyOneVector ? geoIntraFlag0Cost[idx] : 0;
    geoIntraFlag1Cost[1][idx] = 0;
  }
  for (int idx = 0; idx < GEO_MAX_NUM_INTRA_CANDS; idx++)
  {
    uint64_t fracBits = m_CABACEstimator->geo_intraIdx_est(ctxStart, idx);
    geoIntraIdxCost[idx] = (double)fracBits * sqrtLambdaFracBits;
  }
#endif
  m_CABACEstimator->getCtx() = ctxStart;

  // 2. get SAD for all candidates
  CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType);
  pm.setCUData(cu);
  cu.predMode = MODE_INTER;
  cu.slice = tempCS->slice;
  cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
  cu.qp = encTestMode.qp;
  cu.affine = false;
  cu.mtsFlag = false;
#if INTER_LIC
  cu.LICFlag = false;
#endif
  cu.BcwIdx = BCW_DEFAULT;
  cu.geoFlag = true;
  cu.imv = 0;
  cu.mmvdSkip = false;
  cu.skip = false;
  cu.mipFlag = false;
#if JVET_V0130_INTRA_TMP
  cu.tmpFlag = false;
#endif
  cu.bdpcmMode = 0;

  PredictionUnit &pu = tempCS->addPU(cu, pm.chType);
  pu.mergeFlag = true;
  pu.regularMergeFlag = false;
#if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG)
  pu.tmMergeFlag = false;
#endif
#if JVET_X0049_ADAPT_DMVR
  pu.bmMergeFlag = false;
#endif
  CHECK(!m_mergeCandAvail, "merge candidates are not available");
#if TM_MRG
  PU::getGeoMergeCandidates(pu, mergeCtx[GEO_TM_OFF], &m_mergeCand);
  maxNumMergeCandidates = min((int)maxNumMergeCandidates, mergeCtx[GEO_TM_OFF].numValidMergeCand);
#else
  PU::getGeoMergeCandidates(pu, mergeCtx, &m_mergeCand);
  maxNumMergeCandidates = min((int)maxNumMergeCandidates, mergeCtx.numValidMergeCand);
#endif

  PelUnitBuf geoBuffer[GEO_MAX_NUM_UNI_CANDS];
  PelUnitBuf geoTempBuf[GEO_MAX_NUM_UNI_CANDS];
  PelUnitBuf geoMMVDBuf[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM];
  PelUnitBuf geoMMVDTempBuf[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM];
#if JVET_Y0065_GPM_INTRA
#if JVET_AA0058_GPM_ADP_BLD
  PelUnitBuf geoCombinations[GEO_MAX_TRY_WEIGHTED_SAD*GEO_NUM_BLD+1];
#else
  PelUnitBuf geoCombinations[GEO_MAX_TRY_WEIGHTED_SAD+1];
#endif
  PelUnitBuf geoIntraBuffer[GEO_NUM_INTRA_RDO_BUFFER];
  PelUnitBuf geoIntraTempBuf[GEO_NUM_INTRA_RDO_BUFFER];
#else
  PelUnitBuf geoCombinations[GEO_MAX_TRY_WEIGHTED_SAD];
#endif
  DistParam  distParam;

  const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
  DistParam distParamWholeBlk;
  m_pcRdCost->setDistParam(distParamWholeBlk, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y().buf, m_acMergeBuffer[0].Y().stride, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
  Distortion sadWholeBlk[GEO_MAX_NUM_UNI_CANDS], sadMMVDWholeBlk[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM];

  int  pocMrg[GEO_MAX_NUM_UNI_CANDS];
  Mv   mrgMv[GEO_MAX_NUM_UNI_CANDS];
  bool mrgDuplicated[GEO_MAX_NUM_UNI_CANDS];

  double bestMrgCost = MAX_DOUBLE;
  double bestNormalMrgCost = MAX_DOUBLE;

#if JVET_Y0065_GPM_INTRA
  Distortion sadIntraWholeBlk[GEO_NUM_INTRA_RDO_BUFFER];
  uint8_t isGeoChromaAvail[GEO_MAX_NUM_UNI_CANDS];
  uint8_t isGeoMMVDChromaAvail[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM];
  uint8_t isGeoIntraChromaAvail[GEO_NUM_INTRA_RDO_BUFFER];
  memset(isGeoChromaAvail, 0, sizeof(uint8_t) * GEO_MAX_NUM_UNI_CANDS);
  memset(isGeoMMVDChromaAvail, 0, sizeof(uint8_t) * GEO_MAX_NUM_UNI_CANDS * GPM_EXT_MMVD_MAX_REFINE_NUM);
  memset(isGeoIntraChromaAvail, 0, sizeof(uint8_t) * GEO_NUM_INTRA_RDO_BUFFER);
#else
  bool isGeoChromaAvail[GEO_MAX_NUM_UNI_CANDS];
  bool isGeoMMVDChromaAvail[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM];
  memset(isGeoChromaAvail, false, sizeof(bool) * GEO_MAX_NUM_UNI_CANDS);
  memset(isGeoMMVDChromaAvail, false, sizeof(bool) * GEO_MAX_NUM_UNI_CANDS * GPM_EXT_MMVD_MAX_REFINE_NUM);
#endif
#if TM_MRG
  bool isGeoTmChromaAvail[GEO_TM_MAX_NUM_CANDS];
  memset(isGeoTmChromaAvail, false, sizeof(bool) * GEO_TM_MAX_NUM_CANDS);
#endif

  for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++)
  {
#if TM_MRG
    int mrgList = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(mergeCand << 1) + 0].refIdx == -1 ? 1 : 0;
    int mrgRefIdx = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(mergeCand << 1) + mrgList].refIdx;
#else
    int mrgList = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx == -1 ? 1 : 0;
    int mrgRefIdx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + mrgList].refIdx;
#endif
    pocMrg[mergeCand] = tempCS->slice->getRefPic((RefPicList)mrgList, mrgRefIdx)->getPOC();
#if TM_MRG
    mrgMv[mergeCand] = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(mergeCand << 1) + mrgList].mv;
#else
    mrgMv[mergeCand] = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + mrgList].mv;
#endif
    mrgDuplicated[mergeCand] = false;
    if (mergeCand)
    {
      for (int i = 0; i < mergeCand; i++)
      {
        if (pocMrg[mergeCand] == pocMrg[i] && mrgMv[mergeCand] == mrgMv[i])
        {
          mrgDuplicated[mergeCand] = true;
          break;
        }
      }
    }
#if !MULTI_HYP_PRED
    if (mrgDuplicated[mergeCand])
    {
      continue;
    }
#endif
    geoBuffer[mergeCand] = m_acMergeBuffer[mergeCand].getBuf(localUnitArea);
#if TM_MRG
    mergeCtx[GEO_TM_OFF].setMergeInfo(pu, mergeCand);
#else
    mergeCtx.setMergeInfo(pu, mergeCand);
#endif
    if (m_pcEncCfg->getMCTSEncConstraint() && (!(MCTSHelper::checkMvBufferForMCTSConstraint(pu))))
    {
      tempCS->initStructData(encTestMode.qp);
      return;
    }
    m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand], REF_PIC_LIST_X, true, false);
#if MULTI_HYP_PRED
    geoTempBuf[mergeCand] = m_acRealMergeBuffer[MRG_MAX_NUM_CANDS + mergeCand].getBuf(localUnitArea);
#else
    geoTempBuf[mergeCand] = m_acMergeTmpBuffer[mergeCand].getBuf(localUnitArea);
#endif
    geoTempBuf[mergeCand].Y().copyFrom(geoBuffer[mergeCand].Y());
    geoTempBuf[mergeCand].Y().roundToOutputBitdepth(geoTempBuf[mergeCand].Y(), cu.slice->clpRng(COMPONENT_Y));
    distParamWholeBlk.cur.buf = geoTempBuf[mergeCand].Y().buf;
    distParamWholeBlk.cur.stride = geoTempBuf[mergeCand].Y().stride;
    sadWholeBlk[mergeCand] = distParamWholeBlk.distFunc(distParamWholeBlk);
    double curCost = sadWholeBlk[mergeCand] + geoMergeIdxCost[mergeCand];
#if JVET_Y0065_GPM_INTRA
    curCost += geoIntraFlag0Cost[0];
#endif
    if (curCost < bestNormalMrgCost)
    {
      bestNormalMrgCost = curCost;
    }
    curCost += geoMMVDFlagCost[0];
    if (curCost < bestMrgCost)
    {
      bestMrgCost = curCost;
    }
  }
#if MULTI_HYP_PRED
#if TM_MRG
  m_pcInterSearch->setGeoTmpBuffer(mergeCtx[GEO_TM_OFF]);
#else
  m_pcInterSearch->setGeoTmpBuffer(mergeCtx);
#endif
#endif

#if JVET_Y0065_GPM_INTRA
  uint8_t geoIntraMPMList[GEO_NUM_PARTITION_MODE][2][GEO_MAX_NUM_INTRA_CANDS];
  uint8_t intraRDOBufIdx[NUM_LUMA_MODE];
  memset(intraRDOBufIdx, -1, sizeof(uint8_t)*NUM_LUMA_MODE);
  int intraRDOBufCnt = 0;
#if ENABLE_DIMD && JVET_W0123_TIMD_FUSION
  if (sps.getUseDimd() || sps.getUseTimd())
  {
    IntraPrediction::deriveDimdMode(tempCS->picture->getRecoBuf(tempCS->area.Y()), tempCS->area.Y(), cu);
    if (sps.getUseTimd())
    {
      cu.timdMode = m_pcIntraSearch->deriveTimdMode(tempCS->picture->getRecoBuf(tempCS->area.Y()), tempCS->area.Y(), cu);
    }
  }
#elif ENABLE_DIMD
  if (sps.getUseDimd())
  {
    IntraPrediction::deriveDimdMode(tempCS->picture->getRecoBuf(tempCS->area.Y()), tempCS->area.Y(), cu);
  }
#elif JVET_W0123_TIMD_FUSION
  if (sps.getUseTimd())
  {
    cu.timdMode = m_pcIntraSearch->deriveTimdMode(tempCS->picture->getRecoBuf(tempCS->area.Y()), tempCS->area.Y(), cu);
  }
#endif
#if ENABLE_DIMD
  int8_t dimdMode = cu.dimdMode;
#endif
#if JVET_W0123_TIMD_FUSION
  int timdMode = cu.timdMode;
#endif
  for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
  {
    for (int partIdx = 0; partIdx < 2; partIdx++)
    {
      PU::getGeoIntraMPMs(pu, geoIntraMPMList[splitDir][partIdx], splitDir, g_geoTmShape[partIdx][g_GeoParams[splitDir][0]]
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
                        , (splitDir == 0 && partIdx == 0)
#endif
      );
      for (int intraIdx = 0; intraIdx < GEO_MAX_NUM_INTRA_CANDS; intraIdx++)
      {
        uint8_t intraPred = geoIntraMPMList[splitDir][partIdx][intraIdx];
        if (intraRDOBufIdx[intraPred] >= GEO_NUM_INTRA_RDO_BUFFER)
        {
          uint8_t intraCand = intraRDOBufCnt++;
          CHECK(intraCand >= GEO_NUM_INTRA_RDO_BUFFER, "Geo Intra buffer overflow");
          intraRDOBufIdx[intraPred] = intraCand;
          pu.intraDir[0] = intraPred;
          geoIntraBuffer[intraCand] = m_acMergeBuffer[intraCand + GEO_MAX_NUM_UNI_CANDS].getBuf(localUnitArea);
          pu.gpmIntraFlag = true;
          m_pcIntraSearch->initIntraPatternChType(cu, pu.Y());
          m_pcIntraSearch->predIntraAng(COMPONENT_Y, geoIntraBuffer[intraCand].Y(), pu);
          if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
          {
            geoIntraTempBuf[intraCand] = m_acGeoMMVDTmpBuffer[0][intraCand].getBuf(localUnitArea);
            geoIntraTempBuf[intraCand].Y().rspSignal(geoIntraBuffer[intraCand].Y(), m_pcReshape->getInvLUT());
          }
          else
          {
            geoIntraTempBuf[intraCand] = geoIntraBuffer[intraCand];
          }
          pu.gpmIntraFlag = false;
          distParamWholeBlk.cur.buf = geoIntraTempBuf[intraCand].Y().buf;
          distParamWholeBlk.cur.stride = geoIntraTempBuf[intraCand].Y().stride;
          sadIntraWholeBlk[intraCand] = distParamWholeBlk.distFunc(distParamWholeBlk);
        }
      }
    }
  }
#endif

  int wIdx = floorLog2(cu.lwidth()) - GEO_MIN_CU_LOG2;
  int hIdx = floorLog2(cu.lheight()) - GEO_MIN_CU_LOG2;
  Distortion sadSmall = 0, sadLarge = 0;
  int maskStride = 0, maskStride2 = 0, stepX = 1;
  Pel* SADmask;
  static_vector<int, GEO_NUM_PARTITION_MODE> selGeoModeList;
  static_vector<double, GEO_NUM_PARTITION_MODE> selGeoModeRDList;
  static_vector<int, 5> mergeCandList0[GEO_NUM_PARTITION_MODE];
  static_vector<int, 5> mergeCandList1[GEO_NUM_PARTITION_MODE];
  static_vector<int, 5> mmvdCandList0[GEO_NUM_PARTITION_MODE];
  static_vector<int, 5> mmvdCandList1[GEO_NUM_PARTITION_MODE];
  static_vector<double, 5> sadCostList0[GEO_NUM_PARTITION_MODE];
  static_vector<double, 5> sadCostList1[GEO_NUM_PARTITION_MODE];
#if JVET_Y0065_GPM_INTRA
  static_vector<int, GEO_MAX_NUM_INTRA_CANDS> intraCandList0[GEO_NUM_PARTITION_MODE];
  static_vector<int, GEO_MAX_NUM_INTRA_CANDS> intraCandList1[GEO_NUM_PARTITION_MODE];
  static_vector<double, GEO_MAX_NUM_INTRA_CANDS> intraSadCostList0[GEO_NUM_PARTITION_MODE];
  static_vector<double, GEO_MAX_NUM_INTRA_CANDS> intraSadCostList1[GEO_NUM_PARTITION_MODE];
#endif

  for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
  {
    int16_t angle = g_GeoParams[splitDir][0];
    if (g_angle2mirror[angle] == 2)
    {
      stepX = 1;
      maskStride = -GEO_WEIGHT_MASK_SIZE;
      maskStride2 = -(int)cu.lwidth();
      SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][1]) * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
    }
    else if (g_angle2mirror[angle] == 1)
    {
      stepX = -1;
      maskStride2 = cu.lwidth();
      maskStride = GEO_WEIGHT_MASK_SIZE;
      SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][0])];
    }
    else
    {
      stepX = 1;
      maskStride = GEO_WEIGHT_MASK_SIZE;
      maskStride2 = -(int)cu.lwidth();
      SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
    }
#if JVET_Y0065_GPM_INTRA
    for (uint8_t mergeCand = 0; mergeCand < GEO_MAX_NUM_UNI_CANDS + GEO_MAX_NUM_INTRA_CANDS; mergeCand++)
#else
    for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++)
#endif
    {
#if JVET_Y0065_GPM_INTRA
      if ((mergeCand < maxNumMergeCandidates && mrgDuplicated[mergeCand]) || (mergeCand >= maxNumMergeCandidates && mergeCand < GEO_MAX_NUM_UNI_CANDS))
#else
      if (mrgDuplicated[mergeCand])
#endif
      {
        continue;
      }
#if JVET_Y0065_GPM_INTRA
      double tempCost;
      if (mergeCand < GEO_MAX_NUM_UNI_CANDS)
      {
#endif
      m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTempBuf[mergeCand].Y().buf, geoTempBuf[mergeCand].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
      sadLarge = distParam.distFunc(distParam);
#if JVET_Y0065_GPM_INTRA
      tempCost = (double)sadLarge + geoMergeIdxCost[mergeCand] + geoIntraFlag0Cost[0] + geoMMVDFlagCost[0];
#else
      double tempCost = (double)sadLarge + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[0];
#endif
      m_geoMMVDCostList.insert(splitDir, 0, mergeCand, 0, tempCost);
      sortCandList(tempCost, mergeCand, 0, sadCostList0[splitDir], mergeCandList0[splitDir], mmvdCandList0[splitDir], m_numCandPerPar);
      sadSmall = sadWholeBlk[mergeCand] - sadLarge;
#if JVET_Y0065_GPM_INTRA
      tempCost = (double)sadSmall + geoMergeIdxCost[mergeCand] + geoIntraFlag0Cost[0] + geoMMVDFlagCost[0];
#else
      tempCost = (double)sadSmall + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[0];
#endif
      m_geoMMVDCostList.insert(splitDir, 1, mergeCand, 0, tempCost);
      sortCandList(tempCost, mergeCand, 0, sadCostList1[splitDir], mergeCandList1[splitDir], mmvdCandList1[splitDir], m_numCandPerPar);
#if JVET_Y0065_GPM_INTRA
      }
      else
      {
        int intraIdx = mergeCand - GEO_MAX_NUM_UNI_CANDS;
        int rdobuffer = intraRDOBufIdx[geoIntraMPMList[splitDir][0][intraIdx]];
        m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoIntraTempBuf[rdobuffer].Y().buf, geoIntraTempBuf[rdobuffer].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
        sadLarge = distParam.distFunc(distParam);
        tempCost = (double)sadLarge + geoIntraIdxCost[intraIdx] + geoIntraFlag0Cost[1] + geoMMVDFlagCost[0];
        m_geoMMVDCostList.insert(splitDir, 0, mergeCand, 0, tempCost);
        sortIntraCandList(tempCost, mergeCand, intraSadCostList0[splitDir], intraCandList0[splitDir]);

        if (geoIntraMPMList[splitDir][0][intraIdx] != geoIntraMPMList[splitDir][1][intraIdx])
        {
          rdobuffer = intraRDOBufIdx[geoIntraMPMList[splitDir][1][intraIdx]];
          m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoIntraTempBuf[rdobuffer].Y().buf, geoIntraTempBuf[rdobuffer].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
          sadLarge = distParam.distFunc(distParam);
        }
        sadSmall = sadIntraWholeBlk[rdobuffer] - sadLarge;
        tempCost = (double)sadSmall + geoIntraIdxCost[intraIdx] + geoIntraFlag0Cost[1] + geoMMVDFlagCost[0];
        m_geoMMVDCostList.insert(splitDir, 1, mergeCand, 0, tempCost);
        sortIntraCandList(tempCost, mergeCand, intraSadCostList1[splitDir], intraCandList1[splitDir]);
      }
#endif
    }
    updateCandList(splitDir, (sadCostList0[splitDir][0] + sadCostList1[splitDir][0]), selGeoModeList, selGeoModeRDList, GEO_NUM_PARTITION_MODE);
  }

  static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoSplitDirList;
  static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoMergeCand0;
  static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoMergeCand1;
  static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoMmvdCand0;
  static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD> geoMmvdCand1;
  static_vector<double, GEO_MAX_TRY_WEIGHTED_SAD> geoSADCostList;
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
  if (sps.getUseAltGPMSplitModeCode())
  {
    m_pcInterSearch->initGeoAngleSelection(pu
#if JVET_Y0065_GPM_INTRA
                                         , m_pcIntraSearch, geoIntraMPMList
#endif
    );
  }
#if TM_MRG
  const int tmMmvdBufIdx0 = GPM_EXT_MMVD_MAX_REFINE_NUM + 1;
  const int tmMmvdBufIdx1 = GPM_EXT_MMVD_MAX_REFINE_NUM + 1;
#endif
#endif

  for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
  {
#if JVET_Y0065_GPM_INTRA
    int numCandMerge0 = min(m_numCandPerPar, (int)mergeCandList0[splitDir].size());
    int numCandIntra0 = (int)intraCandList0[splitDir].size();
    int numCandPart0 = numCandMerge0 + numCandIntra0;
    for (int candIdx0 = 0; candIdx0 < numCandPart0; candIdx0++)
    {
      int mergeCand0 = candIdx0 < numCandMerge0 ? mergeCandList0[splitDir][candIdx0] : intraCandList0[splitDir][candIdx0-numCandMerge0];
      int numCandMerge1 = min(m_numCandPerPar, (int)mergeCandList1[splitDir].size());
      int numCandIntra1 = candIdx0 < numCandMerge0 ? (int)intraCandList1[splitDir].size() : 0;
      int numCandPart1 = numCandMerge1 + numCandIntra1;
      int candStart1 = (bUseOnlyOneVector && candIdx0 < numCandMerge0) ? numCandMerge1 : 0;
      for (int candIdx1 = candStart1; candIdx1 < numCandPart1; candIdx1++)
      {
        int mergeCand1 = candIdx1 < numCandMerge1 ? mergeCandList1[splitDir][candIdx1] : intraCandList1[splitDir][candIdx1-numCandMerge1];
#else
    int numCandPart0 = min(m_numCandPerPar, (int)mergeCandList0[splitDir].size());
    int numCandPart1 = min(m_numCandPerPar, (int)mergeCandList1[splitDir].size());
    for (int candIdx0 = 0; candIdx0 < numCandPart0; candIdx0++)
    {
      for (int candIdx1 = 0; candIdx1 < numCandPart1; candIdx1++)
      {
        int mergeCand0 = mergeCandList0[splitDir][candIdx0];
        int mergeCand1 = mergeCandList1[splitDir][candIdx1];
#endif

        if (mergeCand0 == mergeCand1)
        {
          continue;
        }

#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
        int geoSyntaxMode = std::numeric_limits<uint8_t>::max();
        if(sps.getUseAltGPMSplitModeCode())
        {
          m_pcInterSearch->setGeoSplitModeToSyntaxTable(pu, mergeCtxRegular, mergeCand0, mergeCtxRegular, mergeCand1
#if JVET_Y0065_GPM_INTRA
                                                      , m_pcIntraSearch
#endif
          );
          geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(splitDir, mergeCand0, mergeCand1);
          if (geoSyntaxMode == std::numeric_limits<uint8_t>::max())
          {
            continue;
          }
        }
#endif

        double tempCost = m_geoMMVDCostList.singleDistList[0][splitDir][mergeCand0][0].cost + m_geoMMVDCostList.singleDistList[1][splitDir][mergeCand1][0].cost;
        tempCost = tempCost +
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
                  (geoSyntaxMode == std::numeric_limits<uint8_t>::max() ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode]);
#else
                   geoModeCost[splitDir];
#endif
#if TM_MRG
#if JVET_Y0065_GPM_INTRA
        if (sps.getUseDMVDMode() && mergeCand0 < GEO_MAX_NUM_UNI_CANDS && mergeCand1 < GEO_MAX_NUM_UNI_CANDS)
#else
        if (sps.getUseDMVDMode())
#endif
        {
          tempCost += geoTMFlagCost[0];
        }
#endif
        updateGeoMMVDCandList(tempCost, splitDir, mergeCand0, mergeCand1, 0, 0, geoSADCostList, geoSplitDirList, geoMergeCand0, geoMergeCand1, geoMmvdCand0, geoMmvdCand1, numSATDCands);
      }
    }
  }

  static_vector<uint8_t, GEO_MAX_TRY_WEIGHTED_SAD>  geoRdModeList;
  static_vector<bool, GEO_MAX_TRY_WEIGHTED_SAD>  isNonMMVDListIdx;
  static_vector<int, GEO_MAX_TRY_WEIGHTED_SAD>  geoPartitionModeList;
  static_vector<double, GEO_MAX_TRY_WEIGHTED_SAD>  geocandCostList;
#if JVET_AA0058_GPM_ADP_BLD
  static_vector<uint8_t, GEO_MAX_TRY_WEIGHTED_SAD> geoBldList;
#endif

  DistParam distParamSAD2;
  const bool useHadamard = !tempCS->slice->getDisableSATDForRD();
  m_pcRdCost->setDistParam(distParamSAD2, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, useHadamard);

  int numberGeoCandChecked = (int)geoSADCostList.size();
  int geoNumMrgSATDCand = min(GEO_MAX_TRY_WEIGHTED_SATD, numberGeoCandChecked);
  int numStoredCands = geoNumMrgSATDCand;
  for (uint8_t candidateIdx = 0; candidateIdx < numberGeoCandChecked; candidateIdx++)
  {
    int splitDir = geoSplitDirList[candidateIdx];
    int mergeCand0 = geoMergeCand0[candidateIdx];
    int mergeCand1 = geoMergeCand1[candidateIdx];
    bool mmvdFlag0 = false;
    bool mmvdFlag1 = false;

#if JVET_AA0058_GPM_ADP_BLD
    for (uint8_t bldIdx = 0; bldIdx < GEO_NUM_BLD; bldIdx++)
    {
      geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx] = m_acGeoWeightedBuffer[candidateIdx * GEO_NUM_BLD + bldIdx].getBuf(localUnitArea);
#if JVET_Y0065_GPM_INTRA
      int isIntra0 = (mergeCand0 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
      int isIntra1 = (mergeCand1 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
      int candidateSAD = candidateIdx * GEO_NUM_BLD + bldIdx;
      if (isIntra0 || isIntra1)
      {
        PelUnitBuf predSrc0, predSrc1;
        if (isIntra0)
        {
          int intraIdx0 = mergeCand0 - GEO_MAX_NUM_UNI_CANDS;
          int rdoBuffer = intraRDOBufIdx[geoIntraMPMList[splitDir][0][intraIdx0]];
          predSrc0 = geoIntraBuffer[rdoBuffer];
        }
        else
        {
          predSrc0 = geoTempBuf[mergeCand0];
        }
        if (isIntra1)
        {
          int intraIdx1 = mergeCand1 - GEO_MAX_NUM_UNI_CANDS;
          int rdoBuffer = intraRDOBufIdx[geoIntraMPMList[splitDir][1][intraIdx1]];
          predSrc1 = geoIntraBuffer[rdoBuffer];
        }
        else
        {
          predSrc1 = geoTempBuf[mergeCand1];
        }
        if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
        {
          if (!isIntra0) // Inter+Intra
          {
            geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx].Y().rspSignal(predSrc0.Y(), m_pcReshape->getFwdLUT());
            predSrc0 = geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx];
          }
          else if (!isIntra1) // Intra+Inter
          {
            geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx].Y().rspSignal(predSrc1.Y(), m_pcReshape->getFwdLUT());
            predSrc1 = geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx];
          }
          m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, bldIdx, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrc0, predSrc1);
          candidateSAD = GEO_MAX_TRY_WEIGHTED_SAD * GEO_NUM_BLD;
          geoCombinations[candidateSAD] = m_acGeoWeightedBuffer[candidateSAD].getBuf(localUnitArea);
          geoCombinations[candidateSAD].Y().rspSignal(geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx].Y(), m_pcReshape->getInvLUT());
        }
        else
        {
          m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, bldIdx, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrc0, predSrc1);
        }
      }
      else
#endif
        m_pcInterSearch->weightedGeoBlk(pu, splitDir, bldIdx, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], geoBuffer[mergeCand0], geoBuffer[mergeCand1]);
#if JVET_Y0065_GPM_INTRA
      distParamSAD2.cur = geoCombinations[candidateSAD].Y();
#else
      distParamSAD2.cur = geoCombinations[candidateIdx].Y();
#endif
      Distortion sad = distParamSAD2.distFunc(distParamSAD2);

#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
      int geoSyntaxMode = std::numeric_limits<uint8_t>::max();
      if (sps.getUseAltGPMSplitModeCode())
      {
        geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(splitDir, mergeCand0, mergeCand1);
        CHECK(geoSyntaxMode < 0 || geoSyntaxMode >= GEO_NUM_SIG_PARTMODE, "Invalid GEO split direction!");
      }
#endif
#if JVET_Y0065_GPM_INTRA
      double updateCost =
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
                          (geoSyntaxMode == std::numeric_limits<uint8_t>::max() ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode])
#else
                          geoModeCost[splitDir]
#endif
                        + geoMMVDFlagCost[mmvdFlag0] + geoIntraFlag0Cost[isIntra0] + geoMMVDFlagCost[mmvdFlag1] + geoIntraFlag1Cost[isIntra0][isIntra1];
      int intraIdx0 = mergeCand0 - GEO_MAX_NUM_UNI_CANDS;
      int intraIdx1 = mergeCand1 - GEO_MAX_NUM_UNI_CANDS;
      updateCost += (isIntra0 ? geoIntraIdxCost[intraIdx0] : geoMergeIdxCost[mergeCand0]);
      updateCost += (isIntra1 ? geoIntraIdxCost[intraIdx1] : ((m_fastGpmMmvdSearch && !isIntra0) ? geoMergeIdxCost[mergeCand1 > mergeCand0 ? (mergeCand1 - 1) : mergeCand1] : geoMergeIdxCost[mergeCand1]));
#else
      double updateCost =
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
                          (geoSyntaxMode == std::numeric_limits<uint8_t>::max() ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode])
#else
                          geoModeCost[splitDir]
#endif
                        + geoMergeIdxCost[mergeCand0] + (m_fastGpmMmvdSearch ? geoMergeIdxCost[mergeCand1 > mergeCand0 ? (mergeCand1 - 1) : mergeCand1] : geoMergeIdxCost[mergeCand1]) + geoMMVDFlagCost[mmvdFlag0] + geoMMVDFlagCost[mmvdFlag1];
#endif
#if TM_MRG
#if JVET_Y0065_GPM_INTRA
      if (sps.getUseDMVDMode() && !isIntra0 && !isIntra1)
#else
      if (sps.getUseDMVDMode())
#endif
      {
        updateCost += geoTMFlagCost[0];
      }
#endif
      updateCost += geoBldFlagCost[bldIdx];
      updateCost += (double)sad;
      orderCandList(candidateIdx, true, splitDir, updateCost, bldIdx, geoRdModeList, isNonMMVDListIdx, geoPartitionModeList, geocandCostList, geoBldList, numStoredCands);
    }
#else
    geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea);
#if JVET_Y0065_GPM_INTRA
    int isIntra0 = (mergeCand0 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
    int isIntra1 = (mergeCand1 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
    uint8_t candidateSAD = candidateIdx;
    if (isIntra0 || isIntra1)
    {
      PelUnitBuf predSrc0, predSrc1;
      if (isIntra0)
      {
        int intraIdx0 = mergeCand0 - GEO_MAX_NUM_UNI_CANDS;
        int rdoBuffer = intraRDOBufIdx[geoIntraMPMList[splitDir][0][intraIdx0]];
        predSrc0 = geoIntraBuffer[rdoBuffer];
      }
      else
      {
        predSrc0 = geoTempBuf[mergeCand0];
      }
      if (isIntra1)
      {
        int intraIdx1 = mergeCand1 - GEO_MAX_NUM_UNI_CANDS;
        int rdoBuffer = intraRDOBufIdx[geoIntraMPMList[splitDir][1][intraIdx1]];
        predSrc1 = geoIntraBuffer[rdoBuffer];
      }
      else
      {
        predSrc1 = geoTempBuf[mergeCand1];
      }
      if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
      {
        if (!isIntra0) // Inter+Intra
        {
          geoCombinations[candidateIdx].Y().rspSignal(predSrc0.Y(), m_pcReshape->getFwdLUT());
          predSrc0 = geoCombinations[candidateIdx];
        }
        else if (!isIntra1) // Intra+Inter
        {
          geoCombinations[candidateIdx].Y().rspSignal(predSrc1.Y(), m_pcReshape->getFwdLUT());
          predSrc1 = geoCombinations[candidateIdx];
        }
        m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], predSrc0, predSrc1);
        candidateSAD = GEO_MAX_TRY_WEIGHTED_SAD;
        geoCombinations[GEO_MAX_TRY_WEIGHTED_SAD] = m_acGeoWeightedBuffer[GEO_MAX_TRY_WEIGHTED_SAD].getBuf(localUnitArea);
        geoCombinations[GEO_MAX_TRY_WEIGHTED_SAD].Y().rspSignal(geoCombinations[candidateIdx].Y(), m_pcReshape->getInvLUT());
      }
      else
      {
        m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], predSrc0, predSrc1);
      }
    }
    else
#endif
    m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], geoBuffer[mergeCand0], geoBuffer[mergeCand1]);
#if JVET_Y0065_GPM_INTRA
    distParamSAD2.cur = geoCombinations[candidateSAD].Y();
#else
    distParamSAD2.cur = geoCombinations[candidateIdx].Y();
#endif
    Distortion sad = distParamSAD2.distFunc(distParamSAD2);

#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
    int geoSyntaxMode = std::numeric_limits<uint8_t>::max();
    if(sps.getUseAltGPMSplitModeCode())
    {
      geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(splitDir, mergeCand0, mergeCand1);
      CHECK(geoSyntaxMode < 0 || geoSyntaxMode >= GEO_NUM_SIG_PARTMODE, "Invalid GEO split direction!");
    }
#endif
#if JVET_Y0065_GPM_INTRA
    double updateCost =
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
                        (geoSyntaxMode == std::numeric_limits<uint8_t>::max() ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode])
#else
                        geoModeCost[splitDir]
#endif
                      + geoMMVDFlagCost[mmvdFlag0] + geoIntraFlag0Cost[isIntra0] + geoMMVDFlagCost[mmvdFlag1] + geoIntraFlag1Cost[isIntra0][isIntra1];
    int intraIdx0 = mergeCand0 - GEO_MAX_NUM_UNI_CANDS;
    int intraIdx1 = mergeCand1 - GEO_MAX_NUM_UNI_CANDS;
    updateCost += (isIntra0 ? geoIntraIdxCost[intraIdx0] : geoMergeIdxCost[mergeCand0]);
    updateCost += (isIntra1 ? geoIntraIdxCost[intraIdx1] : ((m_fastGpmMmvdSearch && !isIntra0) ? geoMergeIdxCost[mergeCand1 > mergeCand0 ? (mergeCand1 - 1) : mergeCand1] : geoMergeIdxCost[mergeCand1]));
#else
    double updateCost =
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
                        (geoSyntaxMode == std::numeric_limits<uint8_t>::max() ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode])
#else
                        geoModeCost[splitDir]
#endif
                      + geoMergeIdxCost[mergeCand0] + (m_fastGpmMmvdSearch ? geoMergeIdxCost[mergeCand1 > mergeCand0 ? (mergeCand1 - 1) : mergeCand1] : geoMergeIdxCost[mergeCand1]) + geoMMVDFlagCost[mmvdFlag0] + geoMMVDFlagCost[mmvdFlag1];
#endif
#if TM_MRG
#if JVET_Y0065_GPM_INTRA
    if (sps.getUseDMVDMode() && !isIntra0 && !isIntra1)
#else
    if (sps.getUseDMVDMode())
#endif
    {
      updateCost += geoTMFlagCost[0];
    }
#endif
    updateCost += (double)sad;
    orderCandList(candidateIdx, true, splitDir, updateCost, geoRdModeList, isNonMMVDListIdx, geoPartitionModeList, geocandCostList, numStoredCands);
#endif
  }

  for (uint8_t i = 1; i < geoNumMrgSATDCand; i++)
  {
#if MERGE_ENC_OPT
    if (geocandCostList[i] > MRG_FAST_RATIO * geocandCostList[0] || geocandCostList[i] > getMergeBestSATDCost())
#else
    if (geocandCostList[i] > MRG_FAST_RATIO * geocandCostList[0] || geocandCostList[i] > getMergeBestSATDCost() || geocandCostList[i] > getAFFBestSATDCost())
#endif
    {
      geoNumMrgSATDCand = i;
      break;
    }
  }

#if JVET_Y0065_GPM_INTRA
  for (uint8_t i = 0; i < geoNumMrgSATDCand; i++)
#else
  for (uint8_t i = 0; i < geoNumMrgSATDCand && isChromaEnabled(pu.chromaFormat); i++)
#endif
  {
    uint8_t candidateIdx = geoRdModeList[i];
    int splitDir = geoSplitDirList[candidateIdx];
    int mergeCand0 = geoMergeCand0[candidateIdx];
    int mergeCand1 = geoMergeCand1[candidateIdx];
#if JVET_AA0058_GPM_ADP_BLD
    uint8_t bldIdx = geoBldList[i];
#endif

#if JVET_Y0065_GPM_INTRA
    PelUnitBuf predSrc0, predSrc1;
    int isIntra0 = (mergeCand0 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
    int isIntra1 = (mergeCand1 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
    if (!isChromaEnabled(pu.chromaFormat) && !isIntra0 && !isIntra1)
    {
      continue;
    }
    if (isIntra0)
    {
      int intraIdx0 = mergeCand0 - GEO_MAX_NUM_UNI_CANDS;
      uint8_t intraPred = geoIntraMPMList[splitDir][0][intraIdx0];
      int rdoBuffer = intraRDOBufIdx[intraPred];
      if (isChromaEnabled(pu.chromaFormat) && !isGeoIntraChromaAvail[rdoBuffer])
      {
        pu.intraDir[1] = intraPred;
        pu.gpmIntraFlag = true;
        m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb());
        m_pcIntraSearch->predIntraAng(COMPONENT_Cb, geoIntraBuffer[rdoBuffer].Cb(), pu);
        m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cr());
        m_pcIntraSearch->predIntraAng(COMPONENT_Cr, geoIntraBuffer[rdoBuffer].Cr(), pu);
        pu.gpmIntraFlag = false;
        isGeoIntraChromaAvail[rdoBuffer] = 2;
      }
      predSrc0 = geoIntraBuffer[rdoBuffer];
    }
    else
    {
    if (isChromaEnabled(pu.chromaFormat) && !isGeoChromaAvail[mergeCand0])
#else
    if (!isGeoChromaAvail[mergeCand0])
#endif
    {
#if TM_MRG
      mergeCtx[GEO_TM_OFF].setMergeInfo(pu, mergeCand0);
#else
      mergeCtx.setMergeInfo(pu, mergeCand0);
#endif
      m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand0], REF_PIC_LIST_X, false, true);
#if JVET_Y0065_GPM_INTRA
      isGeoChromaAvail[mergeCand0] = 1;
#else
      isGeoChromaAvail[mergeCand0] = true;
#endif
    }

#if JVET_Y0065_GPM_INTRA
      predSrc0 = geoTempBuf[mergeCand0];
    }

    if (isIntra1)
    {
      int intraIdx1 = mergeCand1 - GEO_MAX_NUM_UNI_CANDS;
      uint8_t intraPred = geoIntraMPMList[splitDir][1][intraIdx1];
      int rdoBuffer = intraRDOBufIdx[intraPred];
      if (isChromaEnabled(pu.chromaFormat) && !isGeoIntraChromaAvail[rdoBuffer])
      {
        pu.intraDir[1] = intraPred;
        pu.gpmIntraFlag = true;
        m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb());
        m_pcIntraSearch->predIntraAng(COMPONENT_Cb, geoIntraBuffer[rdoBuffer].Cb(), pu);
        m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cr());
        m_pcIntraSearch->predIntraAng(COMPONENT_Cr, geoIntraBuffer[rdoBuffer].Cr(), pu);
        pu.gpmIntraFlag = false;
        isGeoIntraChromaAvail[rdoBuffer] = 2;
      }
      predSrc1 = geoIntraBuffer[rdoBuffer];
    }
    else
    {
    if (isChromaEnabled(pu.chromaFormat) && !isGeoChromaAvail[mergeCand1])
#else
    if (!isGeoChromaAvail[mergeCand1])
#endif
    {
#if TM_MRG
      mergeCtx[GEO_TM_OFF].setMergeInfo(pu, mergeCand1);
#else
      mergeCtx.setMergeInfo(pu, mergeCand1);
#endif
      m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand1], REF_PIC_LIST_X, false, true);
#if JVET_Y0065_GPM_INTRA
      isGeoChromaAvail[mergeCand1] = 1;
#else
      isGeoChromaAvail[mergeCand1] = true;
#endif
    }
#if JVET_Y0065_GPM_INTRA
      predSrc1 = geoTempBuf[mergeCand1];
    }
#endif

#if JVET_AA0058_GPM_ADP_BLD
    geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx] = m_acGeoWeightedBuffer[candidateIdx * GEO_NUM_BLD + bldIdx].getBuf(localUnitArea);
#else
    geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea);
#endif
#if JVET_Y0065_GPM_INTRA
    if (isIntra0 || isIntra1)
    {
      if (isChromaEnabled(pu.chromaFormat))
      {
        if (!isIntra0)
        {
          if (isGeoChromaAvail[mergeCand0] < 2)
          {
            geoTempBuf[mergeCand0].Cb().roundToOutputBitdepth(geoBuffer[mergeCand0].Cb(), cu.slice->clpRng(COMPONENT_Cb));
            geoTempBuf[mergeCand0].Cr().roundToOutputBitdepth(geoBuffer[mergeCand0].Cr(), cu.slice->clpRng(COMPONENT_Cr));
            isGeoChromaAvail[mergeCand0] = 2;
          }
        }
        else if (!isIntra1)
        {
          if (isGeoChromaAvail[mergeCand1] < 2)
          {
            geoTempBuf[mergeCand1].Cb().roundToOutputBitdepth(geoBuffer[mergeCand1].Cb(), cu.slice->clpRng(COMPONENT_Cb));
            geoTempBuf[mergeCand1].Cr().roundToOutputBitdepth(geoBuffer[mergeCand1].Cr(), cu.slice->clpRng(COMPONENT_Cr));
            isGeoChromaAvail[mergeCand1] = 2;
          }
        }
      }

      int interMergeCand = isIntra0 ? mergeCand1 : mergeCand0;
#if TM_MRG
      mergeCtx[GEO_TM_OFF].setMergeInfo(pu, interMergeCand);
#else
      mergeCtx.setMergeInfo(pu, interMergeCand );
#endif
#if ENABLE_OBMC
#if JVET_W0123_TIMD_FUSION
      PU::spanMotionInfo2(pu);
#else
      PU::spanMotionInfo(pu);
#endif
#if JVET_AA0058_GPM_ADP_BLD
      if (!isIntra0)
      {
        geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx].copyFrom(predSrc0);
        predSrc0 = geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx];
      }
      else
      {
        geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx].copyFrom(predSrc1);
        predSrc1 = geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx];
      }
#else
      if (!isIntra0)
      {
        geoCombinations[candidateIdx].copyFrom(predSrc0);
        predSrc0 = geoCombinations[candidateIdx];
      }
      else
      {
        geoCombinations[candidateIdx].copyFrom(predSrc1);
        predSrc1 = geoCombinations[candidateIdx];
      }
#endif
      cu.isobmcMC = true;
      cu.obmcFlag = true;
      m_pcInterSearch->subBlockOBMC(pu, !isIntra0 ? &predSrc0 : &predSrc1);
      cu.isobmcMC = false;
#endif

      if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
      {
        if (!isIntra0) // Inter+Intra
        {
          predSrc0.Y().rspSignal(predSrc0.Y(), m_pcReshape->getFwdLUT());
        }
        else if (!isIntra1) // Intra+Inter
        {
          predSrc1.Y().rspSignal(predSrc1.Y(), m_pcReshape->getFwdLUT());
        }
      }
#if JVET_AA0058_GPM_ADP_BLD
      m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, bldIdx, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrc0, predSrc1);
      m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, bldIdx, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrc0, predSrc1);
#else
      m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], predSrc0, predSrc1);
      m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], predSrc0, predSrc1);
#endif
    }
    else
#endif
#if JVET_AA0058_GPM_ADP_BLD
    m_pcInterSearch->weightedGeoBlk(pu, splitDir, bldIdx, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], geoBuffer[mergeCand0], geoBuffer[mergeCand1]);
#else
    m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], geoBuffer[mergeCand0], geoBuffer[mergeCand1]);
#endif
  }

  bool geocandHasNoResidual[GEO_MAX_TRY_WEIGHTED_SAD];
  bool bestIsSkip = false;
  std::memset(geocandHasNoResidual, false, GEO_MAX_TRY_WEIGHTED_SAD * sizeof(bool));

  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
  tempCS->initStructData(encTestMode.qp);
  uint8_t iteration = 2, iterationBegin = 0;
  for (uint8_t noResidualPass = iterationBegin; noResidualPass < iteration; ++noResidualPass)
  {
    for (uint8_t mrgHADIdx = 0; mrgHADIdx < geoNumMrgSATDCand; mrgHADIdx++)
    {
      uint8_t candidateIdx = geoRdModeList[mrgHADIdx];
      if (((noResidualPass != 0) && geocandHasNoResidual[candidateIdx])
        || ((noResidualPass == 0) && bestIsSkip))
      {
        continue;
      }
      CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType);
      pm.setCUData(cu);
      cu.predMode = MODE_INTER;
      cu.slice = tempCS->slice;
      cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
      cu.qp = encTestMode.qp;
      cu.affine = false;
      cu.mtsFlag = false;
#if INTER_LIC
      cu.LICFlag = false;
#endif
      cu.BcwIdx = BCW_DEFAULT;
      cu.geoFlag = true;
      cu.imv = 0;
      cu.mmvdSkip = false;
      cu.skip = false;
      cu.mipFlag = false;
      cu.bdpcmMode = 0;
      PredictionUnit &pu = tempCS->addPU(cu, pm.chType);
      pu.mergeFlag = true;
      pu.regularMergeFlag = false;
      pu.geoSplitDir = geoSplitDirList[candidateIdx];
      pu.geoMergeIdx0 = geoMergeCand0[candidateIdx];
      pu.geoMergeIdx1 = geoMergeCand1[candidateIdx];
#if JVET_Y0065_GPM_INTRA
      pu.gpmIntraFlag = pu.geoMergeIdx0 >= GEO_MAX_NUM_UNI_CANDS || pu.geoMergeIdx1 >= GEO_MAX_NUM_UNI_CANDS;
      if (pu.geoMergeIdx0 >= GEO_MAX_NUM_UNI_CANDS)
      {
        memcpy(pu.intraMPM, geoIntraMPMList[pu.geoSplitDir][0], sizeof(uint8_t)*GEO_MAX_NUM_INTRA_CANDS);
      }
      if (pu.geoMergeIdx1 >= GEO_MAX_NUM_UNI_CANDS)
      {
        memcpy(pu.intraMPM+GEO_MAX_NUM_INTRA_CANDS, geoIntraMPMList[pu.geoSplitDir][1], sizeof(uint8_t)*GEO_MAX_NUM_INTRA_CANDS);
      }
#if ENABLE_DIMD
      cu.dimdMode = dimdMode;
#endif
#if JVET_W0123_TIMD_FUSION
      cu.timdMode = timdMode;
#endif
#endif
#if JVET_AA0058_GPM_ADP_BLD
      pu.geoBldIdx = geoBldList[mrgHADIdx];
#endif
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
      if(sps.getUseAltGPMSplitModeCode())
      {
        int geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1);
        CHECK(geoSyntaxMode < 0 || geoSyntaxMode >= GEO_NUM_SIG_PARTMODE, "Invalid GEO split direction!");
        pu.geoSyntaxMode = (uint8_t)geoSyntaxMode;
      }
#endif
#if TM_MRG
      pu.tmMergeFlag = false;
      pu.geoTmFlag0 = false;
      pu.geoTmFlag1 = false;
#endif
      pu.geoMMVDFlag0 = false;
      pu.geoMMVDFlag1 = false;

      pu.mmvdMergeFlag = false;
      pu.mmvdMergeIdx = MAX_UCHAR;
#if TM_MRG
      MergeCtx *mergeTmCtx0 = nullptr;
      MergeCtx *mergeTmCtx1 = nullptr;
#if JVET_AA0058_GPM_ADP_BLD
      PU::spanGeoMMVDMotionInfo(pu, mergeCtx[GEO_TM_OFF], *mergeTmCtx0, *mergeTmCtx1, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoTmFlag0, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoTmFlag1, pu.geoMMVDFlag1, pu.geoMMVDIdx1, pu.geoBldIdx);
#else
      PU::spanGeoMMVDMotionInfo(pu, mergeCtx[GEO_TM_OFF], *mergeTmCtx0, *mergeTmCtx1, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoTmFlag0, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoTmFlag1, pu.geoMMVDFlag1, pu.geoMMVDIdx1);
#endif
#else
#if JVET_AA0058_GPM_ADP_BLD
      PU::spanGeoMMVDMotionInfo(pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoMMVDFlag1, pu.geoMMVDIdx1, pu.geoBldIdx);
#else
      PU::spanGeoMMVDMotionInfo(pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoMMVDFlag1, pu.geoMMVDIdx1);
#endif
#endif
#if JVET_AA0058_GPM_ADP_BLD
      tempCS->getPredBuf().copyFrom(geoCombinations[candidateIdx * GEO_NUM_BLD + pu.geoBldIdx]);
#else
      tempCS->getPredBuf().copyFrom(geoCombinations[candidateIdx]);
#endif
#if ENABLE_OBMC
#if JVET_Y0065_GPM_INTRA
      if (!pu.gpmIntraFlag)
      {
#endif
      cu.isobmcMC = true;
      cu.obmcFlag = true;
      m_pcInterSearch->subBlockOBMC(pu);
      cu.isobmcMC = false;
#if JVET_Y0065_GPM_INTRA
      }
#endif
#endif

      xEncodeInterResidual(tempCS, bestCS, pm, encTestMode, noResidualPass, (noResidualPass == 0 ? &geocandHasNoResidual[candidateIdx] : NULL));

      if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
      {
        bestIsSkip = bestCS->getCU(pm.chType)->rootCbf == 0;
      }
      tempCS->initStructData(encTestMode.qp);
    }
  }

  CodingUnit *bestCU = bestCS->getCU(CHANNEL_TYPE_LUMA);
  bool skipGPMMMVD = false;
  if (geoNumMrgSATDCand > 0)
  {
    if (bestCU->skip && !bestCU->geoFlag && !bestCU->affine && !bestCU->mmvdSkip && !bestCU->firstPU->mmvdMergeFlag)
    {
      skipGPMMMVD = true;
    }
    else if (bestCU->affine && bestCU->skip && (bestCU->lwidth() >= 16 || bestCU->lheight() >= 16))
    {
      skipGPMMMVD = true;
    }
  }

  bool isBaseMergeCandIncluded[GEO_MAX_NUM_UNI_CANDS];
  std::memset(isBaseMergeCandIncluded, false, GEO_MAX_NUM_UNI_CANDS * sizeof(bool));
  bool isGPMModeIncludedForMMVD[GEO_NUM_PARTITION_MODE];
  std::memset(isGPMModeIncludedForMMVD, false, GEO_NUM_PARTITION_MODE * sizeof(bool));

  if (!skipGPMMMVD)
  {
    skipGPMMMVD = (selGeoModeRDList[0] > (bestNormalMrgCost * 1.1));
  }

  if (!skipGPMMMVD)
  {
    if (isSecondPass)
    {
      for (int i = 0; i < relatedCU.numGeoDirCand; i++)
      {
        isGPMModeIncludedForMMVD[relatedCU.geoDirCandList[i]] = true;
        if (m_fastGpmMmvdRelatedCU)
        {
#if JVET_Y0065_GPM_INTRA
          if (relatedCU.geoMrgIdx0List[i] < GEO_MAX_NUM_UNI_CANDS)
#endif
          isBaseMergeCandIncluded[relatedCU.geoMrgIdx0List[i]] = true;
#if JVET_Y0065_GPM_INTRA
          if (relatedCU.geoMrgIdx1List[i] < GEO_MAX_NUM_UNI_CANDS)
#endif
          isBaseMergeCandIncluded[relatedCU.geoMrgIdx1List[i]] = true;
        }
      }
      if (!m_fastGpmMmvdRelatedCU)
      {
        std::memset(isBaseMergeCandIncluded, true, GEO_MAX_NUM_UNI_CANDS * sizeof(bool));
      }
    }
    else
    {
      double dirCostThresh = (selGeoModeRDList[0] * 1.2);
      isGPMModeIncludedForMMVD[selGeoModeList[0]] = true;
      isBaseMergeCandIncluded[mergeCandList0[selGeoModeList[0]][0]] = true;
      isBaseMergeCandIncluded[mergeCandList1[selGeoModeList[0]][0]] = true;

      for (int i = 1; i < m_maxNumGPMDirFirstPass; i++)
      {
        if (selGeoModeRDList[i] > dirCostThresh)
        {
          break;
        }
        else
        {
          isGPMModeIncludedForMMVD[selGeoModeList[i]] = true;
          isBaseMergeCandIncluded[mergeCandList0[selGeoModeList[i]][0]] = true;
          isBaseMergeCandIncluded[mergeCandList1[selGeoModeList[i]][0]] = true;
        }
      }
      if (m_includeMoreMMVDCandFirstPass)
      {
        int num = 0;
        // add more cands from best combo results obtained in weighted blended nonmmvd combo
        num = min((int)geocandCostList.size(), GEO_MAX_TRY_WEIGHTED_SATD);
        for (int i = 0; i < num; i++)
        {
          if (m_fastGpmMmvdSearch && (geocandCostList[i] > dirCostThresh))
          {
            break;
          }
          else
          {
            isGPMModeIncludedForMMVD[geoPartitionModeList[i]] = true;
#if JVET_Y0065_GPM_INTRA
            if (geoMergeCand0[geoRdModeList[i]] < GEO_MAX_NUM_UNI_CANDS)
#endif
            isBaseMergeCandIncluded[geoMergeCand0[geoRdModeList[i]]] = true;
#if JVET_Y0065_GPM_INTRA
            if (geoMergeCand1[geoRdModeList[i]] < GEO_MAX_NUM_UNI_CANDS)
#endif
            isBaseMergeCandIncluded[geoMergeCand1[geoRdModeList[i]]] = true;
          }
        }
      }
    }
  }

  if (!skipGPMMMVD)
  {
    CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType);
    pm.setCUData(cu);
    cu.predMode = MODE_INTER;
    cu.slice = tempCS->slice;
    cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
    cu.qp = encTestMode.qp;
    cu.affine = false;
    cu.mtsFlag = false;
#if INTER_LIC
    cu.LICFlag = false;
#endif
    cu.BcwIdx = BCW_DEFAULT;
    cu.geoFlag = true;
    cu.imv = 0;
    cu.mmvdSkip = false;
    cu.skip = false;
    cu.mipFlag = false;
    cu.bdpcmMode = 0;

    PredictionUnit &pu = tempCS->addPU(cu, pm.chType);
    pu.mergeFlag = true;
    pu.regularMergeFlag = false;
#if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG)
    pu.tmMergeFlag = false;
#endif
    bool simpleGPMMMVDStep = (m_pcEncCfg->getIntraPeriod() == -1);
    double mmvdMrgCost[GEO_MAX_NUM_UNI_CANDS][GPM_EXT_MMVD_MAX_REFINE_NUM];
    for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++)
    {
      if (mrgDuplicated[mergeCand])
      {
        continue;
      }
      if (!isBaseMergeCandIncluded[mergeCand])
      {
        continue;
      }
      for (uint8_t mmvdCand = 0; mmvdCand < (extMMVD ? GPM_EXT_MMVD_MAX_REFINE_NUM : GPM_MMVD_MAX_REFINE_NUM); mmvdCand++)
      {
        if (simpleGPMMMVDStep)
        {
          int mmvdStep = (extMMVD ? (mmvdCand >> 3) : (mmvdCand >> 2));
          if (mmvdStep >= 5 && (!m_fastGpmMmvdSearch || (m_fastGpmMmvdSearch && !isSecondPass)))
          {
            continue;
          }
        }
        geoMMVDBuf[mergeCand][mmvdCand] = m_acGeoMMVDBuffer[mergeCand][mmvdCand].getBuf(localUnitArea);
#if TM_MRG
        mergeCtx[GEO_TM_OFF].setGeoMmvdMergeInfo(pu, mergeCand, mmvdCand);
#else
        mergeCtx.setGeoMmvdMergeInfo(pu, mergeCand, mmvdCand);
#endif
        if (m_pcEncCfg->getMCTSEncConstraint() && (!(MCTSHelper::checkMvBufferForMCTSConstraint(pu))))
        {
          tempCS->initStructData(encTestMode.qp);
          return;
        }
        m_pcInterSearch->motionCompensation(pu, geoMMVDBuf[mergeCand][mmvdCand], REF_PIC_LIST_X, true, false);
        geoMMVDTempBuf[mergeCand][mmvdCand] = m_acGeoMMVDTmpBuffer[mergeCand][mmvdCand].getBuf(localUnitArea);
        geoMMVDTempBuf[mergeCand][mmvdCand].Y().copyFrom(geoMMVDBuf[mergeCand][mmvdCand].Y());
        geoMMVDTempBuf[mergeCand][mmvdCand].Y().roundToOutputBitdepth(geoMMVDTempBuf[mergeCand][mmvdCand].Y(), cu.slice->clpRng(COMPONENT_Y));
        distParamWholeBlk.cur.buf = geoMMVDTempBuf[mergeCand][mmvdCand].Y().buf;
        distParamWholeBlk.cur.stride = geoMMVDTempBuf[mergeCand][mmvdCand].Y().stride;
        sadMMVDWholeBlk[mergeCand][mmvdCand] = distParamWholeBlk.distFunc(distParamWholeBlk);
        mmvdMrgCost[mergeCand][mmvdCand] = sadMMVDWholeBlk[mergeCand][mmvdCand] + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[1] + geoMMVDIdxCost[mmvdCand];
        if (mmvdMrgCost[mergeCand][mmvdCand] < bestMrgCost)
        {
          bestMrgCost = mmvdMrgCost[mergeCand][mmvdCand];
        }
      }
    }

    double mrgCostThres = (bestMrgCost * 3.0);
    for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
    {
      if (!isGPMModeIncludedForMMVD[splitDir])
      {
        continue;
      }
      int16_t angle = g_GeoParams[splitDir][0];
      if (g_angle2mirror[angle] == 2)
      {
        stepX = 1;
        maskStride = -GEO_WEIGHT_MASK_SIZE;
        maskStride2 = -(int)cu.lwidth();
        SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][1]) * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
      }
      else if (g_angle2mirror[angle] == 1)
      {
        stepX = -1;
        maskStride2 = cu.lwidth();
        maskStride = GEO_WEIGHT_MASK_SIZE;
        SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][0])];
      }
      else
      {
        stepX = 1;
        maskStride = GEO_WEIGHT_MASK_SIZE;
        maskStride2 = -(int)cu.lwidth();
        SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
      }
      for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++)
      {
        if (mrgDuplicated[mergeCand])
        {
          continue;
        }
        if (!isBaseMergeCandIncluded[mergeCand])
        {
          continue;
        }
        for (uint8_t mmvdCand = 0; mmvdCand < (extMMVD ? GPM_EXT_MMVD_MAX_REFINE_NUM : GPM_MMVD_MAX_REFINE_NUM); mmvdCand++)
        {
          if (simpleGPMMMVDStep)
          {
            int mmvdStep = (extMMVD ? (mmvdCand >> 3) : (mmvdCand >> 2));
            if (mmvdStep >= 5 && (!m_fastGpmMmvdSearch || (m_fastGpmMmvdSearch && !isSecondPass)))
            {
              continue;
            }
          }
          if (mmvdMrgCost[mergeCand][mmvdCand] > mrgCostThres)
          {
            continue;
          }
          m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoMMVDTempBuf[mergeCand][mmvdCand].Y().buf, geoMMVDTempBuf[mergeCand][mmvdCand].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
          sadLarge = distParam.distFunc(distParam);
          double tempCost = (double)sadLarge + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[1] + geoMMVDIdxCost[mmvdCand];
          m_geoMMVDCostList.insert(splitDir, 0, mergeCand, (mmvdCand + 1), tempCost);
          sortCandList(tempCost, mergeCand, (mmvdCand + 1), sadCostList0[splitDir], mergeCandList0[splitDir], mmvdCandList0[splitDir], m_numCandPerPar);

          sadSmall = sadMMVDWholeBlk[mergeCand][mmvdCand] - sadLarge;
          tempCost = (double)sadSmall + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[1] + geoMMVDIdxCost[mmvdCand];
          m_geoMMVDCostList.insert(splitDir, 1, mergeCand, (mmvdCand + 1), tempCost);
          sortCandList(tempCost, mergeCand, (mmvdCand + 1), sadCostList1[splitDir], mergeCandList1[splitDir], mmvdCandList1[splitDir], m_numCandPerPar);
        }
      }
    }

    for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
    {
#if JVET_Y0065_GPM_INTRA
      int numCandMerge0 = min(m_numCandPerPar, (int)mergeCandList0[splitDir].size());
      int numCandIntra0 = (int)intraCandList0[splitDir].size();
      int numCandPart0 = numCandMerge0 + numCandIntra0;
      for (int candIdx0 = 0; candIdx0 < numCandPart0; candIdx0++)
      {
        int mergeCand0 = candIdx0 < numCandMerge0 ? mergeCandList0[splitDir][candIdx0] : intraCandList0[splitDir][candIdx0 - numCandMerge0];
        int mmvdCand0 = candIdx0 < numCandMerge0 ? mmvdCandList0[splitDir][candIdx0] : 0;
        int numCandMerge1 = min(m_numCandPerPar, (int)mergeCandList1[splitDir].size());
        int numCandIntra1 = candIdx0 < numCandMerge0 ? (int)intraCandList1[splitDir].size() : 0;
        int numCandPart1 = numCandMerge0 + numCandIntra1;
        int candStart1 = (bUseOnlyOneVector && candIdx0 < numCandMerge0) ? numCandMerge0 : 0;
        for (int candIdx1 = candStart1; candIdx1 < numCandPart1; candIdx1++)
        {
          int mergeCand1 = candIdx1 < numCandMerge1 ? mergeCandList1[splitDir][candIdx1] : intraCandList1[splitDir][candIdx1 - numCandMerge1];
          int mmvdCand1 = candIdx1 < numCandMerge1 ? mmvdCandList1[splitDir][candIdx1] : 0;
#else
      int numCandPart0 = min(m_numCandPerPar, (int)mergeCandList0[splitDir].size());
      int numCandPart1 = min(m_numCandPerPar, (int)mergeCandList1[splitDir].size());
      for (int candIdx0 = 0; candIdx0 < numCandPart0; candIdx0++)
      {
        for (int candIdx1 = 0; candIdx1 < numCandPart1; candIdx1++)
        {
          int mergeCand0 = mergeCandList0[splitDir][candIdx0];
          int mergeCand1 = mergeCandList1[splitDir][candIdx1];
          int mmvdCand0 = mmvdCandList0[splitDir][candIdx0];
          int mmvdCand1 = mmvdCandList1[splitDir][candIdx1];
#endif
#if TM_MRG
          bool geoTmFlag0 = (mmvdCand0 == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1));
          bool geoTmFlag1 = (mmvdCand1 == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1));
          CHECK(geoTmFlag0 || geoTmFlag1, "GPM TM has not been tested by far");
#endif
          if ((mmvdCand0 == 0) && (mmvdCand1 == 0))
          {
            continue;
          }
          if ((mmvdCand0 == mmvdCand1) && (mmvdCand0 > 0))
          {
            if (mergeCand0 == mergeCand1)
            {
              continue;
            }
          }

#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
          int geoSyntaxMode = std::numeric_limits<uint8_t>::max();
          if (sps.getUseAltGPMSplitModeCode())
          {
            m_pcInterSearch->setGeoSplitModeToSyntaxTable(pu, mergeCtxRegular, mergeCand0, mergeCtxRegular, mergeCand1
#if JVET_Y0065_GPM_INTRA
                                                        , m_pcIntraSearch
#endif
                                                        , mmvdCand0 - 1, mmvdCand1 - 1);
            geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(splitDir, mergeCand0, mergeCand1, mmvdCand0 - 1, mmvdCand1 - 1);
            if (geoSyntaxMode == std::numeric_limits<uint8_t>::max())
            {
              continue;
            }
          }
#endif

          double tempCost = m_geoMMVDCostList.singleDistList[0][splitDir][mergeCand0][mmvdCand0].cost + m_geoMMVDCostList.singleDistList[1][splitDir][mergeCand1][mmvdCand1].cost;
          tempCost = tempCost 
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
                   + (geoSyntaxMode == std::numeric_limits<uint8_t>::max() ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode]);
#else
                   + geoModeCost[splitDir];
#endif
#if TM_MRG
#if JVET_Y0065_GPM_INTRA
          if (sps.getUseDMVDMode() && mergeCand0 < GEO_MAX_NUM_UNI_CANDS && mergeCand1 < GEO_MAX_NUM_UNI_CANDS)
#else
          if (sps.getUseDMVDMode())
#endif
          {
            tempCost += geoTMFlagCost[0];
          }
#endif
          updateGeoMMVDCandList(tempCost, splitDir, mergeCand0, mergeCand1, mmvdCand0, mmvdCand1,
            geoSADCostList, geoSplitDirList, geoMergeCand0, geoMergeCand1, geoMmvdCand0, geoMmvdCand1, numSATDCands);
        }
      }
    }

#if TM_MRG
    uint8_t maxNumTmMrgCand = maxNumMergeCandidates;
    PelUnitBuf geoTmBuffer[GEO_TM_MAX_NUM_CANDS];
    PelUnitBuf geoTmTempBuf[GEO_TM_MAX_NUM_CANDS];
#if JVET_Y0065_GPM_INTRA
    if (sps.getUseDMVDMode() && !bUseOnlyOneVector)
#else
    if (sps.getUseDMVDMode())
#endif
    {
      for (int i = GEO_TM_SHAPE_AL; i < GEO_NUM_TM_MV_CAND; i++)
      {
        mergeCtx[i].numValidMergeCand = maxNumTmMrgCand;
        for (int idx = 0; idx < maxNumTmMrgCand; idx++)
        {
          if (mrgDuplicated[idx])
          {
            continue;
          }
          mergeCtx[i].BcwIdx[idx] = BCW_DEFAULT;
          mergeCtx[i].useAltHpelIf[idx] = false;
#if INTER_LIC
          mergeCtx[i].LICFlags[idx] = false;
#endif
          mergeCtx[i].interDirNeighbours[idx] = mergeCtx[GEO_TM_OFF].interDirNeighbours[idx];
          mergeCtx[i].mvFieldNeighbours[(idx << 1)].mv = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(idx << 1)].mv;
          mergeCtx[i].mvFieldNeighbours[(idx << 1) + 1].mv = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(idx << 1) + 1].mv;
          mergeCtx[i].mvFieldNeighbours[(idx << 1)].refIdx = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(idx << 1)].refIdx;
          mergeCtx[i].mvFieldNeighbours[(idx << 1) + 1].refIdx = mergeCtx[GEO_TM_OFF].mvFieldNeighbours[(idx << 1) + 1].refIdx;
        }
      }

      pu.tmMergeFlag = true;
      Distortion sadTmWholeBlk[GEO_TM_MAX_NUM_CANDS];
      for (uint8_t tmType = GEO_TM_SHAPE_AL; tmType < GEO_NUM_TM_MV_CAND; tmType++)
      {
        pu.geoTmType = tmType;
        for (uint8_t mrgIdx = 0; mrgIdx < maxNumTmMrgCand; mrgIdx++)
        {
          if (mrgDuplicated[mrgIdx])
          {
            continue;
          }
          uint8_t mergeCand = mrgIdx + (tmType - 1) * GEO_MAX_NUM_UNI_CANDS;
          mergeCtx[tmType].setMergeInfo(pu, mrgIdx);
          m_pcInterSearch->deriveTMMv(pu);
          mergeCtx[tmType].mvFieldNeighbours[(mrgIdx << 1)].mv = pu.mv[0];
          mergeCtx[tmType].mvFieldNeighbours[(mrgIdx << 1) + 1].mv = pu.mv[1];

          geoTmBuffer[mergeCand] = m_acGeoMergeTmpBuffer[mergeCand].getBuf(localUnitArea);
          m_pcInterSearch->motionCompensation(pu, geoTmBuffer[mergeCand]);

          // calculate SAD for each candidate
          geoTmTempBuf[mergeCand] = m_acGeoSADTmpBuffer[mergeCand].getBuf(localUnitArea);
          geoTmTempBuf[mergeCand].Y().copyFrom(geoTmBuffer[mergeCand].Y());
          geoTmTempBuf[mergeCand].Y().roundToOutputBitdepth(geoTmTempBuf[mergeCand].Y(), cu.slice->clpRng(COMPONENT_Y));
          distParamWholeBlk.cur.buf = geoTmTempBuf[mergeCand].Y().buf;
          distParamWholeBlk.cur.stride = geoTmTempBuf[mergeCand].Y().stride;
          sadTmWholeBlk[mergeCand] = distParamWholeBlk.distFunc(distParamWholeBlk);
        }
      }
      pu.tmMergeFlag = false;

      for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
      {
        int16_t angle = g_GeoParams[splitDir][0];
        if (g_angle2mirror[angle] == 2)
        {
          stepX = 1;
          maskStride = -GEO_WEIGHT_MASK_SIZE;
          maskStride2 = -(int)cu.lwidth();
          SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][1]) * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
        }
        else if (g_angle2mirror[angle] == 1)
        {
          stepX = -1;
          maskStride2 = cu.lwidth();
          maskStride = GEO_WEIGHT_MASK_SIZE;
          SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][0])];
        }
        else
        {
          stepX = 1;
          maskStride = GEO_WEIGHT_MASK_SIZE;
          maskStride2 = -(int)cu.lwidth();
          SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
        }
        for (uint8_t mergeCand = 0; mergeCand < maxNumTmMrgCand; mergeCand++)
        {
          if (mrgDuplicated[mergeCand])
          {
            continue;
          }

          uint8_t mergeCand0 = mergeCand + (g_geoTmShape[0][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS;
          m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTmTempBuf[mergeCand0].Y().buf, geoTmTempBuf[mergeCand0].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
          sadLarge = distParam.distFunc(distParam);
          double tempCost = (double)sadLarge + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[0];
          m_geoMMVDCostList.insert(splitDir, 0, mergeCand, (GPM_EXT_MMVD_MAX_REFINE_NUM + 1), tempCost);

          uint8_t mergeCand1 = mergeCand + (g_geoTmShape[1][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS;
          m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTmTempBuf[mergeCand1].Y().buf, geoTmTempBuf[mergeCand1].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
          sadSmall = sadTmWholeBlk[mergeCand1] - distParam.distFunc(distParam);
          tempCost = (double)sadSmall + geoMergeIdxCost[mergeCand] + geoMMVDFlagCost[0];
          m_geoMMVDCostList.insert(splitDir, 1, mergeCand, (GPM_EXT_MMVD_MAX_REFINE_NUM + 1), tempCost);
        }
      }

      for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
      {
        for (int mergeCand0 = 0; mergeCand0 < maxNumTmMrgCand; mergeCand0++)
        {
          if (mrgDuplicated[mergeCand0])
          {
            continue;
          }
          for (int mergeCand1 = 0; mergeCand1 < maxNumTmMrgCand; mergeCand1++)
          {
            if (mrgDuplicated[mergeCand1])
            {
              continue;
            }
            if (mergeCand0 == mergeCand1)
            {
              continue;
            }

#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
            int geoSyntaxMode = std::numeric_limits<uint8_t>::max();
            if(sps.getUseAltGPMSplitModeCode())
            {
              m_pcInterSearch->setGeoTMSplitModeToSyntaxTable(pu, mergeCtx, mergeCand0, mergeCand1, tmMmvdBufIdx0 - 1, tmMmvdBufIdx1 - 1);
              geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(splitDir, mergeCand0, mergeCand1, tmMmvdBufIdx0 - 1, tmMmvdBufIdx1 - 1);
              if (geoSyntaxMode == std::numeric_limits<uint8_t>::max())
              {
                continue;
              }
            }
#endif

            double tempCost = m_geoMMVDCostList.singleDistList[0][splitDir][mergeCand0][GPM_EXT_MMVD_MAX_REFINE_NUM + 1].cost + m_geoMMVDCostList.singleDistList[1][splitDir][mergeCand1][GPM_EXT_MMVD_MAX_REFINE_NUM + 1].cost;
            tempCost = tempCost 
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
                     + (geoSyntaxMode == std::numeric_limits<uint8_t>::max() ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode])
#else
                     + geoModeCost[splitDir]
#endif
                     + geoTMFlagCost[1];
            updateGeoMMVDCandList(tempCost, splitDir, mergeCand0, mergeCand1, (GPM_EXT_MMVD_MAX_REFINE_NUM + 1), (GPM_EXT_MMVD_MAX_REFINE_NUM + 1),
              geoSADCostList, geoSplitDirList, geoMergeCand0, geoMergeCand1, geoMmvdCand0, geoMmvdCand1, numSATDCands);
          }
        }
      }
    }
#endif

    int numberGeoCandChecked = (int)geoSADCostList.size();
    if (numberGeoCandChecked == 0)
    {
      return;
    }

    geoNumMrgSATDCand = min(GEO_MAX_TRY_WEIGHTED_SATD, numberGeoCandChecked);
    if (geoRdModeList.size() > geoNumMrgSATDCand)
    {
      geoRdModeList.resize(geoNumMrgSATDCand);
      isNonMMVDListIdx.resize(geoNumMrgSATDCand);
      geoPartitionModeList.resize(geoNumMrgSATDCand);
      geocandCostList.resize(geoNumMrgSATDCand);
    }
    for (uint8_t candidateIdx = 0; candidateIdx < numberGeoCandChecked; candidateIdx++)
    {
      int splitDir = geoSplitDirList[candidateIdx];
      int mergeCand0 = geoMergeCand0[candidateIdx];
      int mergeCand1 = geoMergeCand1[candidateIdx];
#if TM_MRG
      bool tmFlag0 = (geoMmvdCand0[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1));
      bool tmFlag1 = (geoMmvdCand1[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1));
      bool mmvdFlag0 = (geoMmvdCand0[candidateIdx] >= 1 && geoMmvdCand0[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM);
      bool mmvdFlag1 = (geoMmvdCand1[candidateIdx] >= 1 && geoMmvdCand1[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM);
      int  mmvdCand0 = (mmvdFlag0 ? (geoMmvdCand0[candidateIdx] - 1) : MAX_INT);
      int  mmvdCand1 = (mmvdFlag1 ? (geoMmvdCand1[candidateIdx] - 1) : MAX_INT);
      CHECK(tmFlag0 != tmFlag1, "TM flag cannot be enabled/disabled for two partitions separately");

      if (!tmFlag0 && !tmFlag1 && !mmvdFlag0 && !mmvdFlag1)
      {
        continue;
      }
      if (tmFlag0 && mergeCand0 == mergeCand1)
      {
        continue;
      }
#else
      int mmvdCand0 = geoMmvdCand0[candidateIdx] - 1;
      int mmvdCand1 = geoMmvdCand1[candidateIdx] - 1;
      bool mmvdFlag0 = (mmvdCand0 >= 0);
      bool mmvdFlag1 = (mmvdCand1 >= 0);

      if (!mmvdFlag0 && !mmvdFlag1)
      {
        continue;
      }
#endif
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
      int geoSyntaxMode = std::numeric_limits<uint8_t>::max();
      if (sps.getUseAltGPMSplitModeCode()
#if JVET_Y0065_GPM_INTRA
        && mergeCand0 < GEO_MAX_NUM_UNI_CANDS && mergeCand1 < GEO_MAX_NUM_UNI_CANDS
#endif
        )
      {
#if JVET_W0097_GPM_MMVD_TM && TM_MRG
        if (tmFlag0 && tmFlag1)
        {
          geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(splitDir, mergeCand0, mergeCand1, tmMmvdBufIdx0 - 1, tmMmvdBufIdx1 - 1);
          CHECK(geoSyntaxMode < 0 || geoSyntaxMode >= GEO_NUM_SIG_PARTMODE, "Invalid GEO split direction!");
          CHECK(tmFlag0 != tmFlag1, "tmFlag0 and tmFlag1 have to be identical to each other");
        }
        else
#endif
        {
          int  mmvdCandTmp0 = geoMmvdCand0[candidateIdx];
          int  mmvdCandTmp1 = geoMmvdCand1[candidateIdx];
          geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(splitDir, mergeCand0, mergeCand1, mmvdCandTmp0 - 1, mmvdCandTmp1 - 1);
          CHECK(geoSyntaxMode < 0 || geoSyntaxMode >= GEO_NUM_SIG_PARTMODE, "Invalid GEO split direction!");
          CHECK(!mmvdFlag0 && !mmvdFlag1, "GPM MMVD has to be used at least for one partition");
        }
      }
#endif

#if JVET_AA0058_GPM_ADP_BLD
      for (uint8_t bldIdx = 0; bldIdx < GEO_NUM_BLD; bldIdx++)
      {
        geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx] = m_acGeoWeightedBuffer[candidateIdx * GEO_NUM_BLD + bldIdx].getBuf(localUnitArea);

#if JVET_Y0065_GPM_INTRA
        int isIntra0 = (mergeCand0 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
        int isIntra1 = (mergeCand1 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
        int candidateSAD = candidateIdx * GEO_NUM_BLD + bldIdx;
#endif
#if TM_MRG || JVET_Y0065_GPM_INTRA
        PelUnitBuf predSrc0, predSrc1;
#if JVET_Y0065_GPM_INTRA
        int intraIdx0 = mergeCand0 - GEO_MAX_NUM_UNI_CANDS;
        int intraIdx1 = mergeCand1 - GEO_MAX_NUM_UNI_CANDS;
        if (isIntra0 || isIntra1)
        {
          if (isIntra0)
          {
            int rdoBuffer = intraRDOBufIdx[geoIntraMPMList[splitDir][0][intraIdx0]];
            predSrc0 = geoIntraBuffer[rdoBuffer];
          }
#if TM_MRG
          else if (tmFlag0)
          {
            int mrgTmCand0 = mergeCand0 + (g_geoTmShape[0][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS;
            predSrc0 = geoTmTempBuf[mrgTmCand0];
          }
#endif
          else // mmvdFlag0
          {
            predSrc0 = geoMMVDTempBuf[mergeCand0][mmvdCand0];
          }

          if (isIntra1)
          {
            int rdoBuffer = intraRDOBufIdx[geoIntraMPMList[splitDir][1][intraIdx1]];
            predSrc1 = geoIntraBuffer[rdoBuffer];
          }
#if TM_MRG
          else if (tmFlag1)
          {
            int mrgTmCand1 = mergeCand1 + (g_geoTmShape[1][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS;
            predSrc1 = geoTmTempBuf[mrgTmCand1];
          }
#endif
          else // mmvdFlag1
          {
            predSrc1 = geoMMVDTempBuf[mergeCand1][mmvdCand1];
          }
          if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
          {
            if (!isIntra0) // Inter+Intra
            {
              geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx].Y().rspSignal(predSrc0.Y(), m_pcReshape->getFwdLUT());
              m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, bldIdx, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrc1);
            }
            else if (!isIntra1) // Intra+Inter
            {
              geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx].Y().rspSignal(predSrc1.Y(), m_pcReshape->getFwdLUT());
              m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, bldIdx, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrc0, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx]);
            }
            candidateSAD = GEO_MAX_TRY_WEIGHTED_SAD * GEO_NUM_BLD;
            geoCombinations[candidateSAD] = m_acGeoWeightedBuffer[candidateSAD].getBuf(localUnitArea);
            geoCombinations[candidateSAD].Y().rspSignal(geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx].Y(), m_pcReshape->getInvLUT());
          }
          else
          {
            m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, bldIdx, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrc0, predSrc1);
          }
        }
        else
        {
#endif
#if TM_MRG 
          if (tmFlag0)
          {
            int mrgTmCand0 = mergeCand0 + (g_geoTmShape[0][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS;
            predSrc0 = geoTmBuffer[mrgTmCand0];
          }
          else if (mmvdFlag0)
          {
            predSrc0 = geoMMVDBuf[mergeCand0][mmvdCand0];
          }
          else
          {
            predSrc0 = geoBuffer[mergeCand0];
          }

          if (tmFlag1)
          {
            int mrgTmCand1 = mergeCand1 + (g_geoTmShape[1][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS;
            predSrc1 = geoTmBuffer[mrgTmCand1];
          }
          else if (mmvdFlag1)
          {
            predSrc1 = geoMMVDBuf[mergeCand1][mmvdCand1];
          }
          else
          {
            predSrc1 = geoBuffer[mergeCand1];
          }

          m_pcInterSearch->weightedGeoBlk(pu, splitDir, bldIdx, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrc0, predSrc1);
#else
#if JVET_AA0058_GPM_ADP_BLD
          m_pcInterSearch->weightedGeoBlk(pu, splitDir, bldIdx, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], (mmvdFlag0 ? geoMMVDBuf[mergeCand0][mmvdCand0] : geoBuffer[mergeCand0]), (mmvdFlag1 ? geoMMVDBuf[mergeCand1][mmvdCand1] : geoBuffer[mergeCand1]));
#else
          m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], (mmvdFlag0 ? geoMMVDBuf[mergeCand0][mmvdCand0] : geoBuffer[mergeCand0]), (mmvdFlag1 ? geoMMVDBuf[mergeCand1][mmvdCand1] : geoBuffer[mergeCand1]));
#endif
#endif
#if JVET_Y0065_GPM_INTRA
        }
#endif
#else
        m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], (mmvdFlag0 ? geoMMVDBuf[mergeCand0][mmvdCand0] : geoBuffer[mergeCand0]), (mmvdFlag1 ? geoMMVDBuf[mergeCand1][mmvdCand1] : geoBuffer[mergeCand1]));
#endif
#if JVET_Y0065_GPM_INTRA
        distParamSAD2.cur = geoCombinations[candidateSAD].Y();
#else
        distParamSAD2.cur = geoCombinations[candidateIdx].Y();
#endif
        Distortion sad = distParamSAD2.distFunc(distParamSAD2);

#if JVET_Y0065_GPM_INTRA
        double updateCost =
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
        (geoSyntaxMode == std::numeric_limits<uint8_t>::max() ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode])
#else
          geoModeCost[splitDir]
#endif
          + geoMMVDFlagCost[mmvdFlag0] + geoMMVDFlagCost[mmvdFlag1];
        updateCost += (mmvdFlag0 ? geoMMVDIdxCost[mmvdCand0] : geoIntraFlag0Cost[isIntra0]);
        updateCost += (isIntra0 ? geoIntraIdxCost[intraIdx0] : geoMergeIdxCost[mergeCand0]);
        updateCost += (mmvdFlag1 ? geoMMVDIdxCost[mmvdCand1] : geoIntraFlag1Cost[isIntra0][isIntra1]);
        updateCost += (isIntra1 ? geoIntraIdxCost[intraIdx1] : geoMergeIdxCost[mergeCand1]);
#else
        double updateCost =
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
        (geoSyntaxMode == std::numeric_limits<uint8_t>::max(); ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode])
#else
          geoModeCost[splitDir]
#endif
          + geoMergeIdxCost[mergeCand0] + geoMergeIdxCost[mergeCand1] + geoMMVDFlagCost[mmvdFlag0] + geoMMVDFlagCost[mmvdFlag1];
        if (mmvdFlag0)
        {
          updateCost += geoMMVDIdxCost[mmvdCand0];
        }
        if (mmvdFlag1)
        {
          updateCost += geoMMVDIdxCost[mmvdCand1];
        }
#endif
#if TM_MRG
        if (sps.getUseDMVDMode())
        {
#if JVET_Y0065_GPM_INTRA
          if (!mmvdFlag0 && !mmvdFlag1 && !isIntra0 && !isIntra1)
#else
          if (!mmvdFlag0 && !mmvdFlag1)
#endif
          {
            updateCost += geoTMFlagCost[tmFlag0];
          }
        }
#endif

        updateCost += geoBldFlagCost[bldIdx];
        updateCost += (double)sad;
        orderCandList(candidateIdx, false, splitDir, updateCost, bldIdx, geoRdModeList, isNonMMVDListIdx, geoPartitionModeList, geocandCostList, geoBldList, geoNumMrgSATDCand);
      }
#else
      geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea);

#if JVET_Y0065_GPM_INTRA
      int isIntra0 = (mergeCand0 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
      int isIntra1 = (mergeCand1 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
      uint8_t candidateSAD = candidateIdx;
#endif
#if TM_MRG || JVET_Y0065_GPM_INTRA
      PelUnitBuf predSrc0, predSrc1;
#if JVET_Y0065_GPM_INTRA
      int intraIdx0 = mergeCand0 - GEO_MAX_NUM_UNI_CANDS;
      int intraIdx1 = mergeCand1 - GEO_MAX_NUM_UNI_CANDS;
      if (isIntra0 || isIntra1)
      {
        if (isIntra0)
        {
          int rdoBuffer = intraRDOBufIdx[geoIntraMPMList[splitDir][0][intraIdx0]];
          predSrc0 = geoIntraBuffer[rdoBuffer];
        }
#if TM_MRG
        else if (tmFlag0)
        {
          int mrgTmCand0 = mergeCand0 + (g_geoTmShape[0][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS;
          predSrc0 = geoTmTempBuf[mrgTmCand0];
        }
#endif
        else // mmvdFlag0
        {
          predSrc0 = geoMMVDTempBuf[mergeCand0][mmvdCand0];
        }

        if (isIntra1)
        {
          int rdoBuffer = intraRDOBufIdx[geoIntraMPMList[splitDir][1][intraIdx1]];
          predSrc1 = geoIntraBuffer[rdoBuffer];
        }
#if TM_MRG
        else if (tmFlag1)
        {
          int mrgTmCand1 = mergeCand1 + (g_geoTmShape[1][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS;
          predSrc1 = geoTmTempBuf[mrgTmCand1];
        }
#endif
        else // mmvdFlag1
        {
          predSrc1 = geoMMVDTempBuf[mergeCand1][mmvdCand1];
        }
        if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
        {
          if (!isIntra0) // Inter+Intra
          {
            geoCombinations[candidateIdx].Y().rspSignal(predSrc0.Y(), m_pcReshape->getFwdLUT());
            m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], geoCombinations[candidateIdx], predSrc1);
          }
          else if (!isIntra1) // Intra+Inter
          {
            geoCombinations[candidateIdx].Y().rspSignal(predSrc1.Y(), m_pcReshape->getFwdLUT());
            m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], predSrc0, geoCombinations[candidateIdx]);
          }
          candidateSAD = GEO_MAX_TRY_WEIGHTED_SAD;
          geoCombinations[GEO_MAX_TRY_WEIGHTED_SAD] = m_acGeoWeightedBuffer[GEO_MAX_TRY_WEIGHTED_SAD].getBuf(localUnitArea);
          geoCombinations[GEO_MAX_TRY_WEIGHTED_SAD].Y().rspSignal(geoCombinations[candidateIdx].Y(), m_pcReshape->getInvLUT());
        }
        else
        {
          m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], predSrc0, predSrc1);
        }
      }
      else
      {
#endif
#if TM_MRG 
      if (tmFlag0)
      {
        int mrgTmCand0 = mergeCand0 + (g_geoTmShape[0][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS;
        predSrc0 = geoTmBuffer[mrgTmCand0];
      }
      else if (mmvdFlag0)
      {
        predSrc0 = geoMMVDBuf[mergeCand0][mmvdCand0];
      }
      else
      {
        predSrc0 = geoBuffer[mergeCand0];
      }

      if (tmFlag1)
      {
        int mrgTmCand1 = mergeCand1 + (g_geoTmShape[1][g_GeoParams[splitDir][0]] - 1) * GEO_MAX_NUM_UNI_CANDS;
        predSrc1 = geoTmBuffer[mrgTmCand1];
      }
      else if (mmvdFlag1)
      {
        predSrc1 = geoMMVDBuf[mergeCand1][mmvdCand1];
      }
      else
      {
        predSrc1 = geoBuffer[mergeCand1];
      }

      m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], predSrc0, predSrc1);
#else
      m_pcInterSearch->weightedGeoBlk( pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], (mmvdFlag0 ? geoMMVDBuf[mergeCand0][mmvdCand0] : geoBuffer[mergeCand0]), (mmvdFlag1 ? geoMMVDBuf[mergeCand1][mmvdCand1] : geoBuffer[mergeCand1]) );
#endif
#if JVET_Y0065_GPM_INTRA
      }
#endif
#else
      m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], (mmvdFlag0 ? geoMMVDBuf[mergeCand0][mmvdCand0] : geoBuffer[mergeCand0]), (mmvdFlag1 ? geoMMVDBuf[mergeCand1][mmvdCand1] : geoBuffer[mergeCand1]));
#endif
#if JVET_Y0065_GPM_INTRA
      distParamSAD2.cur = geoCombinations[candidateSAD].Y();
#else
      distParamSAD2.cur = geoCombinations[candidateIdx].Y();
#endif
      Distortion sad = distParamSAD2.distFunc(distParamSAD2);

#if JVET_Y0065_GPM_INTRA
      double updateCost = 
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
                          ( geoSyntaxMode == std::numeric_limits<uint8_t>::max() ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode])
#else
                          geoModeCost[splitDir] 
#endif
                        + geoMMVDFlagCost[mmvdFlag0] + geoMMVDFlagCost[mmvdFlag1];
      updateCost += (mmvdFlag0 ? geoMMVDIdxCost[mmvdCand0] : geoIntraFlag0Cost[isIntra0]);
      updateCost += (isIntra0 ? geoIntraIdxCost[intraIdx0] : geoMergeIdxCost[mergeCand0]);
      updateCost += (mmvdFlag1 ? geoMMVDIdxCost[mmvdCand1] : geoIntraFlag1Cost[isIntra0][isIntra1]);
      updateCost += (isIntra1 ? geoIntraIdxCost[intraIdx1] : geoMergeIdxCost[mergeCand1]);
#else
      double updateCost = 
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
                          ( geoSyntaxMode == std::numeric_limits<uint8_t>::max(); ? geoModeCost[splitDir] : geoSigModeCost[geoSyntaxMode])
#else
                          geoModeCost[splitDir] 
#endif
                        + geoMergeIdxCost[mergeCand0] + geoMergeIdxCost[mergeCand1] + geoMMVDFlagCost[mmvdFlag0] + geoMMVDFlagCost[mmvdFlag1];
      if (mmvdFlag0)
      {
        updateCost += geoMMVDIdxCost[mmvdCand0];
      }
      if (mmvdFlag1)
      {
        updateCost += geoMMVDIdxCost[mmvdCand1];
      }
#endif
#if TM_MRG
      if (sps.getUseDMVDMode())
      {
#if JVET_Y0065_GPM_INTRA
        if (!mmvdFlag0 && !mmvdFlag1 && !isIntra0 && !isIntra1)
#else
        if (!mmvdFlag0 && !mmvdFlag1)
#endif
        {
          updateCost += geoTMFlagCost[tmFlag0];
        }
      }
#endif
      updateCost += (double)sad;
      orderCandList(candidateIdx, false, splitDir, updateCost, geoRdModeList, isNonMMVDListIdx, geoPartitionModeList, geocandCostList, geoNumMrgSATDCand);
#endif
    }

    if (m_fastGpmMmvdRelatedCU)
    {
      int cnt = 0;
      for (uint8_t i = 0; i < geoNumMrgSATDCand; i++)
      {
        if (isNonMMVDListIdx[i] == false)
        {
          relatedCU.geoDirCandList[cnt] = geoPartitionModeList[i];
          relatedCU.geoMrgIdx0List[cnt] = geoMergeCand0[geoRdModeList[i]];
          relatedCU.geoMrgIdx1List[cnt] = geoMergeCand1[geoRdModeList[i]];
          cnt++;
        }
      }
      relatedCU.numGeoDirCand = cnt;
    }
    else
    {
      relatedCU.numGeoDirCand = geoNumMrgSATDCand;
      for (uint8_t i = 0; i < geoNumMrgSATDCand; i++)
      {
        relatedCU.geoDirCandList[i] = geoPartitionModeList[i];
      }
    }

    for (uint8_t i = 1; i < geoNumMrgSATDCand; i++)
    {
#if MERGE_ENC_OPT
      if (geocandCostList[i] > MRG_FAST_RATIO * geocandCostList[0] || geocandCostList[i] > getMergeBestSATDCost())
#else
      if (geocandCostList[i] > MRG_FAST_RATIO * geocandCostList[0] || geocandCostList[i] > getMergeBestSATDCost() || geocandCostList[i] > getAFFBestSATDCost())
#endif
      {
        geoNumMrgSATDCand = i;
        break;
      }
    }
#if JVET_Y0065_GPM_INTRA
    for (uint8_t i = 0; i < geoNumMrgSATDCand; i++)
    {
      if (isNonMMVDListIdx[i])
      {
        continue;
      }
      uint8_t candidateIdx = geoRdModeList[i];
      int splitDir = geoSplitDirList[candidateIdx];
      int mergeCand0 = geoMergeCand0[candidateIdx];
      int mergeCand1 = geoMergeCand1[candidateIdx];
#if JVET_AA0058_GPM_ADP_BLD
      uint8_t bldIdx = geoBldList[i];
#endif
#if TM_MRG
      bool tmFlag0 = (geoMmvdCand0[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1));
      bool tmFlag1 = (geoMmvdCand1[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1));
      bool mmvdFlag0 = (geoMmvdCand0[candidateIdx] >= 1 && geoMmvdCand0[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM);
      bool mmvdFlag1 = (geoMmvdCand1[candidateIdx] >= 1 && geoMmvdCand1[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM);
      int  mmvdCand0 = (mmvdFlag0 ? (geoMmvdCand0[candidateIdx] - 1) : MAX_INT);
      int  mmvdCand1 = (mmvdFlag1 ? (geoMmvdCand1[candidateIdx] - 1) : MAX_INT);
      int mrgTmCand0 = MAX_INT, mrgTmCand1 = MAX_INT;
#else
      int mmvdCand0 = geoMmvdCand0[candidateIdx] - 1;
      int mmvdCand1 = geoMmvdCand1[candidateIdx] - 1;

      bool mmvdFlag0 = (mmvdCand0 >= 0);
      bool mmvdFlag1 = (mmvdCand1 >= 0);
#endif

      int isIntra0 = (mergeCand0 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
      int isIntra1 = (mergeCand1 >= GEO_MAX_NUM_UNI_CANDS) ? 1 : 0;
      if (!isChromaEnabled(pu.chromaFormat) && !isIntra0 && !isIntra1)
      {
        continue;
      }
      int mrgIntraCand0 = MAX_INT, mrgIntraCand1 = MAX_INT;
      if (isIntra0)
      {
        int intraIdx0 = mergeCand0 - GEO_MAX_NUM_UNI_CANDS;
        uint8_t intraPred = geoIntraMPMList[splitDir][0][intraIdx0];
        mrgIntraCand0 = intraRDOBufIdx[intraPred];
        if (isChromaEnabled(pu.chromaFormat) && !isGeoIntraChromaAvail[mrgIntraCand0])
        {
          pu.intraDir[1] = intraPred;
          pu.gpmIntraFlag = true;
          m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb());
          m_pcIntraSearch->predIntraAng(COMPONENT_Cb, geoIntraBuffer[mrgIntraCand0].Cb(), pu);
          m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cr());
          m_pcIntraSearch->predIntraAng(COMPONENT_Cr, geoIntraBuffer[mrgIntraCand0].Cr(), pu);
          pu.gpmIntraFlag = false;
          isGeoIntraChromaAvail[mrgIntraCand0] = 2;
        }
      }
#if TM_MRG
      else if (tmFlag0)
      {
        int geoTmType = g_geoTmShape[0][g_GeoParams[splitDir][0]];
        mrgTmCand0 = mergeCand0 + (geoTmType - 1) * GEO_MAX_NUM_UNI_CANDS;
        if (isChromaEnabled(pu.chromaFormat) && !isGeoTmChromaAvail[mrgTmCand0])
        {
          mergeCtx[geoTmType].setMergeInfo(pu, mergeCand0);
          m_pcInterSearch->motionCompensation(pu, geoTmBuffer[mrgTmCand0], REF_PIC_LIST_X, false, true);
          isGeoTmChromaAvail[mrgTmCand0] = true;
        }
      }
#endif
      else if( mmvdFlag0 )
      {
        if( isChromaEnabled( pu.chromaFormat ) && !isGeoMMVDChromaAvail[mergeCand0][mmvdCand0] )
        {
#if TM_MRG
          mergeCtx[GEO_TM_OFF].setGeoMmvdMergeInfo( pu, mergeCand0, mmvdCand0 );
#else
          mergeCtx.setGeoMmvdMergeInfo( pu, mergeCand0, mmvdCand0 );
#endif
          m_pcInterSearch->motionCompensation( pu, geoMMVDBuf[mergeCand0][mmvdCand0], REF_PIC_LIST_X, false, true );
          isGeoMMVDChromaAvail[mergeCand0][mmvdCand0] = 1;
        }
      }
      else
      {
        if( isChromaEnabled( pu.chromaFormat ) && !isGeoChromaAvail[mergeCand0] )
        {
#if TM_MRG
          mergeCtx[GEO_TM_OFF].setMergeInfo( pu, mergeCand0 );
#else
          mergeCtx.setMergeInfo( pu, mergeCand0 );
#endif
          m_pcInterSearch->motionCompensation( pu, geoBuffer[mergeCand0], REF_PIC_LIST_X, false, true );
          isGeoChromaAvail[mergeCand0] = 1;
        }
      }

      if (isIntra1)
      {
        int intraIdx1 = mergeCand1 - GEO_MAX_NUM_UNI_CANDS;
        uint8_t intraPred = geoIntraMPMList[splitDir][1][intraIdx1];
        mrgIntraCand1 = intraRDOBufIdx[intraPred];
        if (isChromaEnabled(pu.chromaFormat) && !isGeoIntraChromaAvail[mrgIntraCand1])
        {
          pu.intraDir[1] = intraPred;
          pu.gpmIntraFlag = true;
          m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb());
          m_pcIntraSearch->predIntraAng(COMPONENT_Cb, geoIntraBuffer[mrgIntraCand1].Cb(), pu);
          m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cr());
          m_pcIntraSearch->predIntraAng(COMPONENT_Cr, geoIntraBuffer[mrgIntraCand1].Cr(), pu);
          pu.gpmIntraFlag = false;
          isGeoIntraChromaAvail[mrgIntraCand1] = 2;
        }
      }
#if TM_MRG
      else if (tmFlag1)
      {
        int geoTmType = g_geoTmShape[1][g_GeoParams[splitDir][0]];
        mrgTmCand1 = mergeCand1 + (geoTmType - 1) * GEO_MAX_NUM_UNI_CANDS;
        if (isChromaEnabled(pu.chromaFormat) && !isGeoTmChromaAvail[mrgTmCand1])
        {
          mergeCtx[geoTmType].setMergeInfo(pu, mergeCand1);
          m_pcInterSearch->motionCompensation(pu, geoTmBuffer[mrgTmCand1], REF_PIC_LIST_X, false, true);
          isGeoTmChromaAvail[mrgTmCand1] = true;
        }
      }
#endif
      else if( mmvdFlag1 )
      {
        if( isChromaEnabled( pu.chromaFormat ) && !isGeoMMVDChromaAvail[mergeCand1][mmvdCand1] )
        {
#if TM_MRG
          mergeCtx[GEO_TM_OFF].setGeoMmvdMergeInfo( pu, mergeCand1, mmvdCand1 );
#else
          mergeCtx.setGeoMmvdMergeInfo( pu, mergeCand1, mmvdCand1 );
#endif
          m_pcInterSearch->motionCompensation( pu, geoMMVDBuf[mergeCand1][mmvdCand1], REF_PIC_LIST_X, false, true );
          isGeoMMVDChromaAvail[mergeCand1][mmvdCand1] = 1;
        }
      }
      else
      {
        if( isChromaEnabled( pu.chromaFormat ) && !isGeoChromaAvail[mergeCand1] )
        {
#if TM_MRG
          mergeCtx[GEO_TM_OFF].setMergeInfo( pu, mergeCand1 );
#else
          mergeCtx.setMergeInfo( pu, mergeCand1 );
#endif
          m_pcInterSearch->motionCompensation( pu, geoBuffer[mergeCand1], REF_PIC_LIST_X, false, true );
          isGeoChromaAvail[mergeCand1] = 1;
        }
      }

#if JVET_AA0058_GPM_ADP_BLD
      geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx] = m_acGeoWeightedBuffer[candidateIdx * GEO_NUM_BLD + bldIdx].getBuf(localUnitArea);
#else
      geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea);
#endif

      PelUnitBuf predSrc0, predSrc1;
      PelUnitBuf predSrcTemp0, predSrcTemp1;
      uint8_t* chromaAvailPtr0 = nullptr;
      uint8_t* chromaAvailPtr1 = nullptr;
      if (isIntra0)
      {
        predSrcTemp0 = geoIntraBuffer[mrgIntraCand0];
      }
#if TM_MRG
      else if (tmFlag0)
      {
        predSrc0 = geoTmBuffer[mrgTmCand0];
      }
#endif
      else if (mmvdFlag0)
      {
        predSrc0 = geoMMVDBuf[mergeCand0][mmvdCand0];
        chromaAvailPtr0 = &isGeoMMVDChromaAvail[mergeCand0][mmvdCand0];
        predSrcTemp0 = geoMMVDTempBuf[mergeCand0][mmvdCand0];
        if (isIntra1)
        {
#if TM_MRG
          mergeCtx[GEO_TM_OFF].setGeoMmvdMergeInfo(pu, mergeCand0, mmvdCand0);
#else
          mergeCtx.setGeoMmvdMergeInfo( pu, mergeCand0, mmvdCand0 );
#endif
        }
      }
      else
      {
        predSrc0 = geoBuffer[mergeCand0];
      }

      if (isIntra1)
      {
        predSrcTemp1 = geoIntraBuffer[mrgIntraCand1];
      }
#if TM_MRG
      else if (tmFlag1)
      {
        predSrc1 = geoTmBuffer[mrgTmCand1];
      }
#endif
      else if (mmvdFlag1)
      {
        predSrc1 = geoMMVDBuf[mergeCand1][mmvdCand1];
        chromaAvailPtr1 = &isGeoMMVDChromaAvail[mergeCand1][mmvdCand1];
        predSrcTemp1 = geoMMVDTempBuf[mergeCand1][mmvdCand1];
        if (isIntra0)
        {
#if TM_MRG
          mergeCtx[GEO_TM_OFF].setGeoMmvdMergeInfo(pu, mergeCand1, mmvdCand1);
#else
          mergeCtx.setGeoMmvdMergeInfo( pu, mergeCand1, mmvdCand1 );
#endif
        }
      }
      else
      {
        predSrc1 = geoBuffer[mergeCand1];
      }

      if (isIntra0 || isIntra1)
      {
        if (isChromaEnabled(pu.chromaFormat))
        {
          if (!isIntra0)
          {
            CHECK(!mmvdFlag0, "mmvdFlag0 must be true");
            if (*chromaAvailPtr0 < 2)
            {
              predSrcTemp0.Cb().roundToOutputBitdepth(predSrc0.Cb(), cu.slice->clpRng(COMPONENT_Cb));
              predSrcTemp0.Cr().roundToOutputBitdepth(predSrc0.Cr(), cu.slice->clpRng(COMPONENT_Cr));
              *chromaAvailPtr0 = 2;
            }
          }
          else if (!isIntra1)
          {
            CHECK(!mmvdFlag1, "mmvdFlag1 must be true");
            if (*chromaAvailPtr1 < 2)
            {
              predSrcTemp1.Cb().roundToOutputBitdepth(predSrc1.Cb(), cu.slice->clpRng(COMPONENT_Cb));
              predSrcTemp1.Cr().roundToOutputBitdepth(predSrc1.Cr(), cu.slice->clpRng(COMPONENT_Cr));
              *chromaAvailPtr1 = 2;
            }
          }
        }

#if ENABLE_OBMC
#if JVET_W0123_TIMD_FUSION
        PU::spanMotionInfo2(pu);
#else
        PU::spanMotionInfo(pu);
#endif
#if JVET_AA0058_GPM_ADP_BLD
        if (!isIntra0)
        {
          geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx].copyFrom(predSrcTemp0);
          predSrcTemp0 = geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx];
        }
        else
        {
          geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx].copyFrom(predSrcTemp1);
          predSrcTemp1 = geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx];
        }
#else
        if (!isIntra0)
        {
          geoCombinations[candidateIdx].copyFrom(predSrcTemp0);
          predSrcTemp0 = geoCombinations[candidateIdx];
        }
        else
        {
          geoCombinations[candidateIdx].copyFrom(predSrcTemp1);
          predSrcTemp1 = geoCombinations[candidateIdx];
        }
#endif
        cu.isobmcMC = true;
        cu.obmcFlag = true;
        m_pcInterSearch->subBlockOBMC(pu, !isIntra0 ? &predSrcTemp0 : &predSrcTemp1);
        cu.isobmcMC = false;
#endif

        if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
        {
          if (!isIntra0) // Inter+Intra
          {
            predSrcTemp0.Y().rspSignal(predSrcTemp0.Y(), m_pcReshape->getFwdLUT());
          }
          else if (!isIntra1) // Intra+Inter
          {
            predSrcTemp1.Y().rspSignal(predSrcTemp1.Y(), m_pcReshape->getFwdLUT());
          }
        }
#if JVET_AA0058_GPM_ADP_BLD
        m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, bldIdx, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrcTemp0, predSrcTemp1);
        m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, bldIdx, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrcTemp0, predSrcTemp1);
      }
      else
      {
        m_pcInterSearch->weightedGeoBlk(pu, splitDir, bldIdx, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx * GEO_NUM_BLD + bldIdx], predSrc0, predSrc1);
      }
#else
        m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], predSrcTemp0, predSrcTemp1);
        m_pcInterSearch->weightedGeoBlkRounded(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], predSrcTemp0, predSrcTemp1);
      }
      else
      {
        m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], predSrc0, predSrc1);
      }
#endif
    }
#else
    for (uint8_t i = 0; i < geoNumMrgSATDCand && isChromaEnabled(pu.chromaFormat); i++)
    {
      if (isNonMMVDListIdx[i])
      {
        continue;
      }
      uint8_t candidateIdx = geoRdModeList[i];
      int splitDir = geoSplitDirList[candidateIdx];
      int mergeCand0 = geoMergeCand0[candidateIdx];
      int mergeCand1 = geoMergeCand1[candidateIdx];
#if TM_MRG
      bool tmFlag0 = (geoMmvdCand0[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1));
      bool tmFlag1 = (geoMmvdCand1[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1));
      bool mmvdFlag0 = (geoMmvdCand0[candidateIdx] >= 1 && geoMmvdCand0[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM);
      bool mmvdFlag1 = (geoMmvdCand1[candidateIdx] >= 1 && geoMmvdCand1[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM);
      int  mmvdCand0 = (mmvdFlag0 ? (geoMmvdCand0[candidateIdx] - 1) : MAX_INT);
      int  mmvdCand1 = (mmvdFlag1 ? (geoMmvdCand1[candidateIdx] - 1) : MAX_INT);
#else
      int mmvdCand0 = geoMmvdCand0[candidateIdx] - 1;
      int mmvdCand1 = geoMmvdCand1[candidateIdx] - 1;

      bool mmvdFlag0 = (mmvdCand0 >= 0);
      bool mmvdFlag1 = (mmvdCand1 >= 0);
#endif

#if TM_MRG
      int mrgTmCand0 = MAX_INT, mrgTmCand1 = MAX_INT;
      if (tmFlag0)
      {
        int geoTmType = g_geoTmShape[0][g_GeoParams[splitDir][0]];
        mrgTmCand0 = mergeCand0 + (geoTmType - 1) * GEO_MAX_NUM_UNI_CANDS;
        if (!isGeoTmChromaAvail[mrgTmCand0])
        {
          mergeCtx[geoTmType].setMergeInfo(pu, mergeCand0);
          m_pcInterSearch->motionCompensation(pu, geoTmBuffer[mrgTmCand0], REF_PIC_LIST_X, false, true);
          isGeoTmChromaAvail[mrgTmCand0] = true;
        }
      }
      else
#endif
        if (mmvdFlag0)
        {
          if (!isGeoMMVDChromaAvail[mergeCand0][mmvdCand0])
          {
#if TM_MRG
            mergeCtx[GEO_TM_OFF].setGeoMmvdMergeInfo(pu, mergeCand0, mmvdCand0);
#else
            mergeCtx.setGeoMmvdMergeInfo(pu, mergeCand0, mmvdCand0);
#endif
            m_pcInterSearch->motionCompensation(pu, geoMMVDBuf[mergeCand0][mmvdCand0], REF_PIC_LIST_X, false, true);
            isGeoMMVDChromaAvail[mergeCand0][mmvdCand0] = true;
          }
        }
        else
        {
          if (!isGeoChromaAvail[mergeCand0])
          {
#if TM_MRG
            mergeCtx[GEO_TM_OFF].setMergeInfo(pu, mergeCand0);
#else
            mergeCtx.setMergeInfo(pu, mergeCand0);
#endif
            m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand0], REF_PIC_LIST_X, false, true);
            isGeoChromaAvail[mergeCand0] = true;
          }
        }

#if TM_MRG
      if (tmFlag1)
      {
        int geoTmType = g_geoTmShape[1][g_GeoParams[splitDir][0]];
        mrgTmCand1 = mergeCand1 + (geoTmType - 1) * GEO_MAX_NUM_UNI_CANDS;
        if (!isGeoTmChromaAvail[mrgTmCand1])
        {
          mergeCtx[geoTmType].setMergeInfo(pu, mergeCand1);
          m_pcInterSearch->motionCompensation(pu, geoTmBuffer[mrgTmCand1], REF_PIC_LIST_X, false, true);
          isGeoTmChromaAvail[mrgTmCand1] = true;
        }
      }
      else
#endif
        if (mmvdFlag1)
        {
          if (!isGeoMMVDChromaAvail[mergeCand1][mmvdCand1])
          {
#if TM_MRG
            mergeCtx[GEO_TM_OFF].setGeoMmvdMergeInfo(pu, mergeCand1, mmvdCand1);
#else
            mergeCtx.setGeoMmvdMergeInfo(pu, mergeCand1, mmvdCand1);
#endif
            m_pcInterSearch->motionCompensation(pu, geoMMVDBuf[mergeCand1][mmvdCand1], REF_PIC_LIST_X, false, true);
            isGeoMMVDChromaAvail[mergeCand1][mmvdCand1] = true;
          }
        }
        else
        {
          if (!isGeoChromaAvail[mergeCand1])
          {
#if TM_MRG
            mergeCtx[GEO_TM_OFF].setMergeInfo(pu, mergeCand1);
#else
            mergeCtx.setMergeInfo(pu, mergeCand1);
#endif
            m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand1], REF_PIC_LIST_X, false, true);
            isGeoChromaAvail[mergeCand1] = true;
          }
        }

      geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea);
#if TM_MRG
      PelUnitBuf predSrc0, predSrc1;
      if (tmFlag0)
      {
        predSrc0 = geoTmBuffer[mrgTmCand0];
      }
      else if (mmvdFlag0)
      {
        predSrc0 = geoMMVDBuf[mergeCand0][mmvdCand0];
      }
      else
      {
        predSrc0 = geoBuffer[mergeCand0];
      }

      if (tmFlag1)
      {
        predSrc1 = geoTmBuffer[mrgTmCand1];
      }
      else if (mmvdFlag1)
      {
        predSrc1 = geoMMVDBuf[mergeCand1][mmvdCand1];
      }
      else
      {
        predSrc1 = geoBuffer[mergeCand1];
      }

      m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], predSrc0, predSrc1);
#else
      m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], (mmvdFlag0 ? geoMMVDBuf[mergeCand0][mmvdCand0] : geoBuffer[mergeCand0]), (mmvdFlag1 ? geoMMVDBuf[mergeCand1][mmvdCand1] : geoBuffer[mergeCand1]));
#endif
    }
#endif

    std::memset(geocandHasNoResidual, false, GEO_MAX_TRY_WEIGHTED_SAD * sizeof(bool));
    tempCS->initStructData(encTestMode.qp);
    uint8_t iteration = 2, iterationBegin = 0;
    for (uint8_t noResidualPass = iterationBegin; noResidualPass < iteration; ++noResidualPass)
    {
      for (uint8_t mrgHADIdx = 0; mrgHADIdx < geoNumMrgSATDCand; mrgHADIdx++)
      {
        if (isNonMMVDListIdx[mrgHADIdx])
        {
          continue;
        }
        uint8_t candidateIdx = geoRdModeList[mrgHADIdx];
        if (((noResidualPass != 0) && geocandHasNoResidual[candidateIdx])
          || ((noResidualPass == 0) && bestIsSkip))
        {
          continue;
        }
        CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType);
        pm.setCUData(cu);
        cu.predMode = MODE_INTER;
        cu.slice = tempCS->slice;
        cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
        cu.qp = encTestMode.qp;
        cu.affine = false;
        cu.mtsFlag = false;
#if INTER_LIC
        cu.LICFlag = false;
#endif
        cu.BcwIdx = BCW_DEFAULT;
        cu.geoFlag = true;
        cu.imv = 0;
        cu.mmvdSkip = false;
        cu.skip = false;
        cu.mipFlag = false;
        cu.bdpcmMode = 0;
        PredictionUnit &pu = tempCS->addPU(cu, pm.chType);
        pu.mergeFlag = true;
        pu.regularMergeFlag = false;
        pu.geoSplitDir = geoSplitDirList[candidateIdx];
        pu.geoMergeIdx0 = geoMergeCand0[candidateIdx];
        pu.geoMergeIdx1 = geoMergeCand1[candidateIdx];
#if JVET_AA0058_GPM_ADP_BLD
        pu.geoBldIdx = geoBldList[mrgHADIdx];
#endif
#if JVET_Y0065_GPM_INTRA
        pu.gpmIntraFlag = pu.geoMergeIdx0 >= GEO_MAX_NUM_UNI_CANDS || pu.geoMergeIdx1 >= GEO_MAX_NUM_UNI_CANDS;
        if (pu.geoMergeIdx0 >= GEO_MAX_NUM_UNI_CANDS)
        {
          memcpy(pu.intraMPM, geoIntraMPMList[pu.geoSplitDir][0], sizeof(uint8_t)*GEO_MAX_NUM_INTRA_CANDS);
        }
        if (pu.geoMergeIdx1 >= GEO_MAX_NUM_UNI_CANDS)
        {
          memcpy(pu.intraMPM+GEO_MAX_NUM_INTRA_CANDS, geoIntraMPMList[pu.geoSplitDir][1], sizeof(uint8_t)*GEO_MAX_NUM_INTRA_CANDS);
        }
#if ENABLE_DIMD
        cu.dimdMode = dimdMode;
#endif
#if JVET_W0123_TIMD_FUSION
        cu.timdMode = timdMode;
#endif
#endif
#if TM_MRG
        pu.geoTmFlag0 = (geoMmvdCand0[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1));
        pu.geoTmFlag1 = (geoMmvdCand1[candidateIdx] == (GPM_EXT_MMVD_MAX_REFINE_NUM + 1));
        pu.geoMMVDFlag0 = (geoMmvdCand0[candidateIdx] >= 1 && geoMmvdCand0[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM);
        pu.geoMMVDFlag1 = (geoMmvdCand1[candidateIdx] >= 1 && geoMmvdCand1[candidateIdx] <= GPM_EXT_MMVD_MAX_REFINE_NUM);
        CHECK(pu.geoTmFlag0 != pu.geoTmFlag1, "TM flag cannot be enabled/disabled for two partitions separately");
        pu.tmMergeFlag = pu.geoTmFlag0;
#else
        pu.geoMMVDFlag0 = (geoMmvdCand0[candidateIdx] > 0);
        pu.geoMMVDFlag1 = (geoMmvdCand1[candidateIdx] > 0);
#endif

#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
        if(sps.getUseAltGPMSplitModeCode())
        {
#if JVET_W0097_GPM_MMVD_TM && TM_MRG
          if(pu.tmMergeFlag)
          {
            int geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, tmMmvdBufIdx0 - 1, tmMmvdBufIdx1 - 1);
            CHECK(geoSyntaxMode < 0 || geoSyntaxMode >= GEO_NUM_SIG_PARTMODE, "Invalid GEO split direction!");
            CHECK(pu.geoMMVDFlag0 || pu.geoMMVDFlag1, "GPM MMVD should not be used in GPM-TM mode");
            pu.geoSyntaxMode = (uint8_t)geoSyntaxMode;
          }
          else
#endif
          {
            int  mmvdCandTmp0 = geoMmvdCand0[candidateIdx];
            int  mmvdCandTmp1 = geoMmvdCand1[candidateIdx];
            int geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, mmvdCandTmp0 - 1, mmvdCandTmp1 - 1);
            CHECK(geoSyntaxMode < 0 || geoSyntaxMode >= GEO_NUM_SIG_PARTMODE, "Invalid GEO split direction!");
            CHECK(!pu.geoMMVDFlag0 && !pu.geoMMVDFlag1, "GPM MMVD has to be used at least for one partition");
            pu.geoSyntaxMode = (uint8_t)geoSyntaxMode;
          }
        }
#endif

        if (pu.geoMMVDFlag0)
        {
          pu.geoMMVDIdx0 = (geoMmvdCand0[candidateIdx] - 1);
        }
        if (pu.geoMMVDFlag1)
        {
          pu.geoMMVDIdx1 = (geoMmvdCand1[candidateIdx] - 1);
        }
        pu.mmvdMergeFlag = false;
        pu.mmvdMergeIdx = MAX_UCHAR;
#if TM_MRG
        MergeCtx* mrgTmCtx0 = (pu.geoTmFlag0 == 0 ? nullptr : &mergeCtx[g_geoTmShape[0][g_GeoParams[pu.geoSplitDir][0]]]);
        MergeCtx* mrgTmCtx1 = (pu.geoTmFlag1 == 0 ? nullptr : &mergeCtx[g_geoTmShape[1][g_GeoParams[pu.geoSplitDir][0]]]);
#if JVET_AA0058_GPM_ADP_BLD
        PU::spanGeoMMVDMotionInfo(pu, mergeCtx[GEO_TM_OFF], *mrgTmCtx0, *mrgTmCtx1, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoTmFlag0, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoTmFlag1, pu.geoMMVDFlag1, pu.geoMMVDIdx1, pu.geoBldIdx);
#else
        PU::spanGeoMMVDMotionInfo(pu, mergeCtx[GEO_TM_OFF], *mrgTmCtx0, *mrgTmCtx1, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoTmFlag0, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoTmFlag1, pu.geoMMVDFlag1, pu.geoMMVDIdx1);
#endif
#else
#if JVET_AA0058_GPM_ADP_BLD
        PU::spanGeoMMVDMotionInfo(pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoMMVDFlag1, pu.geoMMVDIdx1, pu.geoBldIdx);
#else
        PU::spanGeoMMVDMotionInfo(pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1, pu.geoMMVDFlag0, pu.geoMMVDIdx0, pu.geoMMVDFlag1, pu.geoMMVDIdx1);
#endif
#endif
#if JVET_AA0058_GPM_ADP_BLD
        tempCS->getPredBuf().copyFrom(geoCombinations[candidateIdx * GEO_NUM_BLD + pu.geoBldIdx]);
#else
        tempCS->getPredBuf().copyFrom(geoCombinations[candidateIdx]);
#endif
#if ENABLE_OBMC
#if JVET_Y0065_GPM_INTRA
        if (!pu.gpmIntraFlag)
        {
#endif
        cu.isobmcMC = true;
        cu.obmcFlag = true;
        m_pcInterSearch->subBlockOBMC(pu);
        cu.isobmcMC = false;
#if JVET_Y0065_GPM_INTRA
        }
#endif
#endif

        xEncodeInterResidual(tempCS, bestCS, pm, encTestMode, noResidualPass, (noResidualPass == 0 ? &geocandHasNoResidual[candidateIdx] : NULL));

        if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
        {
          bestIsSkip = bestCS->getCU(pm.chType)->rootCbf == 0;
        }
        tempCS->initStructData(encTestMode.qp);
      }
    }
  }
  if (m_bestModeUpdated && bestCS->cost != MAX_DOUBLE)
  {
    xCalDebCost(*bestCS, pm);
  }
}
#else
void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode)
{
  const Slice &slice = *tempCS->slice;
  CHECK(slice.getSliceType() == I_SLICE, "Merge modes not available for I-slices");

  tempCS->initStructData(encTestMode.qp);

  MergeCtx mergeCtx;
  const SPS &sps = *tempCS->sps;

  if (sps.getSbTMVPEnabledFlag())
  {
    Size bufSize = g_miScaling.scale(tempCS->area.lumaSize());
    mergeCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
  }

  CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType);
  pm.setCUData(cu);
  cu.predMode = MODE_INTER;
  cu.slice = tempCS->slice;
  cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
  cu.qp = encTestMode.qp;
  cu.affine = false;
  cu.mtsFlag = false;
#if INTER_LIC
  cu.LICFlag = false;
#endif
  cu.BcwIdx = BCW_DEFAULT;
  cu.geoFlag = true;
  cu.imv = 0;
  cu.mmvdSkip = false;
  cu.skip = false;
  cu.mipFlag = false;
#if JVET_V0130_INTRA_TMP
  cu.tmpFlag = false;
#endif
  cu.bdpcmMode = 0;

  PredictionUnit &pu = tempCS->addPU(cu, pm.chType);
  pu.mergeFlag = true;
  pu.regularMergeFlag = false;
#if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG)
  pu.tmMergeFlag = false;
#endif
  PU::getGeoMergeCandidates(pu, mergeCtx);

  GeoComboCostList comboList;
  int bitsCandTB = floorLog2(GEO_NUM_PARTITION_MODE);
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
  double geoSigModeBits[GEO_NUM_SIG_PARTMODE];
  if(sps.getUseAltGPMSplitModeCode())
  {
    const int maxNumBins = (GEO_NUM_SIG_PARTMODE / GEO_SPLIT_MODE_RICE_CODE_DIVISOR) - 1;
    for (int idx = 0; idx < GEO_NUM_SIG_PARTMODE; idx++)
    {
      int geoModePrefix = idx / GEO_SPLIT_MODE_RICE_CODE_DIVISOR;
      geoSigModeBits[idx] = geoModePrefix + (geoModePrefix == maxNumBins ? 0 : 1)
                          + (GEO_SPLIT_MODE_RICE_CODE_DIVISOR > 1 ? floorLog2(GEO_SPLIT_MODE_RICE_CODE_DIVISOR): 0);
    }
  }
#endif
  PelUnitBuf geoBuffer[GEO_MAX_NUM_UNI_CANDS];
  PelUnitBuf geoTempBuf[GEO_MAX_NUM_UNI_CANDS];
  PelUnitBuf geoCombinations[GEO_MAX_TRY_WEIGHTED_SAD];
  DistParam distParam;

  const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
  const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda();
  uint8_t maxNumMergeCandidates = cu.cs->sps->getMaxNumGeoCand();
  DistParam distParamWholeBlk;
  m_pcRdCost->setDistParam(distParamWholeBlk, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y().buf, m_acMergeBuffer[0].Y().stride, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
  Distortion bestWholeBlkSad = MAX_UINT64;
  double bestWholeBlkCost = MAX_DOUBLE;

  Distortion sadWholeBlk[GEO_MAX_NUM_UNI_CANDS];
  int pocMrg[GEO_MAX_NUM_UNI_CANDS];
  Mv MrgMv[GEO_MAX_NUM_UNI_CANDS];
  bool isSkipThisCand[GEO_MAX_NUM_UNI_CANDS] = { false };

  for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++)
  {
    geoBuffer[mergeCand] = m_acMergeBuffer[mergeCand].getBuf(localUnitArea);
    mergeCtx.setMergeInfo(pu, mergeCand);
    int MrgList = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx == -1 ? 1 : 0;
    RefPicList MrgeRefPicList = (MrgList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
    int MrgrefIdx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + MrgList].refIdx;
    pocMrg[mergeCand] = tempCS->slice->getRefPic(MrgeRefPicList, MrgrefIdx)->getPOC();
    MrgMv[mergeCand] = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + MrgList].mv;

    for( int i = 0; i < mergeCand; i++ )
    {
      if( pocMrg[mergeCand] == pocMrg[i] && MrgMv[mergeCand] == MrgMv[i] )
      {
        isSkipThisCand[mergeCand] = true;
        break;
      }
    }

    if (m_pcEncCfg->getMCTSEncConstraint() && (!(MCTSHelper::checkMvBufferForMCTSConstraint(pu))))
    {
      tempCS->initStructData(encTestMode.qp);
      return;
    }
    m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand]);
#if MULTI_HYP_PRED
    geoTempBuf[mergeCand] = m_acRealMergeBuffer[MRG_MAX_NUM_CANDS + mergeCand].getBuf(localUnitArea);
#else
    geoTempBuf[mergeCand] = m_acMergeTmpBuffer[mergeCand].getBuf(localUnitArea);
#endif
    geoTempBuf[mergeCand].Y().copyFrom(geoBuffer[mergeCand].Y());
    geoTempBuf[mergeCand].Y().roundToOutputBitdepth(geoTempBuf[mergeCand].Y(), cu.slice->clpRng(COMPONENT_Y));
    distParamWholeBlk.cur.buf = geoTempBuf[mergeCand].Y().buf;
    distParamWholeBlk.cur.stride = geoTempBuf[mergeCand].Y().stride;
    sadWholeBlk[mergeCand] = distParamWholeBlk.distFunc(distParamWholeBlk);
    if (sadWholeBlk[mergeCand] < bestWholeBlkSad)
    {
      bestWholeBlkSad = sadWholeBlk[mergeCand];
      bestWholeBlkCost = ( double ) bestWholeBlkSad + ( mergeCand + 1 ) * sqrtLambdaForFirstPass;
    }
  }
#if MULTI_HYP_PRED
  m_pcInterSearch->setGeoTmpBuffer(mergeCtx);
#endif
  bool isGeo = true;
  for (uint8_t mergeCand = 1; mergeCand < maxNumMergeCandidates; mergeCand++)
  {
    isGeo &= isSkipThisCand[mergeCand];
  }
  if (isGeo)
  {
    return;
  }

  int wIdx = floorLog2(cu.lwidth()) - GEO_MIN_CU_LOG2;
  int hIdx = floorLog2(cu.lheight()) - GEO_MIN_CU_LOG2;
  for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
  {
    int maskStride = 0, maskStride2 = 0;
    int stepX = 1;
    Pel* SADmask;
    int16_t angle = g_GeoParams[splitDir][0];
    if (g_angle2mirror[angle] == 2)
    {
      maskStride = -GEO_WEIGHT_MASK_SIZE;
      maskStride2 = -(int)cu.lwidth();
      SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][1]) * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
    }
    else if (g_angle2mirror[angle] == 1)
    {
      stepX = -1;
      maskStride2 = cu.lwidth();
      maskStride = GEO_WEIGHT_MASK_SIZE;
      SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][0])];
    }
    else
    {
      maskStride = GEO_WEIGHT_MASK_SIZE;
      maskStride2 = -(int)cu.lwidth();
      SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
    }
    Distortion sadSmall = 0, sadLarge = 0;
    for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++)
    {
      int bitsCand = mergeCand + 1;

      m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTempBuf[mergeCand].Y().buf, geoTempBuf[mergeCand].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
      sadLarge = distParam.distFunc(distParam);
      m_GeoCostList.insert(splitDir, 0, mergeCand, (double)sadLarge + (double)bitsCand * sqrtLambdaForFirstPass);
      sadSmall = sadWholeBlk[mergeCand] - sadLarge;
      m_GeoCostList.insert(splitDir, 1, mergeCand, (double)sadSmall + (double)bitsCand * sqrtLambdaForFirstPass);
    }
  }

#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
  if (sps.getUseAltGPMSplitModeCode())
  {
    m_pcInterSearch->initGeoAngleSelection(pu
#if JVET_Y0065_GPM_INTRA
                                         , m_pcIntraSearch, geoIntraMPMList
#endif
    );
  }
#endif

  for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
  {
    for (int GeoMotionIdx = 0; GeoMotionIdx < maxNumMergeCandidates * (maxNumMergeCandidates - 1); GeoMotionIdx++)
    {
      unsigned int mergeCand0 = m_GeoModeTest[GeoMotionIdx].m_candIdx0;
      unsigned int mergeCand1 = m_GeoModeTest[GeoMotionIdx].m_candIdx1;

#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
      int geoSyntaxMode = std::numeric_limits<uint8_t>::max();
      if (sps.getUseAltGPMSplitModeCode())
      {
        m_pcInterSearch->setGeoSplitModeToSyntaxTable(pu, mergeCtx, mergeCand0, mergeCtx, mergeCand1
#if JVET_Y0065_GPM_INTRA
                                                    , m_pcIntraSearch
#endif
        );
        geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(splitDir, mergeCand0, mergeCand1);
        if (geoSyntaxMode == std::numeric_limits<uint8_t>::max())
        {
          continue;
        }
      }
#endif

      double tempCost = m_GeoCostList.singleDistList[0][splitDir][mergeCand0].cost + m_GeoCostList.singleDistList[1][splitDir][mergeCand1].cost;
      if( tempCost > bestWholeBlkCost )
      {
        continue;
      }
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
      tempCost = tempCost + (double)(sps.getUseAltGPMSplitModeCode() ? geoSigModeBits[geoSyntaxMode] : bitsCandTB) * sqrtLambdaForFirstPass;
#else
      tempCost = tempCost + (double)bitsCandTB * sqrtLambdaForFirstPass;
#endif
      comboList.list.push_back(GeoMergeCombo(splitDir, mergeCand0, mergeCand1, tempCost));
    }
  }
  if( comboList.list.empty() )
  {
    return;
  }

  comboList.sortByCost();

  bool geocandHasNoResidual[GEO_MAX_TRY_WEIGHTED_SAD] = { false };
  bool bestIsSkip = false;
  int geoNumCobo = (int)comboList.list.size();
  static_vector<uint8_t, GEO_MAX_TRY_WEIGHTED_SAD> geoRdModeList;
  static_vector<double, GEO_MAX_TRY_WEIGHTED_SAD> geocandCostList;

  DistParam distParamSAD2;
  const bool useHadamard = !tempCS->slice->getDisableSATDForRD();
  m_pcRdCost->setDistParam(distParamSAD2, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, useHadamard);
  int geoNumMrgSATDCand = min(GEO_MAX_TRY_WEIGHTED_SATD, geoNumCobo);

  for (uint8_t candidateIdx = 0; candidateIdx < min(geoNumCobo, GEO_MAX_TRY_WEIGHTED_SAD); candidateIdx++)
  {
    int splitDir = comboList.list[candidateIdx].splitDir;
    int mergeCand0 = comboList.list[candidateIdx].mergeIdx0;
    int mergeCand1 = comboList.list[candidateIdx].mergeIdx1;

    geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea);
    m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, geoCombinations[candidateIdx], geoBuffer[mergeCand0], geoBuffer[mergeCand1]);
    distParamSAD2.cur = geoCombinations[candidateIdx].Y();
    Distortion sad = distParamSAD2.distFunc(distParamSAD2);
    int mvBits = 2;
    mergeCand1 -= mergeCand1 < mergeCand0 ? 0 : 1;
    mvBits += mergeCand0;
    mvBits += mergeCand1;
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
    int geoSyntaxMode = std::numeric_limits<uint8_t>::max();
    if (sps.getUseAltGPMSplitModeCode())
    {
      geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(splitDir, mergeCand0, mergeCand1);
      CHECK(geoSyntaxMode < 0 || geoSyntaxMode >= GEO_NUM_SIG_PARTMODE, "Invalid GEO split direction!");
    }
#endif
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
    double updateCost = (double)sad + (double)((sps.getUseAltGPMSplitModeCode() ? geoSigModeBits[geoSyntaxMode] : bitsCandTB) + mvBits) * sqrtLambdaForFirstPass;
#else
    double updateCost = (double)sad + (double)(bitsCandTB + mvBits) * sqrtLambdaForFirstPass;
#endif
    comboList.list[candidateIdx].cost = updateCost;
    updateCandList(candidateIdx, updateCost, geoRdModeList, geocandCostList, geoNumMrgSATDCand);
  }
  for (uint8_t i = 0; i < geoNumMrgSATDCand; i++)
  {
    if (geocandCostList[i] > MRG_FAST_RATIO * geocandCostList[0] || geocandCostList[i] > getMergeBestSATDCost() || geocandCostList[i] > getAFFBestSATDCost())
    {
      geoNumMrgSATDCand = i;
      break;
    }
  }
  for (uint8_t i = 0; i < geoNumMrgSATDCand && isChromaEnabled(pu.chromaFormat); i++)
  {
    uint8_t candidateIdx = geoRdModeList[i];
    int splitDir = comboList.list[candidateIdx].splitDir;
    int mergeCand0 = comboList.list[candidateIdx].mergeIdx0;
    int mergeCand1 = comboList.list[candidateIdx].mergeIdx1;
    geoCombinations[candidateIdx] = m_acGeoWeightedBuffer[candidateIdx].getBuf(localUnitArea);
    m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_CHROMA, geoCombinations[candidateIdx], geoBuffer[mergeCand0], geoBuffer[mergeCand1]);
  }

  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
  tempCS->initStructData(encTestMode.qp);
  uint8_t iteration;
  uint8_t iterationBegin = 0;
  iteration = 2;
  for (uint8_t noResidualPass = iterationBegin; noResidualPass < iteration; ++noResidualPass)
  {
    for (uint8_t mrgHADIdx = 0; mrgHADIdx < geoNumMrgSATDCand; mrgHADIdx++)
    {
      uint8_t candidateIdx = geoRdModeList[mrgHADIdx];
      if (((noResidualPass != 0) && geocandHasNoResidual[candidateIdx])
        || ((noResidualPass == 0) && bestIsSkip))
      {
        continue;
      }
      CodingUnit &cu = tempCS->addCU(tempCS->area, pm.chType);
      pm.setCUData(cu);
      cu.predMode = MODE_INTER;
      cu.slice = tempCS->slice;
      cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
      cu.qp = encTestMode.qp;
      cu.affine = false;
      cu.mtsFlag = false;
#if INTER_LIC
      cu.LICFlag = false;
#endif
      cu.BcwIdx = BCW_DEFAULT;
      cu.geoFlag = true;
      cu.imv = 0;
      cu.mmvdSkip = false;
      cu.skip = false;
      cu.mipFlag = false;
#if JVET_V0130_INTRA_TMP
	    cu.tmpFlag = false;
#endif
      cu.bdpcmMode = 0;
      PredictionUnit &pu = tempCS->addPU(cu, pm.chType);
      pu.mergeFlag = true;
      pu.regularMergeFlag = false;
      pu.geoSplitDir = comboList.list[candidateIdx].splitDir;
      pu.geoMergeIdx0 = comboList.list[candidateIdx].mergeIdx0;
      pu.geoMergeIdx1 = comboList.list[candidateIdx].mergeIdx1;
      pu.mmvdMergeFlag = false;
      pu.mmvdMergeIdx = MAX_UCHAR;
#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
      if (sps.getUseAltGPMSplitModeCode())
      {
        int geoSyntaxMode = m_pcInterSearch->convertGeoSplitModeToSyntax(pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1);
        CHECK(geoSyntaxMode < 0 || geoSyntaxMode >= GEO_NUM_SIG_PARTMODE, "Invalid GEO split direction!");
        pu.geoSyntaxMode = (uint8_t)geoSyntaxMode;
      }
#endif

      PU::spanGeoMotionInfo(pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1);
      tempCS->getPredBuf().copyFrom(geoCombinations[candidateIdx]);
#if ENABLE_OBMC
      cu.isobmcMC = true;
      cu.obmcFlag = true;
      m_pcInterSearch->subBlockOBMC(pu);
      cu.isobmcMC = false;
#endif
      xEncodeInterResidual(tempCS, bestCS, pm, encTestMode, noResidualPass, (noResidualPass == 0 ? &geocandHasNoResidual[candidateIdx] : NULL));

      if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
      {
        bestIsSkip = bestCS->getCU(pm.chType)->rootCbf == 0;
      }
      tempCS->initStructData(encTestMode.qp);
    }
  }
  if (m_bestModeUpdated && bestCS->cost != MAX_DOUBLE)
  {
    xCalDebCost(*bestCS, pm);
  }
}
#endif
#if MERGE_ENC_OPT
void EncCu::xCheckSATDCostRegularMerge(CodingStructure *&tempCS, CodingUnit &cu, PredictionUnit &pu, MergeCtx mergeCtx, PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM], PelUnitBuf *&singleMergeTempBuffer, PelUnitBuf  acMergeTmpBuffer[MRG_MAX_NUM_CANDS]
#if !MULTI_PASS_DMVR
  , Mv   refinedMvdL0[MAX_NUM_PARTS_IN_CTU][MRG_MAX_NUM_CANDS]
#endif
  , unsigned& uiNumMrgSATDCand, static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  &RdModeList, static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &candCostList, DistParam distParam, const TempCtx &ctxStart
#if MULTI_PASS_DMVR
  , bool* applyBDMVR
#endif
)
{
#if INTER_LIC
  cu.LICFlag = false;
#endif
  cu.mmvdSkip = false;
  cu.geoFlag = false;
  cu.affine = false;
  cu.imv = 0;

  pu.ciipFlag = false;
#if CIIP_PDPC
  pu.ciipPDPC = false;
#endif
  pu.mmvdMergeFlag = false;
  pu.regularMergeFlag = true;

  const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
  const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
#if MULTI_HYP_PRED
  const bool testMHP = tempCS->sps->getUseInterMultiHyp()
    && (tempCS->area.lumaSize().area() > MULTI_HYP_PRED_RESTRICT_BLOCK_SIZE 
    && std::min(tempCS->area.lwidth(), tempCS->area.lheight()) >= MULTI_HYP_PRED_RESTRICT_MIN_WH);
#endif

  int insertPos = -1;
  for (uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++)
  {
    mergeCtx.setMergeInfo(pu, uiMergeCand);
#if MULTI_PASS_DMVR
    pu.bdmvrRefine = false; // init as false
#endif
    pu.mvRefine = true;
    distParam.cur = singleMergeTempBuffer->Y();
    acMergeTmpBuffer[uiMergeCand] = m_acMergeTmpBuffer[uiMergeCand].getBuf(localUnitArea);
#if INTER_LIC
    m_pcInterSearch->m_storeBeforeLIC = mergeCtx.interDirNeighbours[uiMergeCand] == 3 ? false : true;
    if (m_pcInterSearch->m_storeBeforeLIC)
    {
      m_pcInterSearch->m_predictionBeforeLIC = acMergeTmpBuffer[uiMergeCand];
      m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, true);
    }
    else
#endif
#if MULTI_PASS_DMVR
    if (applyBDMVR[uiMergeCand])
    {
      if (pu.cu->cs->sps->getUseCiip())
      {
#if MULTI_HYP_PRED
        pu.addHypData.clear();
        pu.numMergedAddHyps = 0;
#endif
        pu.mvRefine = false;
        pu.ciipFlag = true;
        m_pcInterSearch->motionCompensation(pu, acMergeTmpBuffer[uiMergeCand]);
        pu.ciipFlag = false;
#if MULTI_HYP_PRED
        mergeCtx.setMergeInfo(pu, uiMergeCand);
#endif
      }
      pu.bdmvrRefine = true;
      m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR[uiMergeCand << 1], m_mvBufBDMVR[(uiMergeCand << 1) + 1]);

      pu.mvRefine = true;
      m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer);

      if( pu.bdmvrRefine )
      {
        ::memcpy( m_mvBufEncBDOF[uiMergeCand], m_pcInterSearch->getBdofSubPuMvOffset(), sizeof( Mv ) * BDOF_SUBPU_MAX_NUM );
      }

      pu.mvRefine = false;
    }
    else
#endif
    {
      m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, true, &(acMergeTmpBuffer[uiMergeCand]));
    }
#if INTER_LIC
    m_pcInterSearch->m_storeBeforeLIC = false;
#endif
    pu.mvRefine = false;
#if !MULTI_PASS_DMVR
    if (mergeCtx.interDirNeighbours[uiMergeCand] == 3 )
    {
      mergeCtx.mvFieldNeighbours[2 * uiMergeCand].mv = pu.mv[0];
      mergeCtx.mvFieldNeighbours[2 * uiMergeCand + 1].mv = pu.mv[1];
      {
        int dx, dy, i, j, num = 0;
        dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
        dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH);
        if (PU::checkDMVRCondition(pu))
        {
          for (i = 0; i < (pu.lumaSize().height); i += dy)
          {
            for (j = 0; j < (pu.lumaSize().width); j += dx)
            {
              refinedMvdL0[num][uiMergeCand] = pu.mvdL0SubPu[num];
              num++;
            }
          }
        }
      }
    }
#endif

    Distortion uiSad = distParam.distFunc(distParam);
    m_CABACEstimator->getCtx() = ctxStart;
    uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
    double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
#if MULTI_HYP_PRED
    if (testMHP && pu.addHypData.size() < tempCS->sps->getMaxNumAddHyps())
    {
#if MULTI_PASS_DMVR
      CHECK(pu.bdmvrRefine && !applyBDMVR[uiMergeCand], "");
#endif
      uint32_t uiBitsCand = uiMergeCand + 1 + 1 + 1; // one bit for merge flag,  one bit for subblock_merge_flag, and one bit for regualr_merge_flag
      MEResult mergeResult;
      mergeResult.cu = cu;
      mergeResult.pu = pu;
      mergeResult.bits = uiBitsCand;
      mergeResult.cost = uiSad + m_pcRdCost->getCost(uiBitsCand);
      m_baseResultsForMH.push_back(mergeResult);
    }
#endif
    insertPos = -1;
    updateCandList(ModeInfo(cu, pu), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
    if (insertPos != -1 && insertPos < MMVD_MRG_MAX_RD_NUM)
    {
      if (insertPos == RdModeList.size() - 1)
      {
        swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
      }
      else
      {
        for (uint32_t i = uint32_t(RdModeList.size()) - 1; i > insertPos; i--)
        {
          swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
        }
        swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
      }
    }
    CHECK(std::min(uiMergeCand + 1, uiNumMrgSATDCand) != RdModeList.size(), "");
  }
#if MULTI_PASS_DMVR
  pu.bdmvrRefine = false;
#endif
}

void EncCu::xCheckSATDCostCiipMerge(CodingStructure *&tempCS, CodingUnit &cu, PredictionUnit &pu, MergeCtx mergeCtx, PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM], PelUnitBuf *&singleMergeTempBuffer, PelUnitBuf  acMergeTmpBuffer[MRG_MAX_NUM_CANDS]
  , unsigned& uiNumMrgSATDCand, static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  &RdModeList, static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &candCostList, DistParam distParam, const TempCtx &ctxStart)
{
#if INTER_LIC
  cu.LICFlag = false;
#endif
  cu.mmvdSkip = false;
  cu.geoFlag = false;
  cu.affine = false;
  //cu.imv = 0;

  pu.ciipFlag = true;
#if CIIP_PDPC
  pu.ciipPDPC = false;
#endif
  pu.mmvdMergeFlag = false;
  pu.regularMergeFlag = false;

  const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
  const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));

#if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION
  int intraMode = PLANAR_IDX;
  if (mergeCtx.numValidMergeCand)
  {
    const CompArea &area = cu.Y();
    if (cu.slice->getSPS()->getUseTimd() && (cu.lwidth() * cu.lheight() <= CIIP_MAX_SIZE))
    {
#if SECONDARY_MPM && ENABLE_DIMD
      IntraPrediction::deriveDimdMode(cu.cs->picture->getRecoBuf(area), area, cu);
#endif
      cu.timdMode = m_pcIntraSearch->deriveTimdMode(cu.cs->picture->getRecoBuf(area), area, cu);
      intraMode = MAP131TO67(cu.timdMode);
    }
  }
#endif

  int insertPos = -1;
  for (uint32_t mergeCand = 0; mergeCand < mergeCtx.numValidMergeCand; mergeCand++)
  {
    //acMergeTmpBuffer[mergeCand] = m_acMergeTmpBuffer[mergeCand].getBuf(localUnitArea);

    // estimate merge bits
    mergeCtx.setMergeInfo(pu, mergeCand);

    // first round
    pu.intraDir[0] = PLANAR_IDX;
#if CIIP_PDPC
    for (int intraCnt = 0; intraCnt < 2; intraCnt++)
    {
      pu.ciipPDPC = intraCnt == 1;
#else
    uint32_t intraCnt = 0;
#endif
    PelBuf ciipBuff = m_ciipBuffer[intraCnt].getBuf(localUnitArea.Y());

#if JVET_X0141_CIIP_TIMD_TM && JVET_W0123_TIMD_FUSION
    pu.intraDir[0] = (intraCnt == 1) ? PLANAR_IDX : intraMode;
#endif
    // generate intrainter Y prediction
    if (mergeCand == 0)
    {
      m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Y());
      m_pcIntraSearch->predIntraAng(COMPONENT_Y, ciipBuff, pu);
    }

    if( pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() )
    {
      m_pcIntraSearch->geneWeightedPred<true>( COMPONENT_Y, singleMergeTempBuffer->Y(), pu, acMergeTmpBuffer[mergeCand].Y(), ciipBuff, m_pcReshape->getFwdLUT().data() );
    }
    else
    {
      m_pcIntraSearch->geneWeightedPred<false>( COMPONENT_Y, singleMergeTempBuffer->Y(), pu, acMergeTmpBuffer[mergeCand].Y(), ciipBuff );
    }

    // calculate cost
    if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
    {
      PelBuf tmp = m_acGeoWeightedBuffer->getBuf(localUnitArea.Y());
      tmp.rspSignal(singleMergeTempBuffer->Y(), m_pcReshape->getInvLUT());
      distParam.cur = tmp;
    }
    else
    {
      distParam.cur = singleMergeTempBuffer->Y();
    }

    Distortion sadValue = distParam.distFunc(distParam);
    m_CABACEstimator->getCtx() = ctxStart;
    pu.regularMergeFlag = false;
    uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
    double cost = (double)sadValue + (double)fracBits * sqrtLambdaForFirstPassIntra; // need to check the cost calculation again???
    insertPos = -1;
    updateCandList(ModeInfo(cu, pu), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
    if (insertPos != -1 && insertPos < MMVD_MRG_MAX_RD_NUM)
    {
      for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
      {
        swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
      }
      swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
    }
#if CIIP_PDPC
    }
#endif
  }
pu.ciipFlag = false;
#if CIIP_PDPC
pu.ciipPDPC = false;
#endif
}

#if JVET_X0141_CIIP_TIMD_TM && TM_MRG
void EncCu::xCheckSATDCostCiipTmMerge(CodingStructure *&tempCS, CodingUnit &cu, PredictionUnit &pu, MergeCtx mergeCtx, PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM], PelUnitBuf *&singleMergeTempBuffer, PelUnitBuf  acTmMergeTmpBuffer[MRG_MAX_NUM_CANDS]
  , unsigned& uiNumMrgSATDCand, static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  &RdModeList, static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &candCostList, DistParam distParam, const TempCtx &ctxStart)
{
#if INTER_LIC
  cu.LICFlag = false;
#endif
  cu.mmvdSkip = false;
  cu.geoFlag = false;
  cu.affine = false;
  //cu.imv = 0;

  pu.ciipFlag = true;
#if CIIP_PDPC
  pu.ciipPDPC = false;
#endif
  pu.mmvdMergeFlag = false;
  pu.regularMergeFlag = false;
  pu.tmMergeFlag = true;

  const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
  const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));

  int intraMode = PLANAR_IDX;
#if JVET_W0123_TIMD_FUSION
  if (mergeCtx.numValidMergeCand)
  {
    const CompArea &area = cu.Y();
    if (cu.slice->getSPS()->getUseTimd() && (cu.lwidth() * cu.lheight() <= CIIP_MAX_SIZE))
    {
#if SECONDARY_MPM && ENABLE_DIMD
      IntraPrediction::deriveDimdMode(cu.cs->picture->getRecoBuf(area), area, cu);
#endif
      cu.timdMode = m_pcIntraSearch->deriveTimdMode(cu.cs->picture->getRecoBuf(area), area, cu);
      intraMode = MAP131TO67(cu.timdMode);
    }
  }
#endif

  int insertPos = -1;
  for (uint32_t mergeCand = 0; mergeCand < mergeCtx.numValidMergeCand; mergeCand++)
  {
    //acTmMergeTmpBuffer[mergeCand] = m_acTmMergeTmpBuffer[mergeCand].getBuf(localUnitArea);
    // estimate merge bits
    mergeCtx.setMergeInfo(pu, mergeCand);

#if MULTI_HYP_PRED
    pu.addHypData.clear();
    pu.numMergedAddHyps = 0;
#endif
    acTmMergeTmpBuffer[mergeCand] = m_acTmMergeTmpBuffer[mergeCand].getBuf(localUnitArea);
    m_pcInterSearch->motionCompensation(pu, acTmMergeTmpBuffer[mergeCand]);

    // first round
    pu.intraDir[0] = PLANAR_IDX;
#if CIIP_PDPC
    for (int intraCnt = 0; intraCnt < 2; intraCnt++)
    {
      pu.ciipPDPC = intraCnt == 1;
#else
    uint32_t intraCnt = 0;
#endif
    PelBuf ciipBuff = m_ciipBuffer[intraCnt].getBuf(localUnitArea.Y());

    pu.intraDir[0] = (intraCnt == 1) ? PLANAR_IDX : intraMode;
    // generate intrainter Y prediction
    if (mergeCand == 0)
    {
      m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Y());
      m_pcIntraSearch->predIntraAng(COMPONENT_Y, ciipBuff, pu);
    }

    if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
    {
      m_pcIntraSearch->geneWeightedPred<true>(COMPONENT_Y, singleMergeTempBuffer->Y(), pu, acTmMergeTmpBuffer[mergeCand].Y(), ciipBuff, m_pcReshape->getFwdLUT().data());
    }
    else
    {
      m_pcIntraSearch->geneWeightedPred<false>(COMPONENT_Y, singleMergeTempBuffer->Y(), pu, acTmMergeTmpBuffer[mergeCand].Y(), ciipBuff);
    }

    // calculate cost
    if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
    {
      PelBuf tmp = m_acGeoWeightedBuffer->getBuf(localUnitArea.Y());
      tmp.rspSignal(singleMergeTempBuffer->Y(), m_pcReshape->getInvLUT());
      distParam.cur = tmp;
    }
    else
    {
      distParam.cur = singleMergeTempBuffer->Y();
    }

    Distortion sadValue = distParam.distFunc(distParam);
    m_CABACEstimator->getCtx() = ctxStart;
    pu.regularMergeFlag = false;
    uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
    double cost = (double)sadValue + (double)fracBits * sqrtLambdaForFirstPassIntra; // need to check the cost calculation again???
    insertPos = -1;
    updateCandList(ModeInfo(cu, pu), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
    if (insertPos != -1 && insertPos < MMVD_MRG_MAX_RD_NUM)
    {
      for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
      {
        swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
      }
      swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
    }
#if CIIP_PDPC
    }
#endif
  }
pu.ciipFlag = false;
#if CIIP_PDPC
pu.ciipPDPC = false;
#endif
pu.tmMergeFlag = false;
}
#endif

void EncCu::xCheckSATDCostMmvdMerge(CodingStructure *&tempCS, CodingUnit &cu, PredictionUnit &pu, MergeCtx mergeCtx, PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM], PelUnitBuf *&singleMergeTempBuffer
  , unsigned& uiNumMrgSATDCand, static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  &RdModeList, static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &candCostList, DistParam distParam, const TempCtx &ctxStart
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
    , uint32_t * mmvdLUT
#endif
                                    )
{
#if INTER_LIC
  cu.LICFlag = false;
#endif
  cu.mmvdSkip = true;
  cu.geoFlag = false;
  cu.affine = false;
  cu.imv = 0;

  pu.ciipFlag = false;
#if CIIP_PDPC
  pu.ciipPDPC = false;
#endif
  pu.mmvdMergeFlag = true;
  pu.regularMergeFlag = true;

  const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
  int insertPos = -1;
#if MULTI_HYP_PRED
  const bool testMHP = tempCS->sps->getUseInterMultiHyp()
    && (tempCS->area.lumaSize().area() > MULTI_HYP_PRED_RESTRICT_BLOCK_SIZE
      && std::min(tempCS->area.lwidth(), tempCS->area.lheight()) >= MULTI_HYP_PRED_RESTRICT_MIN_WH);
#endif

#if JVET_AA0093_ENHANCED_MMVD_EXTENSION
  const int tempNum =  (std::min<int>(MMVD_BASE_MV_NUM, mergeCtx.numValidMergeCand) * MMVD_MAX_REFINE_NUM);
#else
  const int tempNum = (mergeCtx.numValidMergeCand > 1) ? MMVD_ADD_NUM : MMVD_ADD_NUM >> 1;
#endif
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
  for (int mmvdMergeCandtemp = 0; mmvdMergeCandtemp < tempNum; mmvdMergeCandtemp++)
  {
#if JVET_AA0093_ENHANCED_MMVD_EXTENSION
    if(mmvdMergeCandtemp - (mmvdMergeCandtemp/MMVD_MAX_REFINE_NUM )* MMVD_MAX_REFINE_NUM  >= ((MMVD_MAX_REFINE_NUM >> MMVD_SIZE_SHIFT )/MMVD_BI_DIR))
#else
    if(mmvdMergeCandtemp - (mmvdMergeCandtemp/MMVD_MAX_REFINE_NUM )* MMVD_MAX_REFINE_NUM  >= (MMVD_MAX_REFINE_NUM >> MMVD_SIZE_SHIFT ))
#endif
    {
      continue;
    }
    int mmvdMergeCand = (mmvdLUT == NULL) ? mmvdMergeCandtemp : mmvdLUT[mmvdMergeCandtemp];
#else
  for (int mmvdMergeCand = 0; mmvdMergeCand < tempNum; mmvdMergeCand++)
  {
#endif
    int baseIdx = mmvdMergeCand / MMVD_MAX_REFINE_NUM;
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
    int refineStep = (mmvdMergeCand - (baseIdx * MMVD_MAX_REFINE_NUM )) / MMVD_MAX_DIR ;
#else
    int refineStep = (mmvdMergeCand - (baseIdx * MMVD_MAX_REFINE_NUM)) / 4;
#endif
#if  !JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
    if( refineStep >= m_pcEncCfg->getMmvdDisNum() )
    {
      continue;
    }
#endif

#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
    mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCandtemp, mmvdMergeCand);
#else
    mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand);
#endif
    pu.mvRefine = true;
    distParam.cur = singleMergeTempBuffer->Y();
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
    pu.mmvdEncOptMode = (refineStep > 1 ? 2 : 1);
#else
    pu.mmvdEncOptMode = (refineStep > 2 ? 2 : 1);
#endif
    CHECK(!pu.mmvdMergeFlag, "MMVD merge should be set");
    // Don't do chroma MC here
    m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, false);
    pu.mmvdEncOptMode = 0;
    pu.mvRefine = false;
    Distortion uiSad = distParam.distFunc(distParam);

    m_CABACEstimator->getCtx() = ctxStart;
    uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
    double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
#if MULTI_HYP_PRED
    if (testMHP && pu.addHypData.size() < tempCS->sps->getMaxNumAddHyps())
    {
      uint32_t uiBitsCand = baseIdx + refineStep + 2 + 1 + 1 + 1; // one bit for merge flag,  one bit for subblock_merge_flag, and one bit for regualr_merge_flag
      MEResult mergeResult;
      mergeResult.cu = cu;
      mergeResult.pu = pu;
      mergeResult.bits = uiBitsCand;
      mergeResult.cost = uiSad + m_pcRdCost->getCost(uiBitsCand);

      m_baseResultsForMH.push_back(mergeResult);
    }
#endif
    insertPos = -1;
    updateCandList(ModeInfo(cu, pu), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
    if (insertPos != -1 && insertPos < MMVD_MRG_MAX_RD_NUM)
    {
      for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
      {
        swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
      }
      swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
    }
  }
}

void EncCu::xCheckSATDCostAffineMerge(CodingStructure *&tempCS, CodingUnit &cu, PredictionUnit &pu, AffineMergeCtx affineMergeCtx, MergeCtx& mrgCtx, PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM], PelUnitBuf *&singleMergeTempBuffer
  , unsigned& uiNumMrgSATDCand, static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  &RdModeList, static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &candCostList, DistParam distParam, const TempCtx &ctxStart)
{
  cu.mmvdSkip = false;
  cu.geoFlag = false;
  cu.affine = true;
#if INTER_LIC
  cu.LICFlag = false;
#endif

  pu.mergeFlag = true;
  pu.ciipFlag = false;
#if CIIP_PDPC
  pu.ciipPDPC = false;
#endif
  pu.mmvdMergeFlag = false;
  pu.regularMergeFlag = false;
#if MULTI_HYP_PRED
  pu.addHypData.clear();
  pu.numMergedAddHyps = 0;
  const bool testMHP = tempCS->sps->getUseInterMultiHyp()
    && (tempCS->area.lumaSize().area() > MULTI_HYP_PRED_RESTRICT_BLOCK_SIZE
      && std::min(tempCS->area.lwidth(), tempCS->area.lheight()) >= MULTI_HYP_PRED_RESTRICT_MIN_WH);
#endif

  const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
  int insertPos = -1;
  for (uint32_t uiAffMergeCand = 0; uiAffMergeCand < affineMergeCtx.numValidMergeCand; uiAffMergeCand++)
  {
    // set merge information
    pu.interDir = affineMergeCtx.interDirNeighbours[uiAffMergeCand];
    pu.mergeIdx = uiAffMergeCand;
    cu.affineType = affineMergeCtx.affineType[uiAffMergeCand];
    cu.BcwIdx = affineMergeCtx.BcwIdx[uiAffMergeCand];
#if INTER_LIC
    cu.LICFlag = affineMergeCtx.LICFlags[uiAffMergeCand];
#endif
    pu.mv[0].setZero();
    pu.mv[1].setZero();
    cu.imv = 0;

    pu.mergeType = affineMergeCtx.mergeType[uiAffMergeCand];
    if (pu.mergeType == MRG_TYPE_SUBPU_ATMVP)
    {
      pu.refIdx[0] = affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 0][0].refIdx;
      pu.refIdx[1] = affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 1][0].refIdx;
      // the SbTmvp use xSubPuMC which will need to access the motion buffer for subblock MV
      PU::spanMotionInfo(pu, mrgCtx);
    }
    else
    {
      PU::setAllAffineMvField(pu, affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 0], REF_PIC_LIST_0);
      PU::setAllAffineMvField(pu, affineMergeCtx.mvFieldNeighbours[(uiAffMergeCand << 1) + 1], REF_PIC_LIST_1);
    }

    distParam.cur = singleMergeTempBuffer->Y();

    m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, false);
    Distortion uiSad = distParam.distFunc(distParam);

    m_CABACEstimator->getCtx() = ctxStart;
    uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
    double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
#if MULTI_HYP_PRED
    if (testMHP && pu.addHypData.size() < tempCS->sps->getMaxNumAddHyps())
    {
      uint32_t   uiBitsCand = uiAffMergeCand + 1;
      if (uiAffMergeCand == tempCS->picHeader->getMaxNumAffineMergeCand() - 1)
      {
        uiBitsCand--;
      }
      uiBitsCand = uiBitsCand + 1 + 1; // one bit for merge flag, and one bit for subblock_merge_flag
      MEResult mergeResult;
      mergeResult.cu = cu;
      mergeResult.pu = pu;
      mergeResult.bits = uiBitsCand;
      mergeResult.cost = uiSad + m_pcRdCost->getCost(uiBitsCand);
      m_baseResultsForMH.push_back(mergeResult);
    }
#endif
    insertPos = -1;
    updateCandList(ModeInfo(cu, pu), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
#if MERGE_ENC_OPT
    if (insertPos != -1 && insertPos < MMVD_MRG_MAX_RD_NUM)
#else
    if (insertPos != -1)
#endif
    {
      for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
      {
        swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
      }
      swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
    }
  }
  pu.regularMergeFlag = true;
  cu.affine = false;
}

#if TM_MRG && MERGE_ENC_OPT
void EncCu::xCheckSATDCostTMMerge(       CodingStructure*& tempCS,
                                         CodingUnit&       cu,
                                         PredictionUnit&   pu,
                                         MergeCtx&         mrgCtx,
                                         PelUnitBuf*       acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM],
                                         PelUnitBuf*&      singleMergeTempBuffer,
                                         unsigned&         uiNumMrgSATDCand,
                                         static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  &RdModeList,
                                         static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>    &candCostList,
                                         DistParam         distParam,
                                   const TempCtx&          ctxStart
#if MULTI_PASS_DMVR
                                       , bool*             applyBDMVR
#endif
)
{
#if MULTI_PASS_DMVR
  CHECK(applyBDMVR == nullptr, "Unexpected error");
#endif
  pu.mergeFlag        = true;
  cu.mmvdSkip         = false;
  cu.geoFlag          = false;
  cu.affine           = false;
  cu.imv              = IMV_OFF;
  pu.ciipFlag         = false;
#if CIIP_PDPC
  pu.ciipPDPC         = false;
#endif
  pu.mmvdMergeFlag    = false;
  pu.regularMergeFlag = false;
  pu.tmMergeFlag      = true;

  const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
  int insertPos = -1;
  for (uint32_t uiMergeCand = 0; uiMergeCand < mrgCtx.numValidMergeCand; uiMergeCand++)
  {
    mrgCtx.setMergeInfo( pu, uiMergeCand );
#if MULTI_PASS_DMVR
    if (applyBDMVR[uiMergeCand])
    {
      pu.bdmvrRefine = true;
      m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[(uiMergeCand << 1) + 1]);
    }
#if !BDOF_RM_CONSTRAINTS
    else
#endif
#endif
#if !BDOF_RM_CONSTRAINTS
    {
      PU::spanMotionInfo(pu, mrgCtx);
    }
#endif

    pu.mvRefine = false;
#if INTER_LIC
    m_pcInterSearch->m_storeBeforeLIC = false;
#endif
    m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer);
#if MULTI_PASS_DMVR
    if( pu.bdmvrRefine )
    {
      ::memcpy( m_mvBufEncBDOF4TM[uiMergeCand], m_pcInterSearch->getBdofSubPuMvOffset(), sizeof( Mv ) * BDOF_SUBPU_MAX_NUM );
    }
#endif
    distParam.cur = singleMergeTempBuffer->Y();
    Distortion uiSad = distParam.distFunc(distParam);

    m_CABACEstimator->getCtx() = ctxStart;
    uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
    double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
    insertPos = -1;
    updateCandList(ModeInfo(cu, pu), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);

    if (insertPos != -1 && insertPos < MMVD_MRG_MAX_RD_NUM)
    {
      for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
      {
        swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
      }
      swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
    }
  }
  pu.regularMergeFlag = true;
  cu.affine           = false;
#if AFFINE_MMVD
  pu.afMmvdFlag       = false;
#endif
  pu.tmMergeFlag      = false;
#if MULTI_PASS_DMVR
  pu.bdmvrRefine      = false;
#endif
}
#endif

#if AFFINE_MMVD && MERGE_ENC_OPT
void EncCu::xCheckSATDCostAffineMmvdMerge(       CodingStructure*& tempCS,
                                                 CodingUnit&       cu,
                                                 PredictionUnit&   pu,
                                                 AffineMergeCtx    affineMergeCtx,
                                                 MergeCtx&         mrgCtx,
                                                 PelUnitBuf*       acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM],
                                                 PelUnitBuf*&      singleMergeTempBuffer,
                                                 unsigned&         uiNumMrgSATDCand,
                                                 static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  &RdModeList,
                                                 static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>    &candCostList,
                                                 DistParam         distParam,
                                           const TempCtx&          ctxStart
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
                                          , uint32_t * affMmvdLUT
#if JVET_AA0093_ENHANCED_MMVD_EXTENSION
  , uint8_t numBaseAffine
#endif
#endif
)
{
  cu.mmvdSkip         = false;
  cu.geoFlag          = false;
  cu.affine           = true;
  cu.imv              = IMV_OFF;
#if INTER_LIC
  cu.LICFlag = false;
#endif

  pu.mergeFlag        = true;
  pu.ciipFlag         = false;
#if CIIP_PDPC
  pu.ciipPDPC         = false;
#endif
  pu.mmvdMergeFlag    = false;
  pu.regularMergeFlag = false;
#if MULTI_HYP_PRED
  pu.addHypData.clear();
  pu.numMergedAddHyps = 0;
#endif

  int baseIdxToMergeIdxOffset = (int)PU::getMergeIdxFromAfMmvdBaseIdx(affineMergeCtx, 0);
  int baseCount               = std::min<int>((int)AF_MMVD_BASE_NUM, affineMergeCtx.numValidMergeCand - baseIdxToMergeIdxOffset);
#if JVET_AA0093_ENHANCED_MMVD_EXTENSION
  baseCount = std::min<int>(baseCount, numBaseAffine);
#endif
  int afMmvdCandCount         = baseCount * AF_MMVD_MAX_REFINE_NUM;
  if (baseCount < 1)
  {
    return;
  }

#if MULTI_HYP_PRED
  const bool testMHP = tempCS->sps->getUseInterMultiHyp()
    && (tempCS->area.lumaSize().area() > MULTI_HYP_PRED_RESTRICT_BLOCK_SIZE
      && std::min(tempCS->area.lwidth(), tempCS->area.lheight()) >= MULTI_HYP_PRED_RESTRICT_MIN_WH);
#endif
  const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
  int insertPos = -1;
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
  for (uint32_t uiMergeCandTemp = 0; uiMergeCandTemp < afMmvdCandCount; uiMergeCandTemp++)
  {
    uint32_t uiMergeCand = affMmvdLUT[uiMergeCandTemp];
#if JVET_AA0093_ENHANCED_MMVD_EXTENSION
    if(uiMergeCandTemp - (uiMergeCandTemp/AF_MMVD_MAX_REFINE_NUM )* AF_MMVD_MAX_REFINE_NUM  >= ((AF_MMVD_MAX_REFINE_NUM >> AFFINE_MMVD_SIZE_SHIFT ) / AFFINE_BI_DIR))
#else
    if(uiMergeCandTemp - (uiMergeCandTemp/AF_MMVD_MAX_REFINE_NUM )* AF_MMVD_MAX_REFINE_NUM  >= (AF_MMVD_MAX_REFINE_NUM >> AFFINE_MMVD_SIZE_SHIFT))
#endif
    {
      continue;
    }
#else
  for (uint32_t uiMergeCand = 0; uiMergeCand < afMmvdCandCount; uiMergeCand++)
  {
#endif
    int baseIdx = (int)uiMergeCand / AF_MMVD_MAX_REFINE_NUM;
    int stepIdx = (int)uiMergeCand - baseIdx * AF_MMVD_MAX_REFINE_NUM;
    int dirIdx  = stepIdx % AF_MMVD_OFFSET_DIR;
        stepIdx = stepIdx / AF_MMVD_OFFSET_DIR;

    // Pass Affine MMVD parameters from candidate to PU
    {
      pu.afMmvdFlag     = true;
      pu.afMmvdBaseIdx  = (uint8_t)baseIdx;
      pu.afMmvdDir      = (uint8_t)dirIdx;
      pu.afMmvdStep     = (uint8_t)stepIdx;
      pu.mergeIdx       = (uint8_t)(baseIdx + baseIdxToMergeIdxOffset);
#if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED
#if JVET_AA0093_ENHANCED_MMVD_EXTENSION
      pu.afMmvdMergeIdx = (uint16_t)uiMergeCandTemp;
#else
      pu.afMmvdMergeIdx = (uint8_t)uiMergeCandTemp;
#endif
#endif
      pu.mergeType      = affineMergeCtx.mergeType         [pu.mergeIdx];
      pu.interDir       = affineMergeCtx.interDirNeighbours[pu.mergeIdx];
      pu.cu->affineType = affineMergeCtx.affineType        [pu.mergeIdx];
#if INTER_LIC
      pu.cu->LICFlag    = affineMergeCtx.LICFlags          [pu.mergeIdx];
#endif
      pu.cu->BcwIdx     = affineMergeCtx.BcwIdx            [pu.mergeIdx];
      CHECK(pu.afMmvdDir >= AF_MMVD_OFFSET_DIR || pu.afMmvdStep >= AF_MMVD_STEP_NUM, "Affine MMVD dir or Affine MMVD step is out of range ");
      CHECK(pu.mergeType != MRG_TYPE_DEFAULT_N, "Affine MMVD must have non-SbTMVP base!");
    }

    MvField mvfMmvd[2][3];
    PU::getAfMmvdMvf(pu, affineMergeCtx, mvfMmvd, pu.mergeIdx, pu.afMmvdStep, pu.afMmvdDir);
    PU::setAllAffineMvField(pu, mvfMmvd[0], REF_PIC_LIST_0);
    PU::setAllAffineMvField(pu, mvfMmvd[1], REF_PIC_LIST_1);
    distParam.cur = singleMergeTempBuffer->Y();
    pu.mmvdEncOptMode = (stepIdx > 2 ? 3 : 0);
    m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, false);
    pu.mmvdEncOptMode = 0;
    Distortion uiSad = distParam.distFunc(distParam);

    m_CABACEstimator->getCtx() = ctxStart;
    uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
    double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
#if MULTI_HYP_PRED
    if (testMHP && pu.addHypData.size() < tempCS->sps->getMaxNumAddHyps())
    {
      uint32_t uiBitsCand = baseIdx + stepIdx + 2 + 1 + 1 + 1; // one bit for merge flag,  one bit for subblock_merge_flag, and one bit for afMmvdFlag
      MEResult mergeResult;
      mergeResult.cu = cu;
      mergeResult.pu = pu;
      mergeResult.bits = uiBitsCand;
      mergeResult.cost = uiSad + m_pcRdCost->getCost(uiBitsCand);

      m_baseResultsForMH.push_back(mergeResult);
    }
#endif
    insertPos = -1;
    updateCandList(ModeInfo(cu, pu), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);

    if (insertPos != -1 && insertPos < MMVD_MRG_MAX_RD_NUM)
    {
      for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
      {
        swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
      }
      swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
    }
  }
  pu.regularMergeFlag = true;
  cu.affine = false;
  pu.afMmvdFlag = false;
}
#endif
#if !JVET_W0097_GPM_MMVD_TM && !JVET_Z0056_GPM_SPLIT_MODE_REORDERING
void EncCu::xCheckSATDCostGeoMerge(CodingStructure *&tempCS, CodingUnit &cu, PredictionUnit &pu, MergeCtx geoMergeCtx, PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM], PelUnitBuf *&singleMergeTempBuffer
  , unsigned& uiNumMrgSATDCand, static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  &RdModeList, static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> &candCostList, DistParam distParam, const TempCtx &ctxStart)
{
  const SPS &sps = *tempCS->sps;
  int numGeoChecked = 0;
  GeoComboCostList comboList;
  int bitsCandTB = floorLog2(GEO_NUM_PARTITION_MODE);
  PelUnitBuf geoBuffer[GEO_MAX_NUM_UNI_CANDS];
  PelUnitBuf geoTempBuf[GEO_MAX_NUM_UNI_CANDS];

  uint8_t maxNumMergeCandidates = cu.cs->sps->getMaxNumGeoCand();
  DistParam distParamWholeBlk;
  m_pcRdCost->setDistParam(distParamWholeBlk, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y().buf, m_acMergeBuffer[0].Y().stride, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
  Distortion bestWholeBlkSad = MAX_UINT64;
  double bestWholeBlkCost = MAX_DOUBLE;

  Distortion sadWholeBlk[GEO_MAX_NUM_UNI_CANDS];
  int pocMrg[GEO_MAX_NUM_UNI_CANDS];
  Mv MrgMv[GEO_MAX_NUM_UNI_CANDS];
  bool isSkipThisCand[GEO_MAX_NUM_UNI_CANDS] = { false };

  cu.affine = false;
  cu.mtsFlag = false;
#if INTER_LIC
  cu.LICFlag = false;
#endif
  cu.BcwIdx = BCW_DEFAULT;
  cu.geoFlag = true;
  cu.imv = 0;
  cu.mmvdSkip = false;
  cu.skip = false;
  cu.mipFlag = false;
#if JVET_V0130_INTRA_TMP
  cu.tmpFlag = false;
#endif
  cu.bdpcmMode = 0;
  pu.mergeFlag = true;
  pu.regularMergeFlag = false;
  pu.mmvdMergeFlag = false;
  pu.mmvdMergeIdx = MAX_UCHAR;

  const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
  const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda();
  const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
  int insertPos = -1;

  for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++)
  {
    geoBuffer[mergeCand] = m_acRealMergeBuffer[mergeCand].getBuf(localUnitArea);
    geoMergeCtx.setMergeInfo(pu, mergeCand);
    int MrgList = geoMergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx == -1 ? 1 : 0;
    RefPicList MrgeRefPicList = (MrgList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
    int MrgrefIdx = geoMergeCtx.mvFieldNeighbours[(mergeCand << 1) + MrgList].refIdx;
    pocMrg[mergeCand] = tempCS->slice->getRefPic(MrgeRefPicList, MrgrefIdx)->getPOC();
    MrgMv[mergeCand] = geoMergeCtx.mvFieldNeighbours[(mergeCand << 1) + MrgList].mv;

    for( int i = 0; i < mergeCand; i++ )
    {
      if( pocMrg[mergeCand] == pocMrg[i] && MrgMv[mergeCand] == MrgMv[i] )
      {
        isSkipThisCand[mergeCand] = true;
        break;
      }
    }

    m_pcInterSearch->motionCompensation(pu, geoBuffer[mergeCand]);
    geoTempBuf[mergeCand] = m_acRealMergeBuffer[MRG_MAX_NUM_CANDS + mergeCand].getBuf(localUnitArea);
    geoTempBuf[mergeCand].Y().copyFrom(geoBuffer[mergeCand].Y());
    geoTempBuf[mergeCand].Y().roundToOutputBitdepth(geoTempBuf[mergeCand].Y(), cu.slice->clpRng(COMPONENT_Y));
    distParamWholeBlk.cur.buf = geoTempBuf[mergeCand].Y().buf;
    distParamWholeBlk.cur.stride = geoTempBuf[mergeCand].Y().stride;
    sadWholeBlk[mergeCand] = distParamWholeBlk.distFunc(distParamWholeBlk);
    if (sadWholeBlk[mergeCand] < bestWholeBlkSad)
    {
      bestWholeBlkSad = sadWholeBlk[mergeCand];
      bestWholeBlkCost = (double)bestWholeBlkSad + (double)( mergeCand + 1 ) * sqrtLambdaForFirstPass;
    }
  }

  bool skipGeo = true;
  for (uint8_t mergeCand = 1; mergeCand < maxNumMergeCandidates; mergeCand++)
  {
    if( !isSkipThisCand[mergeCand] )
    {
      skipGeo = false;
      break;
    }
  }

  if( !skipGeo )
  {
    DistParam distParamGeo;
    int wIdx = floorLog2(cu.lwidth()) - GEO_MIN_CU_LOG2;
    int hIdx = floorLog2(cu.lheight()) - GEO_MIN_CU_LOG2;
    for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
    {
      int maskStride = 0, maskStride2 = 0;
      int stepX = 1;
      Pel* SADmask;
      int16_t angle = g_GeoParams[splitDir][0];
      if (g_angle2mirror[angle] == 2)
      {
        maskStride = -GEO_WEIGHT_MASK_SIZE;
        maskStride2 = -(int)cu.lwidth();
        SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][1]) * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
      }
      else if (g_angle2mirror[angle] == 1)
      {
        stepX = -1;
        maskStride2 = cu.lwidth();
        maskStride = GEO_WEIGHT_MASK_SIZE;
        SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][0])];
      }
      else
      {
        maskStride = GEO_WEIGHT_MASK_SIZE;
        maskStride2 = -(int)cu.lwidth();
        SADmask = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
      }
      Distortion sadSmall = 0, sadLarge = 0;
      for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++)
      {
        int bitsCand = mergeCand + 1;

        m_pcRdCost->setDistParam(distParamGeo, tempCS->getOrgBuf().Y(), geoTempBuf[mergeCand].Y().buf, geoTempBuf[mergeCand].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
        sadLarge = distParamGeo.distFunc(distParamGeo);
        m_GeoCostList.insert(splitDir, 0, mergeCand, (double)sadLarge + (double)bitsCand * sqrtLambdaForFirstPass);
        sadSmall = sadWholeBlk[mergeCand] - sadLarge;
        m_GeoCostList.insert(splitDir, 1, mergeCand, (double)sadSmall + (double)bitsCand * sqrtLambdaForFirstPass);
      }
    }

    for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
    {
      for (int GeoMotionIdx = 0; GeoMotionIdx < maxNumMergeCandidates * (maxNumMergeCandidates - 1); GeoMotionIdx++)
      {
        unsigned int mergeCand0 = m_GeoModeTest[GeoMotionIdx].m_candIdx0;
        unsigned int mergeCand1 = m_GeoModeTest[GeoMotionIdx].m_candIdx1;
        double tempCost = m_GeoCostList.singleDistList[0][splitDir][mergeCand0].cost + m_GeoCostList.singleDistList[1][splitDir][mergeCand1].cost;
        if( tempCost > bestWholeBlkCost )
        {
          continue;
        }
        tempCost = tempCost + (double)bitsCandTB * sqrtLambdaForFirstPass;
        comboList.list.push_back(GeoMergeCombo(splitDir, mergeCand0, mergeCand1, tempCost));
      }
    }
    if (!comboList.list.empty())
    {
      comboList.sortByCost();
      int geoNumCobo = (int)comboList.list.size();
      const int numGeoSATD = min(geoNumCobo, GEO_MAX_TRY_WEIGHTED_SAD);
      double bestPrevCost = candCostList.size() > 0 ? candCostList[0] : MAX_DOUBLE;
      for (uint8_t candidateIdx = 0; candidateIdx < numGeoSATD; candidateIdx++)
      {
        int splitDir = comboList.list[candidateIdx].splitDir;
        int mergeCand0 = comboList.list[candidateIdx].mergeIdx0;
        int mergeCand1 = comboList.list[candidateIdx].mergeIdx1;

        pu.geoSplitDir = comboList.list[candidateIdx].splitDir;
        pu.geoMergeIdx0 = comboList.list[candidateIdx].mergeIdx0;
        pu.geoMergeIdx1 = comboList.list[candidateIdx].mergeIdx1;

        m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_LUMA, *singleMergeTempBuffer, geoBuffer[mergeCand0], geoBuffer[mergeCand1]);

        distParam.cur = singleMergeTempBuffer->Y();
        Distortion uiSad = distParam.distFunc(distParam);

        m_CABACEstimator->getCtx() = ctxStart;
        uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
        double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;

        if (cost > bestPrevCost) // skip GEO candidate if cost larger than the best from regular and affine as in original design
        {
          continue;
        }
        numGeoChecked++;
        insertPos = -1;
        updateCandList(ModeInfo(cu, pu), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
#if MERGE_ENC_OPT
        if (insertPos != -1 && insertPos < MMVD_MRG_MAX_RD_NUM)
#else
        if (insertPos != -1)
#endif
        {
          m_pcInterSearch->weightedGeoBlk(pu, splitDir, CHANNEL_TYPE_CHROMA, *singleMergeTempBuffer, geoBuffer[pu.geoMergeIdx0], geoBuffer[pu.geoMergeIdx1]); //have to use pu.geoMergeIdx1 since  mergeCand1 maybe changed
          for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
          {
            swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
          }
          swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
        }
      }
    }
  }

  cu.geoFlag = false;
  if( numGeoChecked < GEO_MAX_TRY_WEIGHTED_SATD ) // try to match the original number of full RD
  {
    uiNumMrgSATDCand = uiNumMrgSATDCand - GEO_MAX_TRY_WEIGHTED_SATD + numGeoChecked;
  }

  if (uiNumMrgSATDCand > RdModeList.size()) //to make sure we have engough candidates in the list
  {
    uiNumMrgSATDCand = (unsigned int)RdModeList.size();
  }
}
#endif
#else
void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
{
  if( m_modeCtrl->getFastDeltaQp() )
  {
    return;
  }

  if ( bestCS->area.lumaSize().width < 8 || bestCS->area.lumaSize().height < 8 )
  {
    return;
  }
  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
  const Slice &slice = *tempCS->slice;

  CHECK( slice.getSliceType() == I_SLICE, "Affine Merge modes not available for I-slices" );

  tempCS->initStructData( encTestMode.qp );

  AffineMergeCtx affineMergeCtx;
  const SPS &sps = *tempCS->sps;
  if (sps.getMaxNumAffineMergeCand() == 0)
  {
    return;
  }

  setAFFBestSATDCost(MAX_DOUBLE);

  MergeCtx mrgCtx;
  if (sps.getSbTMVPEnabledFlag())
  {
    Size bufSize = g_miScaling.scale( tempCS->area.lumaSize() );
    mrgCtx.subPuMvpMiBuf = MotionBuf( m_SubPuMiBuf, bufSize );
    affineMergeCtx.mrgCtx = &mrgCtx;
  }

  {
    // first get merge candidates
    CodingUnit cu( tempCS->area );
    cu.cs = tempCS;
    cu.predMode = MODE_INTER;
    cu.slice = tempCS->slice;
    cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
    cu.mmvdSkip = false;
#if INTER_LIC
    cu.LICFlag = false;
#endif

    PredictionUnit pu( tempCS->area );
    pu.cu = &cu;
    pu.cs = tempCS;
    pu.regularMergeFlag = false;
    PU::getAffineMergeCand( pu, affineMergeCtx 
#if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION && JVET_W0090_ARMC_TM
      ,m_pcInterPred
#endif
    );
#if JVET_W0090_ARMC_TM
    if (sps.getUseAML())
    {
      m_pcInterSearch->adjustAffineMergeCandidates(pu, affineMergeCtx);
    }
#endif

    if ( affineMergeCtx.numValidMergeCand <= 0 )
    {
      return;
    }
  }

  bool candHasNoResidual[AFFINE_MRG_MAX_NUM_CANDS];
  for ( uint32_t ui = 0; ui < affineMergeCtx.numValidMergeCand; ui++ )
  {
    candHasNoResidual[ui] = false;
  }

  bool                                        bestIsSkip = false;
  uint32_t                                    uiNumMrgSATDCand = affineMergeCtx.numValidMergeCand;
  PelUnitBuf                                  acMergeBuffer[AFFINE_MRG_MAX_NUM_CANDS];
  static_vector<uint32_t, AFFINE_MRG_MAX_NUM_CANDS>  RdModeList;
  bool                                        mrgTempBufSet = false;

  for ( uint32_t i = 0; i < AFFINE_MRG_MAX_NUM_CANDS; i++ )
  {
    RdModeList.push_back( i );
  }

  if ( m_pcEncCfg->getUseFastMerge() )
  {
    uiNumMrgSATDCand = std::min( NUM_AFF_MRG_SATD_CAND, affineMergeCtx.numValidMergeCand );
    bestIsSkip = false;

    if ( auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>(m_modeCtrl) )
    {
      bestIsSkip = blkCache->isSkip( tempCS->area );
    }

    static_vector<double, AFFINE_MRG_MAX_NUM_CANDS> candCostList;

    // 1. Pass: get SATD-cost for selected candidates and reduce their count
    if ( !bestIsSkip )
    {
      RdModeList.clear();
      mrgTempBufSet = true;
#if JVET_W0097_GPM_MMVD_TM
      const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
      const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx());
#else
      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( );
#endif

      CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType );

      partitioner.setCUData( cu );
      cu.slice = tempCS->slice;
      cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
      cu.skip = false;
      cu.affine = true;
#if INTER_LIC
      cu.LICFlag = false;
#endif
      cu.predMode = MODE_INTER;
      cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
      cu.qp = encTestMode.qp;

      PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType );

      DistParam distParam;
      const bool bUseHadamard = !tempCS->slice->getDisableSATDForRD();
      m_pcRdCost->setDistParam( distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, bUseHadamard );

      const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height ) );
#if MULTI_HYP_PRED
      const bool testMHP = tempCS->sps->getUseInterMultiHyp()
        && (tempCS->area.lumaSize().area() > MULTI_HYP_PRED_RESTRICT_BLOCK_SIZE 
        && std::min(tempCS->area.lwidth(), tempCS->area.lheight()) >= MULTI_HYP_PRED_RESTRICT_MIN_WH);
#endif

      for ( uint32_t uiMergeCand = 0; uiMergeCand < affineMergeCtx.numValidMergeCand; uiMergeCand++ )
      {
        acMergeBuffer[uiMergeCand] = m_acMergeBuffer[uiMergeCand].getBuf( localUnitArea );

        // set merge information
        pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand];
        pu.mergeFlag = true;
        pu.regularMergeFlag = false;
        pu.mergeIdx = uiMergeCand;
        cu.affineType = affineMergeCtx.affineType[uiMergeCand];
#if AFFINE_MMVD
        pu.afMmvdFlag = false;
#endif
        cu.BcwIdx = affineMergeCtx.BcwIdx[uiMergeCand];
#if INTER_LIC
        cu.LICFlag = affineMergeCtx.LICFlags[uiMergeCand];
#endif

        pu.mergeType = affineMergeCtx.mergeType[uiMergeCand];
        if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP )
        {
          pu.refIdx[0] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][0].refIdx;
          pu.refIdx[1] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][0].refIdx;
          PU::spanMotionInfo( pu, mrgCtx );
        }
        else
        {
          PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0], REF_PIC_LIST_0 );
          PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1], REF_PIC_LIST_1 );

          PU::spanMotionInfo( pu );
        }

        distParam.cur = acMergeBuffer[uiMergeCand].Y();

        m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand], REF_PIC_LIST_X, true, false );

        Distortion uiSad = distParam.distFunc( distParam );
#if JVET_W0097_GPM_MMVD_TM
        m_CABACEstimator->getCtx() = ctxStart;
        uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
        double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
#else
        uint32_t   uiBitsCand = uiMergeCand + 1;
        if ( uiMergeCand == tempCS->picHeader->getMaxNumAffineMergeCand() - 1 )
        {
          uiBitsCand--;
        }
        double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
#endif
#if MULTI_HYP_PRED
        if (testMHP && pu.addHypData.size() < tempCS->sps->getMaxNumAddHyps())
        {
          uiBitsCand = uiBitsCand + 1 + 1; // one bit for merge flag, and one bit for subblock_merge_flag
          MEResult mergeResult;
          mergeResult.cu = cu;
          mergeResult.pu = pu;
          mergeResult.bits = uiBitsCand;
          mergeResult.cost = uiSad + m_pcRdCost->getCost(uiBitsCand);
          m_baseResultsForMH.push_back(mergeResult);
        }
#endif
        updateCandList( uiMergeCand, cost, RdModeList, candCostList
          , uiNumMrgSATDCand );

        CHECK( std::min( uiMergeCand + 1, uiNumMrgSATDCand ) != RdModeList.size(), "" );
      }

      // Try to limit number of candidates using SATD-costs
      for ( uint32_t i = 1; i < uiNumMrgSATDCand; i++ )
      {
        if ( candCostList[i] > MRG_FAST_RATIO * candCostList[0] )
        {
          uiNumMrgSATDCand = i;
          break;
        }
      }

      tempCS->initStructData( encTestMode.qp );
      setAFFBestSATDCost(candCostList[0]);
#if JVET_W0097_GPM_MMVD_TM
      m_CABACEstimator->getCtx() = ctxStart;
#endif
    }
    else
    {
      uiNumMrgSATDCand = affineMergeCtx.numValidMergeCand;
    }
  }

  uint32_t iteration;
  uint32_t iterationBegin = 0;
  iteration = 2;
  for (uint32_t uiNoResidualPass = iterationBegin; uiNoResidualPass < iteration; ++uiNoResidualPass)
  {
    for ( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ )
    {
      uint32_t uiMergeCand = RdModeList[uiMrgHADIdx];

      if ( ((uiNoResidualPass != 0) && candHasNoResidual[uiMergeCand])
        || ((uiNoResidualPass == 0) && bestIsSkip) )
      {
        continue;
      }

      // first get merge candidates
      CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType );

      partitioner.setCUData( cu );
      cu.slice = tempCS->slice;
      cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
#if INTER_LIC
      cu.LICFlag = false;
#endif
      cu.skip = false;
      cu.affine = true;
      cu.predMode = MODE_INTER;
      cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
      cu.qp = encTestMode.qp;
      PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType );

      // set merge information
      pu.mergeFlag = true;
      pu.mergeIdx = uiMergeCand;
      pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand];
      cu.affineType = affineMergeCtx.affineType[uiMergeCand];
#if AFFINE_MMVD
      pu.afMmvdFlag = false;
#endif
      cu.BcwIdx = affineMergeCtx.BcwIdx[uiMergeCand];
#if INTER_LIC
      cu.LICFlag = affineMergeCtx.LICFlags[uiMergeCand];
#endif

      pu.mergeType = affineMergeCtx.mergeType[uiMergeCand];
      if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP )
      {
        pu.refIdx[0] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][0].refIdx;
        pu.refIdx[1] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][0].refIdx;
        PU::spanMotionInfo( pu, mrgCtx );
      }
      else
      {
        PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0], REF_PIC_LIST_0 );
        PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1], REF_PIC_LIST_1 );

        PU::spanMotionInfo( pu );
      }

      if( m_pcEncCfg->getMCTSEncConstraint() && ( !( MCTSHelper::checkMvBufferForMCTSConstraint( *cu.firstPU ) ) ) )
      {
        // Do not use this mode
        tempCS->initStructData( encTestMode.qp );
        return;
      }
      if ( mrgTempBufSet )
      {
        tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand], true, false);   // Copy Luma Only
        m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_X, false, true);
      }
      else
      {
        m_pcInterSearch->motionCompensation( pu );
      }
#if ENABLE_OBMC
      cu.isobmcMC = true;
      m_pcInterSearch->subBlockOBMC(*cu.firstPU);
      cu.isobmcMC = false;
#endif
      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, ( uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL ) );

      if ( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip )
      {
        bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0;
      }
      tempCS->initStructData( encTestMode.qp );
    }// end loop uiMrgHADIdx

    if ( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() )
    {
      const CodingUnit     &bestCU = *bestCS->getCU( partitioner.chType );
      const PredictionUnit &bestPU = *bestCS->getPU( partitioner.chType );

      if ( bestCU.rootCbf == 0 )
      {
        if ( bestPU.mergeFlag )
        {
          m_modeCtrl->setEarlySkipDetected();
        }
        else if ( m_pcEncCfg->getMotionEstimationSearchMethod() != MESEARCH_SELECTIVE )
        {
          int absolute_MV = 0;

          for ( uint32_t uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ )
          {
            if ( slice.getNumRefIdx( RefPicList( uiRefListIdx ) ) > 0 )
            {
              absolute_MV += bestPU.mvd[uiRefListIdx].getAbsHor() + bestPU.mvd[uiRefListIdx].getAbsVer();
            }
          }

          if ( absolute_MV == 0 )
          {
            m_modeCtrl->setEarlySkipDetected();
          }
        }
      }
    }
  }
  if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
  {
    xCalDebCost( *bestCS, partitioner );
  }
}
#endif

#if AFFINE_MMVD && !MERGE_ENC_OPT
void EncCu::xCheckRDCostAffineMmvd2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
{
  if (m_modeCtrl->getFastDeltaQp())
  {
    return;
  }
  if (bestCS->area.lumaSize().width < 8 || bestCS->area.lumaSize().height < 8)
  {
    return;
  }
  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
  const Slice &slice = *tempCS->slice;

  CHECK(slice.getSliceType() == I_SLICE, "Affine MMVD modes not available for I-slices");

  tempCS->initStructData(encTestMode.qp);

  AffineMergeCtx affineMergeCtx;
  const SPS &sps = *tempCS->sps;
  MergeCtx mrgCtx;
  if (sps.getSbTMVPEnabledFlag())
  {
    Size bufSize = g_miScaling.scale(tempCS->area.lumaSize());
    mrgCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
    affineMergeCtx.mrgCtx = &mrgCtx;
  }

  {
    // first get merge candidates
    CodingUnit cu(tempCS->area);
    cu.cs       = tempCS;
    cu.predMode = MODE_INTER;
    cu.slice    = tempCS->slice;
    cu.tileIdx  = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
    cu.mmvdSkip = false;
    cu.geoFlag  = false;
#if INTER_LIC
    cu.LICFlag  = false;
#endif

    PredictionUnit pu(tempCS->area);
    pu.cu = &cu;
    pu.cs = tempCS;

    PU::getAffineMergeCand(pu, affineMergeCtx);
  }
  int baseIdxToMergeIdxOffset = (int)PU::getMergeIdxFromAfMmvdBaseIdx(affineMergeCtx, 0);
  int baseCount = std::min<int>((int)AF_MMVD_BASE_NUM, affineMergeCtx.numValidMergeCand - baseIdxToMergeIdxOffset);
  if (baseCount < 1)
  {
    return;
  }

  bool candHasNoResidual[AF_MMVD_NUM];
  for (uint32_t ui = 0; ui < AF_MMVD_NUM; ui++)
  {
    candHasNoResidual[ui] = false;
  }

  const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));

  bool                                    bestIsSkip       = false;
  int                                     afMmvdCandCount  = baseCount * AF_MMVD_MAX_REFINE_NUM;
  uint32_t                                uiNumMrgSATDCand = std::min( AF_MMVD_NUM, afMmvdCandCount );
  static_vector<uint32_t, AF_MMVD_NUM>    RdModeList;

  for (uint32_t i = 0; i < AF_MMVD_NUM; i++)
  {
    RdModeList.push_back(i);
  }

  if (m_pcEncCfg->getUseFastMerge())
  {
    uiNumMrgSATDCand = std::min((uint32_t)NUM_AF_MMVD_SATD_CAND, uiNumMrgSATDCand);
    bestIsSkip = false;
    if (auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>(m_modeCtrl))
    {
      bestIsSkip = blkCache->isSkip(tempCS->area);
    }

    static_vector<double, AF_MMVD_NUM> candCostList;

    // 1. Pass: get SATD-cost for selected candidates and reduce their count
    if (!bestIsSkip)
    {
      RdModeList.clear();
      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda();

      CodingUnit &cu = tempCS->addCU(tempCS->area, partitioner.chType);

      partitioner.setCUData(cu);
      cu.slice       = tempCS->slice;
      cu.tileIdx     = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
      cu.skip        = false;
      cu.affine      = true;
#if INTER_LIC
      cu.LICFlag     = false;
#endif
      cu.predMode    = MODE_INTER;
      cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
      cu.qp          = encTestMode.qp;

      PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType);

#if MULTI_HYP_PRED
      const bool testMHP = tempCS->sps->getUseInterMultiHyp()
        && (tempCS->area.lumaSize().area() > MULTI_HYP_PRED_RESTRICT_BLOCK_SIZE
          && std::min(tempCS->area.lwidth(), tempCS->area.lheight()) >= MULTI_HYP_PRED_RESTRICT_MIN_WH);
#endif
      DistParam distParam;
      const bool bUseHadamard = !tempCS->slice->getDisableSATDForRD();
      m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);

      {
        for (uint32_t uiMergeCand = 0; uiMergeCand < afMmvdCandCount; uiMergeCand++)
        {
          int baseIdx = (int)uiMergeCand / AF_MMVD_MAX_REFINE_NUM;
          int stepIdx = (int)uiMergeCand - baseIdx * AF_MMVD_MAX_REFINE_NUM;
          int dirIdx  = stepIdx % AF_MMVD_OFFSET_DIR;
              stepIdx = stepIdx / AF_MMVD_OFFSET_DIR;

          PelUnitBuf predTempBuf = m_acMergeBuffer[0].getBuf(localUnitArea);

          // Pass Affine MMVD parameters from candidate to PU
          {
            pu.mergeFlag      = true;
            pu.afMmvdFlag     = true;
            pu.afMmvdBaseIdx  = (uint8_t)baseIdx;
            pu.afMmvdDir      = (uint8_t)dirIdx;
            pu.afMmvdStep     = (uint8_t)stepIdx;
            pu.mergeIdx       = (uint8_t)(baseIdx + baseIdxToMergeIdxOffset);
            pu.mergeType      = affineMergeCtx.mergeType         [pu.mergeIdx];
            pu.interDir       = affineMergeCtx.interDirNeighbours[pu.mergeIdx];
            pu.cu->affineType = affineMergeCtx.affineType        [pu.mergeIdx];
#if INTER_LIC
            pu.cu->LICFlag    = affineMergeCtx.LICFlags          [pu.mergeIdx];
#endif
            pu.cu->BcwIdx     = affineMergeCtx.BcwIdx            [pu.mergeIdx];
            pu.mmvdMergeFlag  = false;
            pu.ciipFlag       = false;

            CHECK(pu.afMmvdDir >= AF_MMVD_OFFSET_DIR || pu.afMmvdStep >= AF_MMVD_STEP_NUM, "Affine MMVD dir or Affine MMVD step is out of range ");
            CHECK(pu.mergeType != MRG_TYPE_DEFAULT_N, "Affine MMVD must have non-SbTMVP base!");
          }

          MvField mvfMmvd[2][3];
          PU::getAfMmvdMvf(pu, affineMergeCtx, mvfMmvd, pu.mergeIdx, pu.afMmvdStep, pu.afMmvdDir);
          PU::setAllAffineMvField(pu, mvfMmvd[0], REF_PIC_LIST_0);
          PU::setAllAffineMvField(pu, mvfMmvd[1], REF_PIC_LIST_1);
          PU::spanMotionInfo(pu);
          pu.mmvdEncOptMode = (stepIdx > 2 ? 3 : 0);
          m_pcInterSearch->motionCompensation(pu, predTempBuf, REF_PIC_LIST_X, true, false);
          pu.mmvdEncOptMode = 0;

          distParam.cur = predTempBuf.Y();
          Distortion uiSad = distParam.distFunc(distParam);
          uint32_t   uiBitsCand = PU::getAfMmvdEstBits(pu);
          double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
#if MULTI_HYP_PRED
          if (testMHP && pu.addHypData.size() < tempCS->sps->getMaxNumAddHyps())
          {
            uint32_t uiBitsCand = baseIdx + stepIdx + 2 + 1 + 1 + 1; // one bit for merge flag,  one bit for subblock_merge_flag, and one bit for afMmvdFlag
            uiBitsCand++; // for mmvd_flag
            MEResult mergeResult;
            mergeResult.cu = cu;
            mergeResult.pu = pu;
            mergeResult.bits = uiBitsCand;
            mergeResult.cost = uiSad + m_pcRdCost->getCost(uiBitsCand);

            m_baseResultsForMH.push_back(mergeResult);
          }
#endif
          updateCandList(uiMergeCand, cost, RdModeList, candCostList, uiNumMrgSATDCand);

          CHECK(std::min(uiMergeCand + 1, uiNumMrgSATDCand) != RdModeList.size(), "");
        }
      }

      // Try to limit number of candidates using SATD-costs
      for (uint32_t i = 1; i < uiNumMrgSATDCand; i++)
      {
        if (candCostList[i] > MRG_FAST_RATIO * candCostList[0])
        {
          uiNumMrgSATDCand = i;
          break;
        }
      }

      tempCS->initStructData(encTestMode.qp);
#if !MERGE_ENC_OPT
      setAFFBestSATDCost(std::min<double>(getAFFBestSATDCost(), candCostList[0]));
#endif
    }
    else
    {
      uiNumMrgSATDCand = afMmvdCandCount;
    }
  }

  bool bBufferMcFromNoResidualPass = (uiNumMrgSATDCand <= NUM_AF_MMVD_SATD_CAND);

  // 2. Pass: check candidates using full RD test
  for (uint32_t uiNoResidualPass = 0; uiNoResidualPass < 2; ++uiNoResidualPass)
  {
    for (uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++)
    {
      uint32_t uiMergeCand = RdModeList[uiMrgHADIdx];

      if (((uiNoResidualPass != 0) && candHasNoResidual[uiMergeCand])
        || ((uiNoResidualPass == 0) && bestIsSkip))
      {
        continue;
      }
      // first get merge candidates
      CodingUnit &cu = tempCS->addCU(tempCS->area, partitioner.chType);

      partitioner.setCUData(cu);
      cu.slice       = tempCS->slice;
      cu.tileIdx     = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
      cu.skip        = false;
      cu.affine      = true;
#if INTER_LIC
      cu.LICFlag     = false;
#endif
      cu.predMode    = MODE_INTER;
      cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
      cu.qp          = encTestMode.qp;
      PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType);

      // Pass Affine MMVD parameters from candidate to PU
      int baseIdx = (int)uiMergeCand / AF_MMVD_MAX_REFINE_NUM;
      int stepIdx = (int)uiMergeCand - baseIdx * AF_MMVD_MAX_REFINE_NUM;
      int dirIdx  = stepIdx % AF_MMVD_OFFSET_DIR;
          stepIdx = stepIdx / AF_MMVD_OFFSET_DIR;
      {
        // set merge information
        pu.mergeFlag      = true;
        pu.afMmvdFlag     = true;
        pu.afMmvdBaseIdx  = (uint8_t)baseIdx;
        pu.afMmvdDir      = (uint8_t)dirIdx;
        pu.afMmvdStep     = (uint8_t)stepIdx;
        pu.mergeIdx       = (uint8_t)(baseIdx + baseIdxToMergeIdxOffset);
        pu.mergeType      = affineMergeCtx.mergeType         [pu.mergeIdx];
#if INTER_LIC
        pu.cu->LICFlag    = affineMergeCtx.LICFlags          [pu.mergeIdx];
#endif
        pu.interDir       = affineMergeCtx.interDirNeighbours[pu.mergeIdx];
        pu.cu->affineType = affineMergeCtx.affineType        [pu.mergeIdx];
        pu.cu->BcwIdx     = affineMergeCtx.BcwIdx            [pu.mergeIdx];
        pu.mmvdMergeFlag  = false;
        pu.ciipFlag       = false;
      }

      MvField mvfMmvd[2][3];
      PU::getAfMmvdMvf(pu, affineMergeCtx, mvfMmvd, pu.mergeIdx, pu.afMmvdStep, pu.afMmvdDir);
      PU::setAllAffineMvField(pu, mvfMmvd[0], REF_PIC_LIST_0);
      PU::setAllAffineMvField(pu, mvfMmvd[1], REF_PIC_LIST_1);
      PU::spanMotionInfo(pu);
      if (m_pcEncCfg->getMCTSEncConstraint() && (!(MCTSHelper::checkMvBufferForMCTSConstraint(*cu.firstPU))))
      {
        // Do not use this mode
        tempCS->initStructData(encTestMode.qp);
        return;
      }

      pu.mmvdEncOptMode = 0;
      if (bBufferMcFromNoResidualPass)
      {
        PelUnitBuf predTempBuf = m_acMergeBuffer[uiMrgHADIdx].getBuf(localUnitArea);
        if (uiNoResidualPass == 0)
        {
          m_pcInterSearch->motionCompensation(pu, predTempBuf, REF_PIC_LIST_X);
        }
        pu.cs->getPredBuf(pu).copyFrom(predTempBuf);
      }
      else
      {
        m_pcInterSearch->motionCompensation(pu);
      }

      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, ( uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL ) );

      if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
      {
        bestIsSkip = bestCS->getCU(partitioner.chType)->rootCbf == 0;
      }
      tempCS->initStructData(encTestMode.qp);
    }// end loop uiMrgHADIdx

    if (uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection())
    {
      const CodingUnit     &bestCU = *bestCS->getCU(partitioner.chType);
      const PredictionUnit &bestPU = *bestCS->getPU(partitioner.chType);

      if (bestCU.rootCbf == 0)
      {
        if (bestPU.mergeFlag)
        {
          m_modeCtrl->setEarlySkipDetected();
        }
        else if (m_pcEncCfg->getMotionEstimationSearchMethod() != MESEARCH_SELECTIVE)
        {
          int absolute_MV = 0;

          for (uint32_t uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++)
          {
            if (slice.getNumRefIdx(RefPicList(uiRefListIdx)) > 0)
            {
              absolute_MV += bestPU.mvd[uiRefListIdx].getAbsHor() + bestPU.mvd[uiRefListIdx].getAbsVer();
            }
          }

          if (absolute_MV == 0)
          {
            m_modeCtrl->setEarlySkipDetected();
          }
        }
      }
    }
  }
}
#endif

#if TM_MRG && !MERGE_ENC_OPT
void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
{
  const Slice &slice = *tempCS->slice;
  CHECK(slice.getSliceType() == I_SLICE, "Merge modes not available for I-slices");
  tempCS->initStructData(encTestMode.qp);

  MergeCtx mergeCtx;
  const SPS &sps = *tempCS->sps;
  if (sps.getSbTMVPEnabledFlag())
  {
    Size bufSize = g_miScaling.scale(tempCS->area.lumaSize());
    mergeCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
  }
#if MULTI_PASS_DMVR
  bool applyBDMVR4TM[TM_MRG_MAX_NUM_CANDS] = { false };
#endif

  {
    // first get merge candidates
    CodingUnit cu( tempCS->area );
    cu.cs       = tempCS;
    cu.predMode = MODE_INTER;
    cu.slice    = tempCS->slice;
    cu.tileIdx  = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
#if INTER_LIC
    cu.LICFlag  = false;
#endif

    PredictionUnit pu( tempCS->area );
    pu.cu = &cu;
    pu.cs = tempCS;
    cu.firstPU = &pu;

    pu.tmMergeFlag = true;
    PU::getInterMergeCandidates(pu, mergeCtx, 0);
#if JVET_W0090_ARMC_TM
    if (sps.getUseAML())
    {
      m_pcInterSearch->adjustInterMergeCandidates(pu, mergeCtx);
    }
#endif

    for( uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++ )
    {
      mergeCtx.setMergeInfo( pu, uiMergeCand );

#if MULTI_PASS_DMVR
      applyBDMVR4TM[uiMergeCand] = PU::checkBDMVRCondition(pu);
      if (applyBDMVR4TM[uiMergeCand])
      {
        pu.bdmvrRefine = true;
        m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[(uiMergeCand << 1) + 1]);
        applyBDMVR4TM[uiMergeCand] = m_pcInterSearch->processBDMVR(pu);
      }
      else
      {
        m_pcInterSearch->deriveTMMv(pu);
      }
#else
      m_pcInterSearch->deriveTMMv( pu );
#endif

      // Store refined motion back to mergeCtx
      mergeCtx.interDirNeighbours[uiMergeCand] = pu.interDir;
      mergeCtx.BcwIdx[uiMergeCand] = pu.cu->BcwIdx;  // Bcw may change, because bi may be reduced to uni by deriveTMMv(pu)
      mergeCtx.mvFieldNeighbours[2 * uiMergeCand].setMvField( pu.mv[0], pu.refIdx[0] );
      mergeCtx.mvFieldNeighbours[2 * uiMergeCand + 1].setMvField( pu.mv[1], pu.refIdx[1] );
      if( pu.interDir == 1 )
      {
        mergeCtx.mvFieldNeighbours[2 * uiMergeCand + 1].setMvField( Mv(), NOT_VALID );
      }
      if( pu.interDir == 2 )
      {
        mergeCtx.mvFieldNeighbours[2 * uiMergeCand].setMvField( Mv(), NOT_VALID );
      }
    }
    pu.regularMergeFlag = true;
  }

  bool candHasNoResidual[TM_MRG_MAX_NUM_CANDS];
  for (uint32_t ui = 0; ui < TM_MRG_MAX_NUM_CANDS; ui++)
  {
    candHasNoResidual[ui] = false;
  }

  bool    bestIsSkip     = false;
  int32_t candNum        = std::min(TM_MRG_MAX_NUM_CANDS, mergeCtx.numValidMergeCand);
  int32_t numMrgSATDCand = candNum;
  bool    mrgTempBufSet = false;

  static_vector<uint32_t, TM_MRG_MAX_NUM_CANDS> RdModeList;
  for (uint32_t i = 0; i < TM_MRG_MAX_NUM_CANDS; i++)
  {
    RdModeList.push_back(i);
  }

  const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
  PelUnitBuf  acMergeRealBuffer[TM_MRG_MAX_NUM_CANDS];  CHECK(TM_MRG_MAX_NUM_CANDS > MMVD_MRG_MAX_RD_BUF_NUM, "TM cannot buffer a larger number of merge candidates than that of regular merge");
  for (unsigned i = 0; i < TM_MRG_MAX_NUM_CANDS; i++)
  {
    acMergeRealBuffer[i] = m_acMergeBuffer[i].getBuf(localUnitArea);
  }

  if (m_pcEncCfg->getUseFastMerge())
  {
    numMrgSATDCand = std::min(candNum, TM_MAX_NUM_SATD_CAND);
    bestIsSkip     = false;
    if (auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>(m_modeCtrl))
    {
      bestIsSkip = blkCache->isSkip(tempCS->area);
      if (slice.getSPS()->getIBCFlag())
      {
        ComprCUCtx cuECtx = m_modeCtrl->getComprCUCtx();
        bestIsSkip = bestIsSkip && cuECtx.bestCU;
      }
    }

    static_vector<double, TM_MRG_MAX_NUM_CANDS> candCostList;

    // 1. Pass: get SATD-cost for selected candidates and reduce their count
    if( !bestIsSkip )
    {
      RdModeList.clear();
      mrgTempBufSet = true;
      const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx());

      CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
      const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda( ) * FRAC_BITS_SCALE;
      partitioner.setCUData( cu );
      cu.slice            = tempCS->slice;
      cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
#if INTER_LIC
      cu.LICFlag          = false;
#endif
      cu.skip             = false;
      cu.mmvdSkip         = false;
      cu.geoFlag          = false;
      cu.affine           = false;
      cu.predMode         = MODE_INTER;
      cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
      cu.qp               = encTestMode.qp;
    //cu.emtFlag  is set below

      PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
#if AFFINE_MMVD
      pu.afMmvdFlag       = false;
#endif
      pu.tmMergeFlag      = true;

      DistParam distParam;
      const bool bUseHadamard = !tempCS->slice->getDisableSATDForRD();
      m_pcRdCost->setDistParam (distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth (CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);

      for( uint32_t uiMergeCand = 0; uiMergeCand < candNum; uiMergeCand++ )
      {
        mergeCtx.setMergeInfo( pu, uiMergeCand );
#if MULTI_PASS_DMVR
        if (applyBDMVR4TM[uiMergeCand])
        {
          pu.bdmvrRefine = true;
          m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[(uiMergeCand << 1) + 1]);
        }
        else
        {
          PU::spanMotionInfo(pu, mergeCtx);
        }
#else
        PU::spanMotionInfo(pu, mergeCtx);
#endif

        pu.mvRefine = false;
#if INTER_LIC
        m_pcInterSearch->m_storeBeforeLIC = false;
#endif

        m_pcInterSearch->motionCompensation(pu, acMergeRealBuffer[uiMergeCand], REF_PIC_LIST_X, true, true);

#if MULTI_PASS_DMVR
        if( pu.bdmvrRefine )
        {
          ::memcpy( m_mvBufEncBDOF4TM[uiMergeCand], m_pcInterSearch->getBdofSubPuMvOffset(), sizeof( Mv ) * BDOF_SUBPU_MAX_NUM );
          PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] );
        }
#endif
        distParam.cur = acMergeRealBuffer[uiMergeCand].Y();
        Distortion uiSad = distParam.distFunc(distParam);
        m_CABACEstimator->getCtx() = ctxStart;
        uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
        double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;

        updateCandList(uiMergeCand, cost, RdModeList, candCostList, numMrgSATDCand);

        CHECK(std::min(uiMergeCand + 1, (uint32_t)numMrgSATDCand) != RdModeList.size(), "");
      }

      // Try to limit number of candidates using SATD-costs
      for( uint32_t i = 1; i < numMrgSATDCand; i++ )
      {
        if( candCostList[i] > MRG_FAST_RATIO * candCostList[0] )
        {
          numMrgSATDCand = i;
          break;
        }
      }

      tempCS->initStructData( encTestMode.qp );
      m_CABACEstimator->getCtx() = ctxStart;
    }
    else
    {
      numMrgSATDCand = candNum;
    }
  }

  // 2. Pass: check candidates using full RD test
  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;

  for (uint32_t uiNoResidualPass = 0; uiNoResidualPass < 2; ++uiNoResidualPass)
  {
    for( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < numMrgSATDCand; uiMrgHADIdx++ )
    {
      uint32_t uiMergeCand = RdModeList[uiMrgHADIdx];

      if (((uiNoResidualPass != 0) && candHasNoResidual[uiMrgHADIdx])
       || ( (uiNoResidualPass == 0) && bestIsSkip ) )
      {
        continue;
      }

      // first get merge candidates
      CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );

      partitioner.setCUData( cu );
      cu.slice            = tempCS->slice;
      cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
      cu.skip             = false;
      cu.mmvdSkip         = false;
#if INTER_LIC
      cu.LICFlag          = false;
#endif
      cu.affine           = false;
      cu.geoFlag          = false;
      cu.predMode         = MODE_INTER;
      cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
      cu.qp               = encTestMode.qp;

      PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
#if AFFINE_MMVD
      pu.afMmvdFlag       = false;
#endif
      pu.tmMergeFlag      = true;

      {
        mergeCtx.setMergeInfo(pu, uiMergeCand);
#if MULTI_PASS_DMVR
        if (applyBDMVR4TM[uiMergeCand])
        {
          pu.bdmvrRefine = true;
          m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[(uiMergeCand << 1) + 1]);
        }
#endif
      }
#if MULTI_PASS_DMVR
      if (!pu.bdmvrRefine)
      {
        PU::spanMotionInfo(pu, mergeCtx);
      }
#else
      PU::spanMotionInfo(pu, mergeCtx);
#endif

      if( mrgTempBufSet )
      {
        tempCS->getPredBuf().copyFrom(acMergeRealBuffer[uiMergeCand]);
#if MULTI_PASS_DMVR
        if( pu.bdmvrRefine )
        {
          PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] );
        }
#endif
      }
      else
      {
        pu.mvRefine = false;
#if INTER_LIC
        m_pcInterSearch->m_storeBeforeLIC = false;
#endif
        m_pcInterSearch->motionCompensation( pu );
#if MULTI_PASS_DMVR
        if( pu.bdmvrRefine )
        {
          ::memcpy( m_mvBufEncBDOF4TM[uiMergeCand], m_pcInterSearch->getBdofSubPuMvOffset(), sizeof( Mv ) * BDOF_SUBPU_MAX_NUM );
          PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] );
        }
#endif
      }

      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL );

      if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip )
      {
        bestIsSkip = !bestCS->cus.empty() && bestCS->getCU( partitioner.chType )->rootCbf == 0;
      }
      tempCS->initStructData( encTestMode.qp );
    }// end loop uiMrgHADIdx

    if( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() )
    {
      const CodingUnit     &bestCU = *bestCS->getCU( partitioner.chType );
      const PredictionUnit &bestPU = *bestCS->getPU( partitioner.chType );

      if( bestCU.rootCbf == 0 )
      {
        if( bestPU.mergeFlag )
        {
          m_modeCtrl->setEarlySkipDetected();
        }
        else if( m_pcEncCfg->getMotionEstimationSearchMethod() != MESEARCH_SELECTIVE )
        {
          int absolute_MV = 0;

          for( uint32_t uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ )
          {
            if( slice.getNumRefIdx( RefPicList( uiRefListIdx ) ) > 0 )
            {
              absolute_MV += bestPU.mvd[uiRefListIdx].getAbsHor() + bestPU.mvd[uiRefListIdx].getAbsVer();
            }
          }

          if( absolute_MV == 0 )
          {
            m_modeCtrl->setEarlySkipDetected();
          }
        }
      }
    }
  }
  if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
  {
    xCalDebCost( *bestCS, partitioner );
  }
}
#endif

//////////////////////////////////////////////////////////////////////////////////////////////
// ibc merge/skip mode check
void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
{
  assert(partitioner.chType != CHANNEL_TYPE_CHROMA); // chroma IBC is derived
#if CTU_256
  if( tempCS->area.lwidth() >= 128 || tempCS->area.lheight() >= 128 ) // disable IBC mode larger than 64x64
#else
  if (tempCS->area.lwidth() == 128 || tempCS->area.lheight() == 128) // disable IBC mode larger than 64x64
#endif
  {
    return;
  }
  const SPS &sps = *tempCS->sps;

  tempCS->initStructData(encTestMode.qp);
  MergeCtx mergeCtx;
#if JVET_Z0084_IBC_TM && IBC_TM_MRG
  MergeCtx mergeCtxTm;
#endif

  if (sps.getSbTMVPEnabledFlag())
  {
    Size bufSize = g_miScaling.scale(tempCS->area.lumaSize());
    mergeCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
  }

  {
    // first get merge candidates
    CodingUnit cu(tempCS->area);
    cu.cs = tempCS;
    cu.predMode = MODE_IBC;
    cu.slice = tempCS->slice;
    cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
    PredictionUnit pu(tempCS->area);
    pu.cu = &cu;
    pu.cs = tempCS;
    cu.mmvdSkip = false;
    pu.mmvdMergeFlag = false;
    pu.regularMergeFlag = false;
#if INTER_LIC
    cu.LICFlag = false;
#endif

    cu.geoFlag = false;
    PU::getIBCMergeCandidates(pu, mergeCtx);
#if JVET_Y0058_IBC_LIST_MODIFY && JVET_W0090_ARMC_TM
    if(pu.cs->sps->getUseAML())
    {
#if JVET_Z0075_IBC_HMVP_ENLARGE
      m_pcInterSearch->adjustIBCMergeCandidates(pu, mergeCtx, 0, IBC_MRG_MAX_NUM_CANDS_MEM);
#else
      m_pcInterSearch->adjustIBCMergeCandidates(pu, mergeCtx);
#endif
    }
#endif

#if JVET_Z0084_IBC_TM && IBC_TM_MRG
    if (pu.cs->sps->getUseDMVDMode() == true)
    {
    pu.tmMergeFlag = true;
    PU::getIBCMergeCandidates(pu, mergeCtxTm);
#if JVET_Y0058_IBC_LIST_MODIFY && JVET_W0090_ARMC_TM
    if (pu.cs->sps->getUseAML())
    {
#if JVET_Z0075_IBC_HMVP_ENLARGE
      m_pcInterSearch->adjustIBCMergeCandidates(pu, mergeCtxTm, 0, IBC_MRG_MAX_NUM_CANDS_MEM);
#else
      m_pcInterSearch->adjustIBCMergeCandidates(pu, mergeCtxTm);
#endif
    }
#endif
    pu.tmMergeFlag = false;
    }
    else
    {
      mergeCtxTm.numValidMergeCand = 0;
    }
#endif
  }

#if JVET_Z0084_IBC_TM && IBC_TM_MRG
  int candHasNoResidual[IBC_MRG_MAX_NUM_CANDS<<1];
  for (unsigned int ui = 0; ui < IBC_MRG_MAX_NUM_CANDS<<1; ui++)
  {
    candHasNoResidual[ui] = 0;
  }

  bool                                                 bestIsSkip = false;
  unsigned                                             numMrgSATDCand = mergeCtx.numValidMergeCand + mergeCtxTm.numValidMergeCand;
  static_vector<unsigned, (IBC_MRG_MAX_NUM_CANDS<<1)>  RdModeList(IBC_MRG_MAX_NUM_CANDS<<1);
  for (unsigned i = 0; i < IBC_MRG_MAX_NUM_CANDS<<1; i++)
  {
    RdModeList[i] = i;
  }

  static_vector<double, (IBC_MRG_MAX_NUM_CANDS<<1)>  candCostList(IBC_MRG_MAX_NUM_CANDS<<1, MAX_DOUBLE);
#else
  int candHasNoResidual[MRG_MAX_NUM_CANDS];
  for (unsigned int ui = 0; ui < mergeCtx.numValidMergeCand; ui++)
  {
    candHasNoResidual[ui] = 0;
  }

  bool                                        bestIsSkip = false;
  unsigned                                    numMrgSATDCand = mergeCtx.numValidMergeCand;
  static_vector<unsigned, MRG_MAX_NUM_CANDS>  RdModeList(MRG_MAX_NUM_CANDS);
  for (unsigned i = 0; i < MRG_MAX_NUM_CANDS; i++)
  {
    RdModeList[i] = i;
  }

  //{
    static_vector<double, MRG_MAX_NUM_CANDS>  candCostList(MRG_MAX_NUM_CANDS, MAX_DOUBLE);
#endif
    // 1. Pass: get SATD-cost for selected candidates and reduce their count
    {
      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( );

      CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, (const ChannelType)partitioner.chType), (const ChannelType)partitioner.chType);

      partitioner.setCUData(cu);
      cu.slice = tempCS->slice;
      cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
      cu.skip = false;
      cu.predMode = MODE_IBC;
      cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
      cu.qp = encTestMode.qp;
      cu.mmvdSkip = false;
#if INTER_LIC
      cu.LICFlag = false;
#endif
      cu.geoFlag = false;

      PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType); //tempCS->addPU(cu);
      pu.mmvdMergeFlag = false;
      pu.regularMergeFlag = false;

      DistParam distParam;
      const bool bUseHadamard = !cu.slice->getDisableSATDForRD();
      Picture* refPic = pu.cu->slice->getPic();
      const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]);
      const Pel*        piRefSrch = refBuf.buf;
      if (tempCS->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
      {
        const CompArea &area = cu.blocks[COMPONENT_Y];
        CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
        PelBuf tmpLuma = m_tmpStorageLCU->getBuf(tmpArea);
        tmpLuma.rspSignal( tempCS->getOrgBuf().Y(), m_pcReshape->getFwdLUT() );
        m_pcRdCost->setDistParam(distParam, tmpLuma, refBuf, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
      }
      else
        m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), refBuf, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);

      int refStride = refBuf.stride;
#if !JVET_Y0058_IBC_LIST_MODIFY
      const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
      const int cuPelX = pu.Y().x;
      const int cuPelY = pu.Y().y;
      const int roiWidth  = pu.lwidth();
      const int roiHeight = pu.lheight();
      const int picWidth  = pu.cs->slice->getPPS()->getPicWidthInLumaSamples();
      const int picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples();
      const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
#endif

#if JVET_Z0084_IBC_TM && IBC_TM_MRG
      int numValidBv = mergeCtx.numValidMergeCand + mergeCtxTm.numValidMergeCand;
#else
      int numValidBv = mergeCtx.numValidMergeCand;
#endif
      for (unsigned int mergeCand = 0; mergeCand < mergeCtx.numValidMergeCand; mergeCand++)
      {
        mergeCtx.setMergeInfo(pu, mergeCand); // set bv info in merge mode

        int xPred = pu.bv.getHor();
        int yPred = pu.bv.getVer();
#if !JVET_Y0058_IBC_LIST_MODIFY  //should have already been checked at merge list construction
#if JVET_Z0084_IBC_TM
        if (!PU::searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth)) // not valid bv derived
#else
        if (!m_pcInterSearch->searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth)) // not valid bv derived
#endif
#else
        if (pu.bv == Mv(0, 0))
#endif
        {
          numValidBv--;
          continue;
        }
        PU::spanMotionInfo(pu, mergeCtx);

        distParam.cur.buf = piRefSrch + refStride * yPred + xPred;

        Distortion sad = distParam.distFunc(distParam);
        unsigned int bitsCand = mergeCand + 1;
#if JVET_Z0084_IBC_TM
        if (mergeCand == tempCS->sps->getMaxNumIBCMergeCand() - 1)
#else
        if (mergeCand == tempCS->sps->getMaxNumMergeCand() - 1)
#endif
        {
          bitsCand--;
        }
        double cost = (double)sad + (double)bitsCand * sqrtLambdaForFirstPass;

        updateCandList(mergeCand, cost, RdModeList, candCostList
         , numMrgSATDCand);
      }

#if JVET_Z0084_IBC_TM && IBC_TM_MRG
    // Add TM refined candidates
    for (unsigned int mergeCand = 0; mergeCand < mergeCtxTm.numValidMergeCand; mergeCand++)
    {
      mergeCtxTm.setMergeInfo(pu, mergeCand); // set bv info in merge mode

      Mv tempBv = pu.bv;
      pu.tmMergeFlag = true;
      m_pcInterSearch->deriveTMMv(pu);
      pu.tmMergeFlag = false;

      pu.bv = pu.mv[0];
      pu.bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
      // Check if mv has been refined
      if (pu.bv == tempBv)
      {
        numValidBv--;
        continue;
      }

      //Store refined result for RDO loop
      mergeCtxTm.mvFieldNeighbours[mergeCand << 1].mv = pu.mv[0];

      int xPred = pu.bv.getHor();
      int yPred = pu.bv.getVer();
#if !JVET_Y0058_IBC_LIST_MODIFY  //should have already been checked at merge list construction and during refinement
      if (!PU::searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth)) // not valid bv derived
#else
      if (pu.bv == Mv(0, 0))
#endif
      {
        numValidBv--;
        continue;
      }
      PU::spanMotionInfo(pu, mergeCtxTm);

      distParam.cur.buf = piRefSrch + refStride * yPred + xPred;

      Distortion sad = distParam.distFunc(distParam);
      unsigned int bitsCand = mergeCand + 1;
      if (mergeCand == tempCS->sps->getMaxNumIBCMergeCand() - 1)
      {
        bitsCand--;
      }
      double cost = (double)sad + (double)bitsCand * sqrtLambdaForFirstPass;

      updateCandList(mergeCand+mergeCtx.numValidMergeCand, cost, RdModeList, candCostList, numMrgSATDCand);
    }
#endif

      // Try to limit number of candidates using SATD-costs
      if (numValidBv)
      {
        numMrgSATDCand = numValidBv;
        for (unsigned int i = 1; i < numValidBv; i++)
        {
          if (candCostList[i] > MRG_FAST_RATIO*candCostList[0])
          {
            numMrgSATDCand = i;
            break;
          }
        }
      }
      else
      {
        tempCS->dist = 0;
        tempCS->fracBits = 0;
        tempCS->cost = MAX_DOUBLE;
        tempCS->costDbOffset = 0;
        tempCS->initStructData(encTestMode.qp);
        return;
      }

      tempCS->initStructData(encTestMode.qp);
    }
  //}


  const unsigned int iteration = 2;
  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
  // 2. Pass: check candidates using full RD test
  for (unsigned int numResidualPass = 0; numResidualPass < iteration; numResidualPass++)
  {
    for (unsigned int mrgHADIdx = 0; mrgHADIdx < numMrgSATDCand; mrgHADIdx++)
    {
      unsigned int mergeCand = RdModeList[mrgHADIdx];
      if (!(numResidualPass == 1 && candHasNoResidual[mergeCand] == 1))
      {
        if (!(bestIsSkip && (numResidualPass == 0)))
        {
          {

            // first get merge candidates
            CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, (const ChannelType)partitioner.chType), (const ChannelType)partitioner.chType);

            partitioner.setCUData(cu);
            cu.slice = tempCS->slice;
            cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
            cu.skip = false;
            cu.predMode = MODE_IBC;
            cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
            cu.qp = encTestMode.qp;
            cu.sbtInfo = 0;
#if INTER_LIC
            cu.LICFlag = false;
#endif

            PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType);// tempCS->addPU(cu);
            pu.intraDir[0] = DC_IDX; // set intra pred for ibc block
            pu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block
            cu.mmvdSkip = false;
            pu.mmvdMergeFlag = false;
            pu.regularMergeFlag = false;
            cu.geoFlag = false;
#if JVET_Z0084_IBC_TM && IBC_TM_MRG
            pu.tmMergeFlag      = false;
            if (mergeCand >= mergeCtx.numValidMergeCand)
            {
              pu.tmMergeFlag    = true;
              mergeCand        -= mergeCtx.numValidMergeCand;
              mergeCtxTm.setMergeInfo(pu, mergeCand);
              PU::spanMotionInfo(pu, mergeCtxTm);
            }
            else
#endif
            {
              mergeCtx.setMergeInfo(pu, mergeCand);
              PU::spanMotionInfo(pu, mergeCtx);
            }
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
            const bool chroma = !(CS::isDualITree(*tempCS));
#else
            const bool chroma = !pu.cu->isSepTree();
#endif
            //  MC
            m_pcInterSearch->motionCompensation(pu,REF_PIC_LIST_0, true, chroma);
            m_CABACEstimator->getCtx() = m_CurrCtx->start;

            m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, (numResidualPass != 0), true, chroma);
            if (tempCS->slice->getSPS()->getUseColorTrans())
            {
              bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
              bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
            }
            xEncodeDontSplit(*tempCS, partitioner);

#if ENABLE_QPA_SUB_CTU
            xCheckDQP (*tempCS, partitioner);
#else
            // this if-check is redundant
            if (tempCS->pps->getUseDQP() && partitioner.currQgEnable())
            {
              xCheckDQP(*tempCS, partitioner);
            }
#endif
            xCheckChromaQPOffset( *tempCS, partitioner );


            DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda());
            xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);

            tempCS->initStructData(encTestMode.qp);
          }

            if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
            {
              if (bestCS->getCU(partitioner.chType) == NULL)
                bestIsSkip = 0;
              else
              bestIsSkip = bestCS->getCU(partitioner.chType)->rootCbf == 0;
            }
        }
      }
    }
  }
  if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
  {
    xCalDebCost( *bestCS, partitioner );
  }
}

void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
{
#if CTU_256
  if( tempCS->area.lwidth() >= 128 || tempCS->area.lheight() >= 128 ) // disable IBC mode larger than 64x64
#else
  if (tempCS->area.lwidth() == 128 || tempCS->area.lheight() == 128) // disable IBC mode larger than 64x64
#endif
  {
    return;
  }

    tempCS->initStructData(encTestMode.qp);

    m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;

    CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType);

    partitioner.setCUData(cu);
    cu.slice = tempCS->slice;
    cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
    cu.skip = false;
    cu.predMode = MODE_IBC;
    cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
    cu.qp = encTestMode.qp;
    cu.imv = 0;
    cu.sbtInfo = 0;

    CU::addPUs(cu);

    m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;

    PredictionUnit& pu = *cu.firstPU;
    cu.mmvdSkip = false;
    pu.mmvdMergeFlag = false;
    pu.regularMergeFlag = false;
#if INTER_LIC
    cu.LICFlag = false;
#endif

    pu.intraDir[0] = DC_IDX; // set intra pred for ibc block
    pu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block

    pu.interDir = 1; // use list 0 for IBC mode
    pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF; // last idx in the list
      bool bValid = m_pcInterSearch->predIBCSearch(cu, partitioner, m_ctuIbcSearchRangeX, m_ctuIbcSearchRangeY, m_ibcHashMap);

      if (bValid)
      {
        PU::spanMotionInfo(pu);
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
        const bool chroma = !(CS::isDualITree(*tempCS));
#else
        const bool chroma = !pu.cu->isSepTree();
#endif
        //  MC
        m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_0, true, chroma);

        {

          m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, false, true, chroma);
          if (tempCS->slice->getSPS()->getUseColorTrans())
          {
            bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
            bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
          }

          xEncodeDontSplit(*tempCS, partitioner);

#if ENABLE_QPA_SUB_CTU
          xCheckDQP (*tempCS, partitioner);
#else
          // this if-check is redundant
          if (tempCS->pps->getUseDQP() && partitioner.currQgEnable())
          {
            xCheckDQP(*tempCS, partitioner);
          }
#endif
          xCheckChromaQPOffset( *tempCS, partitioner );

          tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
          if ( m_bestModeUpdated )
          {
            xCalDebCost( *tempCS, partitioner );
          }

          DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda());
          xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);

        }

      } // bValid
      else
      {
        tempCS->dist = 0;
        tempCS->fracBits = 0;
        tempCS->cost = MAX_DOUBLE;
        tempCS->costDbOffset = 0;
      }
}
  // check ibc mode in encoder RD
  //////////////////////////////////////////////////////////////////////////////////////////////

#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
bool EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
#else
void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
#endif
{
#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
  bool validMode = false;
#endif
#if INTER_LIC
  if (m_pcInterSearch->m_fastLicCtrl.skipRDCheckForLIC((encTestMode.opts & ETO_LIC) > 0, IMV_OFF, bestCS->cost, tempCS->area.Y().area()))
  {
#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
    return (m_pcEncCfg->getUseHashME() ? validMode : true);
#else
    return;
#endif
  }
#endif
#if ENABLE_OBMC
  double bestOBMCCost = MAX_DOUBLE;
#endif
#if JVET_X0083_BM_AMVP_MERGE_MODE
  int maxBdmvrAmSearchLoop = 3;
  m_pcInterSearch->m_amvpOnlyCost = std::numeric_limits<Distortion>::max();
#if JVET_Y0128_NON_CTC
  if (!tempCS->slice->isInterB() || (tempCS->slice->getUseAmvpMergeMode() == false)
#else
  if (!tempCS->slice->isInterB() || (tempCS->picHeader->getMvdL1ZeroFlag() == true)
#endif
#if INTER_LIC
      || (tempCS->slice->getUseLIC() && (encTestMode.opts & ETO_LIC))
#endif
      )
  {
    maxBdmvrAmSearchLoop = 1;
  }
  for (int bdmvrAmSearchLoop = 0; bdmvrAmSearchLoop < maxBdmvrAmSearchLoop; bdmvrAmSearchLoop++)
  {
  bool bdmvrAmMergeNotValid = false;
#endif
  tempCS->initStructData( encTestMode.qp );
  m_pcInterSearch->setAffineModeSelected(false);

  m_pcInterSearch->resetBufferedUniMotions();
  int bcwLoopNum = (tempCS->slice->isInterB() ? BCW_NUM : 1);
  bcwLoopNum = (tempCS->sps->getUseBcw() ? bcwLoopNum : 1);
#if INTER_LIC
  bool lic = encTestMode.opts & ETO_LIC;
  bcwLoopNum = lic ? 1 : bcwLoopNum;
#endif

  if( tempCS->area.lwidth() * tempCS->area.lheight() < BCW_SIZE_CONSTRAINT )
  {
    bcwLoopNum = 1;
  }
#if JVET_X0083_BM_AMVP_MERGE_MODE
  bcwLoopNum = (bdmvrAmSearchLoop > 0) ? 1 : bcwLoopNum;
#endif

  double curBestCost = bestCS->cost;
  double equBcwCost = MAX_DOUBLE;

  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;

  for( int bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
  {
    if( m_pcEncCfg->getUseBcwFast() )
    {
      auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl);

      if( blkCache )
      {
        bool isBestInter = blkCache->getInter(bestCS->area);
        uint8_t bestBcwIdx = blkCache->getBcwIdx(bestCS->area);

        if( isBestInter && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT && g_BcwSearchOrder[bcwLoopIdx] != bestBcwIdx )
        {
          continue;
        }
      }
    }
    if( !tempCS->slice->getCheckLDC() )
    {
      if( bcwLoopIdx != 0 && bcwLoopIdx != 3 && bcwLoopIdx != 4 )
      {
        continue;
      }
    }

  CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );

  partitioner.setCUData( cu );
  cu.slice            = tempCS->slice;
  cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
  cu.skip             = false;
  cu.mmvdSkip = false;
//cu.affine
  cu.predMode         = MODE_INTER;
#if INTER_LIC
  cu.LICFlag          = lic;
#endif
  cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
  cu.qp               = encTestMode.qp;
  CU::addPUs( cu );

  cu.BcwIdx = g_BcwSearchOrder[bcwLoopIdx];
  uint8_t bcwIdx = cu.BcwIdx;
  bool  testBcw = (bcwIdx != BCW_DEFAULT);

#if INTER_LIC
  if (cu.slice->getUseLIC() && lic) { m_pcInterSearch->swapUniMvBuffer(); }
#endif
#if JVET_X0083_BM_AMVP_MERGE_MODE
  if (bdmvrAmSearchLoop == 0)
  {
    cu.firstPU->amvpMergeModeFlag[REF_PIC_LIST_0] = false;
    cu.firstPU->amvpMergeModeFlag[REF_PIC_LIST_1] = false;
  }
  else if (bdmvrAmSearchLoop == 1)
  {
    cu.firstPU->amvpMergeModeFlag[REF_PIC_LIST_0] = true;
    cu.firstPU->amvpMergeModeFlag[REF_PIC_LIST_1] = false;
  }
  else
  {
    cu.firstPU->amvpMergeModeFlag[REF_PIC_LIST_0] = false;
    cu.firstPU->amvpMergeModeFlag[REF_PIC_LIST_1] = true;
  }
  if (cu.firstPU->amvpMergeModeFlag[0] || cu.firstPU->amvpMergeModeFlag[1])
  {
    m_pcInterSearch->predInterSearch( cu, partitioner, bdmvrAmMergeNotValid,
        m_mvFieldAmListEnc, m_mvBufEncAmBDMVR[0], m_mvBufEncAmBDMVR[1] );
  }
  else
  m_pcInterSearch->predInterSearch( cu, partitioner, bdmvrAmMergeNotValid );
  if ((cu.firstPU->refIdx[REF_PIC_LIST_0] < 0 && cu.firstPU->refIdx[REF_PIC_LIST_1] < 0) || bdmvrAmMergeNotValid)
  {
    tempCS->initStructData(encTestMode.qp);
    continue;
  }
#else
  m_pcInterSearch->predInterSearch( cu, partitioner );
#endif
#if INTER_LIC
  if (cu.slice->getUseLIC() && lic) { m_pcInterSearch->swapUniMvBuffer(); }
#endif

  bcwIdx = CU::getValidBcwIdx(cu);
  if( testBcw && bcwIdx == BCW_DEFAULT ) // Enabled Bcw but the search results is uni.
  {
    tempCS->initStructData(encTestMode.qp);
    continue;
  }
  CHECK(!(testBcw || (!testBcw && bcwIdx == BCW_DEFAULT)), " !( bTestBcw || (!bTestBcw && bcwIdx == BCW_DEFAULT ) )");

  bool isEqualUni = false;
  if( m_pcEncCfg->getUseBcwFast() )
  {
    if( cu.firstPU->interDir != 3 && testBcw == 0 )
    {
      isEqualUni = true;
    }
  }
#if JVET_Y0128_NON_CTC && INTER_LIC
  if (cu.LICFlag)
  {
    if (!PU::checkRprLicCondition(*cu.firstPU)) // To check whether LIC actually performs in MC
    {
      cu.LICFlag = false;
      PU::spanLICFlags(*cu.firstPU, false);
    }
  }
#endif
#if JVET_Z0054_BLK_REF_PIC_REORDER
  PredictionUnit& pu = *cu.firstPU;
  if (PU::useRefCombList(pu))
  {
    m_pcInterSearch->setUniRefIdxLC(pu);
  }
  else if (PU::useRefPairList(pu))
  {
    m_pcInterSearch->setBiRefPairIdx(pu);
  }
#endif
#if ENABLE_OBMC //normal inter
  const unsigned wIdx = gp_sizeIdxInfo->idxFrom(partitioner.currArea().lwidth());
  CodingStructure *prevCS = tempCS;
  PelUnitBuf tempWoOBMCBuf = m_tempWoOBMCBuffer.subBuf(UnitAreaRelative(cu, cu));
  tempWoOBMCBuf.copyFrom(tempCS->getPredBuf(cu));
  cu.isobmcMC = true;
  cu.obmcFlag = true;
  m_pcInterSearch->subBlockOBMC(*cu.firstPU);
  cu.isobmcMC = false;
#endif
  xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0
                        , 0
                        , &equBcwCost
  );
#if ENABLE_OBMC // xCheckRDCostInter
  double tempCost = (prevCS == tempCS) ? tempCS->cost : bestCS->cost;
  if (m_pTempCUWoOBMC && tempCost < bestOBMCCost)
  {
    const unsigned hIdx = gp_sizeIdxInfo->idxFrom(prevCS->area.lheight());
    m_pTempCUWoOBMC[wIdx][hIdx]->clearCUs();
    m_pTempCUWoOBMC[wIdx][hIdx]->clearPUs();
    m_pTempCUWoOBMC[wIdx][hIdx]->clearTUs();
    m_pTempCUWoOBMC[wIdx][hIdx]->copyStructure(*prevCS, partitioner.chType);

    m_pPredBufWoOBMC[wIdx][hIdx].copyFrom(tempWoOBMCBuf);
    m_pTempCUWoOBMC[wIdx][hIdx]->getPredBuf(cu).copyFrom(prevCS->getPredBuf(cu));

    bestOBMCCost = tempCost;
#if JVET_AA0129_INTERHASH_OBMCOFF_RD
    validMode = true;
#endif
  }
#endif
  if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT )
    m_pcInterSearch->setAffineModeSelected((bestCS->cus.front()->affine && !(bestCS->cus.front()->firstPU->mergeFlag)));

  tempCS->initStructData(encTestMode.qp);

  double skipTH = MAX_DOUBLE;
  skipTH = (m_pcEncCfg->getUseBcwFast() ? 1.05 : MAX_DOUBLE);
  if( equBcwCost > curBestCost * skipTH )
  {
    break;
  }

  if( m_pcEncCfg->getUseBcwFast() )
  {
    if( isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1 )
    {
      break;
    }
  }
  if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && xIsBcwSkip(cu) && m_pcEncCfg->getUseBcwFast() )
  {
    break;
  }
 }  // for( UChar bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
  if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
  {
    xCalDebCost( *bestCS, partitioner );
  }
#if JVET_X0083_BM_AMVP_MERGE_MODE
  }
#endif
#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
  return (m_pcEncCfg->getUseHashME() ? validMode : true);
#endif
}




bool EncCu::xCheckRDCostInterIMV(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, double &bestIntPelCost)
{
#if JVET_X0083_BM_AMVP_MERGE_MODE
  bool bdmvrAmMergeNotValid = false;
#endif
#if ENABLE_OBMC
  double bestOBMCCost = MAX_DOUBLE;
#endif
  int iIMV = int( ( encTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT );
  m_pcInterSearch->setAffineModeSelected(false);
  // Only Half-Pel, int-Pel, 4-Pel and fast 4-Pel allowed
  CHECK(iIMV < 1 || iIMV > 4, "Unsupported IMV Mode");
  const bool testAltHpelFilter = iIMV == 4;
  // Fast 4-Pel Mode

#if INTER_LIC
  if (m_pcInterSearch->m_fastLicCtrl.skipRDCheckForLIC((encTestMode.opts & ETO_LIC) > 0, (iIMV <= 2 ? iIMV : iIMV - 1), bestCS->cost, tempCS->area.Y().area()))
  {
    return false;
  }
#endif

  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;

  EncTestMode encTestModeBase = encTestMode;                                        // copy for clearing non-IMV options
  encTestModeBase.opts        = EncTestModeOpts( encTestModeBase.opts & ETO_IMV );  // clear non-IMV options (is that intended?)

  tempCS->initStructData( encTestMode.qp );

#if INTER_LIC
  bool lic = encTestMode.opts & ETO_LIC;
#endif

  m_pcInterSearch->resetBufferedUniMotions();
  int bcwLoopNum = (tempCS->slice->isInterB() ? BCW_NUM : 1);
  bcwLoopNum = (tempCS->slice->getSPS()->getUseBcw() ? bcwLoopNum : 1);
#if INTER_LIC
  bcwLoopNum = lic ? 1 : bcwLoopNum;
#endif

  if( tempCS->area.lwidth() * tempCS->area.lheight() < BCW_SIZE_CONSTRAINT )
  {
    bcwLoopNum = 1;
  }

  bool validMode = false;
#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
  bool availMode = false;
#endif
  double curBestCost = bestCS->cost;
  double equBcwCost = MAX_DOUBLE;

  for( int bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
  {
    if( m_pcEncCfg->getUseBcwFast() )
    {
      auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl);

      if( blkCache )
      {
        bool isBestInter = blkCache->getInter(bestCS->area);
        uint8_t bestBcwIdx = blkCache->getBcwIdx(bestCS->area);

        if( isBestInter && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT && g_BcwSearchOrder[bcwLoopIdx] != bestBcwIdx )
        {
          continue;
        }
      }
    }

    if( !tempCS->slice->getCheckLDC() )
    {
      if( bcwLoopIdx != 0 && bcwLoopIdx != 3 && bcwLoopIdx != 4 )
      {
        continue;
      }
    }

    if( m_pcEncCfg->getUseBcwFast() && tempCS->slice->getCheckLDC() && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT
      && (m_bestBcwIdx[0] >= 0 && g_BcwSearchOrder[bcwLoopIdx] != m_bestBcwIdx[0])
      && (m_bestBcwIdx[1] >= 0 && g_BcwSearchOrder[bcwLoopIdx] != m_bestBcwIdx[1]))
    {
      continue;
    }

  CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType );

  partitioner.setCUData( cu );
  cu.slice            = tempCS->slice;
  cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
  cu.skip             = false;
  cu.mmvdSkip = false;
//cu.affine
  cu.predMode         = MODE_INTER;
#if INTER_LIC
  cu.LICFlag          = lic;
#endif
  cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
  cu.qp               = encTestMode.qp;

  CU::addPUs( cu );

  if (testAltHpelFilter)
  {
    cu.imv = IMV_HPEL;
  }
  else
  {
    cu.imv = iIMV == 1 ? IMV_FPEL : IMV_4PEL;
  }

  bool testBcw;
  uint8_t bcwIdx;
  bool affineAmvrEanbledFlag = !testAltHpelFilter && cu.slice->getSPS()->getAffineAmvrEnabledFlag();

  cu.BcwIdx = g_BcwSearchOrder[bcwLoopIdx];
  bcwIdx = cu.BcwIdx;
  testBcw = (bcwIdx != BCW_DEFAULT);

  cu.firstPU->interDir = 10;

#if INTER_LIC
  if (cu.slice->getUseLIC() && lic) { m_pcInterSearch->swapUniMvBuffer(); }
#endif
#if JVET_X0083_BM_AMVP_MERGE_MODE
  m_pcInterSearch->predInterSearch( cu, partitioner, bdmvrAmMergeNotValid );
#else
  m_pcInterSearch->predInterSearch( cu, partitioner );
#endif
#if INTER_LIC
  if (cu.slice->getUseLIC() && lic) { m_pcInterSearch->swapUniMvBuffer(); }
#endif

  if ( cu.firstPU->interDir <= 3 )
  {
    bcwIdx = CU::getValidBcwIdx(cu);
  }
  else
  {
    return false;
  }

  if( m_pcEncCfg->getMCTSEncConstraint() && ( ( cu.firstPU->refIdx[L0] < 0 && cu.firstPU->refIdx[L1] < 0 ) || ( !( MCTSHelper::checkMvBufferForMCTSConstraint( *cu.firstPU ) ) ) ) )
  {
    // Do not use this mode
    tempCS->initStructData( encTestMode.qp );
    continue;
  }
  if( testBcw && bcwIdx == BCW_DEFAULT ) // Enabled Bcw but the search results is uni.
  {
    tempCS->initStructData(encTestMode.qp);
    continue;
  }
  CHECK(!(testBcw || (!testBcw && bcwIdx == BCW_DEFAULT)), " !( bTestBcw || (!bTestBcw && bcwIdx == BCW_DEFAULT ) )");

  bool isEqualUni = false;
  if( m_pcEncCfg->getUseBcwFast() )
  {
    if( cu.firstPU->interDir != 3 && testBcw == 0 )
    {
      isEqualUni = true;
    }
  }
#if JVET_Y0128_NON_CTC && INTER_LIC
  if (cu.LICFlag)
  {
    if (!PU::checkRprLicCondition(*cu.firstPU)) // To check whether LIC actually performs in MC
    {
      cu.LICFlag = false;
      PU::spanLICFlags(*cu.firstPU, false);
    }
  }
#endif

  if ( !CU::hasSubCUNonZeroMVd( cu ) && !CU::hasSubCUNonZeroAffineMVd( cu ) )
  {
    if (m_modeCtrl->useModeResult(encTestModeBase, tempCS, partitioner))
    {
      std::swap(tempCS, bestCS);
      // store temp best CI for next CU coding
      m_CurrCtx->best = m_CABACEstimator->getCtx();
    }
    if ( affineAmvrEanbledFlag )
    {
      tempCS->initStructData( encTestMode.qp );
      continue;
    }
    else
    {
      return false;
    }
  }
#if JVET_Z0054_BLK_REF_PIC_REORDER
  PredictionUnit& pu = *cu.firstPU;
  if (PU::useRefCombList(pu))
  {
    m_pcInterSearch->setUniRefIdxLC(pu);
  }
  else if (PU::useRefPairList(pu))
  {
    m_pcInterSearch->setBiRefPairIdx(pu);
  }
#endif
#if ENABLE_OBMC //normal inter IMV
  CodingStructure *prevCS = tempCS;
  PelUnitBuf tempWoOBMCBuf = m_tempWoOBMCBuffer.subBuf(UnitAreaRelative(cu, cu));
  tempWoOBMCBuf.copyFrom(tempCS->getPredBuf(cu));
  cu.isobmcMC = true;
  cu.obmcFlag = true;
  m_pcInterSearch->subBlockOBMC(*cu.firstPU);
  cu.isobmcMC = false;
#endif
  xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0
                        , 0
                        , &equBcwCost
  );
#if ENABLE_OBMC
  double tempCost = (prevCS == tempCS) ? tempCS->cost : bestCS->cost;
  if (m_pTempCUWoOBMC && tempCost < bestOBMCCost)
  {
    const unsigned wIdx = gp_sizeIdxInfo->idxFrom(tempCS->area.lwidth());
    const unsigned hIdx = gp_sizeIdxInfo->idxFrom(tempCS->area.lheight());

    m_pTempCUWoOBMC[wIdx][hIdx]->clearCUs();
    m_pTempCUWoOBMC[wIdx][hIdx]->clearPUs();
    m_pTempCUWoOBMC[wIdx][hIdx]->clearTUs();
    m_pTempCUWoOBMC[wIdx][hIdx]->copyStructure(*prevCS, partitioner.chType);

    m_pPredBufWoOBMC[wIdx][hIdx].copyFrom(tempWoOBMCBuf);
    m_pTempCUWoOBMC[wIdx][hIdx]->getPredBuf(cu).copyFrom(prevCS->getPredBuf(cu));

    bestOBMCCost = tempCost;
#if JVET_AA0129_INTERHASH_OBMCOFF_RD
    availMode = true;
#endif
  }
#endif
  if( cu.imv == IMV_FPEL && tempCS->cost < bestIntPelCost )
  {
    bestIntPelCost = tempCS->cost;
  }
  tempCS->initStructData(encTestMode.qp);

  double skipTH = MAX_DOUBLE;
  skipTH = (m_pcEncCfg->getUseBcwFast() ? 1.05 : MAX_DOUBLE);
  if( equBcwCost > curBestCost * skipTH )
  {
    break;
  }

  if( m_pcEncCfg->getUseBcwFast() )
  {
    if( isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1 )
    {
      break;
    }
  }
  if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && xIsBcwSkip(cu) && m_pcEncCfg->getUseBcwFast() )
  {
    break;
  }
  validMode = true;
 } // for( UChar bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )

  if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
  {
    xCalDebCost( *bestCS, partitioner );
  }

#if ENABLE_OBMC && JVET_AA0129_INTERHASH_OBMCOFF_RD
  if (m_pcEncCfg->getUseHashME())
  {
    return availMode;
  }
  else
#endif
  return tempCS->slice->getSPS()->getAffineAmvrEnabledFlag() ? validMode : true;
}

void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool calDist )
{
  if ( cs.cost == MAX_DOUBLE )
  {
    cs.costDbOffset = 0;
  }

  if ( cs.slice->getDeblockingFilterDisable() || ( !m_pcEncCfg->getUseEncDbOpt() && !calDist ) )
  {
    return;
  }

  m_pcLoopFilter->setEnc(true);
  const ChromaFormat format = cs.area.chromaFormat;
  CodingUnit*                cu = cs.getCU(partitioner.chType);
  const Position lumaPos = cu->Y().valid() ? cu->Y().pos() : recalcPosition( format, cu->chType, CHANNEL_TYPE_LUMA, cu->blocks[cu->chType].pos() );
  bool topEdgeAvai = lumaPos.y > 0 && ((lumaPos.y % 4) == 0);
  bool leftEdgeAvai = lumaPos.x > 0 && ((lumaPos.x % 4) == 0);
  bool anyEdgeAvai = topEdgeAvai || leftEdgeAvai;
  cs.costDbOffset = 0;

  if ( calDist )
  {
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
    ComponentID compStr = (CS::isDualITree(cs) && !isLuma(partitioner.chType)) ? COMPONENT_Cb : COMPONENT_Y;
    ComponentID compEnd = ((CS::isDualITree(cs) && isLuma(partitioner.chType)) || cs.area.chromaFormat == CHROMA_400 ) ? COMPONENT_Y : COMPONENT_Cr;
#else
    ComponentID compStr = ( cu->isSepTree() && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y;
    ComponentID compEnd = ( ( cu->isSepTree() && isLuma( partitioner.chType ) ) || cs.area.chromaFormat == CHROMA_400 ) ? COMPONENT_Y : COMPONENT_Cr;
#endif
    Distortion finalDistortion = 0;
    for ( int comp = compStr; comp <= compEnd; comp++ )
    {
      const ComponentID compID = ComponentID( comp );
      CPelBuf org = cs.getOrgBuf( compID );
      CPelBuf reco = cs.getRecoBuf( compID );
      finalDistortion += getDistortionDb( cs, org, reco, compID, cs.area.block( COMPONENT_Y ), false );
    }
    //updated distortion
    cs.dist = finalDistortion;
  }

  if ( anyEdgeAvai && m_pcEncCfg->getUseEncDbOpt() )
  {
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
    ComponentID compStr = (CS::isDualITree(cs) && !isLuma(partitioner.chType)) ? COMPONENT_Cb : COMPONENT_Y;
    ComponentID compEnd = ((CS::isDualITree(cs) && isLuma(partitioner.chType)) || cs.area.chromaFormat == CHROMA_400) ? COMPONENT_Y : COMPONENT_Cr;
#else
    ComponentID compStr = ( cu->isSepTree() && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y;
    ComponentID compEnd = ( ( cu->isSepTree() &&  isLuma( partitioner.chType ) ) || cs.area.chromaFormat == CHROMA_400 ) ? COMPONENT_Y : COMPONENT_Cr;
#endif
    const UnitArea currCsArea = clipArea( cs.area, *cs.picture );

    PelStorage&          picDbBuf = m_pcLoopFilter->getDbEncPicYuvBuffer();

    //deblock neighbour pixels
    const Size     lumaSize = cu->Y().valid() ? cu->Y().size() : recalcSize( format, cu->chType, CHANNEL_TYPE_LUMA, cu->blocks[cu->chType].size() );

    const int verOffset = lumaPos.y > 7 ? 8 : 4;
    const int horOffset = lumaPos.x > 7 ? 8 : 4;
    const UnitArea areaTop(  format, Area( lumaPos.x, lumaPos.y - verOffset, lumaSize.width, verOffset  ) );
    const UnitArea areaLeft( format, Area( lumaPos.x - horOffset, lumaPos.y, horOffset, lumaSize.height ) );
    for ( int compIdx = compStr; compIdx <= compEnd; compIdx++ )
    {
      ComponentID compId = (ComponentID)compIdx;

      //Copy current CU's reco to Deblock Pic Buffer
      const CompArea&  curCompArea = currCsArea.block( compId );

      if( cs.slice->getLmcsEnabledFlag() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma( compId ) )
      {
        picDbBuf.getBuf( curCompArea ).rspSignal( cs.getRecoBuf( curCompArea ), m_pcReshape->getInvLUT() );
      }
      else
      {
        picDbBuf.getBuf( curCompArea ).copyFrom( cs.getRecoBuf( curCompArea ) );
      }

      //left neighbour
      if ( leftEdgeAvai )
      {
        const CompArea&  compArea = areaLeft.block(compId);

        if( cs.slice->getLmcsEnabledFlag() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma( compId ) )
        {
          picDbBuf.getBuf( compArea ).rspSignal( cs.picture->getRecoBuf( compArea ), m_pcReshape->getInvLUT() );
        }
        else
        {
          picDbBuf.getBuf( compArea ).copyFrom( cs.picture->getRecoBuf( compArea ) );
        }
      }
      //top neighbour
      if ( topEdgeAvai )
      {
        const CompArea&  compArea = areaTop.block( compId );

        if( cs.slice->getLmcsEnabledFlag() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma( compId ) )
        {
          picDbBuf.getBuf( compArea ).rspSignal( cs.picture->getRecoBuf( compArea ), m_pcReshape->getInvLUT() );
        }
        else
        {
          picDbBuf.getBuf( compArea ).copyFrom( cs.picture->getRecoBuf( compArea ) );
        }
      }
    }
    
#if JVET_V0094_BILATERAL_FILTER
    // Bilateral:
    // The CU itself, the above area and the area to the left have been copied into
    //     PelStorage&          picDbBuf = m_pcLoopFilter->getDbEncPicYuvBuffer();
    //  It is now possible to insert the code for bilateral filtering here.
    
    if( cs.pps->getUseBIF() && ( !CS::isDualITree( cs ) || isLuma( partitioner.chType ) ) )
    {
      for (auto &currTU : CU::traverseTUs(*cu))
      {
        bool isInter = (cu->predMode == MODE_INTER) ? true : false;
        if ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)))
        {
          if ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height)))
          {
            CompArea &compArea = currTU.block(COMPONENT_Y);
            PelBuf    recBuf = picDbBuf.getBuf(compArea);
            PelBuf recIPredBuf = recBuf;
            std::vector<Pel> invLUT;
            m_bilateralFilter->bilateralFilterRDOdiamond5x5(recBuf, recBuf, recBuf, currTU.cu->qp, recIPredBuf, cs.slice->clpRng(COMPONENT_Y), currTU, true, false, invLUT);
          }
        }
      }
    }
#endif
#if JVET_X0071_CHROMA_BILATERAL_FILTER
    if(cs.pps->getUseChromaBIF())
    {
      bool tuValid = false;
      bool tuCBF = false;
      bool isDualTree = CS::isDualITree(cs);
      bool chromaValid = cu->Cb().valid() && cu->Cr().valid();
      bool applyChromaBIF = false;
      for (auto &currTU : CU::traverseTUs(*cu))
      {
        bool isInter = (cu->predMode == MODE_INTER) ? true : false;
        for(int compIdx = COMPONENT_Cb; compIdx < MAX_NUM_COMPONENT; compIdx++)
        {
          bool isCb = compIdx == COMPONENT_Cb ? true : false;
          ComponentID compID = isCb ? COMPONENT_Cb : COMPONENT_Cr;
          applyChromaBIF = false;
          if(!isDualTree && chromaValid)
          {
            tuValid = currTU.blocks[compIdx].valid();
            tuCBF = false;
            if(tuValid)
            {
              tuCBF = TU::getCbf(currTU, compID);
            }
            applyChromaBIF = ((tuCBF || isInter == false) && (currTU.cu->qp > 17) && (tuValid));
          }

          if(isDualTree && chromaValid)
          {
            tuCBF = TU::getCbf(currTU, compID);
            applyChromaBIF = ((tuCBF || isInter == false) && (currTU.cu->qp > 17));
          }
          if (applyChromaBIF)
          {
            CompArea &compArea = currTU.block(compID);
            PelBuf    recBuf = picDbBuf.getBuf(compArea);
            PelBuf recIPredBuf = recBuf;
            m_bilateralFilter->bilateralFilterRDOdiamond5x5Chroma(recBuf, recBuf, recBuf, currTU.cu->qp, recIPredBuf, cs.slice->clpRng(compID), currTU, true, isCb);
          }
        }
      }
    }
#endif
    
    //deblock
    if ( leftEdgeAvai )
    {
      m_pcLoopFilter->resetFilterLengths();
      m_pcLoopFilter->xDeblockCU( *cu, EDGE_VER );
    }

    if (topEdgeAvai)
    {
      m_pcLoopFilter->resetFilterLengths();
      m_pcLoopFilter->xDeblockCU( *cu, EDGE_HOR );
    }

    //update current CU SSE
    Distortion distCur = 0;
    for ( int compIdx = compStr; compIdx <= compEnd; compIdx++ )
    {
      ComponentID compId = (ComponentID)compIdx;
      CPelBuf reco = picDbBuf.getBuf( currCsArea.block( compId ) );
      CPelBuf org = cs.getOrgBuf( compId );
      distCur += getDistortionDb( cs, org, reco, compId, currCsArea.block( COMPONENT_Y ), true );
    }

    //calculate difference between DB_before_SSE and DB_after_SSE for neighbouring CUs
    Distortion distBeforeDb = 0, distAfterDb = 0;
    for (int compIdx = compStr; compIdx <= compEnd; compIdx++)
    {
      ComponentID compId = (ComponentID)compIdx;
      if ( leftEdgeAvai )
      {
        const CompArea&  compArea = areaLeft.block( compId );
        CPelBuf org = cs.picture->getOrigBuf( compArea );
        CPelBuf reco = cs.picture->getRecoBuf( compArea );
        CPelBuf recoDb = picDbBuf.getBuf( compArea );
        distBeforeDb += getDistortionDb( cs, org, reco, compId, areaLeft.block( COMPONENT_Y ), false );
        distAfterDb += getDistortionDb( cs, org, recoDb, compId, areaLeft.block( COMPONENT_Y ), true );
      }
      if ( topEdgeAvai )
      {
        const CompArea&  compArea = areaTop.block( compId );
        CPelBuf org = cs.picture->getOrigBuf( compArea );
        CPelBuf reco = cs.picture->getRecoBuf( compArea );
        CPelBuf recoDb = picDbBuf.getBuf( compArea );
        distBeforeDb += getDistortionDb( cs, org, reco, compId, areaTop.block( COMPONENT_Y ), false );
        distAfterDb += getDistortionDb( cs, org, recoDb, compId, areaTop.block( COMPONENT_Y ), true );
      }
    }

    //updated cost
    int64_t distTmp = distCur - cs.dist + distAfterDb - distBeforeDb;
    int sign = distTmp < 0 ? -1 : 1;
    distTmp = distTmp < 0 ? -distTmp : distTmp;
    cs.costDbOffset = sign * m_pcRdCost->calcRdCost( 0, distTmp );
  }

  m_pcLoopFilter->setEnc( false );
}

Distortion EncCu::getDistortionDb( CodingStructure &cs, CPelBuf org, CPelBuf reco, ComponentID compID, const CompArea& compArea, bool afterDb )
{
  Distortion dist = 0;
#if WCG_EXT
  m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc());
  CPelBuf orgLuma = cs.picture->getOrigBuf( compArea );
  if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
    m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
  {
    if ( compID == COMPONENT_Y && !afterDb && !m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())
    {
      CompArea    tmpArea( COMPONENT_Y, cs.area.chromaFormat, Position( 0, 0 ), compArea.size() );
      PelBuf tmpRecLuma = m_tmpStorageLCU->getBuf( tmpArea );
      tmpRecLuma.rspSignal( reco, m_pcReshape->getInvLUT() );
      dist += m_pcRdCost->getDistPart( org, tmpRecLuma, cs.sps->getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma );
    }
    else
    {
      dist += m_pcRdCost->getDistPart( org, reco, cs.sps->getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma );
    }
  }
  else if (m_pcEncCfg->getLmcs() && cs.slice->getLmcsEnabledFlag() && cs.slice->isIntra()) //intra slice
  {
    if ( compID == COMPONENT_Y && afterDb )
    {
      CompArea    tmpArea( COMPONENT_Y, cs.area.chromaFormat, Position( 0, 0 ), compArea.size() );
      PelBuf tmpRecLuma = m_tmpStorageLCU->getBuf( tmpArea );
      tmpRecLuma.rspSignal( reco, m_pcReshape->getFwdLUT() );
      dist += m_pcRdCost->getDistPart( org, tmpRecLuma, cs.sps->getBitDepth( toChannelType( compID ) ), compID, DF_SSE );
    }
    else
    {
      if ((isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))
      {
        dist += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
      }
      else
      {
        dist += m_pcRdCost->getDistPart( org, reco, cs.sps->getBitDepth(toChannelType( compID ) ), compID, DF_SSE );
      }
    }
  }
  else
#endif
  {
    dist = m_pcRdCost->getDistPart( org, reco, cs.sps->getBitDepth( toChannelType( compID ) ), compID, DF_SSE );
  }
  return dist;
}

void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
                                  , CodingStructure *&bestCS
                                  , Partitioner &partitioner
                                  , const EncTestMode& encTestMode
                                  , int residualPass
                                  , bool* bestHasNonResi
                                  , double* equBcwCost
  )
{

  CodingUnit*            cu        = tempCS->getCU( partitioner.chType );
  double   bestCostInternal        = MAX_DOUBLE;
  double           bestCost        = bestCS->cost;
  double           bestCostBegin   = bestCS->cost;
  CodingUnit*      prevBestCU      = bestCS->getCU( partitioner.chType );
  uint8_t          prevBestSbt     = ( prevBestCU == nullptr ) ? 0 : prevBestCU->sbtInfo;
#if JVET_AA0133_INTER_MTS_OPT
  bool             prevBestMts = (prevBestCU == nullptr) ? 0 : (prevBestCU->firstTU->mtsIdx[COMPONENT_Y] > MTS_SKIP)? true : false ;
#endif
  bool              swapped        = false; // avoid unwanted data copy
  bool             reloadCU        = false;

  const PredictionUnit& pu = *cu->firstPU;

  // clang-format off
  const int affineShiftTab[3] =
  {
    MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER,
    MV_PRECISION_INTERNAL - MV_PRECISION_SIXTEENTH,
    MV_PRECISION_INTERNAL - MV_PRECISION_INT
  };

  const int normalShiftTab[NUM_IMV_MODES] =
  {
    MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER,
    MV_PRECISION_INTERNAL - MV_PRECISION_INT,
    MV_PRECISION_INTERNAL - MV_PRECISION_4PEL,
    MV_PRECISION_INTERNAL - MV_PRECISION_HALF,
  };
  // clang-format on

  int mvShift;

  for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
  {
    if (pu.refIdx[refList] >= 0)
    {
      if (!cu->affine)
      {
        mvShift = normalShiftTab[cu->imv];
        Mv signaledmvd(pu.mvd[refList].getHor() >> mvShift, pu.mvd[refList].getVer() >> mvShift);
        if (!((signaledmvd.getHor() >= MVD_MIN) && (signaledmvd.getHor() <= MVD_MAX)) || !((signaledmvd.getVer() >= MVD_MIN) && (signaledmvd.getVer() <= MVD_MAX)))
          return;
      }
      else
      {
        for (int ctrlP = 1 + (cu->affineType == AFFINEMODEL_6PARAM); ctrlP >= 0; ctrlP--)
        {
          mvShift = affineShiftTab[cu->imv];
          Mv signaledmvd(pu.mvdAffi[refList][ctrlP].getHor() >> mvShift, pu.mvdAffi[refList][ctrlP].getVer() >> mvShift);
          if (!((signaledmvd.getHor() >= MVD_MIN) && (signaledmvd.getHor() <= MVD_MAX)) || !((signaledmvd.getVer() >= MVD_MIN) && (signaledmvd.getVer() <= MVD_MAX)))
            return;
        }
      }
    }
  }
  // avoid MV exceeding 18-bit dynamic range
  const int maxMv = 1 << 17;
  if (!cu->affine && !pu.mergeFlag)
  {
    if ( (pu.refIdx[0] >= 0 && (pu.mv[0].getAbsHor() >= maxMv || pu.mv[0].getAbsVer() >= maxMv))
      || (pu.refIdx[1] >= 0 && (pu.mv[1].getAbsHor() >= maxMv || pu.mv[1].getAbsVer() >= maxMv)))
    {
      return;
    }
  }
  if (cu->affine && !pu.mergeFlag)
  {
    for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
    {
      if (pu.refIdx[refList] >= 0)
      {
        for (int ctrlP = 1 + (cu->affineType == AFFINEMODEL_6PARAM); ctrlP >= 0; ctrlP--)
        {
          if (pu.mvAffi[refList][ctrlP].getAbsHor() >= maxMv || pu.mvAffi[refList][ctrlP].getAbsVer() >= maxMv)
          {
            return;
          }
        }
      }
    }
  }
#if JVET_AA0133_INTER_MTS_OPT
  m_pcInterSearch->setBestCost(bestCS->cost);
  cu->mtsFlag = false;
  const bool mtsAllowed = tempCS->sps->getUseInterMTS() && CU::isInter(*cu) && partitioner.currArea().lwidth() <= tempCS->sps->getInterMTSMaxSize() && partitioner.currArea().lheight() <= tempCS->sps->getInterMTSMaxSize();
#else
  const bool mtsAllowed = tempCS->sps->getUseInterMTS() && CU::isInter( *cu ) && partitioner.currArea().lwidth() <= MTS_INTER_MAX_CU_SIZE && partitioner.currArea().lheight() <= MTS_INTER_MAX_CU_SIZE;
#endif
  uint8_t sbtAllowed = cu->checkAllowedSbt();
  //SBT resolution-dependent fast algorithm: not try size-64 SBT in RDO for low-resolution sequences (now resolution below HD)
  if( tempCS->pps->getPicWidthInLumaSamples() < (uint32_t)m_pcEncCfg->getSBTFast64WidthTh() )
  {
    sbtAllowed = ((cu->lwidth() > 32 || cu->lheight() > 32)) ? 0 : sbtAllowed;
  }
  uint8_t numRDOTried = 0;
  Distortion sbtOffDist = 0;
  bool    sbtOffRootCbf = 0;
  double  sbtOffCost      = MAX_DOUBLE;
  double  currBestCost = MAX_DOUBLE;
  bool    doPreAnalyzeResi = ( sbtAllowed || mtsAllowed ) && residualPass == 0;
#if JVET_AA0133_INTER_MTS_OPT
  double  mtsOffCost = MAX_DOUBLE;
#endif
  m_pcInterSearch->initTuAnalyzer();
  if( doPreAnalyzeResi )
  {
    m_pcInterSearch->calcMinDistSbt( *tempCS, *cu, sbtAllowed );
  }

  auto    slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl );
  int     slShift = 4 + std::min( (int)gp_sizeIdxInfo->idxFrom( cu->lwidth() ) + (int)gp_sizeIdxInfo->idxFrom( cu->lheight() ), 9 );
  Distortion curPuSse = m_pcInterSearch->getEstDistSbt( NUMBER_SBT_MODE );
  uint8_t currBestSbt = 0;
  uint8_t currBestTrs = MAX_UCHAR;
  uint8_t histBestSbt = MAX_UCHAR;
  uint8_t histBestTrs = MAX_UCHAR;
  m_pcInterSearch->setHistBestTrs( MAX_UCHAR, MAX_UCHAR );
  if( doPreAnalyzeResi )
  {
    if( m_pcInterSearch->getSkipSbtAll() && !mtsAllowed ) //emt is off
    {
      histBestSbt = 0; //try DCT2
      m_pcInterSearch->setHistBestTrs( histBestSbt, histBestTrs );
    }
    else
    {
      assert( curPuSse != std::numeric_limits<uint64_t>::max() );
      uint16_t compositeSbtTrs = slsSbt->findBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ) );
      histBestSbt = ( compositeSbtTrs >> 0 ) & 0xff;
      histBestTrs = ( compositeSbtTrs >> 8 ) & 0xff;
      if( m_pcInterSearch->getSkipSbtAll() && CU::isSbtMode( histBestSbt ) ) //special case, skip SBT when loading SBT
      {
        histBestSbt = 0; //try DCT2
      }
      m_pcInterSearch->setHistBestTrs( histBestSbt, histBestTrs );
    }
  }

  {
    if( reloadCU )
    {
      if( bestCost == bestCS->cost ) //The first EMT pass didn't become the bestCS, so we clear the TUs generated
      {
        tempCS->clearTUs();
      }
      else if( false == swapped )
      {
        tempCS->initStructData( encTestMode.qp );
        tempCS->copyStructure( *bestCS, partitioner.chType );
        tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
        bestCost = bestCS->cost;
        cu       = tempCS->getCU( partitioner.chType );
        swapped = true;
      }
      else
      {
        tempCS->clearTUs();
        bestCost = bestCS->cost;
        cu       = tempCS->getCU( partitioner.chType );
      }

      //we need to restart the distortion for the new tempCS, the bit count and the cost
      tempCS->dist     = 0;
      tempCS->fracBits = 0;
      tempCS->cost     = MAX_DOUBLE;
      tempCS->costDbOffset = 0;
    }

    reloadCU    = true; // enable cu reloading
    cu->skip    = false;
    cu->sbtInfo = 0;

    const bool skipResidual = residualPass == 1;
    if( skipResidual || histBestSbt == MAX_UCHAR || !CU::isSbtMode( histBestSbt ) )
    {
    m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
    if (tempCS->slice->getSPS()->getUseColorTrans())
    {
      bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
      bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
    }
#if JVET_AA0133_INTER_MTS_OPT
    numRDOTried += 1;
#else
    numRDOTried += mtsAllowed ? 2 : 1;
#endif
    xEncodeDontSplit( *tempCS, partitioner );

    xCheckDQP( *tempCS, partitioner );
    xCheckChromaQPOffset( *tempCS, partitioner );


    if( NULL != bestHasNonResi && (bestCostInternal > tempCS->cost) )
    {
      bestCostInternal = tempCS->cost;
      if (!(tempCS->getPU(partitioner.chType)->ciipFlag))
      *bestHasNonResi  = !cu->rootCbf;
    }

    if (cu->rootCbf == false)
    {
      if (tempCS->getPU(partitioner.chType)->ciipFlag)
      {
        tempCS->cost = MAX_DOUBLE;
        tempCS->costDbOffset = 0;
        return;
      }
    }
    currBestCost = tempCS->cost;
    sbtOffCost = tempCS->cost;
    sbtOffDist = tempCS->dist;
    sbtOffRootCbf = cu->rootCbf;
    currBestSbt = CU::getSbtInfo(cu->firstTU->mtsIdx[COMPONENT_Y] > MTS_SKIP ? SBT_OFF_MTS : SBT_OFF_DCT, 0);
    currBestTrs = cu->firstTU->mtsIdx[COMPONENT_Y];

#if WCG_EXT
    DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
#else
    DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
#endif
    xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );

    }

    uint8_t numSbtRdo = CU::numSbtModeRdo( sbtAllowed );
    //early termination if all SBT modes are not allowed
    //normative
    if( !sbtAllowed || skipResidual )
    {
      numSbtRdo = 0;
    }
    //fast algorithm
    if( ( histBestSbt != MAX_UCHAR && !CU::isSbtMode( histBestSbt ) ) || m_pcInterSearch->getSkipSbtAll() )
    {
      numSbtRdo = 0;
    }
    if( bestCost != MAX_DOUBLE && sbtOffCost != MAX_DOUBLE )
    {
      double th = 1.07;
      if( !( prevBestSbt == 0 || m_sbtCostSave[0] == MAX_DOUBLE ) )
      {
        assert( m_sbtCostSave[1] <= m_sbtCostSave[0] );
        th *= ( m_sbtCostSave[0] / m_sbtCostSave[1] );
      }
      if( sbtOffCost > bestCost * th )
      {
        numSbtRdo = 0;
      }
    }
    if( !sbtOffRootCbf && sbtOffCost != MAX_DOUBLE )
    {
      double th = Clip3( 0.05, 0.55, ( 27 - cu->qp ) * 0.02 + 0.35 );
      if( sbtOffCost < m_pcRdCost->calcRdCost( ( cu->lwidth() * cu->lheight() ) << SCALE_BITS, 0 ) * th )
      {
        numSbtRdo = 0;
      }
    }

    if( histBestSbt != MAX_UCHAR && numSbtRdo != 0 )
    {
      numSbtRdo = 1;
      m_pcInterSearch->initSbtRdoOrder( CU::getSbtMode( CU::getSbtIdx( histBestSbt ), CU::getSbtPos( histBestSbt ) ) );
    }

    for( int sbtModeIdx = 0; sbtModeIdx < numSbtRdo; sbtModeIdx++ )
    {
      uint8_t sbtMode = m_pcInterSearch->getSbtRdoOrder( sbtModeIdx );
      uint8_t sbtIdx = CU::getSbtIdxFromSbtMode( sbtMode );
      uint8_t sbtPos = CU::getSbtPosFromSbtMode( sbtMode );

      //fast algorithm (early skip, save & load)
      if( histBestSbt == MAX_UCHAR )
      {
        uint8_t skipCode = m_pcInterSearch->skipSbtByRDCost( cu->lwidth(), cu->lheight(), cu->mtDepth, sbtIdx, sbtPos, bestCS->cost, sbtOffDist, sbtOffCost, sbtOffRootCbf );
        if( skipCode != MAX_UCHAR )
        {
          continue;
        }

        if( sbtModeIdx > 0 )
        {
          uint8_t prevSbtMode = m_pcInterSearch->getSbtRdoOrder( sbtModeIdx - 1 );
          //make sure the prevSbtMode is the same size as the current SBT mode (otherwise the estimated dist may not be comparable)
          if( CU::isSameSbtSize( prevSbtMode, sbtMode ) )
          {
            Distortion currEstDist = m_pcInterSearch->getEstDistSbt( sbtMode );
            Distortion prevEstDist = m_pcInterSearch->getEstDistSbt( prevSbtMode );
            if( currEstDist > prevEstDist * 1.15 )
            {
              continue;
            }
          }
        }
      }

      //init tempCS and TU
      if( bestCost == bestCS->cost ) //The first EMT pass didn't become the bestCS, so we clear the TUs generated
      {
        tempCS->clearTUs();
      }
      else if( false == swapped )
      {
        tempCS->initStructData( encTestMode.qp );
        tempCS->copyStructure( *bestCS, partitioner.chType );
        tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
        bestCost = bestCS->cost;
        cu = tempCS->getCU( partitioner.chType );
        swapped = true;
      }
      else
      {
        tempCS->clearTUs();
        bestCost = bestCS->cost;
        cu = tempCS->getCU( partitioner.chType );
      }

      //we need to restart the distortion for the new tempCS, the bit count and the cost
      tempCS->dist = 0;
      tempCS->fracBits = 0;
      tempCS->cost = MAX_DOUBLE;
      cu->skip = false;

      //set SBT info
      cu->setSbtIdx( sbtIdx );
      cu->setSbtPos( sbtPos );

      //try residual coding
      m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
      if (tempCS->slice->getSPS()->getUseColorTrans())
      {
        bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
        bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
      }
      numRDOTried++;

      xEncodeDontSplit( *tempCS, partitioner );

      xCheckDQP( *tempCS, partitioner );
      xCheckChromaQPOffset( *tempCS, partitioner );

      if( NULL != bestHasNonResi && ( bestCostInternal > tempCS->cost ) )
      {
        bestCostInternal = tempCS->cost;
        if( !( tempCS->getPU( partitioner.chType )->ciipFlag ) )
          *bestHasNonResi = !cu->rootCbf;
      }

      if( tempCS->cost < currBestCost )
      {
        currBestSbt = cu->sbtInfo;
        currBestTrs = tempCS->tus[cu->sbtInfo ? cu->getSbtPos() : 0]->mtsIdx[COMPONENT_Y];
        assert( currBestTrs == 0 || currBestTrs == 1 );
        currBestCost = tempCS->cost;
      }

#if WCG_EXT
      DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
#else
      DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
#endif
      xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
    }
#if JVET_AA0133_INTER_MTS_OPT
    if (!skipResidual && mtsAllowed)
    {
      if (bestCost == bestCS->cost) //The first EMT pass didn't become the bestCS, so we clear the TUs generated
      {
        tempCS->clearTUs();
      }
      else if (false == swapped)
      {
        tempCS->initStructData(encTestMode.qp);
        tempCS->copyStructure(*bestCS, partitioner.chType);
        tempCS->getPredBuf().copyFrom(bestCS->getPredBuf());
        bestCost = bestCS->cost;
        cu = tempCS->getCU(partitioner.chType);
        swapped = true;
      }
      else
      {
        tempCS->clearTUs();
        bestCost = bestCS->cost;
        cu = tempCS->getCU(partitioner.chType);
      }

      //we need to restart the distortion for the new tempCS, the bit count and the cost
      tempCS->dist = 0;
      tempCS->fracBits = 0;
      tempCS->cost = MAX_DOUBLE;
      tempCS->costDbOffset = 0;
      cu->skip = false;
      cu->sbtInfo = 0;
      cu->mtsFlag = true;
      m_pcInterSearch->setBestCost(bestCS->cost);
      mtsOffCost = currBestCost;
      bool testMts = true;
      if (bestCost != MAX_DOUBLE && mtsOffCost != MAX_DOUBLE)
      {
        double th = 1.07;
        if (!(prevBestMts == 0 || m_mtsCostSave == MAX_DOUBLE))
        {
          assert(m_sbtCostSave[1] <= m_mtsCostSave);
          th *= (m_mtsCostSave / m_sbtCostSave[1]);
        }
        if (mtsOffCost > bestCost * th)
        {
          testMts = false;
        }
      }
      if(testMts)
      {
      //try residual coding
        bool isValid = m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, skipResidual);
        if (isValid)
        {
          if (tempCS->slice->getSPS()->getUseColorTrans())
          {
            bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
            bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
          }
          numRDOTried++;

          xEncodeDontSplit(*tempCS, partitioner);

          xCheckDQP(*tempCS, partitioner);
          xCheckChromaQPOffset(*tempCS, partitioner);

          if (NULL != bestHasNonResi && (bestCostInternal > tempCS->cost))
          {
            bestCostInternal = tempCS->cost;
            if (!(tempCS->getPU(partitioner.chType)->ciipFlag))
            {
              *bestHasNonResi = !cu->rootCbf;
            }
          }

          if (cu->rootCbf == false)
          {
            if (tempCS->getPU(partitioner.chType)->ciipFlag)
            {
              tempCS->cost = MAX_DOUBLE;
              tempCS->costDbOffset = 0;
              return;
            }
          }
          if (tempCS->cost < currBestCost)
          {
            currBestCost = tempCS->cost;
            sbtOffCost = tempCS->cost;
            sbtOffDist = tempCS->dist;
            sbtOffRootCbf = cu->rootCbf;
            currBestSbt = CU::getSbtInfo(cu->firstTU->mtsIdx[COMPONENT_Y] > MTS_SKIP ? SBT_OFF_MTS : SBT_OFF_DCT, 0);
            currBestTrs = cu->firstTU->mtsIdx[COMPONENT_Y];
          }

  #if WCG_EXT
          DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda(true));
  #else
          DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda());
  #endif
          xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);
        }
      }
    }
#endif
    if( bestCostBegin != bestCS->cost )
    {
      m_sbtCostSave[0] = sbtOffCost;
      m_sbtCostSave[1] = currBestCost;
#if JVET_AA0133_INTER_MTS_OPT
      m_mtsCostSave = mtsOffCost;
#endif
    }
  } //end emt loop

  if( histBestSbt == MAX_UCHAR && doPreAnalyzeResi && numRDOTried > 1 )
  {
    slsSbt->saveBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ), currBestSbt, currBestTrs );
  }
  tempCS->cost = currBestCost;
  if( ETM_INTER_ME == encTestMode.type )
  {
    if( equBcwCost != NULL )
    {
      if( tempCS->cost < ( *equBcwCost ) && cu->BcwIdx == BCW_DEFAULT )
      {
        ( *equBcwCost ) = tempCS->cost;
      }
    }
    else
    {
#if ENABLE_OBMC
      if (cu->obmcFlag)
#endif
      CHECK( equBcwCost == NULL, "equBcwCost == NULL" );
    }
    if( tempCS->slice->getCheckLDC() && !cu->imv && cu->BcwIdx != BCW_DEFAULT && tempCS->cost < m_bestBcwCost[1] )
    {
      if( tempCS->cost < m_bestBcwCost[0] )
      {
        m_bestBcwCost[1] = m_bestBcwCost[0];
        m_bestBcwCost[0] = tempCS->cost;
        m_bestBcwIdx[1] = m_bestBcwIdx[0];
        m_bestBcwIdx[0] = cu->BcwIdx;
      }
      else
      {
        m_bestBcwCost[1] = tempCS->cost;
        m_bestBcwIdx[1] = cu->BcwIdx;
      }
    }
#if INTER_LIC
    m_pcInterSearch->m_fastLicCtrl.setBestAmvpRDBeforeLIC(*cu, currBestCost);
#endif
  }
}


void EncCu::xEncodeDontSplit( CodingStructure &cs, Partitioner &partitioner )
{
  m_CABACEstimator->resetBits();

  m_CABACEstimator->split_cu_mode( CU_DONT_SPLIT, cs, partitioner );
#if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
  if( partitioner.treeType == TREE_C )
    CHECK( m_CABACEstimator->getEstFracBits() != 0, "must be 0 bit" );
#endif
  cs.fracBits += m_CABACEstimator->getEstFracBits(); // split bits
  cs.cost      = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist );

}

#if REUSE_CU_RESULTS
void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner )
{
  m_pcRdCost->setChromaFormat(tempCS->sps->getChromaFormatIdc());
  BestEncInfoCache* bestEncCache = dynamic_cast<BestEncInfoCache*>( m_modeCtrl );
  CHECK( !bestEncCache, "If this mode is chosen, mode controller has to implement the mode caching capabilities" );
  EncTestMode cachedMode;

  if( bestEncCache->setCsFrom( *tempCS, cachedMode, partitioner ) )
  {
    CodingUnit& cu = *tempCS->cus.front();
    partitioner.setCUData( cu );
#if MULTI_PASS_DMVR
    for( auto &pu : CU::traversePUs( cu ) )
    {
      pu.bdmvrRefine = false;
    }
#endif
    if( CU::isIntra( cu )
    || CU::isPLT(cu)
    )
    {
      xReconIntraQT( cu );
    }
    else
    {
      xDeriveCUMV( cu );
      xReconInter( cu );
    }

#if JVET_Z0118_GDR
    bestCS->updateReconMotIPM(cu); // cache    
#endif

    Distortion finalDistortion = 0;
    tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
    if ( m_pcEncCfg->getUseEncDbOpt() )
    {
      xCalDebCost( *tempCS, partitioner, true );
      finalDistortion = tempCS->dist;
    }
    else
    {
    const SPS &sps = *tempCS->sps;
    const int  numValidComponents = getNumberValidComponents( tempCS->area.chromaFormat );

    for( int comp = 0; comp < numValidComponents; comp++ )
    {
      const ComponentID compID = ComponentID( comp );
#if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
      if( CS::isDualITree(*tempCS) && toChannelType(compID) != partitioner.chType )
#else
      if( partitioner.isSepTree( *tempCS ) && toChannelType( compID ) != partitioner.chType )
#endif
      {
        continue;
      }

      CPelBuf reco = tempCS->getRecoBuf( compID );
      CPelBuf org  = tempCS->getOrgBuf ( compID );

      
#if JVET_V0094_BILATERAL_FILTER
        const CompArea &area = cu.blocks[COMPONENT_Y];
        CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
        PelBuf tmpRecLuma = m_tmpStorageLCU->getBuf(tmpArea);
        if(isLuma(compID))
        {
          tmpRecLuma.copyFrom(reco);

          if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
            m_pcEncCfg->getLmcs() && (tempCS->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
          {
            tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
          }
        }
        if(tempCS->pps->getUseBIF() && isLuma(compID) && (cu.qp > 17))
        {
          for (auto &currTU : CU::traverseTUs(cu))
          {
            Position tuPosInCu = currTU.lumaPos() - cu.lumaPos();
            PelBuf tmpSubBuf = tmpRecLuma.subBuf(tuPosInCu, currTU.lumaSize());

            bool isInter = (cu.predMode == MODE_INTRA) ? false : true;
            
            if ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))))
            {
              CompArea compArea = currTU.blocks[compID];
              PelBuf recIPredBuf = tempCS->slice->getPic()->getRecoBuf(compArea);
              // Do we need to use clipArea?
              
              // Only reshape surrounding samples if reshaping is on
               if(m_pcEncCfg->getLmcs() && (tempCS->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() ) && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
               {
                 m_bilateralFilter->bilateralFilterRDOdiamond5x5(tmpSubBuf, tmpSubBuf, tmpSubBuf, currTU.cu->qp, recIPredBuf, tempCS->slice->clpRng(compID), currTU, true, true, m_pcReshape->getInvLUT());
               }
               else
               {
                 std::vector<Pel> invLUT;
                 m_bilateralFilter->bilateralFilterRDOdiamond5x5(tmpSubBuf, tmpSubBuf, tmpSubBuf, currTU.cu->qp, recIPredBuf, tempCS->slice->clpRng(compID), currTU, true, false, invLUT);
               }
            }
          }
        }
#if JVET_X0071_CHROMA_BILATERAL_FILTER
        const CompArea &areaChroma = cu.blocks[compID];
        CompArea    tmpAreaChroma(compID, areaChroma.chromaFormat, Position(0, 0), areaChroma.size());
        PelBuf tmpRecChroma;
        if(isChroma(compID))
        {
          tmpRecChroma = m_tmpStorageLCU->getBuf(tmpAreaChroma);
          tmpRecChroma.copyFrom(reco);
        }

        if(tempCS->pps->getUseChromaBIF() && isChroma(compID) && (cu.qp > 17))
        {
          bool tuValid = false;
          bool tuCBF = false;
          bool isDualTree = CS::isDualITree(*tempCS);
          bool chromaValid = cu.Cb().valid() && cu.Cr().valid();
          bool applyChromaBIF = false;
          for (auto &currTU : CU::traverseTUs(cu))
          {
            Position tuPosInCu = currTU.chromaPos() - cu.chromaPos();
            PelBuf tmpSubBuf = tmpRecChroma.subBuf(tuPosInCu, currTU.chromaSize());
            bool isInter = (cu.predMode == MODE_INTER) ? true : false;
            bool isCb = compID == COMPONENT_Cb ? true : false;
            applyChromaBIF = false;
            if(!isDualTree && chromaValid)
            {
              tuValid = currTU.blocks[compID].valid();
              tuCBF = false;//if CHROMA TU is not vaild, CBF must be zero
              if(tuValid)
              {
                tuCBF = TU::getCbf(currTU, compID);
              }
              applyChromaBIF = (( tuCBF || isInter == false) && (currTU.cu->qp > 17) && (tuValid));
            }

            if(isDualTree && chromaValid)
            {
              applyChromaBIF = ((TU::getCbf(currTU, compID) || isInter == false) && (currTU.cu->qp > 17));
            }

            if(applyChromaBIF)
            {
              CompArea compArea = currTU.blocks[compID];
              PelBuf recIPredBuf = tempCS->slice->getPic()->getRecoBuf(compArea);
              m_bilateralFilter->bilateralFilterRDOdiamond5x5Chroma(tmpSubBuf, tmpSubBuf, tmpSubBuf, currTU.cu->qp, recIPredBuf, tempCS->slice->clpRng(compID), currTU, true, isCb);
            }
          }
        }
#endif

#if WCG_EXT
        if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
          m_pcEncCfg->getLmcs() && (tempCS->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
        {
          const CPelBuf orgLuma = tempCS->getOrgBuf(tempCS->area.blocks[COMPONENT_Y]);
          if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
          {
            finalDistortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
          }
          else
          {
#if JVET_X0071_CHROMA_BILATERAL_FILTER
            if(isChroma(compID) && tempCS->pps->getUseChromaBIF())
            {
              finalDistortion += m_pcRdCost->getDistPart(org, tmpRecChroma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
            }
            else
            {
              finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma );
            }
#else
            finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma );
#endif
          }
        }
        else
#endif
        {
          finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE );
        }
#else
#if JVET_X0071_CHROMA_BILATERAL_FILTER
        const CompArea &areaChroma = cu.blocks[compID];
        CompArea    tmpAreaChroma(compID, areaChroma.chromaFormat, Position(0, 0), areaChroma.size());
        PelBuf tmpRecChroma;
        if(isChroma(compID))
        {
          tmpRecChroma = m_tmpStorageLCU->getBuf(tmpAreaChroma);
          tmpRecChroma.copyFrom(reco);
        }

        if(tempCS->pps->getUseChromaBIF() && isChroma(compID) && (cu.qp > 17))
        {
          bool tuValid = false;
          bool tuCBF = false;
          bool isDualTree = CS::isDualITree(*tempCS);
          bool chromaValid = cu.Cb().valid() && cu.Cr().valid();
          bool applyChromaBIF = false;

          for (auto &currTU : CU::traverseTUs(cu))
          {
            Position tuPosInCu = currTU.chromaPos() - cu.chromaPos();
            PelBuf tmpSubBuf = tmpRecChroma.subBuf(tuPosInCu, currTU.chromaSize());

            bool isInter = (cu.predMode == MODE_INTER) ? true : false;
            bool isCb = compID == COMPONENT_Cb ? true : false;
            applyChromaBIF = false;
            if(!isDualTree && chromaValid)
            {
              tuValid = currTU.blocks[compID].valid();
              tuCBF = false;//if CHROMA TU is not vaild, CBF must be zero
              if(tuValid)
              {
                tuCBF = TU::getCbf(currTU, compID);
              }
              applyChromaBIF = ((tuCBF || isInter == false) && (currTU.cu->qp > 17) && (tuValid));
            }
            if(isDualTree && chromaValid)
            {
              applyChromaBIF = ((TU::getCbf(currTU, compID) || isInter == false) && (currTU.cu->qp > 17));
            }
            if(applyChromaBIF)
            {
              CompArea compArea = currTU.blocks[compID];
              PelBuf recIPredBuf = tempCS->slice->getPic()->getRecoBuf(compArea);
              m_bilateralFilter->bilateralFilterRDOdiamond5x5Chroma(tmpSubBuf, tmpSubBuf, tmpSubBuf, currTU.cu->qp, recIPredBuf, tempCS->slice->clpRng(compID), currTU, true, isCb);
            }
          }
        }
#endif
#if WCG_EXT
      if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs() && (tempCS->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
      {
        const CPelBuf orgLuma = tempCS->getOrgBuf(tempCS->area.blocks[COMPONENT_Y]);
        if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
        {
          const CompArea &area = cu.blocks[COMPONENT_Y];
          CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
          PelBuf tmpRecLuma = m_tmpStorageLCU->getBuf(tmpArea);
          tmpRecLuma.rspSignal( reco, m_pcReshape->getInvLUT() );
          finalDistortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
        }
        else
        {
#if JVET_X0071_CHROMA_BILATERAL_FILTER
          if(isChroma(compID) && tempCS->pps->getUseChromaBIF())
          {
            finalDistortion += m_pcRdCost->getDistPart(org, tmpRecChroma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
          }
          else
          {
            finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma );
          }
#else
          finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma );
#endif
        }
      }
      else
#endif
      {
        finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE );
      }
#endif
    }
    }

    m_CABACEstimator->getCtx() = m_CurrCtx->start;
    m_CABACEstimator->resetBits();

    CUCtx cuCtx;
    cuCtx.isDQPCoded = true;
    cuCtx.isChromaQpAdjCoded = true;
    m_CABACEstimator->coding_unit( cu, partitioner, cuCtx );


    tempCS->dist     = finalDistortion;
    tempCS->fracBits = m_CABACEstimator->getEstFracBits();
    tempCS->cost     = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist );

    xEncodeDontSplit( *tempCS,         partitioner );
    xCheckDQP       ( *tempCS,         partitioner );
    xCheckChromaQPOffset( *tempCS,     partitioner );
    xCheckBestMode  (  tempCS, bestCS, partitioner, cachedMode );
  }
  else
  {
    THROW( "Should never happen!" );
  }
}
#endif

#if MULTI_HYP_PRED
void EncCu::predInterSearchAdditionalHypothesisMulti(const MEResultVec& in, MEResultVec& out, PredictionUnit& pu, const MergeCtx &mrgCtx)
{
  for (const auto &x : in)
  {
    *pu.cu = x.cu;
    pu = x.pu;

    if (pu.mergeType == MRG_TYPE_SUBPU_ATMVP)
    {
      // the SbTmvp use xSubPuMC which will need to access the motion buffer for subblock MV
      PU::spanMotionInfo(pu, mrgCtx);
    }
    else if (x.cu.affine)
    {
      PU::setAllAffineMv(pu, pu.mvAffi[0][0], pu.mvAffi[0][1], pu.mvAffi[0][2], REF_PIC_LIST_0);
      PU::setAllAffineMv(pu, pu.mvAffi[1][0], pu.mvAffi[1][1], pu.mvAffi[1][2], REF_PIC_LIST_1);
    }
#if MULTI_PASS_DMVR
    else if( pu.bdmvrRefine )
    {
#if TM_MRG
      if( pu.tmMergeFlag )
      {
        m_pcInterSearch->setBdmvrSubPuMvBuf( m_mvBufBDMVR4TM[pu.mergeIdx << 1], m_mvBufBDMVR4TM[( pu.mergeIdx << 1 ) + 1] );
      }
      else
#endif
      {
        m_pcInterSearch->setBdmvrSubPuMvBuf( m_mvBufBDMVR[pu.mergeIdx << 1], m_mvBufBDMVR[( pu.mergeIdx << 1 ) + 1] );
      }
    }
#endif
    m_pcInterSearch->predInterSearchAdditionalHypothesis(pu, x, out);
  }
}

void EncCu::xCheckRDCostInterMultiHyp2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
{
#if ENABLE_OBMC
  double bestOBMCCost = MAX_DOUBLE;
#endif
  if (tempCS->area.Y().area() <= MULTI_HYP_PRED_RESTRICT_BLOCK_SIZE || std::min(tempCS->area.Y().width, tempCS->area.Y().height) < MULTI_HYP_PRED_RESTRICT_MIN_WH)
  {
    return;
  }
  const SPS &sps = *tempCS->sps;
  CHECK(!sps.getUseInterMultiHyp(), "Multi Hyp is not active");
  CHECK(!tempCS->slice->isInterB(), "Multi Hyp only allowed in B slices");
  CHECK(encTestMode.opts != ETO_STANDARD, "unknown encoding option to EncCu::xCheckRDCostInterMultiHyp2Nx2N()");

  if ((m_pcEncCfg->getBaseQP() > 32) || (m_pcEncCfg->getBaseQP() > 27 && tempCS->slice->getTLayer() >= 4)) // KRNOTE: explicit QP tests
    return;

  if (m_modeCtrl->getFastDeltaQp())
  {
    if (tempCS->area.lumaSize().width > tempCS->pcv->fastDeltaQPCuMaxSize)
    {
      return; // only check necessary 2Nx2N Inter in fast deltaqp mode
    }
  }

  MEResultVec mhResults;
  const auto RDCostComp = [](const MEResult &x, const MEResult &y) { return x.cost < y.cost; };

  MergeCtx mrgCtx;
  if (sps.getSbTMVPEnabledFlag())
  {
    Size bufSize = g_miScaling.scale(tempCS->area.lumaSize());
    mrgCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
  }

  // Hadamard-based pre-search
  {
    MEResultVec base = m_baseResultsForMH;

    if (base.empty())
      return;

    tempCS->initStructData(encTestMode.qp);
    CodingUnit &cu = tempCS->addCU(tempCS->area, partitioner.chType);
    PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType);

    int iter = 0;
    do
    {
      MEResultVec out;
      const auto survivors = iter > 0 ? 2 : m_pcEncCfg->getNumMHPCandsToTest();
      iter++;

      std::stable_sort(base.begin(), base.end(), RDCostComp);
      if (base.size() > survivors)
        base.resize(survivors);
      predInterSearchAdditionalHypothesisMulti(base, out, pu, mrgCtx);
      mhResults.insert(mhResults.end(), out.begin(), out.end());
      base = out;
    } while (!base.empty());
  }

  std::stable_sort(mhResults.begin(), mhResults.end(), RDCostComp);

  // actual testing with "true" RD costs
  for (int i = 0; i < std::min((int)mhResults.size(), m_pcEncCfg->getAddHypTries()); ++i)
  {
    tempCS->initStructData(encTestMode.qp);
    CodingUnit &cu = tempCS->addCU(tempCS->area, partitioner.chType);
    PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType);

    pu = mhResults[i].pu;
    cu = mhResults[i].cu;
#if JVET_Z0054_BLK_REF_PIC_REORDER
    if (!pu.mergeFlag && PU::useRefCombList(pu))
    {
      m_pcInterSearch->setUniRefIdxLC(pu);
    }
    else if (PU::useRefPairList(pu))
    {
      m_pcInterSearch->setBiRefPairIdx(pu);
    }
#endif

#if MULTI_PASS_DMVR
    if (pu.bdmvrRefine)
    {
#if TM_MRG
      if( pu.tmMergeFlag )
      {
        m_pcInterSearch->setBdmvrSubPuMvBuf( m_mvBufBDMVR4TM[pu.mergeIdx << 1], m_mvBufBDMVR4TM[( pu.mergeIdx << 1 ) + 1] );
      }
      else
#endif
#if JVET_X0049_ADAPT_DMVR 
      if( pu.bmMergeFlag )
      {
        m_pcInterSearch->setBdmvrSubPuMvBuf( m_mvBufBDMVR4BM[pu.mergeIdx << 1], m_mvBufBDMVR4BM[( pu.mergeIdx << 1 ) + 1] );
      }
      else
#endif
      m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR[pu.mergeIdx << 1], m_mvBufBDMVR[(pu.mergeIdx << 1) + 1]);
    }
    else
    {
      PU::spanMotionInfo(pu, mrgCtx);
    }
#else
    PU::spanMotionInfo(pu, mrgCtx);
#endif
    cu.skip = false;
    cu.mmvdSkip = false;

    CHECK(cu.qtDepth != partitioner.currQtDepth, "Mismatch");
    CHECK(cu.btDepth != partitioner.currBtDepth, "Mismatch");
    CHECK(cu.mtDepth != partitioner.currMtDepth, "Mismatch");
    pu.mvRefine = true;
    m_pcInterSearch->motionCompensation(pu);

#if MULTI_PASS_DMVR
    if (pu.bdmvrRefine)
    {
#if TM_MRG
      if( pu.tmMergeFlag )
      {
        PU::spanMotionInfo( pu, mrgCtx, m_mvBufBDMVR4TM[pu.mergeIdx << 1], m_mvBufBDMVR4TM[( pu.mergeIdx << 1 ) + 1], m_pcInterSearch->getBdofSubPuMvOffset() );
      }
      else
#endif
#if JVET_X0049_ADAPT_DMVR
      if( pu.bmMergeFlag )
      {
        PU::spanMotionInfo( pu, mrgCtx, m_mvBufBDMVR4BM[pu.mergeIdx << 1], m_mvBufBDMVR4BM[( pu.mergeIdx << 1 ) + 1], m_pcInterSearch->getBdofSubPuMvOffset() );
      }
      else
#endif
      PU::spanMotionInfo(pu, mrgCtx, m_mvBufBDMVR[pu.mergeIdx << 1], m_mvBufBDMVR[(pu.mergeIdx << 1) + 1], m_pcInterSearch->getBdofSubPuMvOffset());
    }
#endif
    pu.mvRefine = false;

#if ENABLE_OBMC //multi hyp inter IMV
    CodingStructure *prevCS = tempCS;
    PelUnitBuf tempWoOBMCBuf = m_tempWoOBMCBuffer.subBuf(UnitAreaRelative(cu, cu));
    tempWoOBMCBuf.copyFrom(tempCS->getPredBuf(cu));
    cu.isobmcMC = true;
    cu.obmcFlag = true;
    m_pcInterSearch->subBlockOBMC(*cu.firstPU);
    cu.isobmcMC = false;
#endif
    xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode);
#if ENABLE_OBMC
    double tempCost = (prevCS == tempCS) ? tempCS->cost : bestCS->cost;
    if (m_pTempCUWoOBMC && tempCost < bestOBMCCost)
    {
      const unsigned wIdx = gp_sizeIdxInfo->idxFrom(tempCS->area.lwidth());
      const unsigned hIdx = gp_sizeIdxInfo->idxFrom(tempCS->area.lheight());

      m_pTempCUWoOBMC[wIdx][hIdx]->clearCUs();
      m_pTempCUWoOBMC[wIdx][hIdx]->clearPUs();
      m_pTempCUWoOBMC[wIdx][hIdx]->clearTUs();
      m_pTempCUWoOBMC[wIdx][hIdx]->copyStructure(*prevCS, partitioner.chType);

      m_pPredBufWoOBMC[wIdx][hIdx].copyFrom(tempWoOBMCBuf);
      m_pTempCUWoOBMC[wIdx][hIdx]->getPredBuf(cu).copyFrom(prevCS->getPredBuf(cu));

      bestOBMCCost = tempCost;
    }
#endif
  }
}
#endif
#if ENABLE_OBMC
void EncCu::xCheckRDCostInterWoOBMC(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
{
  if (!tempCS->sps->getUseOBMC())
  {
    return;
  }

  if (m_modeCtrl->getFastDeltaQp())
  {
    if (tempCS->area.lumaSize().width > tempCS->pcv->fastDeltaQPCuMaxSize)
    {
      return; // only check necessary 2Nx2N Inter in fast deltaqp mode
    }
  }

  tempCS->initStructData(encTestMode.qp);

  const SPS &sps = *tempCS->sps;
  const unsigned wIdx = gp_sizeIdxInfo->idxFrom(tempCS->area.lwidth());
  const unsigned hIdx = gp_sizeIdxInfo->idxFrom(tempCS->area.lheight());

  CodingStructure* CSWoOBMC = m_pTempCUWoOBMC[wIdx][hIdx];

  if(CSWoOBMC->cus.size() == 0)
    return;

  CodingUnit *cu = CSWoOBMC->getCU(partitioner.chType);

  if (
    !cu->obmcFlag
    || cu->predMode == MODE_INTRA
    || cu->firstPU->mergeFlag
    || CU::isIBC(*cu)
    || cu->geoFlag
#if INTER_LIC
    || cu->LICFlag
#endif
    )
  {
    return;
  }

  const Distortion uiSADOBMCOff = m_pcRdCost->getDistPart(tempCS->getOrgBuf(cu->Y()), m_pPredBufWoOBMC[wIdx][hIdx].Y(),
    sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_SAD_FULL_NBIT);
  const Distortion uiSADOBMCOn = m_pcRdCost->getDistPart(tempCS->getOrgBuf(cu->Y()), CSWoOBMC->getPredBuf(cu->Y()),
    sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_SAD_FULL_NBIT);

  const double    dOBMCThOff = 1.0;
  const bool   bCheckOBMCOff = uiSADOBMCOff * dOBMCThOff < uiSADOBMCOn;

  if (!bCheckOBMCOff)
  {
    return;
  }

  tempCS->copyStructure(*CSWoOBMC, partitioner.chType);
  tempCS->getPredBuf(*cu).copyFrom(m_pPredBufWoOBMC[wIdx][hIdx]);
  cu = tempCS->getCU(partitioner.chType);
  cu->obmcFlag = false;
  //
  xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0);
}
#endif

#if JVET_X0049_ADAPT_DMVR
void EncCu::xCheckSATDCostBMMerge(CodingStructure*& tempCS,
  CodingUnit&       cu,
  PredictionUnit&   pu,
  MergeCtx&         mrgCtx,
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
  MergeCtx&         mrgCtxDir2,
  bool              armcRefinedMotion,
#endif
  PelUnitBuf*       acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM],
  PelUnitBuf*&      singleMergeTempBuffer,
  unsigned&         uiNumMrgSATDCand,
  static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  &RdModeList,
  static_vector<double, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>    &candCostList,
  DistParam         distParam,
  const TempCtx&          ctxStart
#if MULTI_PASS_DMVR && !ADAPT_DIRECTIONAL_DMVR_SKIP_SUBPU_BDOF_REFINE
  , bool*             applyBDMVR
#endif
)
{
  pu.mergeFlag = true;
  cu.mmvdSkip = false;
  cu.geoFlag = false;
  cu.affine = false;
  cu.imv = IMV_OFF;
  pu.ciipFlag = false;
#if CIIP_PDPC
  pu.ciipPDPC = false;
#endif
  pu.mmvdMergeFlag = false;
  pu.regularMergeFlag = false;
  pu.bmMergeFlag = true;
#if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG)
  pu.tmMergeFlag = false;
#endif
  pu.mvRefine = false;
#if INTER_LIC
  m_pcInterSearch->m_storeBeforeLIC = false;
#endif
  pu.bdmvrRefine = true;
  mrgCtx.setMergeInfo(pu, 0);

  const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
  int insertPos = -1;
#if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING && JVET_W0090_ARMC_TM
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
  uint32_t maxNumCand = armcRefinedMotion ? mrgCtx.numValidMergeCand : ((pu.cs->sps->getUseAML()) ? min(mrgCtx.numValidMergeCand, mrgCtx.numCandToTestEnc) : mrgCtx.numCandToTestEnc);
#else
  const uint32_t maxNumCand = (pu.cs->sps->getUseAML()) ? min(mrgCtx.numValidMergeCand, mrgCtx.numCandToTestEnc) : mrgCtx.numCandToTestEnc;
#endif
#else
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
  uint32_t maxNumCand = armcRefinedMotion ? mrgCtx.numValidMergeCand : ((pu.cs->sps->getUseAML()) ? min(mrgCtx.numValidMergeCand, mrgCtx.numCandToTestEnc) : mrgCtx.numCandToTestEnc);
#else
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
  uint32_t maxNumCand = armcRefinedMotion ? mrgCtx.numValidMergeCand : mrgCtx.numCandToTestEnc;
#else
  const uint32_t maxNumCand = mrgCtx.numCandToTestEnc;
#endif
#endif
#endif
  bool subPuRefine[2] = { false, false };
  Mv   finalMvDir[2];
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
  bool hasAtLeastOne2nd = false;
  bool subRefineList[BM_MRG_MAX_NUM_INIT_CANDS << 2][2] = {{false, false}, };
  bool subRefineListTmp[BM_MRG_MAX_NUM_INIT_CANDS << 2][2] = {{false, false}, };
  if (armcRefinedMotion)
  {
    for (uint32_t candIdx = 0; candIdx < maxNumCand; candIdx++)
    {
      pu.cu->imv = mrgCtx.useAltHpelIf[candIdx] ? IMV_HPEL : 0;
      pu.cu->BcwIdx = mrgCtx.BcwIdx[candIdx];
      pu.mv[REF_PIC_LIST_0] = mrgCtx.mvFieldNeighbours[(candIdx << 1)].mv;
      pu.mv[REF_PIC_LIST_1] = mrgCtx.mvFieldNeighbours[(candIdx << 1) + 1].mv;
      pu.refIdx[REF_PIC_LIST_0] = mrgCtx.mvFieldNeighbours[(candIdx << 1)].refIdx;
      pu.refIdx[REF_PIC_LIST_1] = mrgCtx.mvFieldNeighbours[(candIdx << 1) + 1].refIdx;
      bool test2nd = m_pcInterSearch->processBDMVRPU2Dir(pu, subPuRefine, finalMvDir);
      hasAtLeastOne2nd |= test2nd;
      for (pu.bmDir = 1; pu.bmDir <= (test2nd ? 2 : 1); pu.bmDir++)
      {
        uint8_t curDir = pu.bmDir - 1;
        uint8_t refDir = 1 - curDir;
        uint32_t uiMergeCand = candIdx;
        if (pu.bmDir == 2)
        {
          uiMergeCand = candIdx + BM_MRG_MAX_NUM_INIT_CANDS;
        }
        applyBDMVR[uiMergeCand] = true;
        pu.mergeIdx = uiMergeCand;
        pu.mv[curDir] = finalMvDir[curDir];
        if (pu.bmDir == 1)
        {
          pu.mv[refDir] = mrgCtx.mvFieldNeighbours[(candIdx << 1) + refDir].mv;
        }
        else
        {
          pu.mv[refDir] = mrgCtxDir2.mvFieldNeighbours[(candIdx << 1) + refDir].mv;
        }
        if (pu.bmDir == 1)
        {
          mrgCtx.mvFieldNeighbours[2 * candIdx    ].setMvField( pu.mv[0], pu.refIdx[0] );
          mrgCtx.mvFieldNeighbours[2 * candIdx + 1].setMvField( pu.mv[1], pu.refIdx[1] );
        }
        else
        {
          mrgCtxDir2.mvFieldNeighbours[2 * candIdx    ].setMvField( pu.mv[0], pu.refIdx[0] );
          mrgCtxDir2.mvFieldNeighbours[2 * candIdx + 1].setMvField( pu.mv[1], pu.refIdx[1] );
        }
        subRefineList[uiMergeCand][curDir] = subPuRefine[curDir];
        subRefineListTmp[uiMergeCand][curDir] = subRefineList[uiMergeCand][curDir];
      }
      if (!test2nd)
      {
        uint32_t uiMergeCand = candIdx + BM_MRG_MAX_NUM_INIT_CANDS;
        applyBDMVR[uiMergeCand] = false;
        pu.mv[0] = mrgCtxDir2.mvFieldNeighbours[(candIdx << 1)].mv;
        pu.mv[1] = mrgCtxDir2.mvFieldNeighbours[(candIdx << 1) + 1].mv;
        subRefineList[uiMergeCand][1] = subPuRefine[1];
        subRefineListTmp[uiMergeCand][1] = subRefineList[uiMergeCand][1];
      }
    }
    pu.bmDir = 0;
    m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, mrgCtx, NULL, NULL, NULL, mrgCtx.numValidMergeCand, subRefineList, subRefineListTmp);
    if (hasAtLeastOne2nd)
    {
      m_pcInterSearch->adjustMergeCandidatesInOneCandidateGroup(pu, mrgCtxDir2, applyBDMVR + BM_MRG_MAX_NUM_INIT_CANDS, NULL, NULL, mrgCtxDir2.numValidMergeCand, &subRefineList[BM_MRG_MAX_NUM_INIT_CANDS], &subRefineListTmp[BM_MRG_MAX_NUM_INIT_CANDS]);
      for (uint32_t candIdx = BM_MRG_MAX_NUM_CANDS; candIdx < 2*BM_MRG_MAX_NUM_CANDS; candIdx++)
      {
        subRefineList[candIdx][1] = subRefineList[candIdx - BM_MRG_MAX_NUM_CANDS + BM_MRG_MAX_NUM_INIT_CANDS][1];
      }
    }
    for (uint32_t candIdx = BM_MRG_MAX_NUM_CANDS; candIdx < 2*BM_MRG_MAX_NUM_CANDS; candIdx++)
    {
      applyBDMVR[candIdx] = applyBDMVR[candIdx - BM_MRG_MAX_NUM_CANDS + BM_MRG_MAX_NUM_INIT_CANDS];
    }
    if (mrgCtx.numValidMergeCand > pu.cs->sps->getMaxNumBMMergeCand())
    {
      mrgCtx.numValidMergeCand = pu.cs->sps->getMaxNumBMMergeCand();
    }
    if (mrgCtxDir2.numValidMergeCand > pu.cs->sps->getMaxNumBMMergeCand())
    {
      mrgCtxDir2.numValidMergeCand = pu.cs->sps->getMaxNumBMMergeCand();
    }
    maxNumCand = ::min(mrgCtx.numValidMergeCand, (int)pu.cs->sps->getMaxNumBMMergeCand());
  }
#endif
  for (uint32_t candIdx = 0; candIdx < maxNumCand; candIdx++)
  {
    pu.cu->imv = mrgCtx.useAltHpelIf[candIdx] ? IMV_HPEL : 0;
    pu.cu->BcwIdx = mrgCtx.BcwIdx[candIdx];
    pu.mv[REF_PIC_LIST_0] = mrgCtx.mvFieldNeighbours[(candIdx << 1) + 0].mv;
    pu.mv[REF_PIC_LIST_1] = mrgCtx.mvFieldNeighbours[(candIdx << 1) + 1].mv;
    pu.refIdx[REF_PIC_LIST_0] = mrgCtx.mvFieldNeighbours[(candIdx << 1) + 0].refIdx;
    pu.refIdx[REF_PIC_LIST_1] = mrgCtx.mvFieldNeighbours[(candIdx << 1) + 1].refIdx;
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
    bool test2nd = armcRefinedMotion ? applyBDMVR[candIdx + BM_MRG_MAX_NUM_CANDS] : m_pcInterSearch->processBDMVRPU2Dir(pu, subPuRefine, finalMvDir);
#else
    bool test2nd = m_pcInterSearch->processBDMVRPU2Dir(pu, subPuRefine, finalMvDir);
#endif
    for (pu.bmDir = 1; pu.bmDir <= (test2nd ? 2 : 1); pu.bmDir++)
    {
      uint8_t curDir = pu.bmDir - 1;
      uint8_t refDir = 1 - curDir;
      uint32_t uiMergeCand = candIdx;
      if (pu.bmDir == 2)
      {
        uiMergeCand = candIdx + BM_MRG_MAX_NUM_CANDS;
      }

      pu.mergeIdx = uiMergeCand;
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
      if (armcRefinedMotion)
      {
        if (pu.bmDir == 1)
        {
          pu.cu->imv = mrgCtx.useAltHpelIf[candIdx] ? IMV_HPEL : 0;
          pu.cu->BcwIdx = mrgCtx.BcwIdx[candIdx];
          pu.mv[0] = mrgCtx.mvFieldNeighbours[(candIdx << 1)].mv;
          pu.mv[1] = mrgCtx.mvFieldNeighbours[(candIdx << 1) + 1].mv;
          pu.refIdx[REF_PIC_LIST_0] = mrgCtx.mvFieldNeighbours[(candIdx << 1)].refIdx;
          pu.refIdx[REF_PIC_LIST_1] = mrgCtx.mvFieldNeighbours[(candIdx << 1) + 1].refIdx;
        }
        else
        {
          pu.cu->imv = mrgCtxDir2.useAltHpelIf[candIdx] ? IMV_HPEL : 0;
          pu.cu->BcwIdx = mrgCtxDir2.BcwIdx[candIdx];
          pu.mv[0] = mrgCtxDir2.mvFieldNeighbours[(candIdx << 1)].mv;
          pu.mv[1] = mrgCtxDir2.mvFieldNeighbours[(candIdx << 1) + 1].mv;
          pu.refIdx[REF_PIC_LIST_0] = mrgCtxDir2.mvFieldNeighbours[(candIdx << 1)].refIdx;
          pu.refIdx[REF_PIC_LIST_1] = mrgCtxDir2.mvFieldNeighbours[(candIdx << 1) + 1].refIdx;
        }
      }
      else
      {
#endif
      pu.mv[curDir] = finalMvDir[curDir];
      pu.mv[refDir] = mrgCtx.mvFieldNeighbours[(candIdx << 1) + refDir].mv;
      applyBDMVR[uiMergeCand] = true;
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
      }
#endif
      pu.bdmvrRefine = true;
      m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4BM[uiMergeCand << 1], m_mvBufBDMVR4BM[(uiMergeCand << 1) + 1]);
      
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
      if (armcRefinedMotion)
      {
        m_pcInterSearch->processBDMVRSubPU(pu, subRefineList[uiMergeCand][curDir]);
      }
      else
      {
#endif
      m_pcInterSearch->processBDMVRSubPU(pu, subPuRefine[curDir]);
#if JVET_AA0093_REFINED_MOTION_FOR_ARMC
      }
#endif

      m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer);
#if MULTI_PASS_DMVR 
      ::memcpy(m_mvBufEncBDOF4BM[uiMergeCand], m_pcInterSearch->getBdofSubPuMvOffset(), sizeof(Mv) * BDOF_SUBPU_MAX_NUM);
#endif
      distParam.cur = singleMergeTempBuffer->Y();
      Distortion uiSad = distParam.distFunc(distParam);

      m_CABACEstimator->getCtx() = ctxStart;
      uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
      double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
      insertPos = -1;
      updateCandList(ModeInfo(cu, pu), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
      if (insertPos != -1 && insertPos < MMVD_MRG_MAX_RD_NUM)
      {
        for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
        {
          swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
        }
        swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
      }
    }
  }
  pu.bmDir = 0;
  pu.bmMergeFlag = false;
  pu.regularMergeFlag = true;
#if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG)
  pu.tmMergeFlag = false;
#endif
  cu.affine = false;
#if AFFINE_MMVD
  pu.afMmvdFlag = false;
#endif
#if MULTI_PASS_DMVR
  pu.bdmvrRefine = false;
#endif
}
#endif
//! \}