Skip to content
Snippets Groups Projects
EncCu.cpp 72.9 KiB
Newer Older
  • Learn to ignore specific revisions
  •     // LARGE CTU bug
        if( m_pcEncCfg->getQTBT() && m_pcEncCfg->getUseFastLCTU() )
        {
          unsigned minDepth = 0;
          unsigned maxDepth = g_aucLog2[tempCS->sps->getSpsNext().getCTUSize()] - g_aucLog2[tempCS->sps->getSpsNext().getMinQTSize( slice.getSliceType(), partitioner.chType )];
    
          if( auto ad = dynamic_cast<AdaptiveDepthPartitioner*>( &partitioner ) )
          {
            ad->setMaxMinDepth( minDepth, maxDepth, *tempCS );
          }
    
          if( minDepth > partitioner.currQtDepth )
          {
            // enforce QT
            enforceQT = true;
          }
        }
    #endif
    
        if( !enforceQT )
        {
          m_CABACEstimator->resetBits();
    
          if( partitioner.canSplit( CU_QUAD_SPLIT, *tempCS ) )
          {
            m_CABACEstimator->split_cu_flag( split == CU_QUAD_SPLIT, *tempCS, partitioner );
          }
          if( split != CU_QUAD_SPLIT )
          {
            m_CABACEstimator->split_cu_mode_mt( split, *tempCS, partitioner );
          }
    
          tempCS->fracBits += m_CABACEstimator->getEstFracBits(); // split bits
        }
      }
    
      tempCS->cost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist );
    
      // Check Delta QP bits for splitted structure
      xCheckDQP( *tempCS, partitioner, true );
    
      // If the configuration being tested exceeds the maximum number of bytes for a slice / slice-segment, then
      // a proper RD evaluation cannot be performed. Therefore, termination of the
      // slice/slice-segment must be made prior to this CTU.
      // This can be achieved by forcing the decision to be that of the rpcTempCU.
      // The exception is each slice / slice-segment must have at least one CTU.
      if (bestCS->cost != MAX_DOUBLE)
      {
    #if HEVC_TILES_WPP
        const TileMap& tileMap = *tempCS->picture->tileMap;
    #endif
    #if HEVC_TILES_WPP || HEVC_DEPENDENT_SLICES
        const uint32_t CtuAddr             = CU::getCtuAddr( *bestCS->getCU( partitioner.chType ) );
    #endif
        const bool isEndOfSlice        =    slice.getSliceMode() == FIXED_NUMBER_OF_BYTES
                                          && ((slice.getSliceBits() + CS::getEstBits(*bestCS)) > slice.getSliceArgument() << 3)
    #if HEVC_TILES_WPP
                                          && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceCurStartCtuTsAddr())
    #endif
    #if HEVC_DEPENDENT_SLICES
                                          && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceSegmentCurStartCtuTsAddr());
    #else
                                          ;
    #endif
    
    #if HEVC_DEPENDENT_SLICES
        const bool isEndOfSliceSegment =    slice.getSliceSegmentMode() == FIXED_NUMBER_OF_BYTES
                                          && ((slice.getSliceSegmentBits() + CS::getEstBits(*bestCS)) > slice.getSliceSegmentArgument() << 3)
                                          && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceSegmentCurStartCtuTsAddr());
                                              // Do not need to check slice condition for slice-segment since a slice-segment is a subset of a slice.
        if (isEndOfSlice || isEndOfSliceSegment)
    #else
        if(isEndOfSlice)
    #endif
        {
          bestCS->cost = MAX_DOUBLE;
        }
      }
    
    
      // RD check for sub partitioned coding structure.
      xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
    
      tempCS->releaseIntermediateData();
    
      tempCS->prevQP[partitioner.chType] = oldPrevQp;
    }
    
    
    void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
    {
      double bestInterCost             = m_modeCtrl->getBestInterCost();
      double costSize2Nx2NemtFirstPass = m_modeCtrl->getEmtSize2Nx2NFirstPassCost();
      double costSizeNxNemtFirstPass   = MAX_DOUBLE;
      bool skipSecondEmtPass           = m_modeCtrl->getSkipSecondEMTPass();
      const SPS &sps                   = *tempCS->sps;
      const PPS &pps              = *tempCS->pps;
      const CodingUnit *bestCU    = bestCS->getCU( partitioner.chType );
      const int maxSizeEMT        = pps.pcv->noRQT ? EMT_INTRA_MAX_CU_WITH_QTBT : EMT_INTRA_MAX_CU;
    #if HM_EMT_NSST_AS_IN_JEM
      uint8_t considerEmtSecondPass = ( sps.getSpsNext().getUseIntraEMT() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeEMT && partitioner.currArea().lheight() <= maxSizeEMT ) ? 1 : 0;
    #else
      uint8_t considerEmtSecondPass = ( sps.getSpsNext().getUseIntraEMT() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeEMT && partitioner.currArea().lheight() <= maxSizeEMT && nsstIdx == 0 ) ? 1 : 0;
    #endif
    
      Distortion interHad = m_modeCtrl->getInterHad();
    
    
    
      for( uint8_t emtCuFlag = 0; emtCuFlag <= considerEmtSecondPass; emtCuFlag++ )
      {
        //Possible early EMT tests interruptions
        //2) Second EMT pass. This "if clause" is necessary because of the NSST and PDPC "for loops".
        if( emtCuFlag && skipSecondEmtPass )
        {
          continue;
        }
        //3) if interHad is 0, only try further modes if some intra mode was already better than inter
        if( m_pcEncCfg->getUsePbIntraFast() && !tempCS->slice->isIntra() && bestCU && CU::isInter( *bestCS->getCU( partitioner.chType ) ) && interHad == 0 )
        {
          continue;
        }
    
        tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
    
        CodingUnit &cu      = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType );
    
        partitioner.setCUData( cu );
        cu.slice            = tempCS->slice;
    #if HEVC_TILES_WPP
        cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
        cu.skip             = false;
        cu.partSize         = encTestMode.partSize;
        cu.predMode         = MODE_INTRA;
        cu.transQuantBypass = encTestMode.lossless;
        cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
        cu.qp               = encTestMode.qp;
      //cu.ipcm             = false;
        cu.emtFlag          = emtCuFlag;
    
        CU::addPUs( cu );
    
        tempCS->interHad    = interHad;
    
        if( isLuma( partitioner.chType ) )
        {
          m_pcIntraSearch->estIntraPredLumaQT( cu, partitioner );
    
          if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max()
              && tempCS->interHad == 0)
          {
            interHad = 0;
            // JEM assumes only perfect reconstructions can from now on beat the inter mode
            m_modeCtrl->enforceInterHad( 0 );
            continue;
          }
    
          if( !CS::isDualITree( *tempCS ) )
          {
            cu.cs->picture->getRecoBuf( cu.Y() ).copyFrom( cu.cs->getRecoBuf( COMPONENT_Y ) );
          }
        }
    
        if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !CS::isDualITree( *tempCS ) ) )
        {
          m_pcIntraSearch->estIntraPredChromaQT( cu, partitioner );
        }
    
        cu.rootCbf = false;
    
        for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ )
        {
          cu.rootCbf |= cu.firstTU->cbf[t] != 0;
        }
    
        // Get total bits for current mode: encode CU
        m_CABACEstimator->resetBits();
    
        if( pps.getTransquantBypassEnabledFlag() )
        {
          m_CABACEstimator->cu_transquant_bypass_flag( cu );
        }
    
    
        {
          m_CABACEstimator->cu_skip_flag ( cu );
        }
        m_CABACEstimator->pred_mode      ( cu );
        m_CABACEstimator->cu_pred_data   ( cu );
        m_CABACEstimator->pcm_data       ( cu );
    
    
        // Encode Coefficients
        CUCtx cuCtx;
        cuCtx.isDQPCoded = true;
        cuCtx.isChromaQpAdjCoded = true;
        m_CABACEstimator->cu_residual( cu, partitioner, cuCtx );
    
        tempCS->fracBits = m_CABACEstimator->getEstFracBits();
        tempCS->cost     = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);
    
        xEncodeDontSplit( *tempCS, partitioner );
    
        xCheckDQP( *tempCS, partitioner );
    
    
        // we save the cost of the modes for the first EMT pass
        if( !emtCuFlag ) static_cast< double& >( cu.partSize == SIZE_2Nx2N ? costSize2Nx2NemtFirstPass : costSizeNxNemtFirstPass ) = tempCS->cost;
    
    #if WCG_EXT
        DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
    #else
        DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
    #endif
        xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
    
    
        //now we check whether the second pass of SIZE_2Nx2N and the whole Intra SIZE_NxN should be skipped or not
        if( !emtCuFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA && cu.partSize == SIZE_2Nx2N && m_pcEncCfg->getFastInterEMT() )
        {
          const double thEmtInterFastSkipIntra = 1.4; // Skip checking Intra if "2Nx2N using DCT2" is worse than best Inter mode
          if( costSize2Nx2NemtFirstPass > thEmtInterFastSkipIntra * bestInterCost )
          {
            skipSecondEmtPass = true;
            m_modeCtrl->setSkipSecondEMTPass( true );
            break;
          }
        }
    
      } //for emtCuFlag
    }
    
    void EncCu::xCheckIntraPCM(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
    {
      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
    
      CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
    
      partitioner.setCUData( cu );
      cu.slice            = tempCS->slice;
    #if HEVC_TILES_WPP
      cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
      cu.skip             = false;
      cu.partSize         = SIZE_2Nx2N;
      cu.predMode         = MODE_INTRA;
      cu.transQuantBypass = encTestMode.lossless;
      cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
      cu.qp               = encTestMode.qp;
      cu.ipcm             = true;
    
      tempCS->addPU(tempCS->area, partitioner.chType);
    
      tempCS->addTU( tempCS->area, partitioner.chType );
    
      m_pcIntraSearch->IPCMSearch(*tempCS, partitioner);
    
      m_CABACEstimator->getCtx() = m_CurrCtx->start;
    
      m_CABACEstimator->resetBits();
    
      if( tempCS->pps->getTransquantBypassEnabledFlag() )
      {
        m_CABACEstimator->cu_transquant_bypass_flag( cu );
      }
    
    
      {
        m_CABACEstimator->cu_skip_flag ( cu );
      }
      m_CABACEstimator->pred_mode      ( cu );
      m_CABACEstimator->pcm_data       ( cu );
    
    
      tempCS->fracBits = m_CABACEstimator->getEstFracBits();
      tempCS->cost     = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);
    
      xEncodeDontSplit( *tempCS, partitioner );
    
      xCheckDQP( *tempCS, partitioner );
    
    #if WCG_EXT
      DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
    #else
      DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
    #endif
      xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
    }
    
    void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeepCtx )
    {
      CHECK( bKeepCtx && cs.cus.size() <= 1 && partitioner.getImplicitSplit( cs ) == CU_DONT_SPLIT, "bKeepCtx should only be set in split case" );
      CHECK( !bKeepCtx && cs.cus.size() > 1, "bKeepCtx should never be set for non-split case" );
    
      if( !cs.pps->getUseDQP() )
      {
        return;
      }
    
      if( bKeepCtx && partitioner.currDepth != cs.pps->getMaxCuDQPDepth() )
      {
        return;
      }
    
      if( !bKeepCtx && partitioner.currDepth > cs.pps->getMaxCuDQPDepth() )
      {
        return;
      }
    
      CodingUnit* cuFirst = cs.getCU( partitioner.chType );
    
      CHECK( !cuFirst, "No CU available" );
    
      bool hasResidual = false;
      for( const auto &cu : cs.cus )
      {
        if( cu->rootCbf )
        {
          hasResidual = true;
          break;
        }
      }
    
      int predQP = CU::predictQP( *cuFirst, cs.prevQP[partitioner.chType] );
    
      if( hasResidual )
      {
        TempCtx ctxTemp( m_CtxCache );
        if( !bKeepCtx ) ctxTemp = SubCtx( Ctx::DeltaQP, m_CABACEstimator->getCtx() );
    
        m_CABACEstimator->resetBits();
        m_CABACEstimator->cu_qp_delta( *cuFirst, predQP, cuFirst->qp );
    
        cs.fracBits += m_CABACEstimator->getEstFracBits(); // dQP bits
        cs.cost      = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
    
    
        if( !bKeepCtx ) m_CABACEstimator->getCtx() = SubCtx( Ctx::DeltaQP, ctxTemp );
    
        // NOTE: reset QPs for CUs without residuals up to first coded CU
        for( const auto &cu : cs.cus )
        {
          if( cu->rootCbf )
          {
            break;
          }
          cu->qp = predQP;
        }
      }
      else
      {
        // No residuals: reset CU QP to predicted value
        for( const auto &cu : cs.cus )
        {
          cu->qp = predQP;
        }
      }
    }
    
    void EncCu::xFillPCMBuffer( CodingUnit &cu )
    {
      const ChromaFormat format        = cu.chromaFormat;
      const uint32_t numberValidComponents = getNumberValidComponents(format);
    
      for( auto &tu : CU::traverseTUs( cu ) )
      {
        for( uint32_t ch = 0; ch < numberValidComponents; ch++ )
        {
          const ComponentID compID = ComponentID( ch );
    
          const CompArea &compArea = tu.blocks[ compID ];
    
          const CPelBuf source      = tu.cs->getOrgBuf( compArea );
                 PelBuf destination = tu.getPcmbuf( compID );
    
          destination.copyFrom( source );
        }
      }
    }
    
    
    void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
    {
      const Slice &slice = *tempCS->slice;
    
      CHECK( slice.getSliceType() == I_SLICE, "Merge modes not available for I-slices" );
    
      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
    
      MergeCtx mergeCtx;
      const SPS &sps = *tempCS->sps;
    
      if( sps.getSpsNext().getUseSubPuMvp() )
      {
        Size bufSize = g_miScaling.scale( tempCS->area.lumaSize() );
        mergeCtx.subPuMvpMiBuf    = MotionBuf( m_SubPuMiBuf,    bufSize );
      }
    
      {
        // first get merge candidates
        CodingUnit cu( tempCS->area );
        cu.cs       = tempCS;
        cu.partSize = SIZE_2Nx2N;
        cu.predMode = MODE_INTER;
        cu.slice    = tempCS->slice;
    #if HEVC_TILES_WPP
        cu.tileIdx  = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos());
    #endif
    
        PredictionUnit pu( tempCS->area );
        pu.cu = &cu;
        pu.cs = tempCS;
    
        PU::getInterMergeCandidates(pu, mergeCtx);
      }
    
    
      bool candHasNoResidual[MRG_MAX_NUM_CANDS];
      for (uint32_t ui = 0; ui < mergeCtx.numValidMergeCand; ui++)
      {
        candHasNoResidual[ui] = false;
      }
    
      bool                                        bestIsSkip       = false;
      unsigned                                    uiNumMrgSATDCand = mergeCtx.numValidMergeCand;
      PelUnitBuf                                  acMergeBuffer    [ MRG_MAX_NUM_CANDS ];
      static_vector<unsigned, MRG_MAX_NUM_CANDS>  RdModeList;
      bool                                        mrgTempBufSet    = false;
    
    
    #if DMVR_JVET_LOW_LATENCY_K0217
      Mv                                          refinedMvdL0[MRG_MAX_NUM_CANDS];
    #endif
    
    
      for( unsigned i = 0; i < MRG_MAX_NUM_CANDS; i++ )
      {
        RdModeList.push_back( i );
      }
    
      if( m_pcEncCfg->getUseFastMerge() )
      {
        uiNumMrgSATDCand = NUM_MRG_SATD_CAND;
        bestIsSkip       = false;
    
        if( auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >( m_modeCtrl ) )
        {
          bestIsSkip = blkCache->isSkip( tempCS->area );
        }
    
        static_vector<double, MRG_MAX_NUM_CANDS> candCostList;
    
        // 1. Pass: get SATD-cost for selected candidates and reduce their count
        if( !bestIsSkip )
        {
          RdModeList.clear();
          mrgTempBufSet       = true;
          const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( encTestMode.lossless );
    
          CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
    
          partitioner.setCUData( cu );
          cu.slice            = tempCS->slice;
    #if HEVC_TILES_WPP
          cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
          cu.skip             = false;
          cu.partSize         = SIZE_2Nx2N;
        //cu.affine
          cu.predMode         = MODE_INTER;
        //cu.LICFlag
          cu.transQuantBypass = encTestMode.lossless;
          cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
          cu.qp               = encTestMode.qp;
        //cu.emtFlag  is set below
    
          PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
    
          DistParam distParam;
          const bool bUseHadamard= !encTestMode.lossless;
          m_pcRdCost->setDistParam (distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth (CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
    
          const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) );
          for( uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++ )
          {
            acMergeBuffer[uiMergeCand] = m_acMergeBuffer[uiMergeCand].getBuf( localUnitArea );
    
            mergeCtx.setMergeInfo( pu, uiMergeCand );
    
            PU::spanMotionInfo( pu, mergeCtx );
    
            distParam.cur = acMergeBuffer[uiMergeCand].Y();
    
            m_pcInterSearch->motionCompensation( pu,  acMergeBuffer[uiMergeCand] );
            
            if( mergeCtx.interDirNeighbours[uiMergeCand] == 3 && mergeCtx.mrgTypeNeighbours[uiMergeCand] == MRG_TYPE_DEFAULT_N )
            {
              mergeCtx.mvFieldNeighbours[2*uiMergeCand].mv   = pu.mv[0];
              mergeCtx.mvFieldNeighbours[2*uiMergeCand+1].mv = pu.mv[1];
    
    #if DMVR_JVET_LOW_LATENCY_K0217
              refinedMvdL0[uiMergeCand] = pu.mvd[0];
    #endif
    
            }
    
            Distortion uiSad = distParam.distFunc(distParam);
            uint32_t uiBitsCand = uiMergeCand + 1;
            if( uiMergeCand == tempCS->slice->getMaxNumMergeCand() - 1 )
            {
              uiBitsCand--;
            }
            double cost     = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
    
            updateCandList( uiMergeCand, cost, RdModeList, candCostList, uiNumMrgSATDCand );
            CHECK( std::min( uiMergeCand + 1, uiNumMrgSATDCand ) != RdModeList.size(), "" );
          }
          // Try to limit number of candidates using SATD-costs
          for( uint32_t i = 1; i < uiNumMrgSATDCand; i++ )
          {
            if( candCostList[i] > MRG_FAST_RATIO * candCostList[0] )
            {
              uiNumMrgSATDCand = i;
              break;
            }
          }
    
          tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
        }
        else
        {
          uiNumMrgSATDCand = mergeCtx.numValidMergeCand;
        }
      }
    
      const uint32_t iteration = encTestMode.lossless ? 1 : 2;
    
      // 2. Pass: check candidates using full RD test
      for( uint32_t uiNoResidualPass = 0; uiNoResidualPass < iteration; uiNoResidualPass++ )
      {
        for( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ )
        {
          uint32_t uiMergeCand = RdModeList[uiMrgHADIdx];
          if( ( (uiNoResidualPass != 0) && candHasNoResidual[uiMergeCand] )
           || ( (uiNoResidualPass == 0) && bestIsSkip ) )
          {
            continue;
          }
    
          // first get merge candidates
          CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
    
          partitioner.setCUData( cu );
          cu.slice            = tempCS->slice;
    #if HEVC_TILES_WPP
          cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
          cu.skip             = false;
          cu.partSize         = SIZE_2Nx2N;
        //cu.affine
          cu.predMode         = MODE_INTER;
        //cu.LICFlag
          cu.transQuantBypass = encTestMode.lossless;
          cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
          cu.qp               = encTestMode.qp;
          PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
    
          mergeCtx.setMergeInfo( pu, uiMergeCand );
          PU::spanMotionInfo( pu, mergeCtx );
    
          if( mrgTempBufSet )
          {
    
    #if DMVR_JVET_LOW_LATENCY_K0217
            pu.mvd[0] = refinedMvdL0[uiMergeCand];
    #endif
    
            tempCS->getPredBuf().copyFrom( acMergeBuffer[ uiMergeCand ]);
          }
          else
          {
            m_pcInterSearch->motionCompensation( pu );
            
          }
    
    
          xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass
            , NULL
            , 1
            , uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL );
    
    
          if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip )
          {
            bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0;
          }
          tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
        }// end loop uiMrgHADIdx
    
        if( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() )
        {
          const CodingUnit     &bestCU = *bestCS->getCU( partitioner.chType );
          const PredictionUnit &bestPU = *bestCS->getPU( partitioner.chType );
    
          if( bestCU.rootCbf == 0 )
          {
            if( bestPU.mergeFlag )
            {
              m_modeCtrl->setEarlySkipDetected();
            }
            else if( m_pcEncCfg->getMotionEstimationSearchMethod() != MESEARCH_SELECTIVE )
            {
              int absolute_MV = 0;
    
              for( uint32_t uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ )
              {
                if( slice.getNumRefIdx( RefPicList( uiRefListIdx ) ) > 0 )
                {
                  absolute_MV += bestPU.mvd[uiRefListIdx].getAbsHor() + bestPU.mvd[uiRefListIdx].getAbsVer();
                }
              }
    
              if( absolute_MV == 0 )
              {
                m_modeCtrl->setEarlySkipDetected();
              }
            }
          }
        }
      }
    }
    
    void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
    {
      if( m_modeCtrl->getFastDeltaQp() )
      {
        return;
      }
    
      if ( bestCS->area.lumaSize().width < 8 || bestCS->area.lumaSize().height < 8 )
      {
        return;
      }
    
      MvField       affineMvField[2][3];
      unsigned char interDirNeighbours;
      int           numValidMergeCand;
      bool          hasNoResidual = false;
    
    #if JVET_L0646_GBI
      uint8_t       gbiIdx = GBI_DEFAULT;
    #endif
    
    
    
      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
    
      CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
    
      partitioner.setCUData( cu );
      cu.slice            = tempCS->slice;
    #if HEVC_TILES_WPP
      cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
      cu.skip             = false;
      cu.partSize         = encTestMode.partSize;
      cu.affine           = true;
      cu.predMode         = MODE_INTER;
      cu.transQuantBypass = encTestMode.lossless;
      cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
      cu.qp               = encTestMode.qp;
    
      CU::addPUs( cu );
    
      cu.firstPU->mergeFlag = true;
      cu.firstPU->mergeIdx  = 0;
    
    #if JVET_L0646_GBI
      PU::getAffineMergeCand( *cu.firstPU, affineMvField, interDirNeighbours, gbiIdx, numValidMergeCand );
    #else
    
      PU::getAffineMergeCand( *cu.firstPU, affineMvField, interDirNeighbours, numValidMergeCand );
    
      if( numValidMergeCand == -1 )
      {
        return;
      }
    
      cu.firstPU->interDir = interDirNeighbours;
      PU::setAllAffineMvField( *cu.firstPU, affineMvField[REF_PIC_LIST_0], REF_PIC_LIST_0 );
      PU::setAllAffineMvField( *cu.firstPU, affineMvField[REF_PIC_LIST_1], REF_PIC_LIST_1 );
    
    #if JVET_L0646_GBI
      cu.GBiIdx = gbiIdx;
    #endif
    
    
      PU::spanMotionInfo( *cu.firstPU );
    
      m_pcInterSearch->motionCompensation( cu );
    
    
      xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
        , NULL
        , 1
        , &hasNoResidual);
    
    
      if( ! (encTestMode.lossless || hasNoResidual) )
      {
        tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
        tempCS->copyStructure( *bestCS, partitioner.chType );
        tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
    
    
        xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 1
          , NULL
          , 1
          , &hasNoResidual);
    
      }
    }
    void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
    {
      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
    
    
    #if JVET_L0646_GBI
      
      m_pcInterSearch->setAffineModeSelected(false);
    
      if( tempCS->slice->getCheckLDC() )
      {
        m_bestGbiCost[0] = m_bestGbiCost[1] = std::numeric_limits<double>::max();
        m_bestGbiIdx[0] = m_bestGbiIdx[1] = -1;
      }
    
      m_pcInterSearch->resetBufferedUniMotions();
      int gbiLoopNum = (tempCS->slice->isInterB() ? GBI_NUM : 1);
      gbiLoopNum = (tempCS->sps->getSpsNext().getUseGBi() ? gbiLoopNum : 1);
    
      if( tempCS->area.lwidth() * tempCS->area.lheight() < GBI_SIZE_CONSTRAINT )
      {
        gbiLoopNum = 1;
      }
    
      double curBestCost = bestCS->cost;
      double equGBiCost = MAX_DOUBLE;
    
      for( int gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ )
      {
        if( m_pcEncCfg->getUseGBiFast() )
        {
          auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl);
    
          if( blkCache )
          {
            bool isBestInter = blkCache->getInter(bestCS->area);
            uint8_t bestGBiIdx = blkCache->getGbiIdx(bestCS->area);
    
            if( isBestInter && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT && g_GbiSearchOrder[gbiLoopIdx] != bestGBiIdx )
            {
              continue;
            }
          }
        }
        if( !tempCS->slice->getCheckLDC() )
        {
          if( gbiLoopIdx != 0 && gbiLoopIdx != 3 && gbiLoopIdx != 4 )
          {
            continue;
          }
        }
    #endif
    
      CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
    
      partitioner.setCUData( cu );
      cu.slice            = tempCS->slice;
    #if HEVC_TILES_WPP
      cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
      cu.skip             = false;
      cu.partSize         = encTestMode.partSize;
    //cu.affine
      cu.predMode         = MODE_INTER;
      cu.transQuantBypass = encTestMode.lossless;
      cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
      cu.qp               = encTestMode.qp;
      CU::addPUs( cu );
    
    
    #if JVET_L0646_GBI
      cu.GBiIdx = g_GbiSearchOrder[gbiLoopIdx];
      uint8_t gbiIdx = cu.GBiIdx;
      bool  testGbi = (gbiIdx != GBI_DEFAULT);
    #endif
    
      m_pcInterSearch->predInterSearch( cu, partitioner );
    
      const unsigned wIdx = gp_sizeIdxInfo->idxFrom( tempCS->area.lwidth () );
    
    #if JVET_L0646_GBI
      gbiIdx = CU::getValidGbiIdx(cu);
      if( testGbi && gbiIdx == GBI_DEFAULT ) // Enabled GBi but the search results is uni.
      {
        tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
        continue;
      }
      CHECK(!(testGbi || (!testGbi && gbiIdx == GBI_DEFAULT)), " !( bTestGbi || (!bTestGbi && gbiIdx == GBI_DEFAULT ) )");
    
      bool isEqualUni = false;
      if( m_pcEncCfg->getUseGBiFast() )
      {
        if( cu.firstPU->interDir != 3 && testGbi == 0 )
        {
          isEqualUni = true;
        }
      }
    #endif
    
    
      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0
        , m_pImvTempCS ? m_pImvTempCS[wIdx][encTestMode.partSize] : NULL
        , 1
        , 0
    
    #if JVET_L0646_GBI
        , &equGBiCost
    #endif
    
    #if JVET_L0646_GBI
      if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT )
        m_pcInterSearch->setAffineModeSelected((bestCS->cus.front()->affine && !(bestCS->cus.front()->firstPU->mergeFlag)));
    
      tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
    
      double skipTH = MAX_DOUBLE;
      skipTH = (m_pcEncCfg->getUseGBiFast() ? 1.05 : MAX_DOUBLE);
      if( equGBiCost > curBestCost * skipTH )
      {
        break;
      }
    
      if( m_pcEncCfg->getUseGBiFast() )
      {
        if( isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1 )
        {
          break;
        }
      }
      if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT && xIsGBiSkip(cu) && m_pcEncCfg->getUseGBiFast() )
      {
        break;
      }
     }  // for( UChar gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ )
    #endif
    
    }
    
    
    
    
    
    bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
    {
      int iIMV = int( ( encTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT );
    
    #if JVET_L0646_GBI
      m_pcInterSearch->setAffineModeSelected(false);
    #endif
    
      // Only int-Pel, 4-Pel and fast 4-Pel allowed
      CHECK( iIMV != 1 && iIMV != 2 && iIMV != 3, "Unsupported IMV Mode" );
      // Fast 4-Pel Mode
    
      EncTestMode encTestModeBase = encTestMode;                                        // copy for clearing non-IMV options
      encTestModeBase.opts        = EncTestModeOpts( encTestModeBase.opts & ETO_IMV );  // clear non-IMV options (is that intended?)
    
      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
    
      CodingStructure* pcCUInfo2Reuse = nullptr;
    
      if( m_pImvTempCS && encTestMode.partSize != SIZE_2Nx2N && ( ( encTestMode.opts & ETO_FORCE_MERGE ) == 0 ) )
      {
        pcCUInfo2Reuse = m_pImvTempCS[gp_sizeIdxInfo->idxFrom( tempCS->area.lwidth() )][encTestMode.partSize];
    
        if( pcCUInfo2Reuse != nullptr )
        {
          CHECK( tempCS->area != pcCUInfo2Reuse->area, " mismatch" );
          tempCS->copyStructure( *pcCUInfo2Reuse, partitioner.chType );
          tempCS->fracBits = 0;
          tempCS->dist     = 0;
          tempCS->cost     = MAX_DOUBLE;
        }
      }
    
    
    #if JVET_L0646_GBI
      m_pcInterSearch->resetBufferedUniMotions();
      int gbiLoopNum = (tempCS->slice->isInterB() ? GBI_NUM : 1);
      gbiLoopNum = (pcCUInfo2Reuse != NULL ? 1 : gbiLoopNum);
      gbiLoopNum = (tempCS->slice->getSPS()->getSpsNext().getUseGBi() ? gbiLoopNum : 1);
    
      if( tempCS->area.lwidth() * tempCS->area.lheight() < GBI_SIZE_CONSTRAINT )
      {
        gbiLoopNum = 1;
      }
    
      double curBestCost = bestCS->cost;
      double equGBiCost = MAX_DOUBLE;
    
      for( int gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ )
      {
        if( m_pcEncCfg->getUseGBiFast() )
        {
          auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl);
    
          if( blkCache )
          {
            bool isBestInter = blkCache->getInter(bestCS->area);
            uint8_t bestGBiIdx = blkCache->getGbiIdx(bestCS->area);
    
            if( isBestInter && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT && g_GbiSearchOrder[gbiLoopIdx] != bestGBiIdx )
            {
              continue;
            }
          }
        }
    
        if( !tempCS->slice->getCheckLDC() )
        {
          if( gbiLoopIdx != 0 && gbiLoopIdx != 3 && gbiLoopIdx != 4 )
          {
            continue;
          }
        }
    
        if( m_pcEncCfg->getUseGBiFast() && tempCS->slice->getCheckLDC() && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT
          && (m_bestGbiIdx[0] >= 0 && g_GbiSearchOrder[gbiLoopIdx] != m_bestGbiIdx[0])
          && (m_bestGbiIdx[1] >= 0 && g_GbiSearchOrder[gbiLoopIdx] != m_bestGbiIdx[1]))
        {
          continue;
        }
    #endif
    
      CodingUnit &cu = ( pcCUInfo2Reuse != nullptr ) ? *tempCS->getCU( partitioner.chType ) : tempCS->addCU( tempCS->area, partitioner.chType );
    
      if( pcCUInfo2Reuse == nullptr )
      {
        partitioner.setCUData( cu );
        cu.slice            = tempCS->slice;
    #if HEVC_TILES_WPP
        cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
        cu.skip             = false;
        cu.partSize         = encTestMode.partSize;
      //cu.affine
        cu.predMode         = MODE_INTER;
        cu.transQuantBypass = encTestMode.lossless;
        cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
        cu.qp               = encTestMode.qp;
    
        CU::addPUs( cu );
      }
      else
      {
        CHECK( cu.partSize != encTestMode.partSize,    "Mismatch" );
        CHECK( cu.skip,                                "Mismatch" );
        CHECK( cu.qtDepth  != partitioner.currQtDepth, "Mismatch" );
        CHECK( cu.btDepth  != partitioner.currBtDepth, "Mismatch" );
        CHECK( cu.mtDepth  != partitioner.currMtDepth, "Mismatch" );
        CHECK( cu.depth    != partitioner.currDepth,   "Mismatch" );
      }
    
      cu.imv      = iIMV > 1 ? 2 : 1;
      cu.emtFlag  = false;
    
    
    #if JVET_L0646_GBI
      bool testGbi;
      uint8_t gbiIdx;
    #endif
    
      if( pcCUInfo2Reuse != nullptr )
      {
        // reuse the motion info from pcCUInfo2Reuse
        CU::resetMVDandMV2Int( cu, m_pcInterSearch );
    
    
    #if JVET_L0646_GBI
        CHECK(cu.GBiIdx < 0 || cu.GBiIdx >= GBI_NUM, "cu.GBiIdx < 0 || cu.GBiIdx >= GBI_NUM");
        gbiIdx = CU::getValidGbiIdx(cu);
        testGbi = (gbiIdx != GBI_DEFAULT);
    #endif
    
        if( !CU::hasSubCUNonZeroMVd( cu ) )
        {
          m_modeCtrl->useModeResult( encTestModeBase, tempCS, partitioner );
          return false;
        }
        else
        {
          m_pcInterSearch->motionCompensation( cu );
        }
      }
      else
      {
    
    #if JVET_L0646_GBI 
        cu.GBiIdx = g_GbiSearchOrder[gbiLoopIdx];
        gbiIdx = cu.GBiIdx;
        testGbi = (gbiIdx != GBI_DEFAULT);
    #endif
    
        m_pcInterSearch->predInterSearch( cu, partitioner );
    
    #if JVET_L0646_GBI
        gbiIdx = CU::getValidGbiIdx(cu);
    #endif
    
    #if JVET_L0646_GBI
      if( testGbi && gbiIdx == GBI_DEFAULT ) // Enabled GBi but the search results is uni.
      {