Skip to content
Snippets Groups Projects
EncCu.cpp 116 KiB
Newer Older
  • Learn to ignore specific revisions
  •       tempNum = MMVD_ADD_NUM;
          bool allowDirection[4] = { true, true, true, true };
          for (uint32_t mergeCand = mergeCtx.numValidMergeCand; mergeCand < mergeCtx.numValidMergeCand + tempNum; mergeCand++)
          {
            const int mmvdMergeCand = mergeCand - mergeCtx.numValidMergeCand;
            int bitsBaseIdx = 0;
            int bitsRefineStep = 0;
            int bitsDirection = 2;
            int bitsCand = 0;
            int baseIdx;
            int refineStep;
            int direction;
            baseIdx = mmvdMergeCand / MMVD_MAX_REFINE_NUM;
            refineStep = (mmvdMergeCand - (baseIdx * MMVD_MAX_REFINE_NUM)) / 4;
            direction = (mmvdMergeCand - baseIdx * MMVD_MAX_REFINE_NUM - refineStep * 4) % 4;
            if (refineStep == 0)
            {
              allowDirection[direction] = true;
            }
            if (allowDirection[direction] == false)
            {
              continue;
            }
            bitsBaseIdx = baseIdx + 1;
            if (baseIdx == MMVD_BASE_MV_NUM - 1)
            {
              bitsBaseIdx--;
            }
    
            bitsRefineStep = refineStep + 1;
            if (refineStep == MMVD_REFINE_STEP - 1)
            {
              bitsRefineStep--;
            }
    
            bitsCand = bitsBaseIdx + bitsRefineStep + bitsDirection;
            bitsCand++; // for mmvd_flag
    
    #if !JVET_L0054_MMVD
            acMergeBuffer[mergeCand] = m_acMergeBuffer[mergeCand].getBuf(localUnitArea);
    #endif
            mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand);
    
            PU::spanMotionInfo(pu, mergeCtx);
    #if JVET_L0054_MMVD
            distParam.cur = singleMergeTempBuffer->Y();
            m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer);
    #else
            distParam.cur = acMergeBuffer[mergeCand].Y();
            m_pcInterSearch->motionCompensation(pu, acMergeBuffer[mergeCand]);
    #endif
    
            Distortion uiSad = distParam.distFunc(distParam);
    
    
    #if !JVET_L0054_MMVD
            uint32_t bitsCand = mergeCand + 1;
            if (mergeCand == tempCS->slice->getMaxNumMergeCand() - 1)
            {
              bitsCand--;
            }
    #endif
            double cost = (double)uiSad + (double)bitsCand * sqrtLambdaForFirstPass;
    #if JVET_L0054_MMVD
            allowDirection[direction] = cost >  1.3 * candCostList[0] ? 0 : 1;
    #endif
    #if JVET_L0054_MMVD
            insertPos = -1;
    
    #if JVET_L0100_MULTI_HYPOTHESIS_INTRA
            updateDoubleCandList(mergeCand, cost, RdModeList, candCostList, RdModeList2, (uint32_t)NUM_LUMA_MODE, uiNumMrgSATDCand, &insertPos);
    #else
    
            updateCandList(mergeCand, cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
    
            if (insertPos != -1)
            {
              for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
              {
                swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
              }
              swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
            }
    #else
            updateCandList(mergeCand, cost, RdModeList, candCostList, uiNumMrgSATDCand);
    #endif
    #if !JVET_L0054_MMVD
            CHECK(std::min(mergeCand + 1, uiNumMrgSATDCand) != RdModeList.size(), "");
    #endif
          }
    #endif
    
          // Try to limit number of candidates using SATD-costs
          for( uint32_t i = 1; i < uiNumMrgSATDCand; i++ )
          {
            if( candCostList[i] > MRG_FAST_RATIO * candCostList[0] )
            {
              uiNumMrgSATDCand = i;
              break;
            }
          }
    
    
    #if JVET_L0124_L0208_TRIANGLE
          setMergeBestSATDCost( candCostList[0] );
    #endif
    
    
    #if JVET_L0100_MULTI_HYPOTHESIS_INTRA
          if (isIntrainterEnabled)
          {
    
            pu.mhIntraFlag = true;
    
            for (uint32_t mergeCnt = 0; mergeCnt < uiNumMrgSATDCand; mergeCnt++)
            {
    #if JVET_L0054_MMVD
              if (RdModeList[mergeCnt] >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM))
    #else
              if (RdModeList[mergeCnt] >= (MRG_MAX_NUM_CANDS + MRG_MAX_NUM_CANDS))
    #endif
              {
                pu.intraDir[0] = RdModeList2[mergeCnt];
                pu.intraDir[1] = DM_CHROMA_IDX;
                uint32_t bufIdx = (pu.intraDir[0] > 1) ? (pu.intraDir[0] == HOR_IDX ? 2 : 3) : pu.intraDir[0];
                bool isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Cb, pu, true, pu);
                m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb(), isUseFilter);
                m_pcIntraSearch->predIntraAng(COMPONENT_Cb, pu.cs->getPredBuf(pu).Cb(), pu, isUseFilter);
                m_pcIntraSearch->switchBuffer(pu, COMPONENT_Cb, pu.cs->getPredBuf(pu).Cb(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cb, bufIdx));
                isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Cr, pu, true, pu);
                m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cr(), isUseFilter);
                m_pcIntraSearch->predIntraAng(COMPONENT_Cr, pu.cs->getPredBuf(pu).Cr(), pu, isUseFilter);
                m_pcIntraSearch->switchBuffer(pu, COMPONENT_Cr, pu.cs->getPredBuf(pu).Cr(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cr, bufIdx));
              }
            }
    
            pu.mhIntraFlag = false;
    
          tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
        }
        else
        {
    
    #if JVET_L0054_MMVD
          if (bestIsMMVDSkip)
          {
            uiNumMrgSATDCand = mergeCtx.numValidMergeCand + MMVD_ADD_NUM;
          }
          else
          {
            uiNumMrgSATDCand = mergeCtx.numValidMergeCand;
          }
    #else
    
          uiNumMrgSATDCand = mergeCtx.numValidMergeCand;
    
        }
      }
    
      const uint32_t iteration = encTestMode.lossless ? 1 : 2;
    
      // 2. Pass: check candidates using full RD test
      for( uint32_t uiNoResidualPass = 0; uiNoResidualPass < iteration; uiNoResidualPass++ )
      {
        for( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ )
        {
          uint32_t uiMergeCand = RdModeList[uiMrgHADIdx];
    
    
    #if JVET_L0100_MULTI_HYPOTHESIS_INTRA
    #if JVET_L0054_MMVD
          if (uiNoResidualPass != 0 && uiMergeCand >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM)) // intrainter does not support skip mode
    #else
          if (uiNoResidualPass != 0 && uiMergeCand >= (MRG_MAX_NUM_CANDS + MRG_MAX_NUM_CANDS)) // intrainter does not support skip mode
    #endif
          {
    #if JVET_L0054_MMVD
            uiMergeCand -= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM); // for skip, map back to normal merge candidate idx and try RDO
    #else
            uiMergeCand -= (MRG_MAX_NUM_CANDS + MRG_MAX_NUM_CANDS); // for skip, map back to normal merge candidate idx and try RDO
    #endif
            if (isTestSkipMerge[uiMergeCand])
            {
              continue;
            }
          }
    #endif
    
    
    #if JVET_L0054_MMVD
          if (((uiNoResidualPass != 0) && candHasNoResidual[uiMrgHADIdx])
    #else
    
          if( ( (uiNoResidualPass != 0) && candHasNoResidual[uiMergeCand] )
    
           || ( (uiNoResidualPass == 0) && bestIsSkip ) )
          {
            continue;
          }
    
          // first get merge candidates
          CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
    
          partitioner.setCUData( cu );
          cu.slice            = tempCS->slice;
    #if HEVC_TILES_WPP
          cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
          cu.skip             = false;
    
    #if JVET_L0054_MMVD
          cu.mmvdSkip = false;
    
    #endif
    #if JVET_L0124_L0208_TRIANGLE
          cu.triangle         = false;
    
          cu.partSize         = SIZE_2Nx2N;
        //cu.affine
          cu.predMode         = MODE_INTER;
        //cu.LICFlag
          cu.transQuantBypass = encTestMode.lossless;
          cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
          cu.qp               = encTestMode.qp;
          PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
    
    
    #if JVET_L0100_MULTI_HYPOTHESIS_INTRA
    #if JVET_L0054_MMVD
          if (uiNoResidualPass == 0 && uiMergeCand >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM))
    #else
          if (uiNoResidualPass == 0 && uiMergeCand >= (MRG_MAX_NUM_CANDS + MRG_MAX_NUM_CANDS))
    #endif
          {
    #if JVET_L0054_MMVD
            uiMergeCand -= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM);
            cu.mmvdSkip = false;
            mergeCtx.setMergeInfo(pu, uiMergeCand);
    #else
            uiMergeCand -= (MRG_MAX_NUM_CANDS + MRG_MAX_NUM_CANDS);
    #endif
    
            pu.mhIntraFlag = true;
    
            pu.intraDir[0] = RdModeList2[uiMrgHADIdx];
            CHECK(pu.intraDir[0]<0 || pu.intraDir[0]>(NUM_LUMA_MODE - 1), "out of intra mode");
            pu.intraDir[1] = DM_CHROMA_IDX;
          }
    #endif
    
    
    #if JVET_L0100_MULTI_HYPOTHESIS_INTRA
          else if (uiMergeCand >= mergeCtx.numValidMergeCand && uiMergeCand < MRG_MAX_NUM_CANDS + MMVD_ADD_NUM)
    #else
    
          if (uiMergeCand >= mergeCtx.numValidMergeCand)
    
          {
            cu.mmvdSkip = true;
            mergeCtx.setMmvdMergeCandiInfo(pu, uiMergeCand - mergeCtx.numValidMergeCand);
          }
          else
          {
            cu.mmvdSkip = false;
            mergeCtx.setMergeInfo(pu, uiMergeCand);
          }
    #else
    
    #if DMVR_JVET_LOW_LATENCY_K0217
            pu.mvd[0] = refinedMvdL0[uiMergeCand];
    #endif
    
    #if JVET_L0100_MULTI_HYPOTHESIS_INTRA
    
            if (pu.mhIntraFlag)
    
            {
              uint32_t bufIdx = (pu.intraDir[0] > 1) ? (pu.intraDir[0] == HOR_IDX ? 2 : 3) : pu.intraDir[0];
              PelBuf tmpBuf = tempCS->getPredBuf(pu).Y();
              tmpBuf.copyFrom(acMergeBuffer[uiMergeCand].Y());
              m_pcIntraSearch->geneWeightedPred(COMPONENT_Y, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, bufIdx));
              tmpBuf = tempCS->getPredBuf(pu).Cb();
              tmpBuf.copyFrom(acMergeBuffer[uiMergeCand].Cb());
              m_pcIntraSearch->geneWeightedPred(COMPONENT_Cb, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cb, bufIdx));
              tmpBuf = tempCS->getPredBuf(pu).Cr();
              tmpBuf.copyFrom(acMergeBuffer[uiMergeCand].Cr());
              m_pcIntraSearch->geneWeightedPred(COMPONENT_Cr, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cr, bufIdx));
            }
            else
            {
    #if JVET_L0054_MMVD
              if (uiNoResidualPass != 0 && uiMergeCand < mergeCtx.numValidMergeCand && RdModeList[uiMrgHADIdx] >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM))
              {
                tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand]);
              }
              else
              {
                tempCS->getPredBuf().copyFrom(*acMergeTempBuffer[uiMrgHADIdx]);
              }
    #else
              tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand]);
    #endif
            }
    #else
    
    #if JVET_L0054_MMVD
            tempCS->getPredBuf().copyFrom(*acMergeTempBuffer[uiMrgHADIdx]);
    #else
    
            tempCS->getPredBuf().copyFrom( acMergeBuffer[ uiMergeCand ]);
    
    #if JVET_L0100_MULTI_HYPOTHESIS_INTRA
    #if JVET_L0054_MMVD
    
          if (!cu.mmvdSkip && !pu.mhIntraFlag && uiNoResidualPass != 0)
    
          if (!pu.mhIntraFlag && uiNoResidualPass != 0)
    
    #endif
          {
            CHECK(uiMergeCand >= mergeCtx.numValidMergeCand, "out of normal merge");
            isTestSkipMerge[uiMergeCand] = true;
          }
    #endif
    
    
    #if JVET_L0054_MMVD
          xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass
            , NULL
            , 1
            , uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL);
    #else
    
          xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass
            , NULL
            , 1
            , uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL );
    
    #if JVET_L0100_MULTI_HYPOTHESIS_INTRA
    
          if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip && !pu.mhIntraFlag)
    
          if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip )
    
          {
            bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0;
          }
          tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
        }// end loop uiMrgHADIdx
    
        if( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() )
        {
          const CodingUnit     &bestCU = *bestCS->getCU( partitioner.chType );
          const PredictionUnit &bestPU = *bestCS->getPU( partitioner.chType );
    
          if( bestCU.rootCbf == 0 )
          {
            if( bestPU.mergeFlag )
            {
              m_modeCtrl->setEarlySkipDetected();
            }
            else if( m_pcEncCfg->getMotionEstimationSearchMethod() != MESEARCH_SELECTIVE )
            {
              int absolute_MV = 0;
    
              for( uint32_t uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ )
              {
                if( slice.getNumRefIdx( RefPicList( uiRefListIdx ) ) > 0 )
                {
                  absolute_MV += bestPU.mvd[uiRefListIdx].getAbsHor() + bestPU.mvd[uiRefListIdx].getAbsVer();
                }
              }
    
              if( absolute_MV == 0 )
              {
                m_modeCtrl->setEarlySkipDetected();
              }
            }
          }
        }
      }
    }
    
    
    #if JVET_L0124_L0208_TRIANGLE
    void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
    {
      const Slice &slice = *tempCS->slice;
      const SPS &sps = *tempCS->sps;
    
      CHECK( slice.getSliceType() != B_SLICE, "Triangle mode is only applied to B-slices" );
      
      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
      
      bool TrianglecandHasNoResidual[TRIANGLE_MAX_NUM_CANDS];
      for( int MergeCand = 0; MergeCand < TRIANGLE_MAX_NUM_CANDS; MergeCand++ )
      {
        TrianglecandHasNoResidual[MergeCand] = false;
      }
    
      bool                                            bestIsSkip             = m_pcEncCfg->getUseFastDecisionForMerge() ? bestCS->getCU( partitioner.chType )->rootCbf == 0 : false;
      uint8_t                                         NumTriangleCandidate   = TRIANGLE_MAX_NUM_CANDS;
      uint8_t                                         TriangleNumMrgSATDCand = TRIANGLE_MAX_NUM_SATD_CANDS;
      PelUnitBuf                                      acTriangleBuffer[TRIANGLE_MAX_NUM_UNI_CANDS];
      PelUnitBuf                                      acTriangleWeightBuffer[TRIANGLE_MAX_NUM_CANDS];
      static_vector<uint8_t, TRIANGLE_MAX_NUM_CANDS> TriangleRdModeList;
      static_vector<double,  TRIANGLE_MAX_NUM_CANDS> TrianglecandCostList;
    
      if( auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >( m_modeCtrl ) )
      {
        bestIsSkip |= blkCache->isSkip( tempCS->area );
      }
    
      DistParam distParam;
      const bool UseHadamard = !encTestMode.lossless;
      m_pcRdCost->setDistParam( distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, UseHadamard );
    
      const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) );
    
      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(encTestMode.lossless);
    
      MergeCtx TriangleMrgCtx;
      {
        CodingUnit cu( tempCS->area );
        cu.cs       = tempCS;
        cu.partSize = SIZE_2Nx2N;
        cu.predMode = MODE_INTER;
        cu.slice    = tempCS->slice;
        cu.triangle = true;
    #if JVET_L0054_MMVD
        cu.mmvdSkip = false;
    #endif    
    #if JVET_L0646_GBI
        cu.GBiIdx   = GBI_DEFAULT;
    #endif
    
        PredictionUnit pu( tempCS->area );
        pu.cu = &cu;
        pu.cs = tempCS;
    
    
        PU::getTriangleMergeCandidates( pu, TriangleMrgCtx );
        for( uint8_t MergeCand = 0; MergeCand < TRIANGLE_MAX_NUM_UNI_CANDS; MergeCand++ )
        {
          acTriangleBuffer[MergeCand] = m_acMergeBuffer[MergeCand].getBuf(localUnitArea);
          TriangleMrgCtx.setMergeInfo( pu, MergeCand );
          PU::spanMotionInfo( pu, TriangleMrgCtx );
          
          m_pcInterSearch->motionCompensation( pu, acTriangleBuffer[MergeCand] );
        }
      }
    
      bool TempBufSet = bestIsSkip ? false : true;
      TriangleNumMrgSATDCand = bestIsSkip ? TRIANGLE_MAX_NUM_CANDS : TRIANGLE_MAX_NUM_SATD_CANDS;
      if( bestIsSkip )
      {
        for( uint8_t i = 0; i < TRIANGLE_MAX_NUM_CANDS; i++ )
        {
          TriangleRdModeList.push_back(i);
        }
      }
      else
      {
        CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
          
        partitioner.setCUData( cu );
        cu.slice            = tempCS->slice;
        cu.skip             = false;
        cu.partSize         = SIZE_2Nx2N;
        cu.predMode         = MODE_INTER;
        cu.transQuantBypass = encTestMode.lossless;
        cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
        cu.qp               = encTestMode.qp;
        cu.triangle         = true;
    #if JVET_L0054_MMVD
        cu.mmvdSkip         = false;
    #endif
    #if JVET_L0646_GBI
        cu.GBiIdx           = GBI_DEFAULT;
    #endif
    
        PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
          
        int32_t Ratio = abs(g_aucLog2[cu.lwidth()] - g_aucLog2[cu.lheight()]);
        if( Ratio >= 2 )
        {
          NumTriangleCandidate = 30;
        }
        else
        {
          NumTriangleCandidate = TRIANGLE_MAX_NUM_CANDS;
        }
    
        for( uint8_t MergeCand = 0; MergeCand < NumTriangleCandidate; MergeCand++ )
        {
          bool    SplitDir = g_TriangleCombination[MergeCand][0];
          uint8_t CandIdx0 = g_TriangleCombination[MergeCand][1];
          uint8_t CandIdx1 = g_TriangleCombination[MergeCand][2];
    
          pu.mergeIdx  = MergeCand;
          pu.mergeFlag = true;
          acTriangleWeightBuffer[MergeCand] = m_acTriangleWeightBuffer[MergeCand].getBuf( localUnitArea );
          acTriangleBuffer[CandIdx0] = m_acMergeBuffer[CandIdx0].getBuf( localUnitArea );
          acTriangleBuffer[CandIdx1] = m_acMergeBuffer[CandIdx1].getBuf( localUnitArea );
    
          m_pcInterSearch->TriangleWeighting( pu, PU::isTriangleEhancedWeight(pu, TriangleMrgCtx, CandIdx0, CandIdx1), SplitDir, CHANNEL_TYPE_LUMA, acTriangleWeightBuffer[MergeCand], acTriangleBuffer[CandIdx0], acTriangleBuffer[CandIdx1] );
          
          distParam.cur = acTriangleWeightBuffer[MergeCand].Y();
    
          Distortion uiSad = distParam.distFunc( distParam );
    
          uint32_t uiBitsCand = g_TriangleIdxBins[MergeCand];
    
          double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
    
          updateCandList( MergeCand, cost, TriangleRdModeList, TrianglecandCostList, TriangleNumMrgSATDCand );
        }
            
        // limit number of candidates using SATD-costs
        for( uint8_t i = 0; i < TriangleNumMrgSATDCand; i++ )
        {
          if( TrianglecandCostList[i] > MRG_FAST_RATIO * TrianglecandCostList[0] || TrianglecandCostList[i] > getMergeBestSATDCost() )
          {
            TriangleNumMrgSATDCand = i;
            break;
          }
        }
    
        // perform chroma weighting process
        for( uint8_t i = 0; i < TriangleNumMrgSATDCand; i++ )
        {
          uint8_t  MergeCand = TriangleRdModeList[i];
          bool     SplitDir  = g_TriangleCombination[MergeCand][0];
          uint8_t  CandIdx0  = g_TriangleCombination[MergeCand][1];
          uint8_t  CandIdx1  = g_TriangleCombination[MergeCand][2];
            
          pu.mergeIdx  = MergeCand;
          pu.mergeFlag = true;
                    
          m_pcInterSearch->TriangleWeighting( pu, PU::isTriangleEhancedWeight(pu, TriangleMrgCtx, CandIdx0, CandIdx1), SplitDir, CHANNEL_TYPE_CHROMA, acTriangleWeightBuffer[MergeCand], acTriangleBuffer[CandIdx0], acTriangleBuffer[CandIdx1] );
        }
    
        tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
      }
    
      {
        const uint8_t iteration = encTestMode.lossless ? 1 : 2;
        for( uint8_t NoResidualPass = 0; NoResidualPass < iteration; NoResidualPass++ )
        {
          for( uint8_t MrgHADIdx = 0; MrgHADIdx < TriangleNumMrgSATDCand; MrgHADIdx++ )
          {
            uint8_t MergeCand = TriangleRdModeList[MrgHADIdx];
    
            if ( ( (NoResidualPass != 0) && TrianglecandHasNoResidual[MergeCand] )
              || ( (NoResidualPass == 0) && bestIsSkip ) )
            {
              continue;
            }
    
            bool    SplitDir = g_TriangleCombination[MergeCand][0];
            uint8_t CandIdx0 = g_TriangleCombination[MergeCand][1];
            uint8_t CandIdx1 = g_TriangleCombination[MergeCand][2];
    
            CodingUnit &cu = tempCS->addCU(tempCS->area, partitioner.chType);
    
            partitioner.setCUData(cu);
            cu.slice = tempCS->slice;
            cu.skip = false;
            cu.partSize = SIZE_2Nx2N;
            cu.predMode = MODE_INTER;
            cu.transQuantBypass = encTestMode.lossless;
            cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
            cu.qp = encTestMode.qp;
            cu.triangle = true;
    #if JVET_L0054_MMVD
            cu.mmvdSkip = false;
    #endif
    #if JVET_L0646_GBI
            cu.GBiIdx   = GBI_DEFAULT;
    #endif
            PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType);
    
            pu.mergeIdx = MergeCand;
            pu.mergeFlag = true;
    
            PU::spanTriangleMotionInfo(pu, TriangleMrgCtx, MergeCand, SplitDir, CandIdx0, CandIdx1 );
    
            if( TempBufSet )
            {
              tempCS->getPredBuf().copyFrom( acTriangleWeightBuffer[MergeCand] );
            }
            else
            {
              acTriangleBuffer[CandIdx0] = m_acMergeBuffer[CandIdx0].getBuf( localUnitArea );
              acTriangleBuffer[CandIdx1] = m_acMergeBuffer[CandIdx1].getBuf( localUnitArea );
              PelUnitBuf predBuf         = tempCS->getPredBuf();
              m_pcInterSearch->TriangleWeighting( pu, PU::isTriangleEhancedWeight(pu, TriangleMrgCtx, CandIdx0, CandIdx1), SplitDir, MAX_NUM_CHANNEL_TYPE, predBuf, acTriangleBuffer[CandIdx0], acTriangleBuffer[CandIdx1] );
            }
            
            xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, NoResidualPass, NULL, true, ( (NoResidualPass == 0 ) ? &TrianglecandHasNoResidual[MergeCand] : NULL ) );
    
            if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
            {
              bestIsSkip = bestCS->getCU(partitioner.chType)->rootCbf == 0;
            }
            tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
          }// end loop MrgHADIdx
        }   
      }
    }
    #endif
    
    
    void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
    {
      if( m_modeCtrl->getFastDeltaQp() )
      {
        return;
      }
    
      if ( bestCS->area.lumaSize().width < 8 || bestCS->area.lumaSize().height < 8 )
      {
        return;
      }
    
    
    #if JVET_L0632_AFFINE_MERGE
      const Slice &slice = *tempCS->slice;
    
      CHECK( slice.getSliceType() == I_SLICE, "Affine Merge modes not available for I-slices" );
    
      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
    
      AffineMergeCtx affineMergeCtx;
      const SPS &sps = *tempCS->sps;
    
    #if JVET_L0369_SUBBLOCK_MERGE
      MergeCtx mrgCtx;
      if ( sps.getSpsNext().getUseSubPuMvp() )
      {
        Size bufSize = g_miScaling.scale( tempCS->area.lumaSize() );
        mrgCtx.subPuMvpMiBuf = MotionBuf( m_SubPuMiBuf, bufSize );
        affineMergeCtx.mrgCtx = &mrgCtx;
      }
    #endif
    
      {
        // first get merge candidates
        CodingUnit cu( tempCS->area );
        cu.cs = tempCS;
        cu.partSize = SIZE_2Nx2N;
        cu.predMode = MODE_INTER;
        cu.slice = tempCS->slice;
    #if HEVC_TILES_WPP
        cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
    
    Huanbang Chen's avatar
    Huanbang Chen committed
    #if JVET_L0054_MMVD
        cu.mmvdSkip = false;
    #endif
    
    
        PredictionUnit pu( tempCS->area );
        pu.cu = &cu;
        pu.cs = tempCS;
    
        PU::getAffineMergeCand( pu, affineMergeCtx );
    
        if ( affineMergeCtx.numValidMergeCand <= 0 )
        {
          return;
        }
      }
    
      bool candHasNoResidual[AFFINE_MRG_MAX_NUM_CANDS];
      for ( uint32_t ui = 0; ui < affineMergeCtx.numValidMergeCand; ui++ )
      {
        candHasNoResidual[ui] = false;
      }
    
      bool                                        bestIsSkip = false;
      uint32_t                                    uiNumMrgSATDCand = affineMergeCtx.numValidMergeCand;
      PelUnitBuf                                  acMergeBuffer[AFFINE_MRG_MAX_NUM_CANDS];
      static_vector<uint32_t, AFFINE_MRG_MAX_NUM_CANDS>  RdModeList;
      bool                                        mrgTempBufSet = false;
    
      for ( uint32_t i = 0; i < AFFINE_MRG_MAX_NUM_CANDS; i++ )
      {
        RdModeList.push_back( i );
      }
    
      if ( m_pcEncCfg->getUseFastMerge() )
      {
        uiNumMrgSATDCand = std::min( NUM_AFF_MRG_SATD_CAND, affineMergeCtx.numValidMergeCand );
        bestIsSkip = false;
    
        if ( auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>(m_modeCtrl) )
        {
          bestIsSkip = blkCache->isSkip( tempCS->area );
        }
    
        static_vector<double, AFFINE_MRG_MAX_NUM_CANDS> candCostList;
    
        // 1. Pass: get SATD-cost for selected candidates and reduce their count
        if ( !bestIsSkip )
        {
          RdModeList.clear();
          mrgTempBufSet = true;
          const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( encTestMode.lossless );
    
          CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType );
    
          partitioner.setCUData( cu );
          cu.slice = tempCS->slice;
    #if HEVC_TILES_WPP
          cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
          cu.skip = false;
          cu.partSize = SIZE_2Nx2N;
          cu.affine = true;
          cu.predMode = MODE_INTER;
          cu.transQuantBypass = encTestMode.lossless;
          cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
          cu.qp = encTestMode.qp;
    
          PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType );
    
          DistParam distParam;
          const bool bUseHadamard = !encTestMode.lossless;
          m_pcRdCost->setDistParam( distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, bUseHadamard );
    
          const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height ) );
    
          for ( uint32_t uiMergeCand = 0; uiMergeCand < affineMergeCtx.numValidMergeCand; uiMergeCand++ )
          {
            acMergeBuffer[uiMergeCand] = m_acMergeBuffer[uiMergeCand].getBuf( localUnitArea );
    
            // set merge information
            pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand];
            pu.mergeFlag = true;
            pu.mergeIdx = uiMergeCand;
            cu.affineType = affineMergeCtx.affineType[uiMergeCand];
    #if JVET_L0646_GBI
            cu.GBiIdx = affineMergeCtx.GBiIdx[uiMergeCand];
    #endif
    
    #if JVET_L0369_SUBBLOCK_MERGE
            pu.mergeType = affineMergeCtx.mergeType[uiMergeCand];
            if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP )
            {
              pu.refIdx[0] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][0].refIdx;
              pu.refIdx[1] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][0].refIdx;
              PU::spanMotionInfo( pu, mrgCtx );
            }
            else
            {
    #endif
              PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0], REF_PIC_LIST_0 );
              PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1], REF_PIC_LIST_1 );
    
              PU::spanMotionInfo( pu );
    #if JVET_L0369_SUBBLOCK_MERGE
            }
    #endif
    
            distParam.cur = acMergeBuffer[uiMergeCand].Y();
    
            m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand] );
    
            Distortion uiSad = distParam.distFunc( distParam );
            uint32_t   uiBitsCand = uiMergeCand + 1;
            if ( uiMergeCand == tempCS->slice->getMaxNumAffineMergeCand() - 1 )
            {
              uiBitsCand--;
            }
            double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
    
            updateCandList( uiMergeCand, cost, RdModeList, candCostList, uiNumMrgSATDCand );
    
            CHECK( std::min( uiMergeCand + 1, uiNumMrgSATDCand ) != RdModeList.size(), "" );
          }
    
          // Try to limit number of candidates using SATD-costs
          for ( uint32_t i = 1; i < uiNumMrgSATDCand; i++ )
          {
            if ( candCostList[i] > MRG_FAST_RATIO * candCostList[0] )
            {
              uiNumMrgSATDCand = i;
              break;
            }
          }
    
          tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
        }
        else
        {
          uiNumMrgSATDCand = affineMergeCtx.numValidMergeCand;
        }
      }
    
      const uint32_t iteration = encTestMode.lossless ? 1 : 2;
    
      // 2. Pass: check candidates using full RD test
      for ( uint32_t uiNoResidualPass = 0; uiNoResidualPass < iteration; uiNoResidualPass++ )
      {
        for ( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ )
        {
          uint32_t uiMergeCand = RdModeList[uiMrgHADIdx];
    
          if ( ((uiNoResidualPass != 0) && candHasNoResidual[uiMergeCand])
            || ((uiNoResidualPass == 0) && bestIsSkip) )
          {
            continue;
          }
    
          // first get merge candidates
          CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType );
    
          partitioner.setCUData( cu );
          cu.slice = tempCS->slice;
    #if HEVC_TILES_WPP
          cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
          cu.skip = false;
          cu.partSize = SIZE_2Nx2N;
          cu.affine = true;
          cu.predMode = MODE_INTER;
          cu.transQuantBypass = encTestMode.lossless;
          cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
          cu.qp = encTestMode.qp;
          PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType );
    
          // set merge information
          pu.mergeFlag = true;
          pu.mergeIdx = uiMergeCand;
          pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand];
          cu.affineType = affineMergeCtx.affineType[uiMergeCand];
    #if JVET_L0646_GBI
          cu.GBiIdx = affineMergeCtx.GBiIdx[uiMergeCand];
    #endif
    
    #if JVET_L0369_SUBBLOCK_MERGE
          pu.mergeType = affineMergeCtx.mergeType[uiMergeCand];
          if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP )
          {
            pu.refIdx[0] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][0].refIdx;
            pu.refIdx[1] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][0].refIdx;
            PU::spanMotionInfo( pu, mrgCtx );
          }
          else
          {
    #endif
            PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0], REF_PIC_LIST_0 );
            PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1], REF_PIC_LIST_1 );
    
            PU::spanMotionInfo( pu );
    #if JVET_L0369_SUBBLOCK_MERGE
          }
    #endif
    
          if ( mrgTempBufSet )
          {
            tempCS->getPredBuf().copyFrom( acMergeBuffer[uiMergeCand] );
          }
          else
          {
            m_pcInterSearch->motionCompensation( pu );
          }
    
          xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, true, ((uiNoResidualPass == 0) ? &candHasNoResidual[uiMergeCand] : NULL) );
    
          if ( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip )
          {
            bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0;
          }
          tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
        }// end loop uiMrgHADIdx
    
        if ( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() )
        {
          const CodingUnit     &bestCU = *bestCS->getCU( partitioner.chType );
          const PredictionUnit &bestPU = *bestCS->getPU( partitioner.chType );
    
          if ( bestCU.rootCbf == 0 )
          {
            if ( bestPU.mergeFlag )
            {
              m_modeCtrl->setEarlySkipDetected();
            }
            else if ( m_pcEncCfg->getMotionEstimationSearchMethod() != MESEARCH_SELECTIVE )
            {
              int absolute_MV = 0;
    
              for ( uint32_t uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ )
              {
                if ( slice.getNumRefIdx( RefPicList( uiRefListIdx ) ) > 0 )
                {
                  absolute_MV += bestPU.mvd[uiRefListIdx].getAbsHor() + bestPU.mvd[uiRefListIdx].getAbsVer();
                }
              }
    
              if ( absolute_MV == 0 )
              {
                m_modeCtrl->setEarlySkipDetected();
              }
            }
          }
        }
      }
    #else
    
      MvField       affineMvField[2][3];
      unsigned char interDirNeighbours;
      int           numValidMergeCand;
      bool          hasNoResidual = false;
    
    #if JVET_L0646_GBI
      uint8_t       gbiIdx = GBI_DEFAULT;
    #endif
    
    
    
      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
    
      CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
    
      partitioner.setCUData( cu );
      cu.slice            = tempCS->slice;
    #if HEVC_TILES_WPP
      cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
    #endif
      cu.skip             = false;
    
    #if JVET_L0054_MMVD
      cu.mmvdSkip = false;
    #endif
    
      cu.partSize         = encTestMode.partSize;
      cu.affine           = true;
      cu.predMode         = MODE_INTER;
      cu.transQuantBypass = encTestMode.lossless;
      cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
      cu.qp               = encTestMode.qp;
    
      CU::addPUs( cu );
    
      cu.firstPU->mergeFlag = true;
      cu.firstPU->mergeIdx  = 0;
    
    #if JVET_L0646_GBI
      PU::getAffineMergeCand( *cu.firstPU, affineMvField, interDirNeighbours, gbiIdx, numValidMergeCand );
    #else
    
      PU::getAffineMergeCand( *cu.firstPU, affineMvField, interDirNeighbours, numValidMergeCand );
    
      if( numValidMergeCand == -1 )
      {
        return;
      }
    
      cu.firstPU->interDir = interDirNeighbours;
      PU::setAllAffineMvField( *cu.firstPU, affineMvField[REF_PIC_LIST_0], REF_PIC_LIST_0 );
      PU::setAllAffineMvField( *cu.firstPU, affineMvField[REF_PIC_LIST_1], REF_PIC_LIST_1 );
    
    #if JVET_L0646_GBI
      cu.GBiIdx = gbiIdx;
    #endif
    
    
      PU::spanMotionInfo( *cu.firstPU );
    
      m_pcInterSearch->motionCompensation( cu );
    
    
      xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
        , NULL
        , 1
        , &hasNoResidual);
    
    
      if( ! (encTestMode.lossless || hasNoResidual) )
      {
        tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
        tempCS->copyStructure( *bestCS, partitioner.chType );
        tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
    
    
        xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 1
          , NULL
          , 1
          , &hasNoResidual);
    
    }
    void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
    {
      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
    
    
    #if JVET_L0646_GBI
      
      m_pcInterSearch->setAffineModeSelected(false);
    
      if( tempCS->slice->getCheckLDC() )
      {
        m_bestGbiCost[0] = m_bestGbiCost[1] = std::numeric_limits<double>::max();
        m_bestGbiIdx[0] = m_bestGbiIdx[1] = -1;
      }
    
      m_pcInterSearch->resetBufferedUniMotions();
      int gbiLoopNum = (tempCS->slice->isInterB() ? GBI_NUM : 1);
      gbiLoopNum = (tempCS->sps->getSpsNext().getUseGBi() ? gbiLoopNum : 1);
    
      if( tempCS->area.lwidth() * tempCS->area.lheight() < GBI_SIZE_CONSTRAINT )
      {
        gbiLoopNum = 1;
      }
    
      double curBestCost = bestCS->cost;
      double equGBiCost = MAX_DOUBLE;
    
      for( int gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ )
      {
        if( m_pcEncCfg->getUseGBiFast() )
        {
          auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl);
    
          if( blkCache )
          {
            bool isBestInter = blkCache->getInter(bestCS->area);
            uint8_t bestGBiIdx = blkCache->getGbiIdx(bestCS->area);