Skip to content
Snippets Groups Projects
IntraSearch.cpp 220 KiB
Newer Older
  • Learn to ignore specific revisions
  •       if( sps.getUseLFNST() && !tmpValidReturnSplit )
          {
            splitIsSelected = false;
            break;
          }
    
          if( !cu.ispMode )
          {
            csSplit->setDecomp( partitioner.currArea().Y() );
          }
          else if( CU::isISPFirst( cu, partitioner.currArea().Y(), COMPONENT_Y ) )
          {
            csSplit->setDecomp( cu.Y() );
          }
    
          uiSplitCbfLuma |= TU::getCbfAtDepth( *csSplit->getTU( partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1 ), COMPONENT_Y, partitioner.currTrDepth );
          if( cu.ispMode )
          {
            //exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance)
            if( csSplit->cost > bestCostSoFar )
            {
              earlySkipISP    = true;
              splitIsSelected = false;
              break;
            }
            else
            {
              //more restrictive exit condition
              bool tuIsDividedInRows = CU::divideTuInRows( cu );
    
              int nSubPartitions = tuIsDividedInRows ? cu.lheight() >> floorLog2(cu.firstTU->lheight()) : cu.lwidth() >> floorLog2(cu.firstTU->lwidth());
    
              double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
    
              if( subTuCounter < nSubPartitions && csSplit->cost > bestCostSoFar*threshold )
              {
                earlySkipISP    = true;
                splitIsSelected = false;
                break;
              }
            }
          }
    
    
    
    
        } while( partitioner.nextPart( *csSplit ) );
    
        partitioner.exitCurrSplit();
    
        if( splitIsSelected )
        {
          for( auto &ptu : csSplit->tus )
          {
            if( currArea.Y().contains( ptu->Y() ) )
            {
              TU::setCbfAtDepth( *ptu, COMPONENT_Y, currDepth, uiSplitCbfLuma ? 1 : 0 );
            }
          }
    
          //----- restore context states -----
          m_CABACEstimator->getCtx() = ctxStart;
    
    
          cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] = false;
          cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
          cuCtx.lfnstLastScanPos = false;
          cuCtx.violatesMtsCoeffConstraint = false;
    
    #if JVET_Q0516_MTS_SIGNALLING_DC_ONLY_COND 
          cuCtx.mtsLastScanPos = false;
    #endif
    
          csSplit->fracBits = xGetIntraFracBitsQT( *csSplit, partitioner, true, false, cu.ispMode ? 0 : -1, ispType, &cuCtx );
    
    
          //--- update cost ---
          csSplit->cost     = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
    
        if( !sps.getUseLFNST() || validReturnFull || validReturnSplit )
    
          {
            // otherwise this would've happened in useSubStructure
            cs.picture->getRecoBuf( currArea.Y() ).copyFrom( cs.getRecoBuf( currArea.Y() ) );
            cs.picture->getPredBuf( currArea.Y() ).copyFrom( cs.getPredBuf( currArea.Y() ) );
          }
    
          if( cu.ispMode && earlySkipISP )
          {
            cs.cost = MAX_DOUBLE;
          }
          else
          {
            cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist );
            retVal = true;
          }
    
    bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &partitioner, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst)
    {
      const UnitArea &currArea = partitioner.currArea();
      uint32_t       currDepth = partitioner.currTrDepth;
      const Slice    &slice = *cs.slice;
      const SPS      &sps = *cs.sps;
    
      bool bCheckFull = !partitioner.canSplit(TU_MAX_TR_SPLIT, cs);
      bool bCheckSplit = !bCheckFull;
    
      TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx());
      TempCtx ctxBest(m_CtxCache);
    
      CodingStructure *csSplit = nullptr;
      CodingStructure *csFull = nullptr;
      if (bCheckSplit)
      {
        csSplit = &cs;
      }
      else if (bCheckFull)
      {
        csFull = &cs;
      }
    
      bool validReturnFull = false;
    
      if (bCheckFull)
      {
        TransformUnit        &tu = csFull->addTU(CS::getArea(*csFull, currArea, partitioner.chType), partitioner.chType);
        tu.depth = currDepth;
        const CodingUnit     &cu = *csFull->getCU(tu.Y().pos(), CHANNEL_TYPE_LUMA);
        const PredictionUnit &pu = *csFull->getPU(tu.Y().pos(), CHANNEL_TYPE_LUMA);
        CHECK(!tu.Y().valid() || !tu.Cb().valid() || !tu.Cr().valid(), "Invalid TU");
        CHECK(tu.cu != &cu, "wrong CU fetch");
        CHECK(cu.ispMode, "adaptive color transform cannot be applied to ISP");
        CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform");
    
        // 1. intra prediction and forward color transform
    
        PelUnitBuf orgBuf = csFull->getOrgBuf(tu);
        PelUnitBuf predBuf = csFull->getPredBuf(tu);
        PelUnitBuf resiBuf = csFull->getResiBuf(tu);
        PelUnitBuf orgResiBuf = csFull->getOrgResiBuf(tu);
    
        for (int i = 0; i < getNumberValidComponents(tu.chromaFormat); i++)
        {
          ComponentID          compID = (ComponentID)i;
          const CompArea       &area = tu.blocks[compID];
          const ChannelType    chType = toChannelType(compID);
    
          PelBuf         piOrg = orgBuf.bufs[compID];
          PelBuf         piPred = predBuf.bufs[compID];
          PelBuf         piResi = resiBuf.bufs[compID];
    
          initIntraPatternChType(*tu.cu, area);
          if (PU::isMIP(pu, chType))
          {
    
            predIntraMip(compID, piPred, pu);
          }
          else
          {
            predIntraAng(compID, piPred, pu);
          }
    
          piResi.copyFrom(piOrg);
    
          if (slice.getPicHeader()->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
    
          {
            CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
            PelBuf   tmpPred = m_tmpStorageLCU.getBuf(tmpArea);
            tmpPred.copyFrom(piPred);
            piResi.rspSignal(m_pcReshape->getFwdLUT());
            piResi.subtract(tmpPred);
          }
          else
            piResi.subtract(piPred);
        }
    
    
    #if JVET_Q0820_ACT
        resiBuf.colorSpaceConvert(orgResiBuf, true, cs.slice->clpRng(COMPONENT_Y));
    #else
    
        resiBuf.colorSpaceConvert(orgResiBuf, true);
    
    
        // 2. luma residual optimization 
        double     dSingleCostLuma = MAX_DOUBLE;
        bool       checkTransformSkip = sps.getTransformSkipEnabledFlag();
        int        bestLumaModeId = 0;
        uint8_t    nNumTransformCands = cu.mtsFlag ? 4 : 1;
        uint8_t    numTransformIndexCands = nNumTransformCands;
    
        const bool tsAllowed = TU::isTSAllowed(tu, COMPONENT_Y);
    
        const bool mtsAllowed = CU::isMTSAllowed(cu, COMPONENT_Y);
    
        std::vector<TrMode> trModes;
    
        if (sps.getUseLFNST())
        {
          checkTransformSkip &= tsAllowed;
          checkTransformSkip &= !cu.mtsFlag;
          checkTransformSkip &= !cu.lfnstIdx;
    
          if (!cu.mtsFlag && checkTransformSkip)
          {
            trModes.push_back(TrMode(0, true)); //DCT2
            trModes.push_back(TrMode(1, true)); //TS
          }
        }
        else
        {
    
    #if JVET_Q0820_ACT 
          if (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING)
          {
            nNumTransformCands = 1;
            CHECK(!tsAllowed && !cu.bdpcmMode, "transform skip should be enabled for LS");
            if (cu.bdpcmMode)
            {
              trModes.push_back(TrMode(0, true));
            }
            else
            {
              trModes.push_back(TrMode(1, true));
            }
          }
          else
          {
    #endif 
    
          nNumTransformCands = 1 + (tsAllowed ? 1 : 0) + (mtsAllowed ? 4 : 0); // DCT + TS + 4 MTS = 6 tests
    
          trModes.push_back(TrMode(0, true)); //DCT2
          if (tsAllowed)
          {
            trModes.push_back(TrMode(1, true));
          }
          if (mtsAllowed)
          {
            for (int i = 2; i < 6; i++)
            {
              trModes.push_back(TrMode(i, true));
            }
          }
    
        }
    
        CodingStructure &saveLumaCS = *m_pSaveCS[0];
        TransformUnit   *tmpTU = nullptr;
        Distortion      singleDistTmpLuma = 0;
        uint64_t        singleTmpFracBits = 0;
        double          singleCostTmp = 0;
        int             firstCheckId = (sps.getUseLFNST() && mtsCheckRangeFlag && cu.mtsFlag) ? mtsFirstCheckId : 0;
        int             lastCheckId = sps.getUseLFNST() ? ((mtsCheckRangeFlag && cu.mtsFlag) ? (mtsLastCheckId + (int)checkTransformSkip) : (numTransformIndexCands - (firstCheckId + 1) + (int)checkTransformSkip)) : trModes[nNumTransformCands - 1].first;
        bool            isNotOnlyOneMode = sps.getUseLFNST() ? lastCheckId != firstCheckId : nNumTransformCands != 1;
    
        if (isNotOnlyOneMode)
        {
          saveLumaCS.pcv = csFull->pcv;
          saveLumaCS.picture = csFull->picture;
          saveLumaCS.area.repositionTo(csFull->area);
          saveLumaCS.clearTUs();
          tmpTU = &saveLumaCS.addTU(currArea, partitioner.chType);
        }
    
        bool    cbfBestMode = false;
        bool    cbfBestModeValid = false;
        bool    cbfDCT2 = true;
    
    
    #if JVET_Q0820_ACT
        if (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING)
    #endif
    
        m_pcRdCost->lambdaAdjustColorTrans(true, COMPONENT_Y);
    
    
    #if JVET_Q0820_ACT
        for (int modeId = firstCheckId; modeId <= ((m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING) ? (nNumTransformCands - 1) : lastCheckId); modeId++)
    #else
    
        for (int modeId = firstCheckId; modeId <= lastCheckId; modeId++)
    
        {
          uint8_t transformIndex = modeId;
          csFull->getResiBuf(tu.Y()).copyFrom(csFull->getOrgResiBuf(tu.Y()));
    
          m_CABACEstimator->getCtx() = ctxStart;
          m_CABACEstimator->resetBits();
    
          if (sps.getUseLFNST())
          {
            if ((transformIndex < lastCheckId) || ((transformIndex == lastCheckId) && !checkTransformSkip)) //we avoid this if the mode is transformSkip
            {
              // Skip checking other transform candidates if zero CBF is encountered and it is the best transform so far
              if (m_pcEncCfg->getUseFastLFNST() && transformIndex && !cbfBestMode && cbfBestModeValid)
              {
                continue;
              }
            }
          }
          else
          {
    
    #if JVET_AHG14_LOSSLESS
            if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING))
            {
    #endif
    
            if (!cbfDCT2 || (m_pcEncCfg->getUseTransformSkipFast() && bestLumaModeId == 1))
            {
              break;
            }
            if (!trModes[modeId].second)
            {
              continue;
            }
    
    #if JVET_AHG14_LOSSLESS
            }
    #endif
    
            tu.mtsIdx[COMPONENT_Y] = trModes[modeId].first;
          }
    
          singleDistTmpLuma = 0;
          if (sps.getUseLFNST())
          {
            if (cu.mtsFlag)
            {
              if (moreProbMTSIdxFirst)
              {
                uint32_t uiIntraMode = pu.intraDir[CHANNEL_TYPE_LUMA];
    
                if (transformIndex == 1)
                {
                  tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
                }
                else if (transformIndex == 2)
                {
                  tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
                }
                else
                {
                  tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex;
                }
              }
              else
              {
                tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex;
              }
            }
            else
            {
              tu.mtsIdx[COMPONENT_Y] = transformIndex;
            }
    
            if (!cu.mtsFlag && checkTransformSkip)
            {
              xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, modeId == 0 ? &trModes : nullptr, true);
              if (modeId == 0)
              {
                for (int i = 0; i < 2; i++)
                {
                  if (trModes[i].second)
                  {
                    lastCheckId = trModes[i].first;
                  }
                }
              }
            }
            else
            {
              xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma);
            }
          }
          else
          {
            if (nNumTransformCands > 1)
            {
              xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, modeId == 0 ? &trModes : nullptr, true);
              if (modeId == 0)
              {
                for (int i = 0; i < nNumTransformCands; i++)
                {
                  if (trModes[i].second)
                  {
                    lastCheckId = trModes[i].first;
                  }
                }
              }
            }
            else
            {
              xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma);
            }
          }
    
    
    #if JVET_Q0516_MTS_SIGNALLING_DC_ONLY_COND 
          CUCtx cuCtx;
          cuCtx.isDQPCoded = true;
          cuCtx.isChromaQpAdjCoded = true;
    #endif
    
          //----- determine rate and r-d cost -----
          if ((sps.getUseLFNST() ? (modeId == lastCheckId && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth))
          {
            //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
    
    #if JVET_Q0820_ACT
            if (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING)
    #endif
    
            singleCostTmp = MAX_DOUBLE;
    
    #if JVET_Q0820_ACT
            else
            {
              singleTmpFracBits = xGetIntraFracBitsQT(*csFull, partitioner, true, false, -1, TU_NO_ISP);
              singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma, false);
            }
    #endif
    
    #if JVET_Q0516_MTS_SIGNALLING_DC_ONLY_COND 
            singleTmpFracBits = xGetIntraFracBitsQT(*csFull, partitioner, true, false, -1, TU_NO_ISP, &cuCtx);
            
            if (tu.mtsIdx[COMPONENT_Y] > MTS_SKIP)
            {
              if (!cuCtx.mtsLastScanPos)
              {
                singleCostTmp = MAX_DOUBLE;
              }
              else
              {
    
    #if JVET_Q0820_ACT
                singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma, false);
    #else
    
                singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
    
            singleTmpFracBits = xGetIntraFracBitsQT(*csFull, partitioner, true, false, -1, TU_NO_ISP);
    
    #if JVET_Q0820_ACT
            singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma, false);
    #else
    
            singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
    
          }
    
          if (singleCostTmp < dSingleCostLuma)
          {
            dSingleCostLuma = singleCostTmp;
            validReturnFull = true;
    
            if (sps.getUseLFNST())
            {
              bestLumaModeId = modeId;
              cbfBestMode = TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth);
              cbfBestModeValid = true;
            }
            else
            {
              bestLumaModeId = trModes[modeId].first;
              if (trModes[modeId].first == 0)
              {
                cbfDCT2 = TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth);
              }
            }
    
            if (bestLumaModeId != lastCheckId)
            {
              saveLumaCS.getResiBuf(tu.Y()).copyFrom(csFull->getResiBuf(tu.Y()));
              tmpTU->copyComponentFrom(tu, COMPONENT_Y);
              ctxBest = m_CABACEstimator->getCtx();
            }
          }
        }
    
    
    #if JVET_Q0820_ACT
        if (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING)
    #endif
    
        m_pcRdCost->lambdaAdjustColorTrans(false, COMPONENT_Y);
    
        if (sps.getUseLFNST())
        {
          if (!validReturnFull)
          {
            csFull->cost = MAX_DOUBLE;
            return false;
          }
        }
        else
        {
          CHECK(!validReturnFull, "no transform mode was tested for luma");
        }
    
        csFull->setDecomp(currArea.Y(), true);
        csFull->setDecomp(currArea.Cb(), true);
    
        if (bestLumaModeId != lastCheckId)
        {
          csFull->getResiBuf(tu.Y()).copyFrom(saveLumaCS.getResiBuf(tu.Y()));
          tu.copyComponentFrom(*tmpTU, COMPONENT_Y);
          m_CABACEstimator->getCtx() = ctxBest;
        }
    
        // 3 chroma residual optimization
        CodingStructure &saveChromaCS = *m_pSaveCS[1];
        saveChromaCS.pcv = csFull->pcv;
        saveChromaCS.picture = csFull->picture;
        saveChromaCS.area.repositionTo(csFull->area);
    
        saveChromaCS.initStructData(MAX_INT, true);
    
        tmpTU = &saveChromaCS.addTU(currArea, partitioner.chType);
    
        CompArea&  cbArea = tu.blocks[COMPONENT_Cb];
        CompArea&  crArea = tu.blocks[COMPONENT_Cr];
    
    
        ctxStart = m_CABACEstimator->getCtx();
        m_CABACEstimator->resetBits();
    
        bool doReshaping = (slice.getPicHeader()->getLmcsEnabledFlag() && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && (slice.isIntra() || m_pcReshape->getCTUFlag()) && (cbArea.width * cbArea.height > 4));
    
        if (doReshaping)
        {
          const Area      area = tu.Y().valid() ? tu.Y() : Area(recalcPosition(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].pos()), recalcSize(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].size()));
          const CompArea &areaY = CompArea(COMPONENT_Y, tu.chromaFormat, area);
          int             adj = m_pcReshape->calculateChromaAdjVpduNei(tu, areaY);
          tu.setChromaAdj(adj);
        }
    
        CompStorage  orgResiCb[5], orgResiCr[5]; // 0:std, 1-3:jointCbCr (placeholder at this stage), 4:crossComp
        orgResiCb[0].create(cbArea);
        orgResiCr[0].create(crArea);
        orgResiCb[0].copyFrom(csFull->getOrgResiBuf(cbArea));
        orgResiCr[0].copyFrom(csFull->getOrgResiBuf(crArea));
        if (doReshaping)
        {
          int cResScaleInv = tu.getChromaAdj();
          orgResiCb[0].scaleSignal(cResScaleInv, 1, slice.clpRng(COMPONENT_Cb));
          orgResiCr[0].scaleSignal(cResScaleInv, 1, slice.clpRng(COMPONENT_Cr));
        }
    
        // 3.1 regular chroma residual coding
        csFull->getResiBuf(cbArea).copyFrom(orgResiCb[0]);
        csFull->getResiBuf(crArea).copyFrom(orgResiCr[0]);
    
        for (uint32_t c = COMPONENT_Cb; c < ::getNumberValidTBlocks(*csFull->pcv); c++)
        {
          const ComponentID compID = ComponentID(c);
    
    #if JVET_Q0820_ACT 
          double  dSingleBestCostChroma = MAX_DOUBLE;
          int     bestModeId = -1;
    
    #if JVET_Q0784_LFNST_COMBINATION
          bool    tsAllowed = TU::isTSAllowed(tu, compID) && (m_pcEncCfg->getUseChromaTS()) && !cu.lfnstIdx;
    #else
    
          bool    tsAllowed = TU::isTSAllowed(tu, compID) && (m_pcEncCfg->getUseChromaTS());
    
          uint8_t numTransformCands = 1 + (tsAllowed ? 1 : 0);  // DCT + TS = 2 tests
          bool        cbfDCT2 = true;
    
          trModes.clear();
    
          if (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING)
          {
            numTransformCands = 1;
            CHECK(!tsAllowed && !cu.bdpcmModeChroma, "transform skip should be enabled for LS");
            if (cu.bdpcmModeChroma)
            {
              trModes.push_back(TrMode(0, true));
            }
            else
            {
              trModes.push_back(TrMode(1, true));
            }
          }
          else
          {
            trModes.push_back(TrMode(0, true));                    // DCT
            if (tsAllowed)
            {
              trModes.push_back(TrMode(1, true));                  // TS
            }
          }
    
          if (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING)
            m_pcRdCost->lambdaAdjustColorTrans(true, compID);
    
          TempCtx ctxBegin(m_CtxCache);
          ctxBegin = m_CABACEstimator->getCtx();
    
          for (int modeId = 0; modeId < numTransformCands; modeId++)
          {
            if (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING)
            {
              if (modeId && !cbfDCT2)
              {
                continue;
              }
              if (!trModes[modeId].second)
              {
                continue;
              }
            }
    
            if (modeId > 0)
            {
              m_CABACEstimator->getCtx() = ctxBegin;
            }
    
            tu.mtsIdx[compID] = trModes[modeId].first;
    #endif
    
          Distortion singleDistChroma = 0;
    
    #if JVET_Q0820_ACT
          if (numTransformCands > 1)
          {
            xIntraCodingACTTUBlock(tu, compID, singleDistChroma, modeId == 0 ? &trModes : nullptr, true);
          }
          else
    #endif 
    
          xIntraCodingACTTUBlock(tu, compID, singleDistChroma);
    
    #if JVET_Q0820_ACT
          if (!tu.mtsIdx[compID])
          {
            cbfDCT2 = TU::getCbfAtDepth(tu, compID, currDepth);
          }
          uint64_t fracBitChroma = xGetIntraFracBitsQTChroma(tu, compID);
          double   dSingleCostChroma = m_pcRdCost->calcRdCost(fracBitChroma, singleDistChroma, false);
          if (dSingleCostChroma < dSingleBestCostChroma)
          {
            dSingleBestCostChroma = dSingleCostChroma;
            bestModeId = modeId;
            if (bestModeId != (numTransformCands - 1))
            {
              saveChromaCS.getResiBuf(tu.blocks[compID]).copyFrom(csFull->getResiBuf(tu.blocks[compID]));
              tmpTU->copyComponentFrom(tu, compID);
              ctxBest = m_CABACEstimator->getCtx();
            }
          }
          }
    
          if (bestModeId != (numTransformCands - 1))
          {
            csFull->getResiBuf(tu.blocks[compID]).copyFrom(saveChromaCS.getResiBuf(tu.blocks[compID]));
            tu.copyComponentFrom(*tmpTU, compID);
            m_CABACEstimator->getCtx() = ctxBest;
          }
    
          if (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING)
            m_pcRdCost->lambdaAdjustColorTrans(false, compID);
    #else
    
          xGetIntraFracBitsQTChroma(tu, compID);
    
        }
    
        Position tuPos = tu.Y();
        tuPos.relativeTo(cu.Y());
        const UnitArea relativeUnitArea(tu.chromaFormat, Area(tuPos, tu.Y().size()));
        PelUnitBuf     invColorTransResidual = m_colorTransResiBuf.getBuf(relativeUnitArea);
    
    #if JVET_Q0820_ACT
        csFull->getResiBuf(tu).colorSpaceConvert(invColorTransResidual, false, cs.slice->clpRng(COMPONENT_Y));
    #else
    
        csFull->getResiBuf(tu).colorSpaceConvert(invColorTransResidual, false);
    
    
        Distortion totalDist = 0;
        for (uint32_t c = COMPONENT_Y; c < ::getNumberValidTBlocks(*csFull->pcv); c++)
        {
          const ComponentID compID = ComponentID(c);
          const CompArea&   area = tu.blocks[compID];
          PelBuf            piOrg = csFull->getOrgBuf(area);
          PelBuf            piReco = csFull->getRecoBuf(area);
          PelBuf            piPred = csFull->getPredBuf(area);
          PelBuf            piResi = invColorTransResidual.bufs[compID];
    
          piReco.reconstruct(piPred, piResi, cs.slice->clpRng(compID));
    
    
          if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs()
    
            & slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))))
    
          {
            const CPelBuf orgLuma = csFull->getOrgBuf(csFull->area.blocks[COMPONENT_Y]);
            if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
            {
              CompArea      tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
              PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
              tmpRecLuma.copyFrom(piReco);
              tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
              totalDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
            }
            else
            {
              totalDist += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
            }
          }
          else
          {
            totalDist += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE);
          }
        }
    
        m_CABACEstimator->getCtx() = ctxStart;
        uint64_t totalBits = xGetIntraFracBitsQT(*csFull, partitioner, true, true, -1, TU_NO_ISP);
        double   totalCost = m_pcRdCost->calcRdCost(totalBits, totalDist);
    
        saveChromaCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
        saveChromaCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
        saveChromaCS.getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu));
        tmpTU->copyComponentFrom(tu, COMPONENT_Cb);
        tmpTU->copyComponentFrom(tu, COMPONENT_Cr);
        ctxBest = m_CABACEstimator->getCtx();
    
        // 3.2 jointCbCr
        double     bestCostJointCbCr = totalCost;
        Distortion bestDistJointCbCr = totalDist;
        uint64_t   bestBitsJointCbCr = totalBits;
        int        bestJointCbCr = tu.jointCbCr; assert(!bestJointCbCr);
    
        bool       lastIsBest = false;
        std::vector<int>  jointCbfMasksToTest;
        if (sps.getJointCbCrEnabledFlag() && (TU::getCbf(tu, COMPONENT_Cb) || TU::getCbf(tu, COMPONENT_Cr)))
        {
          jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(tu, orgResiCb, orgResiCr);
        }
    
        for (int cbfMask : jointCbfMasksToTest)
        {
    
          m_CABACEstimator->getCtx() = ctxStart;
          m_CABACEstimator->resetBits();
    
          Distortion distTmp = 0;
    
          tu.jointCbCr = (uint8_t)cbfMask;
    
    
    #if JVET_Q0512_ENC_CHROMA_TS_ACT
          ComponentID codeCompId = ((cbfMask >> 1) ? COMPONENT_Cb : COMPONENT_Cr);
          ComponentID otherCompId = ((codeCompId == COMPONENT_Cb) ? COMPONENT_Cr : COMPONENT_Cb);
    #if JVET_Q0784_LFNST_COMBINATION
          bool        tsAllowed = TU::isTSAllowed(tu, codeCompId) && (m_pcEncCfg->getUseChromaTS()) && !cu.lfnstIdx;
    #else
          bool        tsAllowed = TU::isTSAllowed(tu, codeCompId) && (m_pcEncCfg->getUseChromaTS());
    #endif
          uint8_t     numTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests
          bool        cbfDCT2 = true;
    
          trModes.clear();
          trModes.push_back(TrMode(0, true)); // DCT2 
          if (tsAllowed)
          {
            trModes.push_back(TrMode(1, true));//TS
          }
    
          for (int modeId = 0; modeId < numTransformCands; modeId++)
          {
            if (modeId && !cbfDCT2)
            {
              continue;
            }
            if (!trModes[modeId].second)
            {
              continue;
            }
            Distortion distTmp = 0;
            tu.mtsIdx[codeCompId] = trModes[modeId].first;
            tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
            m_CABACEstimator->getCtx() = ctxStart;
    #endif
    
          csFull->getResiBuf(cbArea).copyFrom(orgResiCb[cbfMask]);
          csFull->getResiBuf(crArea).copyFrom(orgResiCr[cbfMask]);
    
    #if JVET_Q0512_ENC_CHROMA_TS_ACT
          if (nNumTransformCands > 1)
          {
            xIntraCodingACTTUBlock(tu, COMPONENT_Cb, distTmp, modeId == 0 ? &trModes : nullptr, true);
          }
          else
    #endif
    
          xIntraCodingACTTUBlock(tu, COMPONENT_Cb, distTmp);
    
          double   costTmp = std::numeric_limits<double>::max();
          uint64_t bitsTmp = 0;
          if (distTmp < std::numeric_limits<Distortion>::max())
          {
    
    #if JVET_Q0512_ENC_CHROMA_TS_ACT
            if (!tu.mtsIdx[codeCompId])
            {
              cbfDCT2 = true;
            }
    #endif
    
    #if JVET_Q0820_ACT
            csFull->getResiBuf(tu).colorSpaceConvert(invColorTransResidual, false, csFull->slice->clpRng(COMPONENT_Y));
    #else
    
            csFull->getResiBuf(tu).colorSpaceConvert(invColorTransResidual, false);
    
            distTmp = 0;
            for (uint32_t c = COMPONENT_Y; c < ::getNumberValidTBlocks(*csFull->pcv); c++)
            {
              const ComponentID compID = ComponentID(c);
              const CompArea&   area = tu.blocks[compID];
              PelBuf            piOrg = csFull->getOrgBuf(area);
              PelBuf            piReco = csFull->getRecoBuf(area);
              PelBuf            piPred = csFull->getPredBuf(area);
              PelBuf            piResi = invColorTransResidual.bufs[compID];
    
              piReco.reconstruct(piPred, piResi, cs.slice->clpRng(compID));
    
              if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs()
    
                & slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))))
    
              {
                const CPelBuf orgLuma = csFull->getOrgBuf(csFull->area.blocks[COMPONENT_Y]);
                if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
                {
                  CompArea      tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
                  PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
                  tmpRecLuma.copyFrom(piReco);
                  tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
                  distTmp += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
                }
                else
                {
                  distTmp += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
                }
              }
              else
              {
                distTmp += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE);
              }
            }
    
            bitsTmp = xGetIntraFracBitsQT(*csFull, partitioner, true, true, -1, TU_NO_ISP);
            costTmp = m_pcRdCost->calcRdCost(bitsTmp, distTmp);
          }
    
    #if JVET_Q0512_ENC_CHROMA_TS_ACT
          else if (!tu.mtsIdx[codeCompId])
          {
            cbfDCT2 = false;
          }
    #endif 
    
    
          if (costTmp < bestCostJointCbCr)
          {
            bestCostJointCbCr = costTmp;
            bestDistJointCbCr = distTmp;
            bestBitsJointCbCr = bitsTmp;
            bestJointCbCr = tu.jointCbCr;
    
    #if JVET_Q0512_ENC_CHROMA_TS_ACT
            lastIsBest = (cbfMask == jointCbfMasksToTest.back() && modeId == (numTransformCands - 1));
    #else
    
            lastIsBest = (cbfMask == jointCbfMasksToTest.back());
    
    
            // store data
            if (!lastIsBest)
            {
              saveChromaCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
              saveChromaCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
              saveChromaCS.getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu));
              tmpTU->copyComponentFrom(tu, COMPONENT_Cb);
              tmpTU->copyComponentFrom(tu, COMPONENT_Cr);
    
              ctxBest = m_CABACEstimator->getCtx();
            }
          }
    
        }
    
        if (!lastIsBest)
        {
          csFull->getResiBuf(cbArea).copyFrom(saveChromaCS.getResiBuf(cbArea));
          csFull->getResiBuf(crArea).copyFrom(saveChromaCS.getResiBuf(crArea));
          csFull->getRecoBuf(tu).copyFrom(saveChromaCS.getRecoBuf(tu));
          tu.copyComponentFrom(*tmpTU, COMPONENT_Cb);
          tu.copyComponentFrom(*tmpTU, COMPONENT_Cr);
    
          m_CABACEstimator->getCtx() = ctxBest;
        }
        tu.jointCbCr = bestJointCbCr;
        csFull->picture->getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu));
    
        csFull->dist += bestDistJointCbCr;
        csFull->fracBits += bestBitsJointCbCr;
        csFull->cost = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist);
      }
    
      bool validReturnSplit = false;
      if (bCheckSplit)
      {
        if (partitioner.canSplit(TU_MAX_TR_SPLIT, *csSplit))
        {
          partitioner.splitCurrArea(TU_MAX_TR_SPLIT, *csSplit);
        }
    
        bool splitIsSelected = true;
        do
        {
          bool tmpValidReturnSplit = xRecurIntraCodingACTQT(*csSplit, partitioner, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst);
          if (sps.getUseLFNST())
          {
            if (!tmpValidReturnSplit)
            {
              splitIsSelected = false;
              break;
            }
          }
          else
          {
            CHECK(!tmpValidReturnSplit, "invalid RD of sub-TU partitions for ACT");
          }
        } while (partitioner.nextPart(*csSplit));
    
        partitioner.exitCurrSplit();
    
        if (splitIsSelected)
        {
          unsigned compCbf[3] = { 0, 0, 0 };
          for (auto &currTU : csSplit->traverseTUs(currArea, partitioner.chType))
          {
            for (unsigned ch = 0; ch < getNumberValidTBlocks(*csSplit->pcv); ch++)
            {
              compCbf[ch] |= (TU::getCbfAtDepth(currTU, ComponentID(ch), currDepth + 1) ? 1 : 0);
            }
          }
    
          for (auto &currTU : csSplit->traverseTUs(currArea, partitioner.chType))
          {
            TU::setCbfAtDepth(currTU, COMPONENT_Y, currDepth, compCbf[COMPONENT_Y]);
            TU::setCbfAtDepth(currTU, COMPONENT_Cb, currDepth, compCbf[COMPONENT_Cb]);
            TU::setCbfAtDepth(currTU, COMPONENT_Cr, currDepth, compCbf[COMPONENT_Cr]);
          }
    
          m_CABACEstimator->getCtx() = ctxStart;
          csSplit->fracBits = xGetIntraFracBitsQT(*csSplit, partitioner, true, true, -1, TU_NO_ISP);
          csSplit->cost = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
    
          validReturnSplit = true;
        }
      }
    
      bool retVal = false;
      if (csFull || csSplit)
      {
        if (sps.getUseLFNST())
        {
          if (validReturnFull || validReturnSplit)
          {
            retVal = true;
          }
        }
        else
        {
          CHECK(!validReturnFull && !validReturnSplit, "illegal TU optimization");
          retVal = true;
        }
      }
      return retVal;
    }
    
    
    ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& partitioner, const double bestCostSoFar, const PartSplit ispType )
    
      const bool keepResi                 = cs.sps->getUseLMChroma() || KEEP_PRED_AND_RESI_SIGNALS;
    
      if( !currArea.Cb().valid() ) return ChromaCbfs( false );
    
    
      TransformUnit &currTU               = *cs.getTU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA );
      const PredictionUnit &pu            = *cs.getPU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA );
    
    
      uint32_t     currDepth                  = partitioner.currTrDepth;
      const PPS &pps                      = *cs.pps;
      ChromaCbfs cbfs                     ( false );
    
      if (currDepth == currTU.depth)
      {
        if (!currArea.Cb().valid() || !currArea.Cr().valid())
        {
          return cbfs;
        }
    
    
        CodingStructure &saveCS = *m_pSaveCS[1];
        saveCS.pcv      = cs.pcv;
        saveCS.picture  = cs.picture;
        saveCS.area.repositionTo( cs.area );
    
        saveCS.initStructData( MAX_INT, true );
    
        if( !currTU.cu->isSepTree() && currTU.cu->ispMode )
    
        {
          saveCS.clearCUs();
          CodingUnit& auxCU = saveCS.addCU( *currTU.cu, partitioner.chType );
          auxCU.ispMode = currTU.cu->ispMode;
          saveCS.sps = currTU.cs->sps;
          saveCS.clearPUs();
          saveCS.addPU( *currTU.cu->firstPU, partitioner.chType );
        }
    
    
        TransformUnit &tmpTU = saveCS.addTU(currArea, partitioner.chType);
    
    
        cs.setDecomp(currArea.Cb(), true); // set in advance (required for Cb2/Cr2 in 4:2:2 video)
    
        const unsigned      numTBlocks  = ::getNumberValidTBlocks( *cs.pcv );
    
        CompArea&  cbArea         = currTU.blocks[COMPONENT_Cb];
        CompArea&  crArea         = currTU.blocks[COMPONENT_Cr];
        double     bestCostCb     = MAX_DOUBLE;