Skip to content
Snippets Groups Projects
IntraSearch.cpp 235 KiB
Newer Older
  • Learn to ignore specific revisions
  •     }
        cs.cost += singleCostTmp;
        cs.dist += singleDistTmpLuma;
        cs.fracBits += singleTmpFracBits;
    Santiago de Luxán Hernández's avatar
    Santiago de Luxán Hernández committed
        splitCbfLuma |= TU::getCbfAtDepth(*cs.getTU(partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1), COMPONENT_Y, partitioner.currTrDepth);
        int nSubPartitions = m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1];
        int nSubPartitions = m_ispTestedModes.numTotalParts[cu.ispMode - 1];
        if (subTuCounter < nSubPartitions)
          // exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance)
          if (cs.cost > bestCostSoFar)
            earlySkipISP = true;
          else if (subTuCounter < nSubPartitions)
            // more restrictive exit condition
            double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
            if (subTuCounter < nSubPartitions && cs.cost > bestCostSoFar * threshold)
              earlySkipISP = true;
      } while (partitioner.nextPart(cs));   // subpartitions loop
      const UnitArea& currArea = partitioner.currArea();
      const uint32_t  currDepth = partitioner.currTrDepth;
      if (earlySkipISP)
        cs.cost = MAX_DOUBLE;
        cs.cost = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
        // The cost check is necessary here again to avoid superfluous operations if the maximum number of coded subpartitions was reached and yet ISP did not win
        if (cs.cost < bestCostSoFar)
          for (auto& ptu : cs.tus)
            if (currArea.Y().contains(ptu->Y()))
    Santiago de Luxán Hernández's avatar
    Santiago de Luxán Hernández committed
              TU::setCbfAtDepth(*ptu, COMPONENT_Y, currDepth, splitCbfLuma ? 1 : 0);
          cs.cost = MAX_DOUBLE;
          earlySkipISP = true;
      return !earlySkipISP;
    bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, const double bestCostSoFar, const int subTuIdx, const PartSplit ispType, const bool ispIsCurrentWinner, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst )
            int   subTuCounter = subTuIdx;
      const UnitArea &currArea = partitioner.currArea();
      const CodingUnit     &cu = *cs.getCU( currArea.lumaPos(), partitioner.chType );
            bool  earlySkipISP = false;
    Tung Nguyen's avatar
    Tung Nguyen committed
      uint32_t currDepth       = partitioner.currTrDepth;
      const SPS &sps           = *cs.sps;
      const PPS &pps           = *cs.pps;
      const bool keepResi      = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS;
      bool bCheckFull          = true;
      bool bCheckSplit         = false;
    Karsten Suehring's avatar
    Karsten Suehring committed
      bCheckFull               = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
      bCheckSplit              = partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
      if( cu.ispMode )
        bCheckSplit = partitioner.canSplit( ispType, cs );
        bCheckFull = !bCheckSplit;
      double     dSingleCost                        = MAX_DOUBLE;
      Distortion uiSingleDistLuma                   = 0;
      uint64_t   singleFracBits                     = 0;
      bool       checkTransformSkip                 = sps.getTransformSkipEnabledFlag();
      int        bestModeId[ MAX_NUM_COMPONENT ]    = { 0, 0, 0 };
      uint8_t    nNumTransformCands                 = cu.mtsFlag ? 4 : 1;
      uint8_t    numTransformIndexCands             = nNumTransformCands;
      const TempCtx ctxStart  ( m_CtxCache, m_CABACEstimator->getCtx() );
      TempCtx       ctxBest   ( m_CtxCache );
      CodingStructure *csSplit = nullptr;
      CodingStructure *csFull  = nullptr;
      CUCtx cuCtx;
      cuCtx.isDQPCoded = true;
      cuCtx.isChromaQpAdjCoded = true;
      bool validReturnFull = false;
      if( bCheckFull )
        csFull->cost = 0.0;
        TransformUnit &tu = csFull->addTU( CS::getArea( *csFull, currArea, partitioner.chType ), partitioner.chType );
        tu.depth = currDepth;
        const bool tsAllowed  = TU::isTSAllowed( tu, COMPONENT_Y );
        const bool mtsAllowed = CU::isMTSAllowed( cu, COMPONENT_Y );
        const bool mtsAllowed = TU::isMTSAllowed( tu, COMPONENT_Y );
        std::vector<TrMode> trModes;
        if( sps.getUseLFNST() )
          checkTransformSkip &= tsAllowed;
          checkTransformSkip &= !cu.mtsFlag;
          checkTransformSkip &= !cu.lfnstIdx;
          if( !cu.mtsFlag && checkTransformSkip )
            trModes.push_back( TrMode( 0, true ) ); //DCT2
            trModes.push_back( TrMode( 1, true ) ); //TS
          nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests
          trModes.push_back( TrMode( 0, true ) ); //DCT2
          if( tsAllowed )
            trModes.push_back( TrMode( 1, true ) );
          if( mtsAllowed )
            for( int i = 2; i < 6; i++ )
              trModes.push_back( TrMode( i, true ) );
        CHECK( !tu.Y().valid(), "Invalid TU" );
        CodingStructure &saveCS = *m_pSaveCS[0];
        TransformUnit *tmpTU = nullptr;
        Distortion singleDistTmpLuma = 0;
        uint64_t     singleTmpFracBits = 0;
        double     singleCostTmp     = 0;
        int        firstCheckId      = ( sps.getUseLFNST() && mtsCheckRangeFlag && cu.mtsFlag ) ? mtsFirstCheckId : 0;
        //we add the MTS candidates to the loop. TransformSkip will still be the last one to be checked (when modeId == lastCheckId) as long as checkTransformSkip is true
        int        lastCheckId       = sps.getUseLFNST() ? ( ( mtsCheckRangeFlag && cu.mtsFlag ) ? ( mtsLastCheckId + ( int ) checkTransformSkip ) : ( numTransformIndexCands - ( firstCheckId + 1 ) + ( int ) checkTransformSkip ) ) :
                                       trModes[ nNumTransformCands - 1 ].first;
        bool isNotOnlyOneMode        = sps.getUseLFNST() ? lastCheckId != firstCheckId : nNumTransformCands != 1;
        if( isNotOnlyOneMode )
          saveCS.pcv     = cs.pcv;
          saveCS.picture = cs.picture;
          tmpTU = &saveCS.addTU(currArea, partitioner.chType);
        bool    cbfBestMode      = false;
        bool    cbfBestModeValid = false;
    Tung Nguyen's avatar
    Tung Nguyen committed
        bool    cbfDCT2  = true;
        double threshold = m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && nNumTransformCands > 1 ? 1 + 1.4 / sqrt( cu.lwidth() * cu.lheight() ) : 1;
        for( int modeId = firstCheckId; modeId <= ( sps.getUseLFNST() ? lastCheckId : ( nNumTransformCands - 1 ) ); modeId++ )
          uint8_t transformIndex = modeId;
          if( sps.getUseLFNST() )
            if( ( transformIndex < lastCheckId ) || ( ( transformIndex == lastCheckId ) && !checkTransformSkip ) ) //we avoid this if the mode is transformSkip
              // Skip checking other transform candidates if zero CBF is encountered and it is the best transform so far
              if( m_pcEncCfg->getUseFastLFNST() && transformIndex && !cbfBestMode && cbfBestModeValid )
            if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING ) )
    #if JVET_P0058_CHROMA_TS
            if( !cbfDCT2 || ( m_pcEncCfg->getUseTransformSkipFast() && bestModeId[ COMPONENT_Y ] == MTS_SKIP))
            if( !cbfDCT2 || ( m_pcEncCfg->getUseTransformSkipFast() && bestModeId[ COMPONENT_Y ] == 1 ) )
            if( !trModes[ modeId ].second )
            //we compare the DCT-II cost against the best ISP cost so far (except for TS)
    #if JVET_P0058_CHROMA_TS
            if (m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && trModes[modeId].first != MTS_DCT2_DCT2 && (trModes[modeId].first != MTS_SKIP || !tsAllowed) && bestDCT2cost > bestCostSoFar * threshold)
            if( m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && trModes[ modeId ].first != 0 && ( trModes[ modeId ].first != 1 || !tsAllowed ) && bestDCT2cost > bestCostSoFar * threshold )
    #if JVET_P0058_CHROMA_TS
            tu.mtsIdx[COMPONENT_Y] = trModes[modeId].first;
            tu.mtsIdx = trModes[ modeId ].first;
          if ((modeId != firstCheckId) && isNotOnlyOneMode)
            m_CABACEstimator->getCtx() = ctxStart;
          int default0Save1Load2 = 0;
          singleDistTmpLuma = 0;
          if( modeId == firstCheckId && ( sps.getUseLFNST() ? ( modeId != lastCheckId ) : ( nNumTransformCands > 1 ) ) )
            if( sps.getUseLFNST() && !cbfBestModeValid )
              default0Save1Load2 = 1;
              default0Save1Load2 = 2;
          if( sps.getUseLFNST() )
            if( cu.mtsFlag )
              if( moreProbMTSIdxFirst )
                const ChannelType     chType      = toChannelType( COMPONENT_Y );
                const CompArea&       area        = tu.blocks[ COMPONENT_Y ];
                const PredictionUnit& pu          = *cs.getPU( area.pos(), chType );
                uint32_t              uiIntraMode = pu.intraDir[ chType ];
                if( transformIndex == 1 )
    #if JVET_P0058_CHROMA_TS
                  tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
                  tu.mtsIdx = ( uiIntraMode < 34 ) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
                else if( transformIndex == 2 )
    #if JVET_P0058_CHROMA_TS
                  tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
                  tu.mtsIdx = ( uiIntraMode < 34 ) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
    #if JVET_P0058_CHROMA_TS
                  tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex;
                  tu.mtsIdx = MTS_DST7_DST7 + transformIndex;
    #if JVET_P0058_CHROMA_TS
                tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex;
                tu.mtsIdx = MTS_DST7_DST7 + transformIndex;
    #if JVET_P0058_CHROMA_TS
              tu.mtsIdx[COMPONENT_Y] = transformIndex;
              tu.mtsIdx = transformIndex;
            if( !cu.mtsFlag && checkTransformSkip )
              xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true );
              if( modeId == 0 )
                for( int i = 0; i < 2; i++ )
                  if( trModes[ i ].second )
                    lastCheckId = trModes[ i ].first;
              xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig );
            if( nNumTransformCands > 1 )
              xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true );
              if( modeId == 0 )
                for( int i = 0; i < nNumTransformCands; i++ )
                  if( trModes[ i ].second )
                    lastCheckId = trModes[ i ].first;
              xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig );
          if( ( sps.getUseLFNST() ? ( modeId == lastCheckId && modeId != 0 && checkTransformSkip ) : ( trModes[ modeId ].first != 0 ) ) && !TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ) )
            //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
            singleCostTmp = MAX_DOUBLE;
            if( cu.ispMode && m_pcRdCost->calcRdCost( csFull->fracBits, csFull->dist + singleDistTmpLuma ) > bestCostSoFar )
              earlySkipISP = true;
              singleTmpFracBits = xGetIntraFracBitsQT( *csFull, partitioner, true, false, subTuCounter, ispType, &cuCtx );
              singleTmpFracBits = xGetIntraFracBitsQT( *csFull, partitioner, true, false, subTuCounter, ispType );
            singleCostTmp     = m_pcRdCost->calcRdCost( singleTmpFracBits, singleDistTmpLuma );
          if ( !cu.ispMode && nNumTransformCands > 1 && modeId == firstCheckId )
            bestDCT2cost = singleCostTmp;
          if (singleCostTmp < dSingleCost)
            dSingleCost       = singleCostTmp;
            uiSingleDistLuma  = singleDistTmpLuma;
            singleFracBits    = singleTmpFracBits;
    Tung Nguyen's avatar
    Tung Nguyen committed
              bestModeId[ COMPONENT_Y ] = modeId;
              cbfBestMode = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth );
              cbfBestModeValid = true;
              validReturnFull = true;
    Tung Nguyen's avatar
    Tung Nguyen committed
              bestModeId[ COMPONENT_Y ] = trModes[ modeId ].first;
              if( trModes[ modeId ].first == 0 )
                cbfDCT2 = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth );
            if( bestModeId[COMPONENT_Y] != lastCheckId )
              saveCS.getPredBuf( tu.Y() ).copyFrom( csFull->getPredBuf( tu.Y() ) );
              saveCS.getRecoBuf( tu.Y() ).copyFrom( csFull->getRecoBuf( tu.Y() ) );
              if( keepResi )
                saveCS.getResiBuf   ( tu.Y() ).copyFrom( csFull->getResiBuf   ( tu.Y() ) );
                saveCS.getOrgResiBuf( tu.Y() ).copyFrom( csFull->getOrgResiBuf( tu.Y() ) );
              tmpTU->copyComponentFrom( tu, COMPONENT_Y );
              ctxBest = m_CABACEstimator->getCtx();
        if( sps.getUseLFNST() && !validReturnFull )
          csFull->cost = MAX_DOUBLE;
            ctxBest = m_CABACEstimator->getCtx();
          if( bestModeId[COMPONENT_Y] != lastCheckId )
            csFull->getPredBuf( tu.Y() ).copyFrom( saveCS.getPredBuf( tu.Y() ) );
            csFull->getRecoBuf( tu.Y() ).copyFrom( saveCS.getRecoBuf( tu.Y() ) );
            if( keepResi )
              csFull->getResiBuf   ( tu.Y() ).copyFrom( saveCS.getResiBuf   ( tu.Y() ) );
              csFull->getOrgResiBuf( tu.Y() ).copyFrom( saveCS.getOrgResiBuf( tu.Y() ) );
            tu.copyComponentFrom( *tmpTU, COMPONENT_Y );
            if( !bCheckSplit )
              m_CABACEstimator->getCtx() = ctxBest;
          else if( bCheckSplit )
            ctxBest = m_CABACEstimator->getCtx();
          csFull->cost     += dSingleCost;
          csFull->dist     += uiSingleDistLuma;
          csFull->fracBits += singleFracBits;
      bool validReturnSplit = false;
      if( bCheckSplit )
        //----- store full entropy coding status, load original entropy coding status -----
        if( bCheckFull )
          m_CABACEstimator->getCtx() = ctxStart;
        //----- code splitted block -----
        csSplit->cost = 0;
        bool uiSplitCbfLuma  = false;
        bool splitIsSelected = true;
    Karsten Suehring's avatar
    Karsten Suehring committed
        if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
        if( cu.ispMode )
          partitioner.splitCurrArea( ispType, *csSplit );
          bool tmpValidReturnSplit = xRecurIntraCodingLumaQT( *csSplit, partitioner, bestCostSoFar, subTuCounter, ispType, false, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId );
          subTuCounter += subTuCounter != -1 ? 1 : 0;
          if( sps.getUseLFNST() && !tmpValidReturnSplit )
            splitIsSelected = false;
          if( !cu.ispMode )
            csSplit->setDecomp( partitioner.currArea().Y() );
          else if( CU::isISPFirst( cu, partitioner.currArea().Y(), COMPONENT_Y ) )
            csSplit->setDecomp( cu.Y() );
          uiSplitCbfLuma |= TU::getCbfAtDepth( *csSplit->getTU( partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1 ), COMPONENT_Y, partitioner.currTrDepth );
          if( cu.ispMode )
            //exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance)
            if( csSplit->cost > bestCostSoFar )
              earlySkipISP    = true;
              splitIsSelected = false;
              //more restrictive exit condition
              bool tuIsDividedInRows = CU::divideTuInRows( cu );
              int nSubPartitions = tuIsDividedInRows ? cu.lheight() >> floorLog2(cu.firstTU->lheight()) : cu.lwidth() >> floorLog2(cu.firstTU->lwidth());
              double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
              if( subTuCounter < nSubPartitions && csSplit->cost > bestCostSoFar*threshold )
                earlySkipISP    = true;
                splitIsSelected = false;
        } while( partitioner.nextPart( *csSplit ) );
        if( splitIsSelected )
          for( auto &ptu : csSplit->tus )
            if( currArea.Y().contains( ptu->Y() ) )
              TU::setCbfAtDepth( *ptu, COMPONENT_Y, currDepth, uiSplitCbfLuma ? 1 : 0 );
          //----- restore context states -----
          m_CABACEstimator->getCtx() = ctxStart;
          cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] = false;
          cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
          cuCtx.lfnstLastScanPos = false;
          cuCtx.violatesMtsCoeffConstraint = false;
          csSplit->fracBits = xGetIntraFracBitsQT( *csSplit, partitioner, true, false, cu.ispMode ? 0 : -1, ispType, &cuCtx );
          csSplit->fracBits = xGetIntraFracBitsQT( *csSplit, partitioner, true, false, cu.ispMode ? 0 : -1, ispType );
          //--- update cost ---
          csSplit->cost     = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
        if( !sps.getUseLFNST() || validReturnFull || validReturnSplit )
            // otherwise this would've happened in useSubStructure
            cs.picture->getRecoBuf( currArea.Y() ).copyFrom( cs.getRecoBuf( currArea.Y() ) );
            cs.picture->getPredBuf( currArea.Y() ).copyFrom( cs.getPredBuf( currArea.Y() ) );
          if( cu.ispMode && earlySkipISP )
            cs.cost = MAX_DOUBLE;
            cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist );
            retVal = true;
    bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &partitioner, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst)
      const UnitArea &currArea = partitioner.currArea();
      uint32_t       currDepth = partitioner.currTrDepth;
      const Slice    &slice = *cs.slice;
      const SPS      &sps = *cs.sps;
      bool bCheckFull = !partitioner.canSplit(TU_MAX_TR_SPLIT, cs);
      bool bCheckSplit = !bCheckFull;
      TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx());
      TempCtx ctxBest(m_CtxCache);
      CodingStructure *csSplit = nullptr;
      CodingStructure *csFull = nullptr;
      if (bCheckSplit)
        csSplit = &cs;
      else if (bCheckFull)
        csFull = &cs;
      bool validReturnFull = false;
      if (bCheckFull)
        TransformUnit        &tu = csFull->addTU(CS::getArea(*csFull, currArea, partitioner.chType), partitioner.chType);
        tu.depth = currDepth;
        const CodingUnit     &cu = *csFull->getCU(tu.Y().pos(), CHANNEL_TYPE_LUMA);
        const PredictionUnit &pu = *csFull->getPU(tu.Y().pos(), CHANNEL_TYPE_LUMA);
        CHECK(!tu.Y().valid() || !tu.Cb().valid() || !tu.Cr().valid(), "Invalid TU");
        CHECK( != &cu, "wrong CU fetch");
        CHECK(cu.ispMode, "adaptive color transform cannot be applied to ISP");
        CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform");
        // 1. intra prediction and forward color transform
        PelUnitBuf orgBuf = csFull->getOrgBuf(tu);
        PelUnitBuf predBuf = csFull->getPredBuf(tu);
        PelUnitBuf resiBuf = csFull->getResiBuf(tu);
        PelUnitBuf orgResiBuf = csFull->getOrgResiBuf(tu);
        for (int i = 0; i < getNumberValidComponents(tu.chromaFormat); i++)
          ComponentID          compID = (ComponentID)i;
          const CompArea       &area = tu.blocks[compID];
          const ChannelType    chType = toChannelType(compID);
          PelBuf         piOrg = orgBuf.bufs[compID];
          PelBuf         piPred = predBuf.bufs[compID];
          PelBuf         piResi = resiBuf.bufs[compID];
          initIntraPatternChType(*, area);
          if (PU::isMIP(pu, chType))
            initIntraMip(pu, area);
            predIntraMip(compID, piPred, pu);
            predIntraAng(compID, piPred, pu);
          if (slice.getPicHeader()->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
          if (slice.getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
            CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
            PelBuf   tmpPred = m_tmpStorageLCU.getBuf(tmpArea);
        resiBuf.colorSpaceConvert(orgResiBuf, true);
        // 2. luma residual optimization 
        double     dSingleCostLuma = MAX_DOUBLE;
        bool       checkTransformSkip = sps.getTransformSkipEnabledFlag();
        int        bestLumaModeId = 0;
        uint8_t    nNumTransformCands = cu.mtsFlag ? 4 : 1;
        uint8_t    numTransformIndexCands = nNumTransformCands;
        const bool tsAllowed = TU::isTSAllowed(tu, COMPONENT_Y);
        const bool mtsAllowed = CU::isMTSAllowed(cu, COMPONENT_Y);
        const bool mtsAllowed = TU::isMTSAllowed(tu, COMPONENT_Y);
        std::vector<TrMode> trModes;
        if (sps.getUseLFNST())
          checkTransformSkip &= tsAllowed;
          checkTransformSkip &= !cu.mtsFlag;
          checkTransformSkip &= !cu.lfnstIdx;
          if (!cu.mtsFlag && checkTransformSkip)
            trModes.push_back(TrMode(0, true)); //DCT2
            trModes.push_back(TrMode(1, true)); //TS
          nNumTransformCands = 1 + (tsAllowed ? 1 : 0) + (mtsAllowed ? 4 : 0); // DCT + TS + 4 MTS = 6 tests
          trModes.push_back(TrMode(0, true)); //DCT2
          if (tsAllowed)
            trModes.push_back(TrMode(1, true));
          if (mtsAllowed)
            for (int i = 2; i < 6; i++)
              trModes.push_back(TrMode(i, true));
        CodingStructure &saveLumaCS = *m_pSaveCS[0];
        TransformUnit   *tmpTU = nullptr;
        Distortion      singleDistTmpLuma = 0;
        uint64_t        singleTmpFracBits = 0;
        double          singleCostTmp = 0;
        int             firstCheckId = (sps.getUseLFNST() && mtsCheckRangeFlag && cu.mtsFlag) ? mtsFirstCheckId : 0;
        int             lastCheckId = sps.getUseLFNST() ? ((mtsCheckRangeFlag && cu.mtsFlag) ? (mtsLastCheckId + (int)checkTransformSkip) : (numTransformIndexCands - (firstCheckId + 1) + (int)checkTransformSkip)) : trModes[nNumTransformCands - 1].first;
        bool            isNotOnlyOneMode = sps.getUseLFNST() ? lastCheckId != firstCheckId : nNumTransformCands != 1;
        if (isNotOnlyOneMode)
          saveLumaCS.pcv = csFull->pcv;
          saveLumaCS.picture = csFull->picture;
          tmpTU = &saveLumaCS.addTU(currArea, partitioner.chType);
        bool    cbfBestMode = false;
        bool    cbfBestModeValid = false;
        bool    cbfDCT2 = true;
        m_pcRdCost->lambdaAdjustColorTrans(true, COMPONENT_Y);
        for (int modeId = firstCheckId; modeId <= lastCheckId; modeId++)
          uint8_t transformIndex = modeId;
          m_CABACEstimator->getCtx() = ctxStart;
          if (sps.getUseLFNST())
            if ((transformIndex < lastCheckId) || ((transformIndex == lastCheckId) && !checkTransformSkip)) //we avoid this if the mode is transformSkip
              // Skip checking other transform candidates if zero CBF is encountered and it is the best transform so far
              if (m_pcEncCfg->getUseFastLFNST() && transformIndex && !cbfBestMode && cbfBestModeValid)
            if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING))
            if (!cbfDCT2 || (m_pcEncCfg->getUseTransformSkipFast() && bestLumaModeId == 1))
            if (!trModes[modeId].second)
    #if JVET_P0058_CHROMA_TS
            tu.mtsIdx[COMPONENT_Y] = trModes[modeId].first;
            tu.mtsIdx = trModes[modeId].first;
          singleDistTmpLuma = 0;
          if (sps.getUseLFNST())
            if (cu.mtsFlag)
              if (moreProbMTSIdxFirst)
                uint32_t uiIntraMode = pu.intraDir[CHANNEL_TYPE_LUMA];
                if (transformIndex == 1)
    #if JVET_P0058_CHROMA_TS
                  tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
                  tu.mtsIdx = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
                else if (transformIndex == 2)
    #if JVET_P0058_CHROMA_TS
                  tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
                  tu.mtsIdx = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
    #if JVET_P0058_CHROMA_TS
                  tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex;
                  tu.mtsIdx = MTS_DST7_DST7 + transformIndex;
    #if JVET_P0058_CHROMA_TS
                tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex;
                tu.mtsIdx = MTS_DST7_DST7 + transformIndex;
    #if JVET_P0058_CHROMA_TS
              tu.mtsIdx[COMPONENT_Y] = transformIndex;
              tu.mtsIdx = transformIndex;
            if (!cu.mtsFlag && checkTransformSkip)
              xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, modeId == 0 ? &trModes : nullptr, true);
              if (modeId == 0)
                for (int i = 0; i < 2; i++)
                  if (trModes[i].second)
                    lastCheckId = trModes[i].first;
              xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma);
            if (nNumTransformCands > 1)
              xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, modeId == 0 ? &trModes : nullptr, true);
              if (modeId == 0)
                for (int i = 0; i < nNumTransformCands; i++)
                  if (trModes[i].second)
                    lastCheckId = trModes[i].first;
              xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma);
          //----- determine rate and r-d cost -----
          if ((sps.getUseLFNST() ? (modeId == lastCheckId && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth))
            //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
            singleCostTmp = MAX_DOUBLE;
            singleTmpFracBits = xGetIntraFracBitsQT(*csFull, partitioner, true, false, -1, TU_NO_ISP);
            singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
          if (singleCostTmp < dSingleCostLuma)
            dSingleCostLuma = singleCostTmp;
            validReturnFull = true;
            if (sps.getUseLFNST())
              bestLumaModeId = modeId;
              cbfBestMode = TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth);
              cbfBestModeValid = true;
              bestLumaModeId = trModes[modeId].first;
              if (trModes[modeId].first == 0)
                cbfDCT2 = TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth);
            if (bestLumaModeId != lastCheckId)
              tmpTU->copyComponentFrom(tu, COMPONENT_Y);
              ctxBest = m_CABACEstimator->getCtx();
        m_pcRdCost->lambdaAdjustColorTrans(false, COMPONENT_Y);
        if (sps.getUseLFNST())
          if (!validReturnFull)
            csFull->cost = MAX_DOUBLE;
            return false;
          CHECK(!validReturnFull, "no transform mode was tested for luma");
        csFull->setDecomp(currArea.Y(), true);
        csFull->setDecomp(currArea.Cb(), true);
        if (bestLumaModeId != lastCheckId)
          tu.copyComponentFrom(*tmpTU, COMPONENT_Y);
          m_CABACEstimator->getCtx() = ctxBest;
        // 3 chroma residual optimization
        CodingStructure &saveChromaCS = *m_pSaveCS[1];
        saveChromaCS.pcv = csFull->pcv;
        saveChromaCS.picture = csFull->picture;
        saveChromaCS.initStructData(MAX_INT, false, true);
        tmpTU = &saveChromaCS.addTU(currArea, partitioner.chType);
        CompArea&  cbArea = tu.blocks[COMPONENT_Cb];
        CompArea&  crArea = tu.blocks[COMPONENT_Cr];
        ctxStart = m_CABACEstimator->getCtx();
        tu.jointCbCr = 0;
        bool doReshaping = (slice.getPicHeader()->getLmcsEnabledFlag() && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && (slice.isIntra() || m_pcReshape->getCTUFlag()) && (cbArea.width * cbArea.height > 4));
        bool doReshaping = (slice.getLmcsEnabledFlag() && slice.getLmcsChromaResidualScaleFlag() && (slice.isIntra() || m_pcReshape->getCTUFlag()) && (cbArea.width * cbArea.height > 4));
        if (doReshaping)
          const Area      area = tu.Y().valid() ? tu.Y() : Area(recalcPosition(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].pos()), recalcSize(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].size()));
          const CompArea &areaY = CompArea(COMPONENT_Y, tu.chromaFormat, area);
          int             adj = m_pcReshape->calculateChromaAdjVpduNei(tu, areaY);