Skip to content
Snippets Groups Projects
IntraSearch.cpp 235 KiB
Newer Older
  • Learn to ignore specific revisions
  •       tu.setChromaAdj(adj);
        }
    
        CompStorage  orgResiCb[5], orgResiCr[5]; // 0:std, 1-3:jointCbCr (placeholder at this stage), 4:crossComp
        orgResiCb[0].create(cbArea);
        orgResiCr[0].create(crArea);
        orgResiCb[0].copyFrom(csFull->getOrgResiBuf(cbArea));
        orgResiCr[0].copyFrom(csFull->getOrgResiBuf(crArea));
        if (doReshaping)
        {
          int cResScaleInv = tu.getChromaAdj();
          orgResiCb[0].scaleSignal(cResScaleInv, 1, slice.clpRng(COMPONENT_Cb));
          orgResiCr[0].scaleSignal(cResScaleInv, 1, slice.clpRng(COMPONENT_Cr));
        }
    
        // 3.1 regular chroma residual coding
        csFull->getResiBuf(cbArea).copyFrom(orgResiCb[0]);
        csFull->getResiBuf(crArea).copyFrom(orgResiCr[0]);
    
        for (uint32_t c = COMPONENT_Cb; c < ::getNumberValidTBlocks(*csFull->pcv); c++)
        {
          const ComponentID compID = ComponentID(c);
          Distortion singleDistChroma = 0;
          xIntraCodingACTTUBlock(tu, compID, singleDistChroma);
          xGetIntraFracBitsQTChroma(tu, compID);
        }
    
        Position tuPos = tu.Y();
        tuPos.relativeTo(cu.Y());
        const UnitArea relativeUnitArea(tu.chromaFormat, Area(tuPos, tu.Y().size()));
        PelUnitBuf     invColorTransResidual = m_colorTransResiBuf.getBuf(relativeUnitArea);
        csFull->getResiBuf(tu).colorSpaceConvert(invColorTransResidual, false);
    
        Distortion totalDist = 0;
        for (uint32_t c = COMPONENT_Y; c < ::getNumberValidTBlocks(*csFull->pcv); c++)
        {
          const ComponentID compID = ComponentID(c);
          const CompArea&   area = tu.blocks[compID];
          PelBuf            piOrg = csFull->getOrgBuf(area);
          PelBuf            piReco = csFull->getRecoBuf(area);
          PelBuf            piPred = csFull->getPredBuf(area);
          PelBuf            piResi = invColorTransResidual.bufs[compID];
    
          piReco.reconstruct(piPred, piResi, cs.slice->clpRng(compID));
    
          if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getReshaper()
    
    #if JVET_P1006_PICTURE_HEADER
            & slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))))
    #else
    
            & slice.getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))))
    
          {
            const CPelBuf orgLuma = csFull->getOrgBuf(csFull->area.blocks[COMPONENT_Y]);
            if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
            {
              CompArea      tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
              PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
              tmpRecLuma.copyFrom(piReco);
              tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
              totalDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
            }
            else
            {
              totalDist += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
            }
          }
          else
          {
            totalDist += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE);
          }
        }
    
        m_CABACEstimator->getCtx() = ctxStart;
        uint64_t totalBits = xGetIntraFracBitsQT(*csFull, partitioner, true, true, -1, TU_NO_ISP);
        double   totalCost = m_pcRdCost->calcRdCost(totalBits, totalDist);
    
        saveChromaCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
        saveChromaCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
        saveChromaCS.getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu));
        tmpTU->copyComponentFrom(tu, COMPONENT_Cb);
        tmpTU->copyComponentFrom(tu, COMPONENT_Cr);
        ctxBest = m_CABACEstimator->getCtx();
    
        // 3.2 jointCbCr
        double     bestCostJointCbCr = totalCost;
        Distortion bestDistJointCbCr = totalDist;
        uint64_t   bestBitsJointCbCr = totalBits;
        int        bestJointCbCr = tu.jointCbCr; assert(!bestJointCbCr);
    
        bool       lastIsBest = false;
        std::vector<int>  jointCbfMasksToTest;
        if (sps.getJointCbCrEnabledFlag() && (TU::getCbf(tu, COMPONENT_Cb) || TU::getCbf(tu, COMPONENT_Cr)))
        {
          jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(tu, orgResiCb, orgResiCr);
        }
    
        for (int cbfMask : jointCbfMasksToTest)
        {
          m_CABACEstimator->getCtx() = ctxStart;
          m_CABACEstimator->resetBits();
    
          Distortion distTmp = 0;
          tu.jointCbCr = (uint8_t)cbfMask;
    
          csFull->getResiBuf(cbArea).copyFrom(orgResiCb[cbfMask]);
          csFull->getResiBuf(crArea).copyFrom(orgResiCr[cbfMask]);
          xIntraCodingACTTUBlock(tu, COMPONENT_Cb, distTmp);
    
          double   costTmp = std::numeric_limits<double>::max();
          uint64_t bitsTmp = 0;
          if (distTmp < std::numeric_limits<Distortion>::max())
          {
            csFull->getResiBuf(tu).colorSpaceConvert(invColorTransResidual, false);
            distTmp = 0;
            for (uint32_t c = COMPONENT_Y; c < ::getNumberValidTBlocks(*csFull->pcv); c++)
            {
              const ComponentID compID = ComponentID(c);
              const CompArea&   area = tu.blocks[compID];
              PelBuf            piOrg = csFull->getOrgBuf(area);
              PelBuf            piReco = csFull->getRecoBuf(area);
              PelBuf            piPred = csFull->getPredBuf(area);
              PelBuf            piResi = invColorTransResidual.bufs[compID];
    
              piReco.reconstruct(piPred, piResi, cs.slice->clpRng(compID));
              if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getReshaper()
    
    #if JVET_P1006_PICTURE_HEADER
                & slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))))
    #else
    
                & slice.getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))))
    
              {
                const CPelBuf orgLuma = csFull->getOrgBuf(csFull->area.blocks[COMPONENT_Y]);
                if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
                {
                  CompArea      tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
                  PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
                  tmpRecLuma.copyFrom(piReco);
                  tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
                  distTmp += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
                }
                else
                {
                  distTmp += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
                }
              }
              else
              {
                distTmp += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE);
              }
            }
    
            bitsTmp = xGetIntraFracBitsQT(*csFull, partitioner, true, true, -1, TU_NO_ISP);
            costTmp = m_pcRdCost->calcRdCost(bitsTmp, distTmp);
          }
    
          if (costTmp < bestCostJointCbCr)
          {
            bestCostJointCbCr = costTmp;
            bestDistJointCbCr = distTmp;
            bestBitsJointCbCr = bitsTmp;
            bestJointCbCr = tu.jointCbCr;
            lastIsBest = (cbfMask == jointCbfMasksToTest.back());
    
            // store data
            if (!lastIsBest)
            {
              saveChromaCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
              saveChromaCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
              saveChromaCS.getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu));
              tmpTU->copyComponentFrom(tu, COMPONENT_Cb);
              tmpTU->copyComponentFrom(tu, COMPONENT_Cr);
    
              ctxBest = m_CABACEstimator->getCtx();
            }
          }
        }
    
        if (!lastIsBest)
        {
          csFull->getResiBuf(cbArea).copyFrom(saveChromaCS.getResiBuf(cbArea));
          csFull->getResiBuf(crArea).copyFrom(saveChromaCS.getResiBuf(crArea));
          csFull->getRecoBuf(tu).copyFrom(saveChromaCS.getRecoBuf(tu));
          tu.copyComponentFrom(*tmpTU, COMPONENT_Cb);
          tu.copyComponentFrom(*tmpTU, COMPONENT_Cr);
    
          m_CABACEstimator->getCtx() = ctxBest;
        }
        tu.jointCbCr = bestJointCbCr;
        csFull->picture->getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu));
    
        csFull->dist += bestDistJointCbCr;
        csFull->fracBits += bestBitsJointCbCr;
        csFull->cost = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist);
      }
    
      bool validReturnSplit = false;
      if (bCheckSplit)
      {
        if (partitioner.canSplit(TU_MAX_TR_SPLIT, *csSplit))
        {
          partitioner.splitCurrArea(TU_MAX_TR_SPLIT, *csSplit);
        }
    
        bool splitIsSelected = true;
        do
        {
          bool tmpValidReturnSplit = xRecurIntraCodingACTQT(*csSplit, partitioner, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst);
          if (sps.getUseLFNST())
          {
            if (!tmpValidReturnSplit)
            {
              splitIsSelected = false;
              break;
            }
          }
          else
          {
            CHECK(!tmpValidReturnSplit, "invalid RD of sub-TU partitions for ACT");
          }
        } while (partitioner.nextPart(*csSplit));
    
        partitioner.exitCurrSplit();
    
        if (splitIsSelected)
        {
          unsigned compCbf[3] = { 0, 0, 0 };
          for (auto &currTU : csSplit->traverseTUs(currArea, partitioner.chType))
          {
            for (unsigned ch = 0; ch < getNumberValidTBlocks(*csSplit->pcv); ch++)
            {
              compCbf[ch] |= (TU::getCbfAtDepth(currTU, ComponentID(ch), currDepth + 1) ? 1 : 0);
            }
          }
    
          for (auto &currTU : csSplit->traverseTUs(currArea, partitioner.chType))
          {
            TU::setCbfAtDepth(currTU, COMPONENT_Y, currDepth, compCbf[COMPONENT_Y]);
            TU::setCbfAtDepth(currTU, COMPONENT_Cb, currDepth, compCbf[COMPONENT_Cb]);
            TU::setCbfAtDepth(currTU, COMPONENT_Cr, currDepth, compCbf[COMPONENT_Cr]);
          }
    
          m_CABACEstimator->getCtx() = ctxStart;
          csSplit->fracBits = xGetIntraFracBitsQT(*csSplit, partitioner, true, true, -1, TU_NO_ISP);
          csSplit->cost = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
    
          validReturnSplit = true;
        }
      }
    
      bool retVal = false;
      if (csFull || csSplit)
      {
        if (sps.getUseLFNST())
        {
          if (validReturnFull || validReturnSplit)
          {
            retVal = true;
          }
        }
        else
        {
          CHECK(!validReturnFull && !validReturnSplit, "illegal TU optimization");
          retVal = true;
        }
      }
      return retVal;
    }
    #endif
    
    
    ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& partitioner, const double bestCostSoFar, const PartSplit ispType )
    
      const bool keepResi                 = cs.sps->getUseLMChroma() || KEEP_PRED_AND_RESI_SIGNALS;
    
      if( !currArea.Cb().valid() ) return ChromaCbfs( false );
    
    
      TransformUnit &currTU               = *cs.getTU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA );
      const PredictionUnit &pu            = *cs.getPU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA );
    
    
      uint32_t     currDepth                  = partitioner.currTrDepth;
      const PPS &pps                      = *cs.pps;
      ChromaCbfs cbfs                     ( false );
    
      if (currDepth == currTU.depth)
      {
        if (!currArea.Cb().valid() || !currArea.Cr().valid())
        {
          return cbfs;
        }
    
    
        CodingStructure &saveCS = *m_pSaveCS[1];
        saveCS.pcv      = cs.pcv;
        saveCS.picture  = cs.picture;
        saveCS.area.repositionTo( cs.area );
    
        saveCS.initStructData( MAX_INT, false, true );
    
        if( !currTU.cu->isSepTree() && currTU.cu->ispMode )
    
        {
          saveCS.clearCUs();
          CodingUnit& auxCU = saveCS.addCU( *currTU.cu, partitioner.chType );
          auxCU.ispMode = currTU.cu->ispMode;
          saveCS.sps = currTU.cs->sps;
          saveCS.clearPUs();
          saveCS.addPU( *currTU.cu->firstPU, partitioner.chType );
        }
    
    
        TransformUnit &tmpTU = saveCS.addTU(currArea, partitioner.chType);
    
    
        cs.setDecomp(currArea.Cb(), true); // set in advance (required for Cb2/Cr2 in 4:2:2 video)
    
        const unsigned      numTBlocks  = ::getNumberValidTBlocks( *cs.pcv );
    
        CompArea&  cbArea         = currTU.blocks[COMPONENT_Cb];
        CompArea&  crArea         = currTU.blocks[COMPONENT_Cr];
        double     bestCostCb     = MAX_DOUBLE;
        double     bestCostCr     = MAX_DOUBLE;
        Distortion bestDistCb     = 0;
        Distortion bestDistCr     = 0;
        int        maxModesTested = 0;
        bool       earlyExitISP   = false;
    
        TempCtx ctxStartTU( m_CtxCache );
        TempCtx ctxStart  ( m_CtxCache );
        TempCtx ctxBest   ( m_CtxCache );
    
        ctxStartTU       = m_CABACEstimator->getCtx();
        currTU.jointCbCr = 0;
    
        // Do predictions here to avoid repeating the "default0Save1Load2" stuff
    
    #if JVET_P0059_CHROMA_BDPCM
        int  predMode   = pu.cu->bdpcmModeChroma ? BDPCM_IDX : PU::getFinalIntraMode(pu, CHANNEL_TYPE_CHROMA);
    #else
    
        int  predMode   = PU::getFinalIntraMode( pu, CHANNEL_TYPE_CHROMA );
    
        PelBuf piPredCb = cs.getPredBuf(cbArea);
        PelBuf piPredCr = cs.getPredBuf(crArea);
    
        initIntraPatternChType( *currTU.cu, cbArea);
        initIntraPatternChType( *currTU.cu, crArea);
    
        if( PU::isLMCMode( predMode ) )
        {
          xGetLumaRecPixels( pu, cbArea );
          predIntraChromaLM( COMPONENT_Cb, piPredCb, pu, cbArea, predMode );
          predIntraChromaLM( COMPONENT_Cr, piPredCr, pu, crArea, predMode );
        }
        else
        {
    
          predIntraAng( COMPONENT_Cb, piPredCb, pu);
          predIntraAng( COMPONENT_Cr, piPredCr, pu);
    
        // determination of chroma residuals including reshaping and cross-component prediction
        //----- get chroma residuals -----
        PelBuf resiCb  = cs.getResiBuf(cbArea);
        PelBuf resiCr  = cs.getResiBuf(crArea);
        resiCb.copyFrom( cs.getOrgBuf (cbArea) );
        resiCr.copyFrom( cs.getOrgBuf (crArea) );
        resiCb.subtract( piPredCb );
        resiCr.subtract( piPredCr );
    
        //----- get reshape parameter ----
    
    Brian Heng's avatar
    Brian Heng committed
    #if JVET_P1006_PICTURE_HEADER
        bool doReshaping = ( cs.picHeader->getLmcsEnabledFlag() && cs.picHeader->getLmcsChromaResidualScaleFlag()
    #else
    
        bool doReshaping = ( cs.slice->getLmcsEnabledFlag() && cs.slice->getLmcsChromaResidualScaleFlag()
    
    Brian Heng's avatar
    Brian Heng committed
    #endif
    
                             && (cs.slice->isIntra() || m_pcReshape->getCTUFlag()) && (cbArea.width * cbArea.height > 4) );
        if( doReshaping )
        {
          const Area area = currTU.Y().valid() ? currTU.Y() : Area(recalcPosition(currTU.chromaFormat, currTU.chType, CHANNEL_TYPE_LUMA, currTU.blocks[currTU.chType].pos()), recalcSize(currTU.chromaFormat, currTU.chType, CHANNEL_TYPE_LUMA, currTU.blocks[currTU.chType].size()));
    
          const CompArea &areaY = CompArea(COMPONENT_Y, currTU.chromaFormat, area);
          int adj = m_pcReshape->calculateChromaAdjVpduNei(currTU, areaY);
    
          currTU.setChromaAdj(adj);
        }
    
        //----- get cross component prediction parameters -----
        bool checkCrossComponentPrediction = PU::isChromaIntraModeCrossCheckMode( pu ) && pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && TU::getCbf( currTU, COMPONENT_Y );
        int  compAlpha[MAX_NUM_COMPONENT] = { 0, 0, 0 };
        if( checkCrossComponentPrediction )
        {
          compAlpha[COMPONENT_Cb] = xCalcCrossComponentPredictionAlpha( currTU, COMPONENT_Cb, m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate() );
          compAlpha[COMPONENT_Cr] = xCalcCrossComponentPredictionAlpha( currTU, COMPONENT_Cr, m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate() );
          if( compAlpha[COMPONENT_Cb] == 0 && compAlpha[COMPONENT_Cr] == 0 )
          {
            checkCrossComponentPrediction = false;
          }
        }
    
        //===== store original residual signals (std and crossCompPred) =====
        CompStorage  orgResiCb[5], orgResiCr[5]; // 0:std, 1-3:jointCbCr (placeholder at this stage), 4:crossComp
        for( int k = 0; k < (checkCrossComponentPrediction?5:1); k+=4 )
        {
          orgResiCb[k].create( cbArea );
          orgResiCr[k].create( crArea );
          if( k >= 4 ) {
            CrossComponentPrediction::crossComponentPrediction( currTU, COMPONENT_Cb, cs.getResiBuf(currTU.Y()), resiCb, orgResiCb[k], false);
            CrossComponentPrediction::crossComponentPrediction( currTU, COMPONENT_Cr, cs.getResiBuf(currTU.Y()), resiCr, orgResiCr[k], false);
          } else {
            orgResiCb[k].copyFrom( resiCb );
            orgResiCr[k].copyFrom( resiCr );
          }
          if( doReshaping )
          {
            int cResScaleInv = currTU.getChromaAdj();
            orgResiCb[k].scaleSignal( cResScaleInv, 1, currTU.cu->cs->slice->clpRng(COMPONENT_Cb) );
            orgResiCr[k].scaleSignal( cResScaleInv, 1, currTU.cu->cs->slice->clpRng(COMPONENT_Cr) );
          }
        }
    
    
        for( uint32_t c = COMPONENT_Cb; c < numTBlocks; c++)
        {
          const ComponentID compID  = ComponentID(c);
          const CompArea&   area    = currTU.blocks[compID];
    
          double     dSingleCost    = MAX_DOUBLE;
          int        bestModeId     = 0;
          Distortion singleDistCTmp = 0;
          double     singleCostTmp  = 0;
          const int  crossCPredictionModesToTest = checkCrossComponentPrediction ? 2 : 1;
    
    #if JVET_P0058_CHROMA_TS
          const bool tsAllowed = TU::isTSAllowed(currTU, compID) && (m_pcEncCfg->getUseChromaTS());
          uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests
          std::vector<TrMode> trModes;
          trModes.push_back(TrMode(0, true)); // DCT2
    
          if (tsAllowed)
          {
              trModes.push_back(TrMode(1, true));//TS
          }
          CHECK(!currTU.Cb().valid(), "Invalid TU");
    #endif
    
    #if JVET_P0058_CHROMA_TS
          const int  totalModesToTest            = crossCPredictionModesToTest * nNumTransformCands;
          bool cbfDCT2 = true;
    #else
    
    Tung Nguyen's avatar
    Tung Nguyen committed
          const int  totalModesToTest            = crossCPredictionModesToTest;
    
          const bool isOneMode                   = false;
          maxModesTested                         = totalModesToTest > maxModesTested ? totalModesToTest : maxModesTested;
    
          int currModeId = 0;
          int default0Save1Load2 = 0;
    
    
          if (!isOneMode)
          {
            ctxStart = m_CABACEstimator->getCtx();
          }
    
    
    #if JVET_P0058_CHROMA_TS
          for (int modeId = 0; modeId < nNumTransformCands; modeId++)
    #endif
    
          {
            for (int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++)
            {
    
              resiCb.copyFrom( orgResiCb[4*crossCPredictionModeId] );
              resiCr.copyFrom( orgResiCr[4*crossCPredictionModeId] );
    
              currTU.compAlpha    [compID] = ( crossCPredictionModeId ? compAlpha[compID] : 0 );
    
    #if JVET_P0058_CHROMA_TS
    
    #if JVET_P0059_CHROMA_BDPCM
              currTU.mtsIdx[compID] = currTU.cu->bdpcmModeChroma ? MTS_SKIP : trModes[modeId].first;
    #else
    
              currTU.mtsIdx[compID] = trModes[modeId].first;
    
              currModeId++;
    
              const bool isFirstMode = (currModeId == 1);
    
              const bool isLastMode  = false; // Always store output to saveCS and tmpTU
    
    #if JVET_AHG14_LOSSLESS
              if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING ) )
              {
    #endif
    
    #if JVET_P0058_CHROMA_TS
               //if DCT2's cbf==0, skip ts search
              if (!cbfDCT2 && trModes[modeId].first == MTS_SKIP)
              {
                  break;
              }
              if (!trModes[modeId].second)
              {
                  continue;
              }
    #endif
    
    #if JVET_AHG14_LOSSLESS
              }
    #endif
    
              if (!isFirstMode) // if not first mode to be tested
              {
                m_CABACEstimator->getCtx() = ctxStart;
              }
    
              singleDistCTmp = 0;
    
    
    #if JVET_P0058_CHROMA_TS
              if (nNumTransformCands > 1)
              {
                  xIntraCodingTUBlock(currTU, compID, crossCPredictionModeId != 0, singleDistCTmp, default0Save1Load2, nullptr, modeId == 0 ? &trModes : nullptr, true);
              }
              else
              {
                  xIntraCodingTUBlock(currTU, compID, crossCPredictionModeId != 0, singleDistCTmp, default0Save1Load2);
              }
    #else
    
              xIntraCodingTUBlock( currTU, compID, crossCPredictionModeId != 0, singleDistCTmp, default0Save1Load2 );
    
    #if JVET_P0058_CHROMA_TS
    
    #if JVET_P0059_CHROMA_BDPCM
              if (((crossCPredictionModeId == 1) && (currTU.compAlpha[compID] == 0)) || ((currTU.mtsIdx[compID] == MTS_SKIP && !currTU.cu->bdpcmModeChroma) && !TU::getCbf(currTU, compID))) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
    #else
    
              if (((crossCPredictionModeId == 1) && (currTU.compAlpha[compID] == 0)) || ((currTU.mtsIdx[compID] == MTS_SKIP) && !TU::getCbf(currTU, compID))) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
    
    Tung Nguyen's avatar
    Tung Nguyen committed
              if( ( ( crossCPredictionModeId == 1 ) && ( currTU.compAlpha[compID] == 0 ) ) ) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
    
              else if( lumaUsesISP && bestCostSoFar != MAX_DOUBLE && c == COMPONENT_Cb )
              {
                uint64_t fracBitsTmp = xGetIntraFracBitsQTSingleChromaComponent( cs, partitioner, ComponentID( c ) );
                singleCostTmp = m_pcRdCost->calcRdCost( fracBitsTmp, singleDistCTmp );
                if( isOneMode || ( !isOneMode && !isLastMode ) )
                {
                  m_CABACEstimator->getCtx() = ctxStart;
                }
              }
    
              else if( !isOneMode )
              {
                uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma( currTU, compID );
                singleCostTmp = m_pcRdCost->calcRdCost( fracBitsTmp, singleDistCTmp );
              }
    
              if( singleCostTmp < dSingleCost )
              {
    
                dSingleCost = singleCostTmp;
                bestModeId  = currModeId;
    
                if ( c == COMPONENT_Cb )
                {
                  bestCostCb = singleCostTmp;
                  bestDistCb = singleDistCTmp;
                }
                else
                {
                  bestCostCr = singleCostTmp;
                  bestDistCr = singleDistCTmp;
                }
    
    #if JVET_P0058_CHROMA_TS
                if (currTU.mtsIdx[compID] == MTS_DCT2_DCT2)
                {
                    cbfDCT2 = TU::getCbfAtDepth(currTU, compID, currDepth);
                }
    #endif
    
    
                if( !isLastMode )
                {
    #if KEEP_PRED_AND_RESI_SIGNALS
                  saveCS.getPredBuf   (area).copyFrom(cs.getPredBuf   (area));
                  saveCS.getOrgResiBuf(area).copyFrom(cs.getOrgResiBuf(area));
    
    Taoran Lu's avatar
    Taoran Lu committed
    #endif
                  saveCS.getPredBuf   (area).copyFrom(cs.getPredBuf   (area));
    
                  if( keepResi )
                  {
                    saveCS.getResiBuf (area).copyFrom(cs.getResiBuf   (area));
                  }
                  saveCS.getRecoBuf   (area).copyFrom(cs.getRecoBuf   (area));
    
                  tmpTU.copyComponentFrom(currTU, compID);
    
                  ctxBest = m_CABACEstimator->getCtx();
                }
              }
            }
          }
    
    
          if( lumaUsesISP && dSingleCost > bestCostSoFar && c == COMPONENT_Cb )
          {
            //Luma + Cb cost is already larger than the best cost, so we don't need to test Cr
            cs.dist = MAX_UINT;
            m_CABACEstimator->getCtx() = ctxStart;
    
            earlyExitISP               = true;
    
          // Done with one component of separate coding of Cr and Cb, just switch to the best Cb contexts if Cr coding is still to be done
          if ( c == COMPONENT_Cb && bestModeId < totalModesToTest)
          {
            m_CABACEstimator->getCtx() = ctxBest;
    
            currTU.copyComponentFrom(tmpTU, COMPONENT_Cb); // Cbf of Cb is needed to estimate cost for Cr Cbf
          }
        }
    
        if ( !earlyExitISP )
        {
          // Test using joint chroma residual coding
          double     bestCostCbCr   = bestCostCb + bestCostCr;
          Distortion bestDistCbCr   = bestDistCb + bestDistCr;
          int        bestJointCbCr  = 0;
    
          bool       lastIsBest     = false;
          std::vector<int>  jointCbfMasksToTest;
    
          if ( cs.sps->getJointCbCrEnabledFlag() && (TU::getCbf(tmpTU, COMPONENT_Cb) || TU::getCbf(tmpTU, COMPONENT_Cr)))
    
    Fangdong Chen's avatar
    Fangdong Chen committed
            jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(currTU, orgResiCb, orgResiCr);
    
          }
          for( int cbfMask : jointCbfMasksToTest )
    
          {
            Distortion distTmp = 0;
    
            currTU.jointCbCr               = (uint8_t)cbfMask;
    
            currTU.compAlpha[COMPONENT_Cb] = 0;
    
            currTU.compAlpha[COMPONENT_Cr] = 0;
    
    #if JVET_P0058_CHROMA_TS
            // encoder bugfix: initialize mtsIdx for chroma under JointCbCrMode.
            currTU.mtsIdx[COMPONENT_Cb] = currTU.mtsIdx[COMPONENT_Cr]  = MTS_DCT2_DCT2;
    #endif
    
            m_CABACEstimator->getCtx() = ctxStartTU;
    
            resiCb.copyFrom( orgResiCb[cbfMask] );
            resiCr.copyFrom( orgResiCr[cbfMask] );
            xIntraCodingTUBlock( currTU, COMPONENT_Cb, false, distTmp, 0 );
    
            double costTmp = std::numeric_limits<double>::max();
            if( distTmp < std::numeric_limits<Distortion>::max() )
            {
              uint64_t bits  = xGetIntraFracBitsQTChroma( currTU, COMPONENT_Cb );
              costTmp = m_pcRdCost->calcRdCost( bits, distTmp );
            }
    
            if( costTmp < bestCostCbCr )
            {
              bestCostCbCr  = costTmp;
              bestDistCbCr  = distTmp;
    
              bestJointCbCr = currTU.jointCbCr;
    
              // store data
              if( cbfMask != jointCbfMasksToTest.back() )
              {
    #if KEEP_PRED_AND_RESI_SIGNALS
                saveCS.getOrgResiBuf(cbArea).copyFrom(cs.getOrgResiBuf(cbArea));
                saveCS.getOrgResiBuf(crArea).copyFrom(cs.getOrgResiBuf(crArea));
    #endif
                saveCS.getPredBuf   (cbArea).copyFrom(cs.getPredBuf   (cbArea));
                saveCS.getPredBuf   (crArea).copyFrom(cs.getPredBuf   (crArea));
                if( keepResi )
                {
                  saveCS.getResiBuf (cbArea).copyFrom(cs.getResiBuf   (cbArea));
                  saveCS.getResiBuf (crArea).copyFrom(cs.getResiBuf   (crArea));
                }
                saveCS.getRecoBuf   (cbArea).copyFrom(cs.getRecoBuf   (cbArea));
                saveCS.getRecoBuf   (crArea).copyFrom(cs.getRecoBuf   (crArea));
    
                tmpTU.copyComponentFrom(currTU, COMPONENT_Cb);
                tmpTU.copyComponentFrom(currTU, COMPONENT_Cr);
    
                ctxBest = m_CABACEstimator->getCtx();
              }
              else
              {
                lastIsBest = true;
              }
    
          // Retrieve the best CU data (unless it was the very last one tested)
    
          if ( !( maxModesTested == 1 && jointCbfMasksToTest.empty() ) && !lastIsBest )
    
          {
    #if KEEP_PRED_AND_RESI_SIGNALS
            cs.getPredBuf   (cbArea).copyFrom(saveCS.getPredBuf   (cbArea));
            cs.getOrgResiBuf(cbArea).copyFrom(saveCS.getOrgResiBuf(cbArea));
            cs.getPredBuf   (crArea).copyFrom(saveCS.getPredBuf   (crArea));
            cs.getOrgResiBuf(crArea).copyFrom(saveCS.getOrgResiBuf(crArea));
    #endif
            cs.getPredBuf   (cbArea).copyFrom(saveCS.getPredBuf   (cbArea));
            cs.getPredBuf   (crArea).copyFrom(saveCS.getPredBuf   (crArea));
    
            if( keepResi )
            {
              cs.getResiBuf (cbArea).copyFrom(saveCS.getResiBuf   (cbArea));
              cs.getResiBuf (crArea).copyFrom(saveCS.getResiBuf   (crArea));
            }
            cs.getRecoBuf   (cbArea).copyFrom(saveCS.getRecoBuf   (cbArea));
            cs.getRecoBuf   (crArea).copyFrom(saveCS.getRecoBuf   (crArea));
    
            currTU.copyComponentFrom(tmpTU, COMPONENT_Cb);
            currTU.copyComponentFrom(tmpTU, COMPONENT_Cr);
    
            m_CABACEstimator->getCtx() = ctxBest;
          }
    
          // Copy results to the picture structures
          cs.picture->getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea));
          cs.picture->getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea));
          cs.picture->getPredBuf(cbArea).copyFrom(cs.getPredBuf(cbArea));
          cs.picture->getPredBuf(crArea).copyFrom(cs.getPredBuf(crArea));
    
          cbfs.cbf(COMPONENT_Cb) = TU::getCbf(currTU, COMPONENT_Cb);
          cbfs.cbf(COMPONENT_Cr) = TU::getCbf(currTU, COMPONENT_Cr);
    
          currTU.jointCbCr = ( (cbfs.cbf(COMPONENT_Cb) + cbfs.cbf(COMPONENT_Cr)) ? bestJointCbCr : 0 );
    
          cs.dist         += bestDistCbCr;
    
        }
      }
      else
      {
        unsigned    numValidTBlocks   = ::getNumberValidTBlocks( *cs.pcv );
        ChromaCbfs  SplitCbfs         ( false );
    
        if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
        {
          partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
        }
    
        else if( currTU.cu->ispMode )
        {
          partitioner.splitCurrArea( ispType, cs );
        }
    
          ChromaCbfs subCbfs = xRecurIntraChromaCodingQT( cs, partitioner, bestCostSoFar, ispType );
    
    
          for( uint32_t ch = COMPONENT_Cb; ch < numValidTBlocks; ch++ )
          {
            const ComponentID compID = ComponentID( ch );
            SplitCbfs.cbf( compID ) |= subCbfs.cbf( compID );
          }
        } while( partitioner.nextPart( cs ) );
    
        partitioner.exitCurrSplit();
    
    
        if( lumaUsesISP && cs.dist == MAX_UINT )
        {
          return cbfs;
        }
    
          if( !lumaUsesISP )
          {
            for( auto &ptu : cs.tus )
            {
              if( currArea.Cb().contains( ptu->Cb() ) || ( !ptu->Cb().valid() && currArea.Y().contains( ptu->Y() ) ) )
              {
                TU::setCbfAtDepth( *ptu, COMPONENT_Cb, currDepth, SplitCbfs.Cb );
                TU::setCbfAtDepth( *ptu, COMPONENT_Cr, currDepth, SplitCbfs.Cr );
              }
            }
          }
    
        }
      }
    
      return cbfs;
    }
    
    uint64_t IntraSearch::xFracModeBitsIntra(PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &chType)
    {
      uint32_t orgMode = uiMode;
    
    
      if (!pu.mhIntraFlag)
    
      std::swap(orgMode, pu.intraDir[chType]);
    
      m_CABACEstimator->resetBits();
    
      if( isLuma( chType ) )
      {
    
        if (!pu.mhIntraFlag)
    
        {
          m_CABACEstimator->intra_luma_pred_mode(pu);
        }
    
      if ( !pu.mhIntraFlag )
    
      std::swap(orgMode, pu.intraDir[chType]);
    
      return m_CABACEstimator->getEstFracBits();
    }
    
    
    #if JVET_P0517_ADAPTIVE_COLOR_TRANSFORM 
    
    void IntraSearch::sortRdModeListFirstColorSpace(ModeInfo mode, double cost, char bdpcmMode, ModeInfo* rdModeList, double* rdCostList, char* bdpcmModeList, int& candNum)
    {
      if (candNum == 0)
      {
        rdModeList[0] = mode;
        rdCostList[0] = cost;
        bdpcmModeList[0] = bdpcmMode;
        candNum++;
        return;
      }
    
      int insertPos = -1;
      for (int pos = candNum - 1; pos >= 0; pos--)
      {
        if (cost < rdCostList[pos])
        {
          insertPos = pos;
        }
      }
    
      if (insertPos >= 0)
      {
        for (int i = candNum - 1; i >= insertPos; i--)
        {
          rdModeList[i + 1] = rdModeList[i];
          rdCostList[i + 1] = rdCostList[i];
          bdpcmModeList[i + 1] = bdpcmModeList[i];
        }
        rdModeList[insertPos] = mode;
        rdCostList[insertPos] = cost;
        bdpcmModeList[insertPos] = bdpcmMode;
        candNum++;
      }
      else
      {
        rdModeList[candNum] = mode;
        rdCostList[candNum] = cost;
        bdpcmModeList[candNum] = bdpcmMode;
        candNum++;
      }
    
      CHECK(candNum > FAST_UDI_MAX_RDMODE_NUM, "exceed intra mode candidate list capacity");
    
      return;
    }
    
    void IntraSearch::invalidateBestRdModeFirstColorSpace()
    {
      int numSaveRdClass = 4 * NUM_LFNST_NUM_PER_SET * 2;
      int savedRdModeListSize = FAST_UDI_MAX_RDMODE_NUM;
    
      for (int i = 0; i < numSaveRdClass; i++)
      {
        m_numSavedRdModeFirstColorSpace[i] = 0;
        for (int j = 0; j < savedRdModeListSize; j++)
        {
    
    #if JVET_P0803_COMBINED_MIP_CLEANUP
          m_savedRdModeFirstColorSpace[i][j] = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, 0);
    #else
    
          m_savedRdModeFirstColorSpace[i][j] = ModeInfo(false, 0, NOT_INTRA_SUBPARTITIONS, 0);
    
          m_savedBDPCMModeFirstColorSpace[i][j] = 0;
          m_savedRdCostFirstColorSpace[i][j] = MAX_DOUBLE;
        }
      }
    }
    #endif
    
    
    void IntraSearch::encPredIntraDPCM( const ComponentID &compID, PelBuf &pOrg, PelBuf &pDst, const uint32_t &uiDirMode )
    {
      CHECK( pOrg.buf == 0, "Encoder DPCM called without original buffer" );
    
    
      const int srcStride = m_refBufferStride[compID];
    
      CPelBuf   pSrc = CPelBuf(getPredictorPtr(compID), srcStride, m_leftRefLength + 1);
    
      // Sample Adaptive intra-Prediction (SAP)
      if( uiDirMode == HOR_IDX )
      {
        // left column filled with reference samples, remaining columns filled with pOrg data
        for( int y = 0; y < pDst.height; y++ )
        {
    
          pDst.at(0, y) = pSrc.at(1 + y, 1);
    
        }
        CPelBuf orgRest  = pOrg.subBuf( 0, 0, pOrg.width - 1, pOrg.height );
        PelBuf  predRest = pDst.subBuf( 1, 0, pDst.width - 1, pDst.height );
    
        predRest.copyFrom( orgRest );
      }
      else // VER_IDX
      {
        // top row filled with reference samples, remaining rows filled with pOrg data
        for( int x = 0; x < pDst.width; x++ )
        {
          pDst.at( x, 0 ) = pSrc.at( 1 + x, 0 );
        }
        CPelBuf orgRest  = pOrg.subBuf( 0, 0, pOrg.width, pOrg.height - 1 );
        PelBuf  predRest = pDst.subBuf( 0, 1, pDst.width, pDst.height - 1 );
    
        predRest.copyFrom( orgRest );
      }
    }
    
    bool IntraSearch::useDPCMForFirstPassIntraEstimation( const PredictionUnit &pu, const uint32_t &uiDirMode )
    {
      return CU::isRDPCMEnabled( *pu.cu ) && pu.cu->transQuantBypass && (uiDirMode == HOR_IDX || uiDirMode == VER_IDX);
    }
    
    template<typename T, size_t N>
    void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const PredictionUnit &pu, const bool fastMip)
    {
      const int maxCandPerType = numModesForFullRD >> 1;
      static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList;
      static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList;
      const double minCost = candCostList[0];
      bool keepOneMip = candModeList.size() > numModesForFullRD;
    
      int numConv = 0;
      int numMip = 0;
      for (int idx = 0; idx < candModeList.size() - (keepOneMip?0:1); idx++)
      {
        bool addMode = false;
    
    Philipp Merkle's avatar
    Philipp Merkle committed
        const ModeInfo& orgMode = candModeList[idx];
    
    Philipp Merkle's avatar
    Philipp Merkle committed
        if (!orgMode.mipFlg)
    
          addMode = (numConv < 3);
          numConv += addMode ? 1:0;
    
          addMode = ( numMip < maxCandPerType || (candCostList[idx] < thresholdHadCost * minCost) || keepOneMip );
          keepOneMip = false;
          numMip += addMode ? 1:0;
        }
        if( addMode )
        {
    
    Philipp Merkle's avatar
    Philipp Merkle committed
          tempRdModeList.push_back(orgMode);
    
          tempCandCostList.push_back(candCostList[idx]);
        }
      }
    
      if ((pu.lwidth() > 8 && pu.lheight() > 8))
      {
        // Sort MIP candidates by Hadamard cost
    
    #if JVET_P0803_COMBINED_MIP_CLEANUP
        const int transpOff = getNumModesMip( pu.Y() );
    #else
    
        const int transpOff = getNumModesMip(pu.Y()) / 2;
    
    Philipp Merkle's avatar
    Philipp Merkle committed
        static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedMipModes(0);
        static_vector<double, FAST_UDI_MAX_RDMODE_NUM> sortedMipCost(0);
    
    #if JVET_P0803_COMBINED_MIP_CLEANUP
        for( uint8_t mode : { 0, 1, 2 } )
    #else
    
        for (uint8_t mode : { 3, 4, 5 })
    
        {
          uint8_t candMode = mode + uint8_t((mipHadCost[mode + transpOff] < mipHadCost[mode]) ? transpOff : 0);
          updateCandList(candMode, mipHadCost[candMode], sortedMipModes, sortedMipCost, 3);
        }
    
        // Append MIP mode to RD mode list
    
        const int modeListSize = int(tempRdModeList.size());
    
        for (int idx = 0; idx < 3; idx++)
        {
    
    #if JVET_P0803_COMBINED_MIP_CLEANUP
          const bool     isTransposed = (sortedMipModes[idx] >= transpOff ? true : false);
          const uint32_t mipIdx       = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]);
          const ModeInfo mipMode( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx );
    #else
    
          const ModeInfo mipMode(true, 0, NOT_INTRA_SUBPARTITIONS, sortedMipModes[idx]);
    
          bool alreadyIncluded = false;
    
          for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++)
    
          {
            if (tempRdModeList[modeListIdx] == mipMode)
            {
              alreadyIncluded = true;
              break;
            }
          }
    
          if (!alreadyIncluded)
          {
            tempRdModeList.push_back(mipMode);
            tempCandCostList.push_back(0);
            if( fastMip ) break;
          }
        }
      }
    
      candModeList = tempRdModeList;
      candCostList = tempCandCostList;
      numModesForFullRD = int(candModeList.size());
    }
    
    // It decides which modes from the ISP lists can be full RD tested
    void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode, const Size cuSize)
    {