Skip to content
Snippets Groups Projects
IntraSearch.cpp 166 KiB
Newer Older
  • Learn to ignore specific revisions
  •           m_rdModeListWithoutMrlVer.resize(std::min<size_t>(m_rdModeListWithoutMrlVer.size(), maxSize));
    
            }
          }
          if (maxSize == 0)
          {
            cs.dist = std::numeric_limits<Distortion>::max();
            cs.interHad = 0;
    
            //===== reset context models =====
            m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
            m_CABACEstimator->getCtx() = SubCtx(Ctx::MipMode, ctxStartMipMode);
            m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
            m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag);
            m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
            m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx);
    
    
    #if JVET_O0502_ISP_CLEANUP
        int numNonISPModes = (int)uiRdModeList.size();
    #endif
    
    
    #if JVET_O0502_ISP_CLEANUP
    
          //we create a single full RD list that includes all intra modes using regular intra, MRL and ISP
    
          const int maxNumRDModesISP = 16;
          for (int i = 0; i < maxNumRDModesISP; i++)
            uiRdModeList.push_back(ModeInfo(false, 0, INTRA_SUBPARTITIONS_RESERVED, 0));
    #else
    
          //we create a single full RD list that includes all intra modes using regular intra, MRL and ISP
          auto* firstIspList  = ispOptions[1] == HOR_INTRA_SUBPARTITIONS ? &m_rdModeListWithoutMrlHor : &m_rdModeListWithoutMrlVer;
          auto* secondIspList = ispOptions[1] == HOR_INTRA_SUBPARTITIONS ? &m_rdModeListWithoutMrlVer : &m_rdModeListWithoutMrlHor;
    
    
          if( !sps.getUseLFNST() && m_pcEncCfg->getUseFastISP() )
    
            CHECKD( uiRdModeList.size() > CandCostList.size(), "Error: CandCostList size" );
            // find the first non-MRL, non-MIP mode
            int indexFirstMode = int(uiRdModeList.size()) - 1; // default is last mode
            for (int k = 0; k < int(uiRdModeList.size()); k++)
            {
              if (uiRdModeList[k].mRefId == 0 && uiRdModeList[k].mipFlg == false)
              {
                indexFirstMode = k;
                break;
              }
            }
            // move the mode indicated by indexFirstMode to the beginning
            for (int idx = indexFirstMode - 1; idx >= 0; idx--)
            {
              std::swap(uiRdModeList[idx], uiRdModeList[idx + 1]);
              std::swap(CandCostList[idx], CandCostList[idx + 1]);
            }
            //insert all ISP modes after the first non-mrl mode
            uiRdModeList.insert(uiRdModeList.begin() + 1, secondIspList->begin(), secondIspList->end());
            uiRdModeList.insert(uiRdModeList.begin() + 1, firstIspList->begin(), firstIspList->end());
    
          }
          else
          {
            //insert all ISP modes at the end of the current list
            uiRdModeList.insert( uiRdModeList.end(), secondIspList->begin(), secondIspList->end() );
            uiRdModeList.insert( uiRdModeList.end(), firstIspList->begin() , firstIspList->end()  );
          }
    
        ModeInfo       uiBestPUMode;
    
        int            bestBDPCMMode = 0;
        double         bestCostNonBDPCM = MAX_DOUBLE;
    
    
        CodingStructure *csTemp = m_pTempCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )];
        CodingStructure *csBest = m_pBestCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )];
    
        csTemp->slice = cs.slice;
        csBest->slice = cs.slice;
        csTemp->initStructData();
        csBest->initStructData();
    
    #if JVET_O0050_LOCAL_DUAL_TREE
        csTemp->picture = cs.picture;
        csBest->picture = cs.picture;
    #endif
    
    #if !JVET_O0925_MIP_SIMPLIFICATIONS
    
        m_bestCostNonMip = MAX_DOUBLE;
    
        static_vector<int, FAST_UDI_MAX_RDMODE_NUM> rdModeIdxList;
    
    Philipp Merkle's avatar
    Philipp Merkle committed
        static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> rdModeListTemp;
    
        for( int i = 0; i < uiRdModeList.size(); i++)
    
          if( !uiRdModeList[i].mipFlg && uiRdModeList[i].ispMod==NOT_INTRA_SUBPARTITIONS )
    
    Philipp Merkle's avatar
    Philipp Merkle committed
            rdModeListTemp.push_back( uiRdModeList[i] );
    
            rdModeIdxList.push_back( i );
    
        for( int i = 0; i < uiRdModeList.size(); i++)
    
          if( uiRdModeList[i].mipFlg || uiRdModeList[i].ispMod!=NOT_INTRA_SUBPARTITIONS )
    
    Philipp Merkle's avatar
    Philipp Merkle committed
            rdModeListTemp.push_back( uiRdModeList[i] );
    
            rdModeIdxList.push_back( i );
    
    #if JVET_O0925_MIP_SIMPLIFICATIONS
    
    Philipp Merkle's avatar
    Philipp Merkle committed
          uiRdModeList.resize(rdModeListTemp.size());
    
        for( int i = 0; i < uiRdModeList.size(); i++)
    
    Philipp Merkle's avatar
    Philipp Merkle committed
          uiRdModeList[i] = rdModeListTemp[i];
    
    #if JVET_O0925_MIP_SIMPLIFICATIONS
        else
        {
    
    Philipp Merkle's avatar
    Philipp Merkle committed
          static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> rdModeListTemp;
    
          for( int i = 0; i < uiRdModeList.size(); i++ )
          {
            if( !uiRdModeList[i].mipFlg  )
            {
    
    Philipp Merkle's avatar
    Philipp Merkle committed
              rdModeListTemp.push_back( uiRdModeList[i] );
    
    Philipp Merkle's avatar
    Philipp Merkle committed
          uiRdModeList.resize(rdModeListTemp.size());
          for( int i = 0; i < rdModeListTemp.size(); i++ )
    
    Philipp Merkle's avatar
    Philipp Merkle committed
            uiRdModeList[i] = rdModeListTemp[i];
    
        // just to be sure
        numModesForFullRD = ( int ) uiRdModeList.size();
    
    #if !JVET_O0502_ISP_CLEANUP
    
        PartSplit intraSubPartitionsProcOrder = TU_NO_ISP;
        int       bestNormalIntraModeIndex    = -1;
    
        TUIntraSubPartitioner subTuPartitioner( partitioner );
    
        if( !cu.ispMode && !cu.mtsFlag )
        {
          m_modeCtrl->setMtsFirstPassNoIspCost( MAX_DOUBLE );
        }
    
    #if !JVET_O0502_ISP_CLEANUP
    
        bool      ispHorAllZeroCbfs = false, ispVerAllZeroCbfs = false;
    
        for (int mode = -2 * int(testBDPCM); mode < (int)uiRdModeList.size(); mode++)
    
        {
          // set CU/PU to luma prediction mode
          ModeInfo uiOrgMode;
    
    Muhammed Coban's avatar
    Muhammed Coban committed
          if ( mode < 0 )
    
    #if JVET_O0315_RDPCM_INTRAMODE_ALIGN
            uiOrgMode = ModeInfo(false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmMode == 2 ? VER_IDX : HOR_IDX);
    #else
    
            unsigned mpm_pred[NUM_MOST_PROBABLE_MODES];
            PU::getIntraMPMs(pu, mpm_pred);
            uiOrgMode = ModeInfo(false, 0, NOT_INTRA_SUBPARTITIONS, mpm_pred[0]);
    
            cu.mipFlag                     = uiOrgMode.mipFlg;
            cu.ispMode                     = uiOrgMode.ispMod;
            pu.multiRefIdx                 = uiOrgMode.mRefId;
            pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId;
          }
          else
          {
            cu.bdpcmMode = 0;
    
    #if JVET_O0502_ISP_CLEANUP
            if (uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
            {
              if (mode == numNonISPModes) // the list needs to be sorted only once
              {
                xSortISPCandList(bestCurrentCost, csBest->cost);
              }
              xGetNextISPMode(uiRdModeList[mode], (mode > 0 ? &uiRdModeList[mode - 1] : nullptr), Size(width, height));
              if (uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
                continue;
            }
    #endif
    
            uiOrgMode = uiRdModeList[mode];
    
          cu.mipFlag                     = uiOrgMode.mipFlg;
          cu.ispMode                     = uiOrgMode.ispMod;
          pu.multiRefIdx                 = uiOrgMode.mRefId;
          pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId;
    
          CHECK(cu.mipFlag && pu.multiRefIdx, "Error: combination of MIP and MRL not supported");
          CHECK(pu.multiRefIdx && (pu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported");
          CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
          CHECK(cu.ispMode && pu.multiRefIdx, "Error: combination of ISP and MRL not supported");
    
    #if !JVET_O0502_ISP_CLEANUP
    
            if( cu.ispMode )
            {
              intraSubPartitionsProcOrder = CU::getISPType( cu, COMPONENT_Y );
              bool tuIsDividedInRows = CU::divideTuInRows( cu );
    
              if ( ( tuIsDividedInRows && ispHorAllZeroCbfs ) || ( !tuIsDividedInRows && ispVerAllZeroCbfs ) )
              {
                continue;
              }
    
              if( m_intraModeDiagRatio.at( bestNormalIntraModeIndex ) > 1.25 )
              {
                continue;
              }
    
              if( ( m_intraModeHorVerRatio.at( bestNormalIntraModeIndex ) > 1.25 && tuIsDividedInRows ) || ( m_intraModeHorVerRatio.at( bestNormalIntraModeIndex ) < 0.8 && !tuIsDividedInRows ) )
    
    
          // set context models
          m_CABACEstimator->getCtx() = ctxStart;
    
          // determine residual for partition
          cs.initSubStructure( *csTemp, partitioner.chType, cs.area, true );
    
    
          bool tmpValidReturn = false;
    
    #if JVET_O0502_ISP_CLEANUP
            tmpValidReturn = xIntraCodingLumaISP(*csTemp, subTuPartitioner, bestCurrentCost);
            if (csTemp->tus.size() == 0)
            {
              // no TUs were coded
              csTemp->cost = MAX_DOUBLE;
              continue;
            }
            if (!cu.mtsFlag && !cu.lfnstIdx)
            {
              // we save the data for future tests
              m_ispTestedModes.setModeResults((ISPType)cu.ispMode, (int)uiOrgMode.modeId, (int)csTemp->tus.size(), csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ? csTemp->cost : MAX_DOUBLE, csBest->cost);
            }
    #else
    
            tmpValidReturn = xRecurIntraCodingLumaQT( *csTemp, subTuPartitioner, bestCurrentCost, 0, intraSubPartitionsProcOrder, false,
                                                      mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst );
    
    #if !JVET_O0925_MIP_SIMPLIFICATIONS
    
            if( ! fastMip )
            {
              m_bestCostNonMip = MAX_DOUBLE;
            }
    
            tmpValidReturn = xRecurIntraCodingLumaQT( *csTemp, partitioner, uiBestPUMode.ispMod ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, uiBestPUMode.ispMod,
                                                      mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst );
    
    #if JVET_O0502_ISP_CLEANUP
          if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP)
          {
            m_regIntraRDListWithCosts.push_back(ModeInfoWithCost(cu.mipFlag, pu.multiRefIdx, cu.ispMode, uiOrgMode.modeId, csTemp->cost));
          }
    #endif
    
    
          if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] )
          {
    
    #if !JVET_O0502_ISP_CLEANUP
    
            if( !sps.getUseLFNST() )
    
              if ( cu.ispMode == HOR_INTRA_SUBPARTITIONS )
              {
                ispHorAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lheight() > 2 && csTemp->cost >= bestCurrentCost );
              }
              else
              {
                ispVerAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lwidth() > 2 && csTemp->cost >= bestCurrentCost );
              }
    
    Nan Hu's avatar
    Nan Hu committed
            csTemp->costDbOffset = 0;
    
          validReturn |= tmpValidReturn;
    
          if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 )
          {
    
            m_modeCostStore[ lfnstIdx ][ testMip ? rdModeIdxList[ mode ] : mode ] = tmpValidReturn ? csTemp->cost : ( MAX_DOUBLE / 2.0 ); //(MAX_DOUBLE / 2.0) ??
    
    #if JVET_O0502_ISP_CLEANUP
          DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
            cu.blocks[0].y, (int)width, (int)height, csTemp->cost, uiOrgMode.modeId, uiOrgMode.ispMod,
            pu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);
    #else
    
          DTRACE( g_trace_ctx, D_INTRA_COST, "IntraCost T %f (%d) \n", csTemp->cost, uiOrgMode.modeId );
    
            // check r-d cost
            if( csTemp->cost < csBest->cost )
            {
              std::swap( csTemp, csBest );
    
              uiBestPUMode  = uiOrgMode;
              bestBDPCMMode = cu.bdpcmMode;
              if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode )
              {
                m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost;
    
                m_bestModeCostValid[ lfnstIdx ] = true;
    
              }
              if( csBest->cost < bestCurrentCost )
              {
                bestCurrentCost = csBest->cost;
              }
              if( !cu.ispMode && !cu.mtsFlag )
              {
                m_modeCtrl->setMtsFirstPassNoIspCost( csBest->cost );
              }
            }
            if( !cu.ispMode && !cu.bdpcmMode && csBest->cost < bestCostNonBDPCM )
    
              bestCostNonBDPCM = csBest->cost;
    
    #if !JVET_O0502_ISP_CLEANUP
    
              bestNormalIntraModeIndex = mode;
    
    #if JVET_O0050_LOCAL_DUAL_TREE
          if( cu.isConsIntra() && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0 )
          {
    
            if( m_pcEncCfg->getUseFastLocalDualTree() )
    
              //Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%)
    
              break;
            }
            else
            {
              if( csBest->cost > costInterCU * 1.5 )
              {
                break;
              }
            }
          }
    #endif
    
        cu.ispMode = uiBestPUMode.ispMod;
    
        if( validReturn )
        {
          cs.useSubStructure( *csBest, partitioner.chType, pu.singleChan( CHANNEL_TYPE_LUMA ), true, true, keepResi, keepResi );
        }
    
        if( validReturn )
        {
          //=== update PU data ====
          cu.mipFlag = uiBestPUMode.mipFlg;
          pu.multiRefIdx = uiBestPUMode.mRefId;
          pu.intraDir[ CHANNEL_TYPE_LUMA ] = uiBestPUMode.modeId;
          cu.bdpcmMode = bestBDPCMMode;
        }
    
      }
    
      //===== reset context models =====
      m_CABACEstimator->getCtx() = ctxStart;
    
    void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner, const double maxCostAllowed )
    
    {
      const ChromaFormat format   = cu.chromaFormat;
      const uint32_t    numberValidComponents = getNumberValidComponents(format);
      CodingStructure &cs = *cu.cs;
      const TempCtx ctxStart  ( m_CtxCache, m_CABACEstimator->getCtx() );
    
      cs.setDecomp( cs.area.Cb(), false );
    
    
      double    bestCostSoFar = maxCostAllowed;
    
    #if JVET_O0050_LOCAL_DUAL_TREE
      bool      lumaUsesISP   = !cu.isSepTree() && cu.ispMode;
    #else
    
      bool      lumaUsesISP   = !CS::isDualITree( *cu.cs ) && cu.ispMode;
    
      PartSplit ispType       = lumaUsesISP ? CU::getISPType( cu, COMPONENT_Y ) : TU_NO_ISP;
      CHECK( cu.ispMode && bestCostSoFar < 0, "bestCostSoFar must be positive!" );
    
    
      auto &pu = *cu.firstPU;
    
      {
        uint32_t       uiBestMode = 0;
        Distortion uiBestDist = 0;
        double     dBestCost = MAX_DOUBLE;
    
        //----- init mode list ----
        {
          uint32_t  uiMinMode = 0;
          uint32_t  uiMaxMode = NUM_CHROMA_MODE;
    
          //----- check chroma modes -----
          uint32_t chromaCandModes[ NUM_CHROMA_MODE ];
          PU::getIntraChromaCandModes( pu, chromaCandModes );
    
          // create a temporary CS
          CodingStructure &saveCS = *m_pSaveCS[0];
          saveCS.pcv      = cs.pcv;
          saveCS.picture  = cs.picture;
          saveCS.area.repositionTo( cs.area );
          saveCS.clearTUs();
    
    
    #if JVET_O0050_LOCAL_DUAL_TREE
          if( !cu.isSepTree() && cu.ispMode )
    #else
    
          if( !CS::isDualITree( cs ) && cu.ispMode )
    
    #if JVET_O0050_LOCAL_DUAL_TREE
          if( cu.isSepTree() )
    #else
    
          {
            if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
            {
              partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
    
              do
              {
                cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType ), partitioner.chType ).depth = partitioner.currTrDepth;
              } while( partitioner.nextPart( cs ) );
    
              partitioner.exitCurrSplit();
            }
            else
            cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType ), partitioner.chType );
          }
    
          std::vector<TransformUnit*> orgTUs;
    
    
          if( lumaUsesISP )
          {
            CodingUnit& auxCU = saveCS.addCU( cu, partitioner.chType );
            auxCU.ispMode = cu.ispMode;
            saveCS.sps = cu.cs->sps;
            saveCS.addPU( *cu.firstPU, partitioner.chType );
          }
    
    
    
          // create a store for the TUs
          for( const auto &ptu : cs.tus )
          {
            // for split TUs in HEVC, add the TUs without Chroma parts for correct setting of Cbfs
    
            if( lumaUsesISP || pu.contains( *ptu, CHANNEL_TYPE_CHROMA ) )
    
            {
              saveCS.addTU( *ptu, partitioner.chType );
              orgTUs.push_back( ptu );
            }
          }
    
          // SATD pre-selecting.
          int satdModeList[NUM_CHROMA_MODE];
          int64_t satdSortedCost[NUM_CHROMA_MODE];
          for (int i = 0; i < NUM_CHROMA_MODE; i++)
          {
            satdSortedCost[i] = 0; // for the mode not pre-select by SATD, do RDO by default, so set the initial value 0.
            satdModeList[i] = 0;
          }
          bool modeIsEnable[NUM_INTRA_MODE + 1]; // use intra mode idx to check whether enable
          for (int i = 0; i < NUM_INTRA_MODE + 1; i++)
          {
            modeIsEnable[i] = 1;
          }
    
          DistParam distParam;
    
          const bool useHadamard = !cu.transQuantBypass;
    
          pu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation.
    
          initIntraPatternChType(cu, pu.Cb());
          initIntraPatternChType(cu, pu.Cr());
          xGetLumaRecPixels(pu, pu.Cb());
    
          for (int idx = uiMinMode; idx <= uiMaxMode - 1; idx++)
          {
            int mode = chromaCandModes[idx];
            satdModeList[idx] = mode;
            if (PU::isLMCMode(mode) && !PU::isLMCModeEnabled(pu, mode))
            {
              continue;
            }
    
            if ((mode == LM_CHROMA_IDX) || (mode == PLANAR_IDX) || (mode == DM_CHROMA_IDX)) // only pre-check regular modes and MDLM modes, not including DM ,Planar, and LM
    
            {
              continue;
            }
            pu.intraDir[1] = mode; // temporary assigned, for SATD checking.
    
            int64_t sad = 0;
            CodingStructure& cs = *(pu.cs);
    
            CompArea areaCb = pu.Cb();
            PelBuf orgCb = cs.getOrgBuf(areaCb);
            PelBuf predCb = cs.getPredBuf(areaCb);
    
            m_pcRdCost->setDistParam(distParam, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, useHadamard);
            distParam.applyWeight = false;
    
            if (PU::isLMCMode(mode))
            {
              predIntraChromaLM(COMPONENT_Cb, predCb, pu, areaCb, mode);
            }
            else
            {
    
              initPredIntraParams(pu, pu.Cb(), *pu.cs->sps);
              predIntraAng(COMPONENT_Cb, predCb, pu);
    
            }
    
            sad += distParam.distFunc(distParam);
    
            CompArea areaCr = pu.Cr();
            PelBuf orgCr = cs.getOrgBuf(areaCr);
            PelBuf predCr = cs.getPredBuf(areaCr);
    
            m_pcRdCost->setDistParam(distParam, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, useHadamard);
            distParam.applyWeight = false;
    
            if (PU::isLMCMode(mode))
            {
              predIntraChromaLM(COMPONENT_Cr, predCr, pu, areaCr, mode);
            }
            else
            {
    
              initPredIntraParams(pu, pu.Cr(), *pu.cs->sps);
              predIntraAng(COMPONENT_Cr, predCr, pu);
    
            }
            sad += distParam.distFunc(distParam);
            satdSortedCost[idx] = sad;
          }
          // sort the mode based on the cost from small to large.
          int tempIdx = 0;
          int64_t tempCost = 0;
          for (int i = uiMinMode; i <= uiMaxMode - 1; i++)
          {
            for (int j = i + 1; j <= uiMaxMode - 1; j++)
            {
              if (satdSortedCost[j] < satdSortedCost[i])
              {
                tempIdx = satdModeList[i];
                satdModeList[i] = satdModeList[j];
                satdModeList[j] = tempIdx;
    
                tempCost = satdSortedCost[i];
                satdSortedCost[i] = satdSortedCost[j];
                satdSortedCost[j] = tempCost;
    
              }
            }
          }
          int reducedModeNumber = 2; // reduce the number of chroma modes
          for (int i = 0; i < reducedModeNumber; i++)
          {
            modeIsEnable[satdModeList[uiMaxMode - 1 - i]] = 0; // disable the last reducedModeNumber modes
          }
    
    
          // save the dist
          Distortion baseDist = cs.dist;
    
          for (uint32_t uiMode = uiMinMode; uiMode < uiMaxMode; uiMode++)
          {
            const int chromaIntraMode = chromaCandModes[uiMode];
            if( PU::isLMCMode( chromaIntraMode ) && ! PU::isLMCModeEnabled( pu, chromaIntraMode ) )
            {
              continue;
            }
    
            if (!modeIsEnable[chromaIntraMode] && PU::isLMCModeEnabled(pu, chromaIntraMode)) // when CCLM is disable, then MDLM is disable. not use satd checking
            {
              continue;
            }
    
            cs.setDecomp( pu.Cb(), false );
            cs.dist = baseDist;
            //----- restore context models -----
            m_CABACEstimator->getCtx() = ctxStart;
    
            //----- chroma coding -----
            pu.intraDir[1] = chromaIntraMode;
    
    
            xRecurIntraChromaCodingQT( cs, partitioner, bestCostSoFar, ispType );
            if( lumaUsesISP && cs.dist == MAX_UINT )
            {
              continue;
            }
    
    #if JVET_O1136_TS_BDPCM_SIGNALLING
            if (cs.sps->getTransformSkipEnabledFlag())
    #else
    
            uint64_t fracBits   = xGetIntraFracBitsQT( cs, partitioner, false, true, -1, ispType );
    
            Distortion uiDist = cs.dist;
            double    dCost   = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist );
    
            //----- compare -----
            if( dCost < dBestCost )
            {
    
              if( lumaUsesISP && dCost < bestCostSoFar )
              {
                bestCostSoFar = dCost;
              }
    
              for( uint32_t i = getFirstComponentOfChannel( CHANNEL_TYPE_CHROMA ); i < numberValidComponents; i++ )
              {
                const CompArea &area = pu.blocks[i];
    
                saveCS.getRecoBuf     ( area ).copyFrom( cs.getRecoBuf   ( area ) );
    #if KEEP_PRED_AND_RESI_SIGNALS
                saveCS.getPredBuf     ( area ).copyFrom( cs.getPredBuf   ( area ) );
                saveCS.getResiBuf     ( area ).copyFrom( cs.getResiBuf   ( area ) );
    
    Taoran Lu's avatar
    Taoran Lu committed
    #endif
                saveCS.getPredBuf     ( area ).copyFrom( cs.getPredBuf   (area ) );
                cs.picture->getPredBuf( area ).copyFrom( cs.getPredBuf   (area ) );
    
                cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf( area ) );
    
                for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
                {
                  saveCS.tus[j]->copyComponentFrom( *orgTUs[j], area.compID );
                }
              }
    
              dBestCost  = dCost;
              uiBestDist = uiDist;
              uiBestMode = chromaIntraMode;
            }
          }
    
          for( uint32_t i = getFirstComponentOfChannel( CHANNEL_TYPE_CHROMA ); i < numberValidComponents; i++ )
          {
            const CompArea &area = pu.blocks[i];
    
            cs.getRecoBuf         ( area ).copyFrom( saveCS.getRecoBuf( area ) );
    #if KEEP_PRED_AND_RESI_SIGNALS
            cs.getPredBuf         ( area ).copyFrom( saveCS.getPredBuf( area ) );
            cs.getResiBuf         ( area ).copyFrom( saveCS.getResiBuf( area ) );
    #endif
    
    Taoran Lu's avatar
    Taoran Lu committed
            cs.getPredBuf         ( area ).copyFrom( saveCS.getPredBuf( area ) );
            cs.picture->getPredBuf( area ).copyFrom( cs.getPredBuf    ( area ) );
    
    
            cs.picture->getRecoBuf( area ).copyFrom( cs.    getRecoBuf( area ) );
    
            for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
            {
              orgTUs[ j ]->copyComponentFrom( *saveCS.tus[ j ], area.compID );
            }
          }
        }
    
        pu.intraDir[1] = uiBestMode;
        cs.dist        = uiBestDist;
      }
    
      //----- restore context models -----
      m_CABACEstimator->getCtx() = ctxStart;
    
      if( lumaUsesISP && bestCostSoFar >= maxCostAllowed )
      {
        cu.ispMode = 0;
      }
    
    }
    
    void IntraSearch::IPCMSearch(CodingStructure &cs, Partitioner& partitioner)
    {
    
    #if JVET_O0050_LOCAL_DUAL_TREE
      ComponentID compStr = (partitioner.isSepTree(cs) && !isLuma( partitioner.chType)) ? COMPONENT_Cb : COMPONENT_Y;
      ComponentID compEnd = (partitioner.isSepTree(cs) && isLuma( partitioner.chType)) ? COMPONENT_Y : COMPONENT_Cr;
    #else
    
      ComponentID compStr = (CS::isDualITree(cs) && !isLuma(partitioner.chType)) ? COMPONENT_Cb: COMPONENT_Y;
      ComponentID compEnd = (CS::isDualITree(cs) && isLuma(partitioner.chType)) ? COMPONENT_Y : COMPONENT_Cr;
    
      for( ComponentID compID = compStr; compID <= compEnd; compID = ComponentID(compID+1) )
    
      {
    
        xEncPCM(cs, partitioner, compID);
      }
    
      cs.getPredBuf().fill(0);
      cs.getResiBuf().fill(0);
      cs.getOrgResiBuf().fill(0);
    
      cs.dist     = 0;
      cs.fracBits = 0;
      cs.cost     = 0;
    
      cs.setDecomp(cs.area);
    
    Taoran Lu's avatar
    Taoran Lu committed
      cs.picture->getPredBuf(cs.area).copyFrom(cs.getPredBuf());
    
    }
    
    void IntraSearch::xEncPCM(CodingStructure &cs, Partitioner& partitioner, const ComponentID &compID)
    {
      TransformUnit &tu = *cs.getTU( partitioner.chType );
    
      const int  channelBitDepth = cs.sps->getBitDepth(toChannelType(compID));
      const uint32_t uiPCMBitDepth = cs.sps->getPCMBitDepth(toChannelType(compID));
    
      const int pcmShiftRight = (channelBitDepth - int(uiPCMBitDepth));
    
      CompArea  area    = tu.blocks[compID];
      PelBuf    pcmBuf  = tu.getPcmbuf  (compID);
      PelBuf    recBuf  = cs.getRecoBuf ( area );
      CPelBuf   orgBuf  = cs.getOrgBuf  ( area );
    
      CHECK(pcmShiftRight < 0, "Negative shift");
    
    Taoran Lu's avatar
    Taoran Lu committed
      CompArea      tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
      PelBuf tempOrgBuf = m_tmpStorageLCU.getBuf(tmpArea);
      tempOrgBuf.copyFrom(orgBuf);
    
      if (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
    
    Taoran Lu's avatar
    Taoran Lu committed
      {
        tempOrgBuf.rspSignal(m_pcReshape->getFwdLUT());
      }
    
      for (uint32_t uiY = 0; uiY < pcmBuf.height; uiY++)
      {
        for (uint32_t uiX = 0; uiX < pcmBuf.width; uiX++)
        {
          // Encode
    
    Taoran Lu's avatar
    Taoran Lu committed
          pcmBuf.at(uiX, uiY) = tempOrgBuf.at(uiX, uiY) >> pcmShiftRight;
    
          // Reconstruction
          recBuf.at(uiX, uiY) = pcmBuf.at(uiX, uiY) << pcmShiftRight;
        }
      }
    }
    
    
    #if JVET_O0050_LOCAL_DUAL_TREE
    void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost )
    {
      if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE )
      {
        m_cuAreaInSCIPU[m_numCuInSCIPU] = area;
        m_cuCostInSCIPU[m_numCuInSCIPU] = cost;
        m_numCuInSCIPU++;
      }
    }
    
    void IntraSearch::initCuAreaCostInSCIPU()
    {
      for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ )
      {
        m_cuAreaInSCIPU[i] = Area();
        m_cuCostInSCIPU[i] = 0;
      }
      m_numCuInSCIPU = 0;
    }
    #endif
    
    void IntraSearch::PLTSearch(CodingStructure &cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
    
      CodingUnit    &cu = *cs.getCU(partitioner.chType);
    
      TransformUnit &tu = *cs.getTU(partitioner.chType);
    
      uint32_t height = cu.block(compBegin).height;
      uint32_t width = cu.block(compBegin).width;
    
      m_orgCtxRD = PLTCtx(m_CABACEstimator->getCtx());
    
      if (m_pcEncCfg->getReshaper() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
      {
        cs.getPredBuf().copyFrom(cs.getOrgBuf());
        cs.getPredBuf().Y().rspSignal(m_pcReshape->getFwdLUT());
      }
    
    
      Pel  *runLength = tu.getRunLens (compBegin);
      bool *runType   = tu.getRunTypes(compBegin);
    
      cu.lastPLTSize[compBegin] = cs.prevPLT.curPLTSize[compBegin];
      //derive palette
      derivePLTLossy(cs, partitioner, compBegin, numComp);
      reorderPLT(cs, partitioner, compBegin, numComp);
    
    // -------------------------------------------------------------------------------------------------------------------
    
      //calculate palette index
      preCalcPLTIndex(cs, partitioner, compBegin, numComp);
      //derive run
    
      uint64_t bits = MAX_UINT;
      deriveRunAndCalcBits(cs, partitioner, compBegin, numComp, PLT_SCAN_HORTRAV, bits);
    
      if ((cu.curPLTSize[compBegin] + cu.useEscape[compBegin]) > 1)
      {
    
        deriveRunAndCalcBits(cs, partitioner, compBegin, numComp, PLT_SCAN_VERTRAV, bits);
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
      cu.useRotation[compBegin] = m_bestScanRotationMode;
    
      memcpy(runType, m_runTypeRD, sizeof(bool)*width*height);
      memcpy(runLength, m_runLengthRD, sizeof(Pel)*width*height);
    
      //reconstruct pixel
      PelBuf    curPLTIdx = tu.getcurPLTIdx(compBegin);
    
      for (uint32_t y = 0; y < height; y++)
    
        for (uint32_t x = 0; x < width; x++)
    
          if (curPLTIdx.at(x, y) == cu.curPLTSize[compBegin])
    
    // Intra search
    
          }
          else
          {
            for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++)
            {
    
              CompArea area = cu.blocks[compID];
              PelBuf   recBuf = cs.getRecoBuf(area);
    
              uint32_t scaleX = getComponentScaleX((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
              uint32_t scaleY = getComponentScaleY((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
              if (compBegin != COMPONENT_Y || compID == COMPONENT_Y)
              {
    
                recBuf.at(x, y) = cu.curPLT[compID][curPLTIdx.at(x, y)];
    
              else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0)
    
                recBuf.at(x >> scaleX, y >> scaleY) = cu.curPLT[compID][curPLTIdx.at(x, y)];
    
              }
            }
          }
        }
      }
    
      cs.getPredBuf().fill(0);
      cs.getResiBuf().fill(0);
      cs.getOrgResiBuf().fill(0);
    
      cs.fracBits = MAX_UINT;
      cs.cost = MAX_DOUBLE;
      Distortion distortion = 0;
      for (uint32_t comp = compBegin; comp < (compBegin + numComp); comp++)
      {
        const ComponentID compID = ComponentID(comp);
        CPelBuf reco = cs.getRecoBuf(compID);
        CPelBuf org = cs.getOrgBuf(compID);
    
        if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
          m_pcEncCfg->getReshaper() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
        {
          const CPelBuf orgLuma = cs.getOrgBuf(cs.area.blocks[COMPONENT_Y]);
    
          if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
          {
            const CompArea &areaY = cu.Y();
    
            CompArea tmpArea1(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size());
            PelBuf   tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
    
            tmpRecLuma.copyFrom(reco);
            tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
            distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
          }
          else
          {
            distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
          }
        }
        else
    
          distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE);
      }
    
      cs.dist += distortion;
      const CompArea &area = cu.blocks[compBegin];
      cs.setDecomp(area);
      cs.picture->getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
    
    void IntraSearch::deriveRunAndCalcBits(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, uint64_t& minBits)
    
      CodingUnit    &cu = *cs.getCU(partitioner.chType);
    
      TransformUnit &tu = *cs.getTU(partitioner.chType);
    
      uint32_t height = cu.block(compBegin).height;
      uint32_t width = cu.block(compBegin).width;
      Pel  *runLength = tu.getRunLens (compBegin);
      bool *runType   = tu.getRunTypes(compBegin);
    
      cu.useRotation[compBegin] = (pltScanMode == PLT_SCAN_VERTRAV); 
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
      m_scanOrder = g_scanOrder[SCAN_UNGROUPED][(cu.useRotation[compBegin]) ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
    
      deriveRun(cs, partitioner, compBegin);
    
      m_CABACEstimator->getCtx() = PLTCtx(m_orgCtxRD);
      m_CABACEstimator->resetBits();
      CUCtx cuCtx;
      cuCtx.isDQPCoded = true;
      cuCtx.isChromaQpAdjCoded = true;
      m_CABACEstimator->cu_palette_info(cu, compBegin, numComp, cuCtx);
    
      uint64_t bitsTemp = m_CABACEstimator->getEstFracBits();
      if (minBits > bitsTemp)
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
        m_bestScanRotationMode = pltScanMode;
    
        memcpy(m_runTypeRD, runType, sizeof(bool)*width*height);
        memcpy(m_runLengthRD, runLength, sizeof(Pel)*width*height);
        minBits = bitsTemp;
    
    void IntraSearch::deriveRun(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin)
    
      CodingUnit    &cu = *cs.getCU(partitioner.chType);
    
      TransformUnit &tu = *cs.getTU(partitioner.chType);
    
      uint32_t height = cu.block(compBegin).height;
      uint32_t width = cu.block(compBegin).width;
      uint32_t total = height * width, idx = 0;
      uint32_t startPos = 0;
    
      uint64_t indexBits = 0, runBitsIndex = 0, runBitsCopy = 0;
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
      m_storeCtxRun = PLTCtx(m_orgCtxRD);
    
    
      PLTtypeBuf  runType = tu.getrunType(compBegin);
      PelBuf      runLength = tu.getrunLength(compBegin);
    
      while (idx < total)
    
        startPos = idx;
        double aveBitsPerPix[NUM_PLT_RUN];
        uint32_t indexRun = 0;
        bool runValid = calIndexRun(cs, partitioner, startPos, total, indexRun, compBegin);
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
        m_CABACEstimator->getCtx() = PLTCtx(m_storeCtxRun);
    
        aveBitsPerPix[PLT_RUN_INDEX] = runValid ? getRunBits(cu, indexRun, startPos, PLT_RUN_INDEX, &indexBits, &runBitsIndex, compBegin) : MAX_DOUBLE;
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
        m_storeCtxRunIndex = PLTCtx(m_CABACEstimator->getCtx());
    
        uint32_t copyRun = 0;
        bool copyValid = calCopyRun(cs, partitioner, startPos, total, copyRun, compBegin);
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
        m_CABACEstimator->getCtx() = PLTCtx(m_storeCtxRun);
    
        aveBitsPerPix[PLT_RUN_COPY] = copyValid ? getRunBits(cu, copyRun, startPos, PLT_RUN_COPY, &indexBits, &runBitsCopy, compBegin) : MAX_DOUBLE;
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
        m_storeCtxRunCopy = PLTCtx(m_CABACEstimator->getCtx());
    
        if (copyValid == 0 && runValid == 0)
    
          if (aveBitsPerPix[PLT_RUN_COPY] <= aveBitsPerPix[PLT_RUN_INDEX])
    
            for (int runidx = 0; runidx <copyRun; runidx++)
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
              uint32_t posy = m_scanOrder[idx + runidx].y;
              uint32_t posx = m_scanOrder[idx + runidx].x;
    
              runType.at(posx, posy) = PLT_RUN_COPY;
    
              runLength.at(posx, posy) = copyRun;
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
            m_storeCtxRun = PLTCtx(m_storeCtxRunCopy);
    
            for (int runidx = 0; runidx <indexRun; runidx++)
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
              uint32_t posy = m_scanOrder[idx + runidx].y;
              uint32_t posx = m_scanOrder[idx + runidx].x;
    
              runType.at(posx, posy) = PLT_RUN_INDEX;
    
              runLength.at(posx, posy) = indexRun;
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
            m_storeCtxRun = PLTCtx(m_storeCtxRunIndex);
    
      assert(idx == total);
    
    double IntraSearch::getRunBits(const CodingUnit&  cu, uint32_t run, uint32_t strPos, PLTRunMode paletteRunMode, uint64_t* indexBits, uint64_t* runBits, ComponentID compBegin)
    
      TransformUnit&   tu = *cu.firstTU;
    
      uint32_t height = cu.block(compBegin).height;
      uint32_t width  = cu.block(compBegin).width;
      uint32_t endPos = height*width;
      PLTtypeBuf runType   = tu.getrunType(compBegin);
      PelBuf     curPLTIdx = tu.getcurPLTIdx(compBegin);
      uint32_t   indexMaxSize = (cu.useEscape[compBegin]) ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin];
    
    
      m_CABACEstimator->resetBits();
      ///////////////// encode Run Type
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
      m_CABACEstimator->encodeRunType(cu, runType, strPos, m_scanOrder, compBegin);
    
      uint64_t runTypeBits = m_CABACEstimator->getEstFracBits();
    
      uint32_t curLevel = 0;
      switch (paletteRunMode)
      {
      case PLT_RUN_INDEX:
    
        curLevel = m_CABACEstimator->writePLTIndex(cu, strPos, curPLTIdx, runType, indexMaxSize, compBegin);
        *indexBits = m_CABACEstimator->getEstFracBits() - runTypeBits;
    
        m_CABACEstimator->cu_run_val(run - 1, PLT_RUN_INDEX, curLevel, endPos - strPos - 1);
    
        *runBits = m_CABACEstimator->getEstFracBits() - runTypeBits - (*indexBits);
    
        break;
      case PLT_RUN_COPY:
        m_CABACEstimator->cu_run_val(run - 1, PLT_RUN_COPY, curLevel, endPos - strPos - 1);
    
        *runBits = m_CABACEstimator->getEstFracBits() - runTypeBits;
    
      double costPerPixel = (double)m_CABACEstimator->getEstFracBits() / (double)run;
      return costPerPixel;
    
    void IntraSearch::preCalcPLTIndex(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
    
      CodingUnit &cu = *cs.getCU(partitioner.chType);
      TransformUnit &tu = *cs.getTU(partitioner.chType);
      const int  channelBitDepth_L = cs.sps->getBitDepth(CHANNEL_TYPE_LUMA);
      const int  channelBitDepth_C = cs.sps->getBitDepth(CHANNEL_TYPE_CHROMA);