Skip to content
Snippets Groups Projects
IntraSearch.cpp 322 KiB
Newer Older
  • Learn to ignore specific revisions
  •     orgResiCr[0].copyFrom( resiCr );
        if( doReshaping )
        {
          int cResScaleInv = currTU.getChromaAdj();
          orgResiCb[0].scaleSignal( cResScaleInv, 1, currTU.cu->cs->slice->clpRng(COMPONENT_Cb) );
          orgResiCr[0].scaleSignal( cResScaleInv, 1, currTU.cu->cs->slice->clpRng(COMPONENT_Cr) );
        }
    
        for( uint32_t c = COMPONENT_Cb; c < numTBlocks; c++)
        {
          const ComponentID compID  = ComponentID(c);
          const CompArea&   area    = currTU.blocks[compID];
    
          double     dSingleCost    = MAX_DOUBLE;
          int        bestModeId     = 0;
          Distortion singleDistCTmp = 0;
          double     singleCostTmp  = 0;
    
          const bool tsAllowed = TU::isTSAllowed(currTU, compID) && m_pcEncCfg->getUseChromaTS() && !currTU.cu->lfnstIdx;
    
          uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests
          std::vector<TrMode> trModes;
    
          if (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless())
    
          {
            nNumTransformCands = 1;
            CHECK(!tsAllowed && !currTU.cu->bdpcmModeChroma, "transform skip should be enabled for LS");
            if (currTU.cu->bdpcmModeChroma)
            {
              trModes.push_back(TrMode(0, true));
            }
            else
            {
              trModes.push_back(TrMode(1, true));
            }
          }
          else
          {
    
            trModes.push_back(TrMode(0, true));   // DCT2
    
            if (tsAllowed)
            {
              trModes.push_back(TrMode(1, true));   // TS
            }
    
          CHECK(!currTU.Cb().valid(), "Invalid TU");
    
    
          const int  totalModesToTest            = nNumTransformCands;
    
          bool cbfDCT2 = true;
    
          const bool isOneMode                   = false;
          maxModesTested                         = totalModesToTest > maxModesTested ? totalModesToTest : maxModesTested;
    
          int currModeId = 0;
          int default0Save1Load2 = 0;
    
          if (!isOneMode)
          {
            ctxStart = m_CABACEstimator->getCtx();
          }
    
    
          for (int modeId = 0; modeId < nNumTransformCands; modeId++)
    
            resiCb.copyFrom(orgResiCb[0]);
            resiCr.copyFrom(orgResiCr[0]);
            currTU.mtsIdx[compID] = currTU.cu->bdpcmModeChroma ? MTS_SKIP : trModes[modeId].first;
    
            const bool isFirstMode = (currModeId == 1);
            const bool isLastMode  = false;   // Always store output to saveCS and tmpTU
            if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless()))
            {
              // if DCT2's cbf==0, skip ts search
    
              if (!cbfDCT2 && trModes[modeId].first == MTS_SKIP)
              {
                  break;
              }
              if (!trModes[modeId].second)
              {
                  continue;
              }
    
            if (!isFirstMode)   // if not first mode to be tested
            {
              m_CABACEstimator->getCtx() = ctxStart;
            }
    
            singleDistCTmp = 0;
    
            if (nNumTransformCands > 1)
            {
              xIntraCodingTUBlock(currTU, compID, singleDistCTmp, default0Save1Load2, nullptr,
                                  modeId == 0 ? &trModes : nullptr, true);
            }
            else
            {
              xIntraCodingTUBlock(currTU, compID, singleDistCTmp, default0Save1Load2);
            }
    
            if (((currTU.mtsIdx[compID] == MTS_SKIP && !currTU.cu->bdpcmModeChroma)
                 && !TU::getCbf(currTU, compID)))   // In order not to code TS flag when cbf is zero, the case for TS with
                                                    // cbf being zero is forbidden.
            {
              if (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING || !slice.isLossless())
    
                singleCostTmp = MAX_DOUBLE;
    
                uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma(currTU, compID);
                singleCostTmp        = m_pcRdCost->calcRdCost(fracBitsTmp, singleDistCTmp);
    
            }
            else if (lumaUsesISP && bestCostSoFar != MAX_DOUBLE && c == COMPONENT_Cb)
            {
              uint64_t fracBitsTmp = xGetIntraFracBitsQTSingleChromaComponent(cs, partitioner, ComponentID(c));
              singleCostTmp        = m_pcRdCost->calcRdCost(fracBitsTmp, singleDistCTmp);
              if (isOneMode || (!isOneMode && !isLastMode))
    
                m_CABACEstimator->getCtx() = ctxStart;
    
            }
            else if (!isOneMode)
            {
              uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma(currTU, compID);
              singleCostTmp        = m_pcRdCost->calcRdCost(fracBitsTmp, singleDistCTmp);
            }
    
            if (singleCostTmp < dSingleCost)
            {
              dSingleCost = singleCostTmp;
              bestModeId  = currModeId;
    
              if (c == COMPONENT_Cb)
    
                bestCostCb = singleCostTmp;
                bestDistCb = singleDistCTmp;
    
                bestCostCr = singleCostTmp;
                bestDistCr = singleDistCTmp;
    
              if (currTU.mtsIdx[compID] == MTS_DCT2_DCT2)
    
                cbfDCT2 = TU::getCbfAtDepth(currTU, compID, currDepth);
              }
    
                saveCS.getPredBuf(area).copyFrom(cs.getPredBuf(area));
                saveCS.getOrgResiBuf(area).copyFrom(cs.getOrgResiBuf(area));
    
    Taoran Lu's avatar
    Taoran Lu committed
    #endif
    
                saveCS.getPredBuf(area).copyFrom(cs.getPredBuf(area));
                if (keepResi)
                {
                  saveCS.getResiBuf(area).copyFrom(cs.getResiBuf(area));
                }
                saveCS.getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
    
                tmpTU.copyComponentFrom(currTU, compID);
    
                ctxBest = m_CABACEstimator->getCtx();
    
          if( lumaUsesISP && dSingleCost > bestCostSoFar && c == COMPONENT_Cb )
          {
            //Luma + Cb cost is already larger than the best cost, so we don't need to test Cr
            cs.dist = MAX_UINT;
            m_CABACEstimator->getCtx() = ctxStart;
    
            earlyExitISP               = true;
    
          // Done with one component of separate coding of Cr and Cb, just switch to the best Cb contexts if Cr coding is still to be done
    
          if ((c == COMPONENT_Cb && bestModeId < totalModesToTest) || (c == COMPONENT_Cb && m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless()))
    
          {
            m_CABACEstimator->getCtx() = ctxBest;
    
            currTU.copyComponentFrom(tmpTU, COMPONENT_Cb); // Cbf of Cb is needed to estimate cost for Cr Cbf
          }
        }
    
        if ( !earlyExitISP )
        {
          // Test using joint chroma residual coding
          double     bestCostCbCr   = bestCostCb + bestCostCr;
          Distortion bestDistCbCr   = bestDistCb + bestDistCr;
          int        bestJointCbCr  = 0;
    
          std::vector<int>  jointCbfMasksToTest;
    
          if ( cs.sps->getJointCbCrEnabledFlag() && (TU::getCbf(tmpTU, COMPONENT_Cb) || TU::getCbf(tmpTU, COMPONENT_Cr)))
    
    Fangdong Chen's avatar
    Fangdong Chen committed
            jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(currTU, orgResiCb, orgResiCr);
    
          bool checkDCTOnly = (TU::getCbf(tmpTU, COMPONENT_Cb) && tmpTU.mtsIdx[COMPONENT_Cb] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMPONENT_Cr)) ||
                              (TU::getCbf(tmpTU, COMPONENT_Cr) && tmpTU.mtsIdx[COMPONENT_Cr] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMPONENT_Cb)) ||
                              (TU::getCbf(tmpTU, COMPONENT_Cb) && tmpTU.mtsIdx[COMPONENT_Cb] == MTS_DCT2_DCT2 && TU::getCbf(tmpTU, COMPONENT_Cr) && tmpTU.mtsIdx[COMPONENT_Cr] == MTS_DCT2_DCT2);
    
          bool checkTSOnly = (TU::getCbf(tmpTU, COMPONENT_Cb) && tmpTU.mtsIdx[COMPONENT_Cb] == MTS_SKIP && !TU::getCbf(tmpTU, COMPONENT_Cr)) ||
                             (TU::getCbf(tmpTU, COMPONENT_Cr) && tmpTU.mtsIdx[COMPONENT_Cr] == MTS_SKIP && !TU::getCbf(tmpTU, COMPONENT_Cb)) ||
                             (TU::getCbf(tmpTU, COMPONENT_Cb) && tmpTU.mtsIdx[COMPONENT_Cb] == MTS_SKIP && TU::getCbf(tmpTU, COMPONENT_Cr) && tmpTU.mtsIdx[COMPONENT_Cr] == MTS_SKIP);
    
          if (jointCbfMasksToTest.size() && currTU.cu->bdpcmModeChroma)
          {
            CHECK(!checkTSOnly || checkDCTOnly, "bdpcm only allows transform skip");
          }
    
          for( int cbfMask : jointCbfMasksToTest )
    
            currTU.jointCbCr               = (uint8_t)cbfMask;
    
            ComponentID codeCompId = ((currTU.jointCbCr >> 1) ? COMPONENT_Cb : COMPONENT_Cr);
            ComponentID otherCompId = ((codeCompId == COMPONENT_Cb) ? COMPONENT_Cr : COMPONENT_Cb);
    
            bool        tsAllowed = TU::isTSAllowed(currTU, codeCompId) && (m_pcEncCfg->getUseChromaTS()) && !currTU.cu->lfnstIdx;
    
            uint8_t     numTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests
            bool        cbfDCT2 = true;
    
            std::vector<TrMode> trModes;
            if (checkDCTOnly || checkTSOnly)
            {
              numTransformCands = 1;
            }
    
            if (!checkTSOnly || currTU.cu->bdpcmModeChroma)
            {
    
              trModes.push_back(TrMode(0, true)); // DCT2
    
            }
            if (tsAllowed && !checkDCTOnly)
            {
              trModes.push_back(TrMode(1, true));//TS
            }
            for (int modeId = 0; modeId < numTransformCands; modeId++)
            {
              if (modeId && !cbfDCT2)
              {
                continue;
              }
              if (!trModes[modeId].second)
              {
                continue;
              }
              Distortion distTmp = 0;
              currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmModeChroma ? MTS_SKIP : trModes[modeId].first;
              currTU.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
    
              m_CABACEstimator->getCtx() = ctxStartTU;
    
              resiCb.copyFrom(orgResiCb[cbfMask]);
              resiCr.copyFrom(orgResiCr[cbfMask]);
              if (numTransformCands > 1)
    
                xIntraCodingTUBlock(currTU, COMPONENT_Cb, distTmp, 0, nullptr, modeId == 0 ? &trModes : nullptr, true);
              }
              else
              {
                xIntraCodingTUBlock(currTU, COMPONENT_Cb, distTmp, 0);
              }
              double costTmp = std::numeric_limits<double>::max();
              if (distTmp < std::numeric_limits<Distortion>::max())
              {
                uint64_t bits = xGetIntraFracBitsQTChroma(currTU, COMPONENT_Cb);
                costTmp       = m_pcRdCost->calcRdCost(bits, distTmp);
                if (!currTU.mtsIdx[codeCompId])
                {
                  cbfDCT2 = true;
                }
              }
              else if (!currTU.mtsIdx[codeCompId])
              {
                cbfDCT2 = false;
    
              if (costTmp < bestCostCbCr)
    
                bestCostCbCr  = costTmp;
                bestDistCbCr  = distTmp;
                bestJointCbCr = currTU.jointCbCr;
    
                // store data
                {
    
    #if KEEP_PRED_AND_RESI_SIGNALS
    
                  saveCS.getOrgResiBuf(cbArea).copyFrom(cs.getOrgResiBuf(cbArea));
                  saveCS.getOrgResiBuf(crArea).copyFrom(cs.getOrgResiBuf(crArea));
    
                  saveCS.getPredBuf(cbArea).copyFrom(cs.getPredBuf(cbArea));
                  saveCS.getPredBuf(crArea).copyFrom(cs.getPredBuf(crArea));
                  if (keepResi)
                  {
                    saveCS.getResiBuf(cbArea).copyFrom(cs.getResiBuf(cbArea));
                    saveCS.getResiBuf(crArea).copyFrom(cs.getResiBuf(crArea));
                  }
                  saveCS.getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea));
                  saveCS.getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea));
    
                  tmpTU.copyComponentFrom(currTU, COMPONENT_Cb);
                  tmpTU.copyComponentFrom(currTU, COMPONENT_Cr);
    
                  ctxBest = m_CABACEstimator->getCtx();
                }
    
          // Retrieve the best CU data (unless it was the very last one tested)
          {
    #if KEEP_PRED_AND_RESI_SIGNALS
            cs.getPredBuf   (cbArea).copyFrom(saveCS.getPredBuf   (cbArea));
            cs.getOrgResiBuf(cbArea).copyFrom(saveCS.getOrgResiBuf(cbArea));
            cs.getPredBuf   (crArea).copyFrom(saveCS.getPredBuf   (crArea));
            cs.getOrgResiBuf(crArea).copyFrom(saveCS.getOrgResiBuf(crArea));
    #endif
            cs.getPredBuf   (cbArea).copyFrom(saveCS.getPredBuf   (cbArea));
            cs.getPredBuf   (crArea).copyFrom(saveCS.getPredBuf   (crArea));
    
            if( keepResi )
            {
              cs.getResiBuf (cbArea).copyFrom(saveCS.getResiBuf   (cbArea));
              cs.getResiBuf (crArea).copyFrom(saveCS.getResiBuf   (crArea));
            }
            cs.getRecoBuf   (cbArea).copyFrom(saveCS.getRecoBuf   (cbArea));
            cs.getRecoBuf   (crArea).copyFrom(saveCS.getRecoBuf   (crArea));
    
            currTU.copyComponentFrom(tmpTU, COMPONENT_Cb);
            currTU.copyComponentFrom(tmpTU, COMPONENT_Cr);
    
            m_CABACEstimator->getCtx() = ctxBest;
          }
    
          // Copy results to the picture structures
    
    Seungwook Hong's avatar
    Seungwook Hong committed
    #if JVET_Z0118_GDR
          cs.updateReconMotIPM(cbArea);
    #else
    
          cs.picture->getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea));
    
    Seungwook Hong's avatar
    Seungwook Hong committed
    #endif
    
    #if JVET_Z0118_GDR
          cs.updateReconMotIPM(crArea);
    #else
    
          cs.picture->getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea));
    
    Seungwook Hong's avatar
    Seungwook Hong committed
    #endif
    
          cs.picture->getPredBuf(cbArea).copyFrom(cs.getPredBuf(cbArea));
          cs.picture->getPredBuf(crArea).copyFrom(cs.getPredBuf(crArea));
    
          cbfs.cbf(COMPONENT_Cb) = TU::getCbf(currTU, COMPONENT_Cb);
          cbfs.cbf(COMPONENT_Cr) = TU::getCbf(currTU, COMPONENT_Cr);
    
          currTU.jointCbCr = ( (cbfs.cbf(COMPONENT_Cb) + cbfs.cbf(COMPONENT_Cr)) ? bestJointCbCr : 0 );
    
          cs.dist         += bestDistCbCr;
    
        }
      }
      else
      {
        unsigned    numValidTBlocks   = ::getNumberValidTBlocks( *cs.pcv );
        ChromaCbfs  SplitCbfs         ( false );
    
        if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
        {
          partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
        }
    
        else if( currTU.cu->ispMode )
        {
          partitioner.splitCurrArea( ispType, cs );
        }
    
          ChromaCbfs subCbfs = xRecurIntraChromaCodingQT( cs, partitioner, bestCostSoFar, ispType );
    
    
          for( uint32_t ch = COMPONENT_Cb; ch < numValidTBlocks; ch++ )
          {
            const ComponentID compID = ComponentID( ch );
            SplitCbfs.cbf( compID ) |= subCbfs.cbf( compID );
          }
        } while( partitioner.nextPart( cs ) );
    
        partitioner.exitCurrSplit();
    
    
        if( lumaUsesISP && cs.dist == MAX_UINT )
        {
          return cbfs;
        }
    
        cbfs.Cb |= SplitCbfs.Cb;
        cbfs.Cr |= SplitCbfs.Cr;
    
        if (!lumaUsesISP)
        {
          for (auto &ptu: cs.tus)
    
            if (currArea.Cb().contains(ptu->Cb()) || (!ptu->Cb().valid() && currArea.Y().contains(ptu->Y())))
    
              TU::setCbfAtDepth(*ptu, COMPONENT_Cb, currDepth, SplitCbfs.Cb);
              TU::setCbfAtDepth(*ptu, COMPONENT_Cr, currDepth, SplitCbfs.Cr);
    
        }
      }
    
      return cbfs;
    }
    
    uint64_t IntraSearch::xFracModeBitsIntra(PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &chType)
    {
    
    Vadim Seregin's avatar
    Vadim Seregin committed
      uint8_t orgMode = uiMode;
    
    #if JVET_Y0065_GPM_INTRA
      if (!pu.ciipFlag && !pu.gpmIntraFlag)
    #else
    
      std::swap(orgMode, pu.intraDir[chType]);
    
      m_CABACEstimator->resetBits();
    
      if( isLuma( chType ) )
      {
    
    #if JVET_Y0065_GPM_INTRA
        if (!pu.ciipFlag && !pu.gpmIntraFlag)
    #else
    
        {
          m_CABACEstimator->intra_luma_pred_mode(pu);
        }
    
    #if JVET_Y0065_GPM_INTRA
      if ( !pu.ciipFlag && !pu.gpmIntraFlag )
    #else
    
      std::swap(orgMode, pu.intraDir[chType]);
    
      return m_CABACEstimator->getEstFracBits();
    }
    
    
    void IntraSearch::sortRdModeListFirstColorSpace(ModeInfo mode, double cost, char bdpcmMode, ModeInfo* rdModeList, double* rdCostList, char* bdpcmModeList, int& candNum)
    {
      if (candNum == 0)
      {
        rdModeList[0] = mode;
        rdCostList[0] = cost;
        bdpcmModeList[0] = bdpcmMode;
        candNum++;
        return;
      }
    
      int insertPos = -1;
      for (int pos = candNum - 1; pos >= 0; pos--)
      {
        if (cost < rdCostList[pos])
        {
          insertPos = pos;
        }
      }
    
      if (insertPos >= 0)
      {
        for (int i = candNum - 1; i >= insertPos; i--)
        {
          rdModeList[i + 1] = rdModeList[i];
          rdCostList[i + 1] = rdCostList[i];
          bdpcmModeList[i + 1] = bdpcmModeList[i];
        }
        rdModeList[insertPos] = mode;
        rdCostList[insertPos] = cost;
        bdpcmModeList[insertPos] = bdpcmMode;
        candNum++;
      }
      else
      {
        rdModeList[candNum] = mode;
        rdCostList[candNum] = cost;
        bdpcmModeList[candNum] = bdpcmMode;
        candNum++;
      }
    
      CHECK(candNum > FAST_UDI_MAX_RDMODE_NUM, "exceed intra mode candidate list capacity");
    
      return;
    }
    
    void IntraSearch::invalidateBestRdModeFirstColorSpace()
    {
      int numSaveRdClass = 4 * NUM_LFNST_NUM_PER_SET * 2;
      int savedRdModeListSize = FAST_UDI_MAX_RDMODE_NUM;
    
      for (int i = 0; i < numSaveRdClass; i++)
      {
        m_numSavedRdModeFirstColorSpace[i] = 0;
        for (int j = 0; j < savedRdModeListSize; j++)
        {
    
          m_savedRdModeFirstColorSpace[i][j] = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, 0);
    
          m_savedBDPCMModeFirstColorSpace[i][j] = 0;
          m_savedRdCostFirstColorSpace[i][j] = MAX_DOUBLE;
        }
      }
    }
    
    template<typename T, size_t N>
    
    void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const PredictionUnit &pu, const bool fastMip
    #if JVET_AB0157_TMRL
      , const double* tmrlCostList
    #endif
    
    #if JVET_AC0105_DIRECTIONAL_PLANAR
      , const double* dirPlanarCostList
    #endif
    
    {
      const int maxCandPerType = numModesForFullRD >> 1;
      static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList;
      static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList;
      const double minCost = candCostList[0];
      bool keepOneMip = candModeList.size() > numModesForFullRD;
    
      int numConv = 0;
      int numMip = 0;
      for (int idx = 0; idx < candModeList.size() - (keepOneMip?0:1); idx++)
      {
        bool addMode = false;
    
    Philipp Merkle's avatar
    Philipp Merkle committed
        const ModeInfo& orgMode = candModeList[idx];
    
    Philipp Merkle's avatar
    Philipp Merkle committed
        if (!orgMode.mipFlg)
    
          addMode = (numConv < 3);
          numConv += addMode ? 1:0;
    
          addMode = ( numMip < maxCandPerType || (candCostList[idx] < thresholdHadCost * minCost) || keepOneMip );
          keepOneMip = false;
          numMip += addMode ? 1:0;
        }
        if( addMode )
        {
    
    Philipp Merkle's avatar
    Philipp Merkle committed
          tempRdModeList.push_back(orgMode);
    
          tempCandCostList.push_back(candCostList[idx]);
        }
      }
    
      if ((pu.lwidth() > 8 && pu.lheight() > 8))
      {
        // Sort MIP candidates by Hadamard cost
    
        const int transpOff = getNumModesMip( pu.Y() );
    
    Philipp Merkle's avatar
    Philipp Merkle committed
        static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedMipModes(0);
        static_vector<double, FAST_UDI_MAX_RDMODE_NUM> sortedMipCost(0);
    
        for( uint8_t mode : { 0, 1, 2 } )
    
        {
          uint8_t candMode = mode + uint8_t((mipHadCost[mode + transpOff] < mipHadCost[mode]) ? transpOff : 0);
          updateCandList(candMode, mipHadCost[candMode], sortedMipModes, sortedMipCost, 3);
        }
    
        // Append MIP mode to RD mode list
    
        const int modeListSize = int(tempRdModeList.size());
    
        for (int idx = 0; idx < 3; idx++)
        {
    
          const bool     isTransposed = (sortedMipModes[idx] >= transpOff ? true : false);
          const uint32_t mipIdx       = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]);
          const ModeInfo mipMode( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx );
    
          bool alreadyIncluded = false;
    
          for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++)
    
          {
            if (tempRdModeList[modeListIdx] == mipMode)
            {
              alreadyIncluded = true;
              break;
            }
          }
    
          if (!alreadyIncluded)
          {
    
    fan wang's avatar
    fan wang committed
    #if JVET_AB0155_SGPM
            updateCandList(mipMode, sortedMipCost[idx], tempRdModeList, tempCandCostList, tempRdModeList.size() + 1);
    #else
    
            tempRdModeList.push_back(mipMode);
            tempCandCostList.push_back(0);
    
    fan wang's avatar
    fan wang committed
    #endif
    
    #if JVET_AB0157_TMRL
    
      if (pu.lwidth() > 8 && pu.lheight() > 8 && CU::allowTmrl(*pu.cu))
    
      {
        // Sort TMRL candidates by cost.
        static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedTmrlModes(0);
    
        static_vector<double, FAST_UDI_MAX_RDMODE_NUM>  sortedTmrlCost(0);
    
        for (uint8_t tmrlListIdx = 0; tmrlListIdx < MRL_LIST_SIZE; tmrlListIdx++)
        {
          CHECK(tmrlCostList[tmrlListIdx] == MAX_DOUBLE, "tmrlCostList is not filled.");
          updateCandList(tmrlListIdx, tmrlCostList[tmrlListIdx], sortedTmrlModes, sortedTmrlCost, 3);
        }
    
        // Append TMRL mode to RD mode list
        const int modeListSize = int(tempRdModeList.size());
        for (int idx = 0; idx < 3; idx++)
        {
    
          const uint8_t  tmrlListIdx = sortedTmrlModes[idx];
    
          const ModeInfo tmrlMode(false, false, tmrlListIdx + MAX_REF_LINE_IDX, NOT_INTRA_SUBPARTITIONS, 0);
    
          bool           alreadyIncluded = false;
    
          for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++)
          {
            if (tempRdModeList[modeListIdx] == tmrlMode)
            {
              alreadyIncluded = true;
              break;
            }
          }
    
          if (!alreadyIncluded)
          {
    
            const auto numRd = tempRdModeList.size() + 1;
            updateCandList(tmrlMode, sortedTmrlCost[idx], tempRdModeList, tempCandCostList, numRd);
    
    #if JVET_AC0105_DIRECTIONAL_PLANAR
      static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedDirPlanarModes(0);
      static_vector<double, FAST_UDI_MAX_RDMODE_NUM>  sortedDirPlanarCost(0);
      for (uint8_t Idx = 0; Idx < 2; Idx++)
      {
        CHECK(dirPlanarCostList[Idx] == MAX_DOUBLE, "dirPlanarCostList is not filled.");
        updateCandList(Idx, dirPlanarCostList[Idx], sortedDirPlanarModes, sortedDirPlanarCost, 2);
      }
    
      const int modeListSize = int(tempRdModeList.size());
      for (int idx = 0; idx < 2; idx++)
      {
        const uint8_t  dirPlanarListIdx = sortedDirPlanarModes[idx];
        const ModeInfo dirPlanarMode(false, false, 0, NOT_INTRA_SUBPARTITIONS,
                                      dirPlanarListIdx == 0 ? PL_HOR_IDX : PL_VER_IDX);
        bool alreadyIncluded = false;
        for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++)
        {
          if (tempRdModeList[modeListIdx] == dirPlanarMode)
          {
            alreadyIncluded = true;
            break;
          }
        }
    
        if (!alreadyIncluded)
        {
          const auto numRd = tempRdModeList.size() + 1;
          updateCandList(dirPlanarMode, sortedDirPlanarCost[idx], tempRdModeList, tempCandCostList, numRd);
          break;
        }
      }
    #endif
    
    
      candModeList = tempRdModeList;
      candCostList = tempCandCostList;
      numModesForFullRD = int(candModeList.size());
    }
    
    // It decides which modes from the ISP lists can be full RD tested
    void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode, const Size cuSize)
    {
      static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>* rdModeLists[2] = { &m_ispCandListHor, &m_ispCandListVer };
    
    
      const int curIspLfnstIdx = m_curIspLfnstIdx;
      if (curIspLfnstIdx >= NUM_LFNST_NUM_PER_SET)
      {
        //All lfnst indices have been checked
        return;
      }
    
    
      ISPType nextISPcandSplitType;
    
      auto& ispTestedModes = m_ispTestedModes[curIspLfnstIdx];
    
      const bool horSplitIsTerminated = ispTestedModes.splitIsFinished[HOR_INTRA_SUBPARTITIONS - 1];
      const bool verSplitIsTerminated = ispTestedModes.splitIsFinished[VER_INTRA_SUBPARTITIONS - 1];
      if (!horSplitIsTerminated && !verSplitIsTerminated)
    
      {
        nextISPcandSplitType = !lastMode ? HOR_INTRA_SUBPARTITIONS : lastMode->ispMod == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS;
      }
    
      else if (!horSplitIsTerminated && verSplitIsTerminated)
    
      {
        nextISPcandSplitType = HOR_INTRA_SUBPARTITIONS;
      }
    
      else if (horSplitIsTerminated && !verSplitIsTerminated)
    
      {
        nextISPcandSplitType = VER_INTRA_SUBPARTITIONS;
      }
      else
      {
    
        xFinishISPModes();
    
        return;   // no more modes will be tested
      }
    
    
      int maxNumSubPartitions = ispTestedModes.numTotalParts[nextISPcandSplitType - 1];
    
      // We try to break the split here for lfnst > 0 according to the first mode
    
      if (curIspLfnstIdx > 0 && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] == 1)
      {
        int firstModeThisSplit = ispTestedModes.getTestedIntraMode(nextISPcandSplitType, 0);
        int numSubPartsFirstModeThisSplit = ispTestedModes.getNumCompletedSubParts(nextISPcandSplitType, firstModeThisSplit);
        CHECK(numSubPartsFirstModeThisSplit < 0, "wrong number of subpartitions!");
        bool stopThisSplit = false;
        bool stopThisSplitAllLfnsts = false;
        if (numSubPartsFirstModeThisSplit < maxNumSubPartitions)
        {
          stopThisSplit = true;
          if (m_pcEncCfg->getUseFastISP() && curIspLfnstIdx == 1 && numSubPartsFirstModeThisSplit < maxNumSubPartitions - 1)
          {
            stopThisSplitAllLfnsts = true;
          }
        }
    
        if (stopThisSplit)
        {
          ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true;
          if (curIspLfnstIdx == 1 && stopThisSplitAllLfnsts)
          {
            m_ispTestedModes[2].splitIsFinished[nextISPcandSplitType - 1] = true;
          }
          return;
        }
      }
    
    
      // We try to break the split here for lfnst = 0 or all lfnst indices according to the first two modes
    
      if (curIspLfnstIdx == 0 && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] == 2)
    
      {
        // Split stop criteria after checking the performance of previously tested intra modes
        const int thresholdSplit1 = maxNumSubPartitions;
    
        bool stopThisSplitForAllLFNSTs = false;
        const int thresholdSplit1ForAllLFNSTs = maxNumSubPartitions - 1;
    
        int mode1 = ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 0);
    
    Jie's avatar
    Jie committed
    #if ENABLE_DIMD && !JVET_V0087_DIMD_NO_ISP
    
    Vadim Seregin's avatar
    Vadim Seregin committed
        mode1 = ( mode1 == DC_IDX || mode1 == DIMD_IDX ) ? -1 : mode1;
    #else
    
        mode1 = mode1 == DC_IDX ? -1 : mode1;
    
    Vadim Seregin's avatar
    Vadim Seregin committed
    #endif
    
        int numSubPartsBestMode1 = mode1 != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode1) : -1;
        int mode2 = ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 1);
    
    Jie's avatar
    Jie committed
    #if ENABLE_DIMD && !JVET_V0087_DIMD_NO_ISP
    
    Vadim Seregin's avatar
    Vadim Seregin committed
        mode2 = ( mode2 == DC_IDX || mode2 == DIMD_IDX ) ? -1 : mode2;
    #else
    
        mode2 = mode2 == DC_IDX ? -1 : mode2;
    
    Vadim Seregin's avatar
    Vadim Seregin committed
    #endif
    
        int numSubPartsBestMode2 = mode2 != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode2) : -1;
    
    
        // 1) The 2 most promising modes do not reach a certain number of sub-partitions
        if (numSubPartsBestMode1 != -1 && numSubPartsBestMode2 != -1)
        {
          if (numSubPartsBestMode1 < thresholdSplit1 && numSubPartsBestMode2 < thresholdSplit1)
          {
    
            if (curIspLfnstIdx == 0 && numSubPartsBestMode1 < thresholdSplit1ForAllLFNSTs && numSubPartsBestMode2 < thresholdSplit1ForAllLFNSTs)
            {
              stopThisSplitForAllLFNSTs = true;
            }
    
          else
          {
            //we stop also if the cost is MAX_DOUBLE for both modes
            double mode1Cost = ispTestedModes.getRDCost(nextISPcandSplitType, mode1);
            double mode2Cost = ispTestedModes.getRDCost(nextISPcandSplitType, mode2);
            if (!(mode1Cost < MAX_DOUBLE || mode2Cost < MAX_DOUBLE))
            {
              stopThisSplit = true;
            }
          }
    
          // 2) One split type may be discarded by comparing the number of sub-partitions of the best angle modes of both splits
    
          ISPType otherSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS;
          int  numSubPartsBestMode2OtherSplit = mode2 != -1 ? ispTestedModes.getNumCompletedSubParts(otherSplit, mode2) : -1;
    
          if (numSubPartsBestMode2OtherSplit != -1 && numSubPartsBestMode2 != -1 && ispTestedModes.bestSplitSoFar != nextISPcandSplitType)
    
            if (numSubPartsBestMode2OtherSplit > numSubPartsBestMode2)
    
            {
              stopThisSplit = true;
            }
    
            // both have the same number of subpartitions
            else if (numSubPartsBestMode2OtherSplit == numSubPartsBestMode2)
    
              // both have the maximum number of subpartitions, so it compares RD costs to decide
              if (numSubPartsBestMode2OtherSplit == maxNumSubPartitions)
    
                double rdCostBestMode2ThisSplit = ispTestedModes.getRDCost(nextISPcandSplitType, mode2);
                double rdCostBestMode2OtherSplit = ispTestedModes.getRDCost(otherSplit, mode2);
                double threshold = 1.3;
                if (rdCostBestMode2ThisSplit == MAX_DOUBLE || rdCostBestMode2OtherSplit < rdCostBestMode2ThisSplit * threshold)
                {
                  stopThisSplit = true;
                }
              }
              else // none of them reached the maximum number of subpartitions with the best angle modes, so it compares the results with the the planar mode
              {
                int  numSubPartsBestMode1OtherSplit = mode1 != -1 ? ispTestedModes.getNumCompletedSubParts(otherSplit, mode1) : -1;
                if (numSubPartsBestMode1OtherSplit != -1 && numSubPartsBestMode1 != -1 && numSubPartsBestMode1OtherSplit > numSubPartsBestMode1)
                {
                  stopThisSplit = true;
                }
    
          ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true;
    
          if (stopThisSplitForAllLFNSTs)
          {
            for (int lfnstIdx = 1; lfnstIdx < NUM_LFNST_NUM_PER_SET; lfnstIdx++)
            {
              m_ispTestedModes[lfnstIdx].splitIsFinished[nextISPcandSplitType - 1] = true;
            }
          }
    
          return;
        }
      }
    
      // Now a new mode is retrieved from the list and it has to be decided whether it should be tested or not
    
      if (ispTestedModes.candIndexInList[nextISPcandSplitType - 1] < rdModeLists[nextISPcandSplitType - 1]->size())
    
        ModeInfo candidate = rdModeLists[nextISPcandSplitType - 1]->at(ispTestedModes.candIndexInList[nextISPcandSplitType - 1]);
        ispTestedModes.candIndexInList[nextISPcandSplitType - 1]++;
    
    
        // extra modes are only tested if ISP has won so far
    
        if (ispTestedModes.candIndexInList[nextISPcandSplitType - 1] > ispTestedModes.numOrigModesToTest)
    
          if (ispTestedModes.bestSplitSoFar != candidate.ispMod || ispTestedModes.bestModeSoFar == PLANAR_IDX)
    
            ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true;
    
            return;
          }
        }
    
        bool testCandidate = true;
    
        // we look for a reference mode that has already been tested within the window and decide to test the new one according to the reference mode costs
    
    Vadim Seregin's avatar
    Vadim Seregin committed
        if (
    
    Jie's avatar
    Jie committed
    #if ENABLE_DIMD && !JVET_V0087_DIMD_NO_ISP
    
    Vadim Seregin's avatar
    Vadim Seregin committed
          candidate.modeId != DIMD_IDX &&
    
    Keming Cao's avatar
    Keming Cao committed
    #endif
    #if JVET_W0123_TIMD_FUSION
          candidate.modeId != TIMD_IDX &&
    
    #endif
    #if JVET_AC0105_DIRECTIONAL_PLANAR
          candidate.modeId != PL_HOR_IDX && candidate.modeId != PL_VER_IDX &&
    
    Vadim Seregin's avatar
    Vadim Seregin committed
    #endif
          maxNumSubPartitions > 2 && (curIspLfnstIdx > 0 || (candidate.modeId >= DC_IDX && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2)))
    
          int       refLfnstIdx = -1;
    
          const int angWindowSize = 5;
          int       numSubPartsLeftMode, numSubPartsRightMode, numSubPartsRefMode, leftIntraMode = -1, rightIntraMode = -1;
          int       windowSize = candidate.modeId > DC_IDX ? angWindowSize : 1;
    
          int       numSamples = cuSize.width << floorLog2(cuSize.height);
    
          int       numSubPartsLimit = numSamples >= 256 ? maxNumSubPartitions - 1 : 2;
    
    
          xFindAlreadyTestedNearbyIntraModes(curIspLfnstIdx, (int)candidate.modeId, &refLfnstIdx, &leftIntraMode, &rightIntraMode, (ISPType)candidate.ispMod, windowSize);
    
          if (refLfnstIdx != -1 && refLfnstIdx != curIspLfnstIdx)
          {
            CHECK(leftIntraMode != candidate.modeId || rightIntraMode != candidate.modeId, "wrong intra mode and lfnstIdx values!");
            numSubPartsRefMode = m_ispTestedModes[refLfnstIdx].getNumCompletedSubParts((ISPType)candidate.ispMod, candidate.modeId);
            CHECK(numSubPartsRefMode <= 0, "Wrong value of the number of subpartitions completed!");
          }
          else
          {
            numSubPartsLeftMode = leftIntraMode != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, leftIntraMode) : -1;
            numSubPartsRightMode = rightIntraMode != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, rightIntraMode) : -1;
    
            numSubPartsRefMode = std::max(numSubPartsLeftMode, numSubPartsRightMode);
          }
    
    
          if (numSubPartsRefMode > 0)
          {
            // The mode was found. Now we check the condition
            testCandidate = numSubPartsRefMode > numSubPartsLimit;
          }
        }
    
        if (testCandidate)
        {
          modeInfo = candidate;
        }
      }
    
      else
      {
        //the end of the list was reached, so the split is invalidated
        ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true;
      }
    
    void IntraSearch::xFindAlreadyTestedNearbyIntraModes(int lfnstIdx, int currentIntraMode, int* refLfnstIdx, int* leftIntraMode, int* rightIntraMode, ISPType ispOption, int windowSize)
    
    {
      bool leftModeFound = false, rightModeFound = false;
      *leftIntraMode = -1;
      *rightIntraMode = -1;
    
      *refLfnstIdx = -1;
    
      const unsigned st = ispOption - 1;
    
    
      //first we check if the exact intra mode was already tested for another lfnstIdx value
      if (lfnstIdx > 0)
      {
        bool sameIntraModeFound = false;
        if (lfnstIdx == 2 && m_ispTestedModes[1].modeHasBeenTested[currentIntraMode][st])
        {
          sameIntraModeFound = true;
          *refLfnstIdx = 1;
        }
        else if (m_ispTestedModes[0].modeHasBeenTested[currentIntraMode][st])
        {
          sameIntraModeFound = true;
          *refLfnstIdx = 0;
        }
    
        if (sameIntraModeFound)
        {
          *leftIntraMode = currentIntraMode;
          *rightIntraMode = currentIntraMode;
          return;
        }
      }
    
    
      //The mode has not been checked for another lfnstIdx value, so now we look for a similar mode within a window using the same lfnstIdx
    
      for (int k = 1; k <= windowSize; k++)
      {
        int off = currentIntraMode - 2 - k;
        int leftMode = (off < 0) ? NUM_LUMA_MODE + off : currentIntraMode - k;
        int rightMode = currentIntraMode > DC_IDX ? (((int)currentIntraMode - 2 + k) % 65) + 2 : PLANAR_IDX;
    
    
        leftModeFound  = leftMode  != (int)currentIntraMode ? m_ispTestedModes[lfnstIdx].modeHasBeenTested[leftMode][st]  : false;
        rightModeFound = rightMode != (int)currentIntraMode ? m_ispTestedModes[lfnstIdx].modeHasBeenTested[rightMode][st] : false;
    
        if (leftModeFound || rightModeFound)
        {
          *leftIntraMode = leftModeFound ? leftMode : -1;
          *rightIntraMode = rightModeFound ? rightMode : -1;
    
          *refLfnstIdx = lfnstIdx;
    
    //It prepares the list of potential intra modes candidates that will be tested using RD costs
    bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost, ModeInfo bestNonISPMode)
    
      int bestISPModeInRelCU = -1;
      m_modeCtrl->setStopNonDCT2Transforms(false);
    
      if (m_pcEncCfg->getUseFastISP())
      {
        //we check if the ISP tests can be cancelled
        double thSkipISP = 1.4;
        if (bestNonISPCost > bestCostSoFar * thSkipISP)
        {
          for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++)
          {
            for (int j = 0; j < NUM_LFNST_NUM_PER_SET; j++)
            {
              m_ispTestedModes[j].splitIsFinished[splitIdx] = true;
            }
          }
          return false;
        }
        if (!updateISPStatusFromRelCU(bestNonISPCost, bestNonISPMode, bestISPModeInRelCU))
        {
          return false;
        }
      }
    
    
      for (int k = 0; k < m_ispCandListHor.size(); k++)
      {
        m_ispCandListHor.at(k).ispMod = HOR_INTRA_SUBPARTITIONS; //we set the correct ISP split type value
      }
    
      auto origHadList = m_ispCandListHor;   // save the original hadamard list of regular intra
      bool modeIsInList[NUM_LUMA_MODE] = { false };
    
      m_ispCandListHor.clear();
      m_ispCandListVer.clear();
    
      // we sort the normal intra modes according to their full RD costs
    
    Vadim Seregin's avatar
    Vadim Seregin committed
      std::stable_sort(m_regIntraRDListWithCosts.begin(), m_regIntraRDListWithCosts.end(), ModeInfoWithCost::compareModeInfoWithCost);
    
    
      // we get the best angle from the regular intra list
      int bestNormalIntraAngle = -1;
      for (int modeIdx = 0; modeIdx < m_regIntraRDListWithCosts.size(); modeIdx++)
      {
        if (bestNormalIntraAngle == -1 && m_regIntraRDListWithCosts.at(modeIdx).modeId > DC_IDX)
        {
          bestNormalIntraAngle = m_regIntraRDListWithCosts.at(modeIdx).modeId;
          break;