Skip to content
Snippets Groups Projects
IntraSearch.cpp 218 KiB
Newer Older
  • Learn to ignore specific revisions
  •     for (int mode = isSecondColorSpace ? 0 : -2 * int(testBDPCM); mode < (int)uiRdModeList.size(); mode++)
    
        {
          // set CU/PU to luma prediction mode
          ModeInfo uiOrgMode;
    
          if (sps.getUseColorTrans() && !m_pcEncCfg->getRGBFormatFlag() && isSecondColorSpace && mode)
          {
            continue;
          }
    
          if (mode < 0 || (isSecondColorSpace && m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx][mode]))
    
            cu.bdpcmMode = mode < 0 ? -mode : m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx][mode];
    
            uiOrgMode = ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmMode == 2 ? VER_IDX : HOR_IDX );
    
            uiOrgMode = uiRdModeList[mode];
          }
          if (!cu.bdpcmMode && uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
    
          {
            if (mode == numNonISPModes)   // the list needs to be sorted only once
    
              if (m_pcEncCfg->getUseFastISP())
    
                m_modeCtrl->setBestPredModeDCT2(uiBestPUMode.modeId);
    
              if (!xSortISPCandList(bestCurrentCost, csBest->cost, uiBestPUMode))
              {
                break;
              }
            }
            xGetNextISPMode(uiRdModeList[mode], (mode > 0 ? &uiRdModeList[mode - 1] : nullptr), Size(width, height));
            if (uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
            {
              continue;
    
            cu.lfnstIdx = m_curIspLfnstIdx;
            uiOrgMode   = uiRdModeList[mode];
          }
    
          cu.mipFlag                     = uiOrgMode.mipFlg;
    
          pu.mipTransposedFlag           = uiOrgMode.mipTrFlg;
    
          cu.ispMode                     = uiOrgMode.ispMod;
          pu.multiRefIdx                 = uiOrgMode.mRefId;
          pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId;
    
          CHECK(cu.mipFlag && pu.multiRefIdx, "Error: combination of MIP and MRL not supported");
          CHECK(pu.multiRefIdx && (pu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported");
          CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
          CHECK(cu.ispMode && pu.multiRefIdx, "Error: combination of ISP and MRL not supported");
    
          CHECK(cu.ispMode&& cu.colorTransform, "Error: combination of ISP and ACT not supported");
    
    
          pu.intraDir[CHANNEL_TYPE_CHROMA] = cu.colorTransform ? DM_CHROMA_IDX : pu.intraDir[CHANNEL_TYPE_CHROMA];
    
    
          // set context models
          m_CABACEstimator->getCtx() = ctxStart;
    
          // determine residual for partition
          cs.initSubStructure( *csTemp, partitioner.chType, cs.area, true );
    
    
          bool tmpValidReturn = false;
    
            if ( m_pcEncCfg->getUseFastISP() )
            {
              m_modeCtrl->setISPWasTested(true);
            }
    
            tmpValidReturn = xIntraCodingLumaISP(*csTemp, subTuPartitioner, bestCurrentCost);
            if (csTemp->tus.size() == 0)
            {
              // no TUs were coded
              csTemp->cost = MAX_DOUBLE;
              continue;
            }
    
            // we save the data for future tests
            m_ispTestedModes[m_curIspLfnstIdx].setModeResults((ISPType)cu.ispMode, (int)uiOrgMode.modeId, (int)csTemp->tus.size(), csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ? csTemp->cost : MAX_DOUBLE, csBest->cost);
            csTemp->cost = !tmpValidReturn ? MAX_DOUBLE : csTemp->cost;
    
            if (cu.colorTransform)
            {
              tmpValidReturn = xRecurIntraCodingACTQT(*csTemp, partitioner, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst);
            }
            else
    
            {
              tmpValidReturn = xRecurIntraCodingLumaQT(
                *csTemp, partitioner, uiBestPUMode.ispMod ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP,
                uiBestPUMode.ispMod, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst);
            }
    
          if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP)
          {
    
            m_regIntraRDListWithCosts.push_back( ModeInfoWithCost( cu.mipFlag, pu.mipTransposedFlag, pu.multiRefIdx, cu.ispMode, uiOrgMode.modeId, csTemp->cost ) );
    
          if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] )
          {
            csTemp->cost = MAX_DOUBLE;
    
    Nan Hu's avatar
    Nan Hu committed
            csTemp->costDbOffset = 0;
    
          validReturn |= tmpValidReturn;
    
          if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 )
          {
    
            m_modeCostStore[lfnstIdx][mode] = tmpValidReturn ? csTemp->cost : (MAX_DOUBLE / 2.0); //(MAX_DOUBLE / 2.0) ??
    
          DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
    
                 cu.blocks[0].y, (int) width, (int) height, csTemp->cost, uiOrgMode.modeId, uiOrgMode.ispMod,
                 pu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);
    
            if (isFirstColorSpace)
            {
              if (m_pcEncCfg->getRGBFormatFlag() || !cu.ispMode)
              {
                sortRdModeListFirstColorSpace(uiOrgMode, csTemp->cost, cu.bdpcmMode, m_savedRdModeFirstColorSpace[m_savedRdModeIdx], m_savedRdCostFirstColorSpace[m_savedRdModeIdx], m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx], m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx]);
              }
            }
    
            // check r-d cost
            if( csTemp->cost < csBest->cost )
            {
              std::swap( csTemp, csBest );
    
              uiBestPUMode  = uiOrgMode;
              bestBDPCMMode = cu.bdpcmMode;
              if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode )
              {
                m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost;
    
                m_bestModeCostValid[ lfnstIdx ] = true;
    
              }
              if( csBest->cost < bestCurrentCost )
              {
                bestCurrentCost = csBest->cost;
              }
    
              if ( cu.ispMode )
              {
                m_modeCtrl->setIspCost(csBest->cost);
                bestLfnstIdx = cu.lfnstIdx;
              }
              else if ( testISP )
              {
                m_modeCtrl->setMtsFirstPassNoIspCost(csBest->cost);
              }
    
            }
            if( !cu.ispMode && !cu.bdpcmMode && csBest->cost < bestCostNonBDPCM )
    
              bestCostNonBDPCM = csBest->cost;
    
          if( m_pcEncCfg->getFastLocalDualTreeMode() )
    
            if( cu.isConsIntra() && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0 )
    
              if( m_pcEncCfg->getFastLocalDualTreeMode() == 2 )
    
                //Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%)
    
          if (sps.getUseColorTrans() && !CS::isDualITree(cs))
          {
            if ((m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform) && csBest->cost != MAX_DOUBLE && bestCS->cost != MAX_DOUBLE && mode >= 0)
            {
              if (csBest->cost > bestCS->cost)
              {
                break;
              }
            }
          }
    
        cu.ispMode = uiBestPUMode.ispMod;
    
        cu.lfnstIdx = bestLfnstIdx;
    
            cs.useSubStructure(*csBest, partitioner.chType, pu, true, true, KEEP_PRED_AND_RESI_SIGNALS, KEEP_PRED_AND_RESI_SIGNALS, true);
    
            cs.useSubStructure(*csBest, partitioner.chType, pu.singleChan(CHANNEL_TYPE_LUMA), true, true, KEEP_PRED_AND_RESI_SIGNALS,
    
        if( validReturn )
        {
          //=== update PU data ====
          cu.mipFlag = uiBestPUMode.mipFlg;
    
          pu.mipTransposedFlag             = uiBestPUMode.mipTrFlg;
    
          pu.multiRefIdx = uiBestPUMode.mRefId;
          pu.intraDir[ CHANNEL_TYPE_LUMA ] = uiBestPUMode.modeId;
          cu.bdpcmMode = bestBDPCMMode;
    
          if (cu.colorTransform)
          {
            CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform");
          }
    
      }
    
      //===== reset context models =====
      m_CABACEstimator->getCtx() = ctxStart;
    
    void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner, const double maxCostAllowed )
    
    {
      const ChromaFormat format   = cu.chromaFormat;
      const uint32_t    numberValidComponents = getNumberValidComponents(format);
      CodingStructure &cs = *cu.cs;
      const TempCtx ctxStart  ( m_CtxCache, m_CABACEstimator->getCtx() );
    
      cs.setDecomp( cs.area.Cb(), false );
    
    
      double    bestCostSoFar = maxCostAllowed;
    
      bool      lumaUsesISP   = !cu.isSepTree() && cu.ispMode;
    
      PartSplit ispType       = lumaUsesISP ? CU::getISPType( cu, COMPONENT_Y ) : TU_NO_ISP;
      CHECK( cu.ispMode && bestCostSoFar < 0, "bestCostSoFar must be positive!" );
    
    
      auto &pu = *cu.firstPU;
    
      {
        uint32_t       uiBestMode = 0;
        Distortion uiBestDist = 0;
        double     dBestCost = MAX_DOUBLE;
    
        int32_t bestBDPCMMode = 0;
    
          int32_t  uiMinMode = 0;
          int32_t  uiMaxMode = NUM_CHROMA_MODE;
    
          //----- check chroma modes -----
          uint32_t chromaCandModes[ NUM_CHROMA_MODE ];
          PU::getIntraChromaCandModes( pu, chromaCandModes );
    
          // create a temporary CS
          CodingStructure &saveCS = *m_pSaveCS[0];
          saveCS.pcv      = cs.pcv;
          saveCS.picture  = cs.picture;
          saveCS.area.repositionTo( cs.area );
          saveCS.clearTUs();
    
    
          if( !cu.isSepTree() && cu.ispMode )
    
          if( cu.isSepTree() )
    
          {
            if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
            {
              partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
    
              do
              {
                cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType ), partitioner.chType ).depth = partitioner.currTrDepth;
              } while( partitioner.nextPart( cs ) );
    
              partitioner.exitCurrSplit();
            }
            else
            cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType ), partitioner.chType );
          }
    
          std::vector<TransformUnit*> orgTUs;
    
    
          if( lumaUsesISP )
          {
            CodingUnit& auxCU = saveCS.addCU( cu, partitioner.chType );
            auxCU.ispMode = cu.ispMode;
            saveCS.sps = cu.cs->sps;
            saveCS.addPU( *cu.firstPU, partitioner.chType );
          }
    
    
    
          // create a store for the TUs
          for( const auto &ptu : cs.tus )
          {
            // for split TUs in HEVC, add the TUs without Chroma parts for correct setting of Cbfs
    
            if( lumaUsesISP || pu.contains( *ptu, CHANNEL_TYPE_CHROMA ) )
    
            {
              saveCS.addTU( *ptu, partitioner.chType );
              orgTUs.push_back( ptu );
            }
          }
    
          // SATD pre-selecting.
          int satdModeList[NUM_CHROMA_MODE];
          int64_t satdSortedCost[NUM_CHROMA_MODE];
          for (int i = 0; i < NUM_CHROMA_MODE; i++)
          {
            satdSortedCost[i] = 0; // for the mode not pre-select by SATD, do RDO by default, so set the initial value 0.
            satdModeList[i] = 0;
          }
          bool modeIsEnable[NUM_INTRA_MODE + 1]; // use intra mode idx to check whether enable
          for (int i = 0; i < NUM_INTRA_MODE + 1; i++)
          {
            modeIsEnable[i] = 1;
          }
    
          DistParam distParamSad;
          DistParam distParamSatd;
    
          pu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation.
    
          initIntraPatternChType(cu, pu.Cb());
          initIntraPatternChType(cu, pu.Cr());
          xGetLumaRecPixels(pu, pu.Cb());
    
          for (int idx = uiMinMode; idx <= uiMaxMode - 1; idx++)
          {
            int mode = chromaCandModes[idx];
            satdModeList[idx] = mode;
            if (PU::isLMCMode(mode) && !PU::isLMCModeEnabled(pu, mode))
            {
              continue;
            }
    
            if ((mode == LM_CHROMA_IDX) || (mode == PLANAR_IDX) || (mode == DM_CHROMA_IDX)) // only pre-check regular modes and MDLM modes, not including DM ,Planar, and LM
    
            {
              continue;
            }
            pu.intraDir[1] = mode; // temporary assigned, for SATD checking.
    
    
            int64_t sad = 0;
            int64_t sadCb = 0;
            int64_t satdCb = 0;
            int64_t sadCr = 0;
            int64_t satdCr = 0;
    
            CodingStructure& cs = *(pu.cs);
    
            CompArea areaCb = pu.Cb();
            PelBuf orgCb = cs.getOrgBuf(areaCb);
            PelBuf predCb = cs.getPredBuf(areaCb);
    
            m_pcRdCost->setDistParam(distParamSad, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, false);
            m_pcRdCost->setDistParam(distParamSatd, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, true);
            distParamSad.applyWeight = false;
            distParamSatd.applyWeight = false;
    
            if (PU::isLMCMode(mode))
            {
              predIntraChromaLM(COMPONENT_Cb, predCb, pu, areaCb, mode);
            }
            else
            {
    
              initPredIntraParams(pu, pu.Cb(), *pu.cs->sps);
              predIntraAng(COMPONENT_Cb, predCb, pu);
    
            sadCb = distParamSad.distFunc(distParamSad) * 2;
            satdCb = distParamSatd.distFunc(distParamSatd);
            sad += std::min(sadCb, satdCb);
    
            CompArea areaCr = pu.Cr();
            PelBuf orgCr = cs.getOrgBuf(areaCr);
            PelBuf predCr = cs.getPredBuf(areaCr);
    
            m_pcRdCost->setDistParam(distParamSad, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, false);
            m_pcRdCost->setDistParam(distParamSatd, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, true);
            distParamSad.applyWeight = false;
            distParamSatd.applyWeight = false;
    
            if (PU::isLMCMode(mode))
            {
              predIntraChromaLM(COMPONENT_Cr, predCr, pu, areaCr, mode);
            }
            else
            {
    
              initPredIntraParams(pu, pu.Cr(), *pu.cs->sps);
              predIntraAng(COMPONENT_Cr, predCr, pu);
    
            sadCr = distParamSad.distFunc(distParamSad) * 2;
            satdCr = distParamSatd.distFunc(distParamSatd);
            sad += std::min(sadCr, satdCr);
    
            satdSortedCost[idx] = sad;
          }
          // sort the mode based on the cost from small to large.
          int tempIdx = 0;
          int64_t tempCost = 0;
          for (int i = uiMinMode; i <= uiMaxMode - 1; i++)
          {
            for (int j = i + 1; j <= uiMaxMode - 1; j++)
            {
              if (satdSortedCost[j] < satdSortedCost[i])
              {
                tempIdx = satdModeList[i];
                satdModeList[i] = satdModeList[j];
                satdModeList[j] = tempIdx;
    
                tempCost = satdSortedCost[i];
                satdSortedCost[i] = satdSortedCost[j];
                satdSortedCost[j] = tempCost;
    
              }
            }
          }
          int reducedModeNumber = 2; // reduce the number of chroma modes
          for (int i = 0; i < reducedModeNumber; i++)
          {
            modeIsEnable[satdModeList[uiMaxMode - 1 - i]] = 0; // disable the last reducedModeNumber modes
          }
    
          bool testBDPCM = true;
          testBDPCM = testBDPCM && CU::bdpcmAllowed(cu, COMPONENT_Cb) && cu.ispMode == 0 && cu.mtsFlag == 0 && cu.lfnstIdx == 0;
          for (int32_t uiMode = uiMinMode - (2 * int(testBDPCM)); uiMode < uiMaxMode; uiMode++)
    
            int chromaIntraMode;
    
    
            if (uiMode < 0)
            {
                cu.bdpcmModeChroma = -uiMode;
    
                chromaIntraMode = cu.bdpcmModeChroma == 2 ? chromaCandModes[1] : chromaCandModes[2];
    
              chromaIntraMode = chromaCandModes[uiMode];
    
    
              cu.bdpcmModeChroma = 0;
              if( PU::isLMCMode( chromaIntraMode ) && ! PU::isLMCModeEnabled( pu, chromaIntraMode ) )
              {
                continue;
              }
              if (!modeIsEnable[chromaIntraMode] && PU::isLMCModeEnabled(pu, chromaIntraMode)) // when CCLM is disable, then MDLM is disable. not use satd checking
              {
                continue;
              }
    
            cs.setDecomp( pu.Cb(), false );
            cs.dist = baseDist;
            //----- restore context models -----
            m_CABACEstimator->getCtx() = ctxStart;
    
            //----- chroma coding -----
            pu.intraDir[1] = chromaIntraMode;
    
    
            xRecurIntraChromaCodingQT( cs, partitioner, bestCostSoFar, ispType );
            if( lumaUsesISP && cs.dist == MAX_UINT )
            {
              continue;
            }
    
            if (cs.sps->getTransformSkipEnabledFlag())
    
            uint64_t fracBits   = xGetIntraFracBitsQT( cs, partitioner, false, true, -1, ispType );
    
            Distortion uiDist = cs.dist;
            double    dCost   = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist );
    
            //----- compare -----
            if( dCost < dBestCost )
            {
    
              if( lumaUsesISP && dCost < bestCostSoFar )
              {
                bestCostSoFar = dCost;
              }
    
              for( uint32_t i = getFirstComponentOfChannel( CHANNEL_TYPE_CHROMA ); i < numberValidComponents; i++ )
              {
                const CompArea &area = pu.blocks[i];
    
                saveCS.getRecoBuf     ( area ).copyFrom( cs.getRecoBuf   ( area ) );
    #if KEEP_PRED_AND_RESI_SIGNALS
                saveCS.getPredBuf     ( area ).copyFrom( cs.getPredBuf   ( area ) );
                saveCS.getResiBuf     ( area ).copyFrom( cs.getResiBuf   ( area ) );
    
    Taoran Lu's avatar
    Taoran Lu committed
    #endif
                saveCS.getPredBuf     ( area ).copyFrom( cs.getPredBuf   (area ) );
                cs.picture->getPredBuf( area ).copyFrom( cs.getPredBuf   (area ) );
    
                cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf( area ) );
    
                for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
                {
                  saveCS.tus[j]->copyComponentFrom( *orgTUs[j], area.compID );
                }
              }
    
              dBestCost  = dCost;
              uiBestDist = uiDist;
              uiBestMode = chromaIntraMode;
    
              bestBDPCMMode = cu.bdpcmModeChroma;
    
            }
          }
    
          for( uint32_t i = getFirstComponentOfChannel( CHANNEL_TYPE_CHROMA ); i < numberValidComponents; i++ )
          {
            const CompArea &area = pu.blocks[i];
    
            cs.getRecoBuf         ( area ).copyFrom( saveCS.getRecoBuf( area ) );
    #if KEEP_PRED_AND_RESI_SIGNALS
            cs.getPredBuf         ( area ).copyFrom( saveCS.getPredBuf( area ) );
            cs.getResiBuf         ( area ).copyFrom( saveCS.getResiBuf( area ) );
    #endif
    
    Taoran Lu's avatar
    Taoran Lu committed
            cs.getPredBuf         ( area ).copyFrom( saveCS.getPredBuf( area ) );
            cs.picture->getPredBuf( area ).copyFrom( cs.getPredBuf    ( area ) );
    
    
            cs.picture->getRecoBuf( area ).copyFrom( cs.    getRecoBuf( area ) );
    
            for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
            {
              orgTUs[ j ]->copyComponentFrom( *saveCS.tus[ j ], area.compID );
            }
          }
        }
    
        pu.intraDir[1] = uiBestMode;
        cs.dist        = uiBestDist;
    
        cu.bdpcmModeChroma = bestBDPCMMode;
    
      }
    
      //----- restore context models -----
      m_CABACEstimator->getCtx() = ctxStart;
    
      if( lumaUsesISP && bestCostSoFar >= maxCostAllowed )
      {
        cu.ispMode = 0;
      }
    
    void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost )
    {
      if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE )
      {
        m_cuAreaInSCIPU[m_numCuInSCIPU] = area;
        m_cuCostInSCIPU[m_numCuInSCIPU] = cost;
        m_numCuInSCIPU++;
      }
    }
    
    void IntraSearch::initCuAreaCostInSCIPU()
    {
      for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ )
      {
        m_cuAreaInSCIPU[i] = Area();
        m_cuCostInSCIPU[i] = 0;
      }
      m_numCuInSCIPU = 0;
    }
    
    void IntraSearch::PLTSearch(CodingStructure &cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
    
      CodingUnit    &cu = *cs.getCU(partitioner.chType);
    
      TransformUnit &tu = *cs.getTU(partitioner.chType);
    
      uint32_t height = cu.block(compBegin).height;
      uint32_t width = cu.block(compBegin).width;
    
      if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
    
      {
        cs.getPredBuf().copyFrom(cs.getOrgBuf());
        cs.getPredBuf().Y().rspSignal(m_pcReshape->getFwdLUT());
      }
    
      if( cu.isLocalSepTree() )
        cs.prevPLT.curPLTSize[compBegin] = cs.prevPLT.curPLTSize[COMPONENT_Y];
    
      cu.lastPLTSize[compBegin] = cs.prevPLT.curPLTSize[compBegin];
      //derive palette
      derivePLTLossy(cs, partitioner, compBegin, numComp);
      reorderPLT(cs, partitioner, compBegin, numComp);
    
      bool idxExist[MAXPLTSIZE + 1] = { false };
    
      preCalcPLTIndexRD(cs, partitioner, compBegin, numComp); // Pre-calculate distortions for each pixel
    
      double rdCost = MAX_DOUBLE;
    
      deriveIndexMap(cs, partitioner, compBegin, numComp, PLT_SCAN_HORTRAV, rdCost, idxExist); // Optimize palette index map (horizontal scan)
    
      if ((cu.curPLTSize[compBegin] + cu.useEscape[compBegin]) > 1)
      {
    
        deriveIndexMap(cs, partitioner, compBegin, numComp, PLT_SCAN_VERTRAV, rdCost, idxExist); // Optimize palette index map (vertical scan)
      }
      // Remove unused palette entries
      uint8_t newPLTSize = 0;
      int idxMapping[MAXPLTSIZE + 1];
      memset(idxMapping, -1, sizeof(int) * (MAXPLTSIZE + 1));
      for (int i = 0; i < cu.curPLTSize[compBegin]; i++)
      {
        if (idxExist[i])
        {
          idxMapping[i] = newPLTSize;
          newPLTSize++;
        }
    
      idxMapping[cu.curPLTSize[compBegin]] = cu.useEscape[compBegin]? newPLTSize: -1;
      if (newPLTSize != cu.curPLTSize[compBegin]) // there exist unused palette entries
      { // update palette table and reuseflag
        Pel curPLTtmp[MAX_NUM_COMPONENT][MAXPLTSIZE];
        int reuseFlagIdx = 0, curPLTtmpIdx = 0, reuseEntrySize = 0;
        memset(cu.reuseflag[compBegin], false, sizeof(bool) * MAXPLTPREDSIZE);
    
        int compBeginTmp = compBegin;
        int numCompTmp   = numComp;
    
        if( cu.isLocalSepTree() )
    
          memset(cu.reuseflag[COMPONENT_Y], false, sizeof(bool) * MAXPLTPREDSIZE);
    
          compBeginTmp = COMPONENT_Y;
          numCompTmp   = (cu.chromaFormat != CHROMA_400) ? 3 : 1;
        }
    
        for (int curIdx = 0; curIdx < cu.curPLTSize[compBegin]; curIdx++)
        {
          if (idxExist[curIdx])
          {
    
            for (int comp = compBeginTmp; comp < (compBeginTmp + numCompTmp); comp++)
    
              curPLTtmp[comp][curPLTtmpIdx] = cu.curPLT[comp][curIdx];
    
            // Update reuse flags
            if (curIdx < cu.reusePLTSize[compBegin])
            {
              bool match = false;
              for (; reuseFlagIdx < cs.prevPLT.curPLTSize[compBegin]; reuseFlagIdx++)
              {
                bool matchTmp = true;
                for (int comp = compBegin; comp < (compBegin + numComp); comp++)
                {
                  matchTmp = matchTmp && (curPLTtmp[comp][curPLTtmpIdx] == cs.prevPLT.curPLT[comp][reuseFlagIdx]);
                }
                if (matchTmp)
                {
                  match = true;
                  break;
                }
              }
              if (match)
              {
                cu.reuseflag[compBegin][reuseFlagIdx] = true;
    
                if( cu.isLocalSepTree() )
                  cu.reuseflag[COMPONENT_Y][reuseFlagIdx] = true;
    
                reuseEntrySize++;
              }
            }
            curPLTtmpIdx++;
          }
        }
        cu.reusePLTSize[compBegin] = reuseEntrySize;
        // update palette table
        cu.curPLTSize[compBegin] = newPLTSize;
    
        if( cu.isLocalSepTree() )
          cu.curPLTSize[COMPONENT_Y] = newPLTSize;
    
        for (int comp = compBeginTmp; comp < (compBeginTmp + numCompTmp); comp++)
    
          memcpy( cu.curPLT[comp], curPLTtmp[comp], sizeof(Pel)*cu.curPLTSize[compBegin]);
      }
    
    Yung-Hsuan Chao (Jessie)'s avatar
    Yung-Hsuan Chao (Jessie) committed
      cu.useRotation[compBegin] = m_bestScanRotationMode;
    
      int indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin];
      if (indexMaxSize <= 1)
      {
        cu.useRotation[compBegin] = false;
      }
    
      //reconstruct pixel
      PelBuf    curPLTIdx = tu.getcurPLTIdx(compBegin);
    
      for (uint32_t y = 0; y < height; y++)
    
        for (uint32_t x = 0; x < width; x++)
    
          curPLTIdx.at(x, y) = idxMapping[curPLTIdx.at(x, y)];
    
          if (curPLTIdx.at(x, y) == cu.curPLTSize[compBegin])
    
            calcPixelPred(cs, partitioner, y, x, compBegin, numComp);
    
          }
          else
          {
            for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++)
            {
    
              CompArea area = cu.blocks[compID];
              PelBuf   recBuf = cs.getRecoBuf(area);
    
              uint32_t scaleX = getComponentScaleX((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
              uint32_t scaleY = getComponentScaleY((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
              if (compBegin != COMPONENT_Y || compID == COMPONENT_Y)
              {
    
                recBuf.at(x, y) = cu.curPLT[compID][curPLTIdx.at(x, y)];
    
              else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0)
    
                recBuf.at(x >> scaleX, y >> scaleY) = cu.curPLT[compID][curPLTIdx.at(x, y)];
    
              }
            }
          }
        }
      }
    
      cs.getPredBuf().fill(0);
      cs.getResiBuf().fill(0);
      cs.getOrgResiBuf().fill(0);
    
      cs.fracBits = MAX_UINT;
      cs.cost = MAX_DOUBLE;
      Distortion distortion = 0;
      for (uint32_t comp = compBegin; comp < (compBegin + numComp); comp++)
      {
        const ComponentID compID = ComponentID(comp);
        CPelBuf reco = cs.getRecoBuf(compID);
        CPelBuf org = cs.getOrgBuf(compID);
    
        if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
          m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
    
        {
          const CPelBuf orgLuma = cs.getOrgBuf(cs.area.blocks[COMPONENT_Y]);
    
          if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
          {
            const CompArea &areaY = cu.Y();
    
            CompArea tmpArea1(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size());
            PelBuf   tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
    
            tmpRecLuma.copyFrom(reco);
            tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
            distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
          }
          else
          {
            distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
          }
        }
        else
    
          distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE);
      }
    
      cs.dist += distortion;
      const CompArea &area = cu.blocks[compBegin];
      cs.setDecomp(area);
      cs.picture->getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
    
    void IntraSearch::calcPixelPredRD(CodingStructure& cs, Partitioner& partitioner, Pel* orgBuf, Pel* paPixelValue, Pel* paRecoValue, ComponentID compBegin, uint32_t numComp)
    {
      CodingUnit &cu = *cs.getCU(partitioner.chType);
      TransformUnit &tu = *cs.getTU(partitioner.chType);
    
      int qp[3];
      int qpRem[3];
      int qpPer[3];
      int quantiserScale[3];
      int quantiserRightShift[3];
      int rightShiftOffset[3];
      int invquantiserRightShift[3];
      int add[3];
      for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
      {
        QpParam cQP(tu, ComponentID(ch));
        qp[ch] = cQP.Qp(true);
        qpRem[ch] = qp[ch] % 6;
        qpPer[ch] = qp[ch] / 6;
        quantiserScale[ch] = g_quantScales[0][qpRem[ch]];
        quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch];
        rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1);
        invquantiserRightShift[ch] = IQUANT_SHIFT;
        add[ch] = 1 << (invquantiserRightShift[ch] - 1);
      }
    
      for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
      {
        const int  channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch));
        paPixelValue[ch] = Pel(std::max<int>(0, ((orgBuf[ch] * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
        assert(paPixelValue[ch] < (1 << (channelBitDepth + 1)));
        paRecoValue[ch] = (((paPixelValue[ch] * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
        paRecoValue[ch] = Pel(ClipBD<int>(paRecoValue[ch], channelBitDepth));//to be checked
      }
    }
    
    void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
    {
      CodingUnit &cu = *cs.getCU(partitioner.chType);
      uint32_t height = cu.block(compBegin).height;
      uint32_t width = cu.block(compBegin).width;
    
      bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
    
    
      CPelBuf   orgBuf[3];
      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
      {
        CompArea  area = cu.blocks[comp];
    
        if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
    
        {
          orgBuf[comp] = cs.getPredBuf(area);
        }
        else
        {
          orgBuf[comp] = cs.getOrgBuf(area);
        }
      }
    
      int rasPos;
      uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
      uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
      for (uint32_t y = 0; y < height; y++)
      {
        for (uint32_t x = 0; x < width; x++)
        {
          rasPos = y * width + x;;
          // chroma discard
          bool discardChroma = (compBegin == COMPONENT_Y) && (y&scaleY || x&scaleX);
          Pel curPel[3];
          for (int comp = compBegin; comp < (compBegin + numComp); comp++)
          {
            uint32_t pX1 = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
            uint32_t pY1 = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
            curPel[comp] = orgBuf[comp].at(pX1, pY1);
          }
    
          uint8_t  pltIdx = 0;
          double minError = MAX_DOUBLE;
          uint8_t  bestIdx = 0;
    
          for (uint8_t z = 0; z < cu.curPLTSize[compBegin]; z++)
          {
            m_indexError[z][rasPos] = minError;
          }
    
          while (pltIdx < cu.curPLTSize[compBegin])
          {
            uint64_t sqrtError = 0;
    
            if (lossless)
            {
              for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
              {
                sqrtError += int64_t(abs(curPel[comp] - cu.curPLT[comp][pltIdx]));
              }
              if (sqrtError == 0)
              {
                m_indexError[pltIdx][rasPos] = (double) sqrtError;
                minError                     = (double) sqrtError;
                bestIdx                      = pltIdx;
                break;
              }
            }
            else
            {
    
            for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
            {
              int64_t tmpErr = int64_t(curPel[comp] - cu.curPLT[comp][pltIdx]);
              if (isChroma((ComponentID)comp))
              {
                sqrtError += uint64_t(tmpErr*tmpErr*ENC_CHROMA_WEIGHTING);
              }
              else
              {
                sqrtError += tmpErr*tmpErr;
              }
            }
            m_indexError[pltIdx][rasPos] = (double)sqrtError;
            if (sqrtError < minError)
            {
              minError = (double)sqrtError;
              bestIdx = pltIdx;
            }
    
            pltIdx++;
          }
    
          Pel paPixelValue[3], paRecoValue[3];
    
          calcPixelPredRD(cs, partitioner, curPel, paPixelValue, paRecoValue, compBegin, numComp);
    
          uint64_t error = 0, rate = 0;
          for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
          {
    
            if (lossless)
            {
              rate += m_escapeNumBins[curPel[comp]];
            }
            else
            {
    
            int64_t tmpErr = int64_t(curPel[comp] - paRecoValue[comp]);
            if (isChroma((ComponentID)comp))
            {
              error += uint64_t(tmpErr*tmpErr*ENC_CHROMA_WEIGHTING);
            }
            else
            {
              error += tmpErr*tmpErr;
            }
            rate += m_escapeNumBins[paPixelValue[comp]]; // encode quantized escape color
    
          }
          double rdCost = (double)error + m_pcRdCost->getLambda()*(double)rate;
          m_indexError[cu.curPLTSize[compBegin]][rasPos] = rdCost;
    
          if (rdCost < minError)
    
          {
            minError = rdCost;
            bestIdx = (uint8_t)cu.curPLTSize[compBegin];
          }
          m_minErrorIndexMap[rasPos] = bestIdx; // save the optimal index of the current pixel
        }
      }
    }
    
    
    void IntraSearch::deriveIndexMap(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, double& dMinCost, bool* idxExist)
    
    {
      CodingUnit    &cu = *cs.getCU(partitioner.chType);
      TransformUnit &tu = *cs.getTU(partitioner.chType);
      uint32_t      height = cu.block(compBegin).height;
      uint32_t      width = cu.block(compBegin).width;
    
      int   total     = height*width;
      Pel  *runIndex = tu.getPLTIndex(compBegin);
      bool *runType  = tu.getRunTypes(compBegin);
      m_scanOrder = g_scanOrder[SCAN_UNGROUPED][pltScanMode ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
    // Trellis initialization
      for (int i = 0; i < 2; i++)
      {
        memset(m_prevRunTypeRDOQ[i], 0, sizeof(Pel)*NUM_TRELLIS_STATE);
        memset(m_prevRunPosRDOQ[i],  0, sizeof(int)*NUM_TRELLIS_STATE);
        memset(m_stateCostRDOQ[i],  0, sizeof (double)*NUM_TRELLIS_STATE);
      }
      for (int state = 0; state < NUM_TRELLIS_STATE; state++)
      {
        m_statePtRDOQ[state][0] = 0;
      }
    // Context modeling
      const FracBitsAccess& fracBits = m_CABACEstimator->getCtx().getFracBitsAcess();
      BinFracBits fracBitsPltCopyFlagIndex[RUN_IDX_THRE + 1];
      for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
      {
        const unsigned  ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_INDEX, dist);
        fracBitsPltCopyFlagIndex[dist] = fracBits.getFracBitsArray(Ctx::IdxRunModel( ctxId ) );
      }
      BinFracBits fracBitsPltCopyFlagAbove[RUN_IDX_THRE + 1];
      for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
      {
        const unsigned  ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_COPY, dist);
        fracBitsPltCopyFlagAbove[dist] = fracBits.getFracBitsArray(Ctx::CopyRunModel( ctxId ) );
      }
      const BinFracBits fracBitsPltRunType = fracBits.getFracBitsArray( Ctx::RunTypeFlag() );
    
    // Trellis RDO per CG
      bool contTrellisRD = true;
      for (int subSetId = 0; ( subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE ) && contTrellisRD; subSetId++)
      {
        int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE;
        int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE);
        maxSubPos = (maxSubPos > total) ? total : maxSubPos; // if last position is out of the current CU size
        contTrellisRD = deriveSubblockIndexMap(cs, partitioner, compBegin, pltScanMode, minSubPos, maxSubPos, fracBitsPltRunType, fracBitsPltCopyFlagIndex, fracBitsPltCopyFlagAbove, dMinCost, (bool)pltScanMode);
      }
      if (!contTrellisRD)
      {
        return;
      }
    
    
    // best state at the last scan position
      double  sumRdCost = MAX_DOUBLE;
      uint8_t bestState = 0;
      for (uint8_t state = 0; state < NUM_TRELLIS_STATE; state++)
      {
        if (m_stateCostRDOQ[0][state] < sumRdCost)
        {
          sumRdCost = m_stateCostRDOQ[0][state];
          bestState = state;
        }
      }
    
         bool checkRunTable  [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
      uint8_t checkIndexTable[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
      uint8_t bestStateTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
      uint8_t nextState = bestState;
    // best trellis path
      for (int i = (width*height - 1); i >= 0; i--)
      {
        bestStateTable[i] = nextState;
        int rasterPos = m_scanOrder[i].idx;
        nextState = m_statePtRDOQ[nextState][rasterPos];
      }
    // reconstruct index and runs based on the state pointers
      for (int i = 0; i < (width*height); i++)
      {
        int rasterPos = m_scanOrder[i].idx;
        int  abovePos = (pltScanMode == PLT_SCAN_HORTRAV) ? m_scanOrder[i].idx - width : m_scanOrder[i].idx - 1;
            nextState = bestStateTable[i];
        if ( nextState == 0 ) // same as the previous
        {
          checkRunTable[rasterPos] = checkRunTable[ m_scanOrder[i - 1].idx ];
          if ( checkRunTable[rasterPos] == PLT_RUN_INDEX )
          {
            checkIndexTable[rasterPos] = checkIndexTable[m_scanOrder[i - 1].idx];
          }
          else
          {
            checkIndexTable[rasterPos] = checkIndexTable[ abovePos ];
          }
        }
        else if (nextState == 1) // CopyAbove mode
        {
          checkRunTable[rasterPos] = PLT_RUN_COPY;
          checkIndexTable[rasterPos] = checkIndexTable[abovePos];
        }
        else if (nextState == 2) // Index mode
        {
          checkRunTable[rasterPos] = PLT_RUN_INDEX;
          checkIndexTable[rasterPos] = m_minErrorIndexMap[rasterPos];
        }
      }
    
    // Escape flag
      m_bestEscape = false;
      for (int pos = 0; pos < (width*height); pos++)
      {
        uint8_t index = checkIndexTable[pos];
        if (index == cu.curPLTSize[compBegin])