Skip to content
Snippets Groups Projects
IntraSearch.cpp 442 KiB
Newer Older
  • Learn to ignore specific revisions
  •   TransformUnit &tu = *cs.getTU(partitioner.chType);
      uint32_t      height = cu.block(compBegin).height;
      uint32_t      width = cu.block(compBegin).width;
    
      int   total     = height*width;
      Pel  *runIndex = tu.getPLTIndex(compBegin);
      bool *runType  = tu.getRunTypes(compBegin);
      m_scanOrder = g_scanOrder[SCAN_UNGROUPED][pltScanMode ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
    // Trellis initialization
      for (int i = 0; i < 2; i++)
      {
        memset(m_prevRunTypeRDOQ[i], 0, sizeof(Pel)*NUM_TRELLIS_STATE);
        memset(m_prevRunPosRDOQ[i],  0, sizeof(int)*NUM_TRELLIS_STATE);
        memset(m_stateCostRDOQ[i],  0, sizeof (double)*NUM_TRELLIS_STATE);
      }
      for (int state = 0; state < NUM_TRELLIS_STATE; state++)
      {
        m_statePtRDOQ[state][0] = 0;
      }
    // Context modeling
      const FracBitsAccess& fracBits = m_CABACEstimator->getCtx().getFracBitsAcess();
      BinFracBits fracBitsPltCopyFlagIndex[RUN_IDX_THRE + 1];
      for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
      {
        const unsigned  ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_INDEX, dist);
        fracBitsPltCopyFlagIndex[dist] = fracBits.getFracBitsArray(Ctx::IdxRunModel( ctxId ) );
      }
      BinFracBits fracBitsPltCopyFlagAbove[RUN_IDX_THRE + 1];
      for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
      {
        const unsigned  ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_COPY, dist);
        fracBitsPltCopyFlagAbove[dist] = fracBits.getFracBitsArray(Ctx::CopyRunModel( ctxId ) );
      }
      const BinFracBits fracBitsPltRunType = fracBits.getFracBitsArray( Ctx::RunTypeFlag() );
    
    // Trellis RDO per CG
      bool contTrellisRD = true;
      for (int subSetId = 0; ( subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE ) && contTrellisRD; subSetId++)
      {
        int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE;
        int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE);
        maxSubPos = (maxSubPos > total) ? total : maxSubPos; // if last position is out of the current CU size
        contTrellisRD = deriveSubblockIndexMap(cs, partitioner, compBegin, pltScanMode, minSubPos, maxSubPos, fracBitsPltRunType, fracBitsPltCopyFlagIndex, fracBitsPltCopyFlagAbove, dMinCost, (bool)pltScanMode);
      }
      if (!contTrellisRD)
      {
        return;
      }
    
    
    // best state at the last scan position
      double  sumRdCost = MAX_DOUBLE;
      uint8_t bestState = 0;
      for (uint8_t state = 0; state < NUM_TRELLIS_STATE; state++)
      {
        if (m_stateCostRDOQ[0][state] < sumRdCost)
        {
          sumRdCost = m_stateCostRDOQ[0][state];
          bestState = state;
        }
      }
    
         bool checkRunTable  [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
      uint8_t checkIndexTable[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
      uint8_t bestStateTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
      uint8_t nextState = bestState;
    // best trellis path
      for (int i = (width*height - 1); i >= 0; i--)
      {
        bestStateTable[i] = nextState;
        int rasterPos = m_scanOrder[i].idx;
        nextState = m_statePtRDOQ[nextState][rasterPos];
      }
    // reconstruct index and runs based on the state pointers
      for (int i = 0; i < (width*height); i++)
      {
        int rasterPos = m_scanOrder[i].idx;
        int  abovePos = (pltScanMode == PLT_SCAN_HORTRAV) ? m_scanOrder[i].idx - width : m_scanOrder[i].idx - 1;
            nextState = bestStateTable[i];
        if ( nextState == 0 ) // same as the previous
        {
          checkRunTable[rasterPos] = checkRunTable[ m_scanOrder[i - 1].idx ];
          if ( checkRunTable[rasterPos] == PLT_RUN_INDEX )
          {
            checkIndexTable[rasterPos] = checkIndexTable[m_scanOrder[i - 1].idx];
          }
          else
          {
            checkIndexTable[rasterPos] = checkIndexTable[ abovePos ];
          }
        }
        else if (nextState == 1) // CopyAbove mode
        {
          checkRunTable[rasterPos] = PLT_RUN_COPY;
          checkIndexTable[rasterPos] = checkIndexTable[abovePos];
        }
        else if (nextState == 2) // Index mode
        {
          checkRunTable[rasterPos] = PLT_RUN_INDEX;
          checkIndexTable[rasterPos] = m_minErrorIndexMap[rasterPos];
        }
      }
    
    // Escape flag
      m_bestEscape = false;
      for (int pos = 0; pos < (width*height); pos++)
      {
        uint8_t index = checkIndexTable[pos];
        if (index == cu.curPLTSize[compBegin])
        {
          m_bestEscape = true;
          break;
        }
      }
    
    // Horizontal scan v.s vertical scan
      if (sumRdCost < dMinCost)
      {
        cu.useEscape[compBegin] = m_bestEscape;
        m_bestScanRotationMode = pltScanMode;
    
        memset(idxExist, false, sizeof(bool) * (MAXPLTSIZE + 1));
    
        for (int pos = 0; pos < (width*height); pos++)
        {
          runIndex[pos] = checkIndexTable[pos];
          runType[pos] = checkRunTable[pos];
    
          idxExist[checkIndexTable[pos]] = true;
    
        }
        dMinCost = sumRdCost;
      }
    }
    
    bool IntraSearch::deriveSubblockIndexMap(
      CodingStructure& cs,
      Partitioner&  partitioner,
      ComponentID   compBegin,
      PLTScanMode   pltScanMode,
      int           minSubPos,
      int           maxSubPos,
      const BinFracBits& fracBitsPltRunType,
      const BinFracBits* fracBitsPltIndexINDEX,
      const BinFracBits* fracBitsPltIndexCOPY,
      const double minCost,
      bool         useRotate
    )
    {
      CodingUnit &cu    = *cs.getCU(partitioner.chType);
      uint32_t   height = cu.block(compBegin).height;
      uint32_t   width  = cu.block(compBegin).width;
      int indexMaxValue = cu.curPLTSize[compBegin];
    
      int refId = 0;
      int currRasterPos, currScanPos, prevScanPos, aboveScanPos, roffset;
      int log2Width = (pltScanMode == PLT_SCAN_HORTRAV) ? floorLog2(width): floorLog2(height);
      int buffersize = (pltScanMode == PLT_SCAN_HORTRAV) ? 2*width: 2*height;
      for (int curPos = minSubPos; curPos < maxSubPos; curPos++)
      {
        currRasterPos = m_scanOrder[curPos].idx;
        prevScanPos = (curPos == 0) ? 0 : (curPos - 1) % buffersize;
        roffset = (curPos >> log2Width) << log2Width;
        aboveScanPos = roffset - (curPos - roffset + 1);
        aboveScanPos %= buffersize;
        currScanPos = curPos % buffersize;
        if ((pltScanMode == PLT_SCAN_HORTRAV && curPos < width) || (pltScanMode == PLT_SCAN_VERTRAV && curPos < height))
        {
          aboveScanPos = -1; // first column/row: above row is not valid
        }
    
    
    // 1st state: same as previous scanned sample
    // 2nd state: Copy_Above mode
    
    // 3rd state: Index mode
    
    // Loop of current state
    
        for ( int curState = 0; curState < NUM_TRELLIS_STATE; curState++ )
    
        {
          double    minRdCost          = MAX_DOUBLE;
          int       minState           = 0; // best prevState
          uint8_t   bestRunIndex       = 0;
          bool      bestRunType        = 0;
          bool      bestPrevCodedType  = 0;
          int       bestPrevCodedPos   = 0;
          if ( ( curState == 0 && curPos == 0 ) || ( curState == 1 && aboveScanPos < 0 ) ) // state not available
          {
            m_stateCostRDOQ[1 - refId][curState] = MAX_DOUBLE;
            continue;
          }
    
          bool    runType  = 0;
          uint8_t runIndex = 0;
          if ( curState == 1 ) // 2nd state: Copy_Above mode
          {
            runType = PLT_RUN_COPY;
          }
    
          else if ( curState == 2 ) // 3rd state: Index mode
    
          {
            runType = PLT_RUN_INDEX;
            runIndex = m_minErrorIndexMap[currRasterPos];
          }
    
    // Loop of previous state
    
          for ( int stateID = 0; stateID < NUM_TRELLIS_STATE; stateID++ )
    
          {
            if ( m_stateCostRDOQ[refId][stateID] == MAX_DOUBLE )
            {
              continue;
            }
            if ( curState == 0 ) // 1st state: same as previous scanned sample
            {
              runType = m_runMapRDOQ[refId][stateID][prevScanPos];
              runIndex = ( runType == PLT_RUN_INDEX ) ? m_indexMapRDOQ[refId][stateID][ prevScanPos ] : m_indexMapRDOQ[refId][stateID][ aboveScanPos ];
            }
            else if ( curState == 1 ) // 2nd state: Copy_Above mode
            {
              runIndex = m_indexMapRDOQ[refId][stateID][aboveScanPos];
            }
            bool    prevRunType   = m_runMapRDOQ[refId][stateID][prevScanPos];
            uint8_t prevRunIndex  = m_indexMapRDOQ[refId][stateID][prevScanPos];
            uint8_t aboveRunIndex = (aboveScanPos >= 0) ? m_indexMapRDOQ[refId][stateID][aboveScanPos] : 0;
            int      dist = curPos - m_prevRunPosRDOQ[refId][stateID] - 1;
            double rdCost = m_stateCostRDOQ[refId][stateID];
            if ( rdCost >= minRdCost ) continue;
    
    
    // Calculate Rd cost
    
            bool prevCodedRunType = m_prevRunTypeRDOQ[refId][stateID];
            int  prevCodedPos     = m_prevRunPosRDOQ [refId][stateID];
            const BinFracBits* fracBitsPt = (m_prevRunTypeRDOQ[refId][stateID] == PLT_RUN_INDEX) ? fracBitsPltIndexINDEX : fracBitsPltIndexCOPY;
            rdCost += rateDistOptPLT(runType, runIndex, prevRunType, prevRunIndex, aboveRunIndex, prevCodedRunType, prevCodedPos, curPos, (pltScanMode == PLT_SCAN_HORTRAV) ? width : height, dist, indexMaxValue, fracBitsPt, fracBitsPltRunType);
            if (rdCost < minRdCost) // update minState ( minRdCost )
            {
              minRdCost    = rdCost;
              minState     = stateID;
              bestRunType  = runType;
              bestRunIndex = runIndex;
              bestPrevCodedType = prevCodedRunType;
              bestPrevCodedPos  = prevCodedPos;
            }
          }
    // Update trellis info of current state
          m_stateCostRDOQ  [1 - refId][curState]  = minRdCost;
          m_prevRunTypeRDOQ[1 - refId][curState]  = bestPrevCodedType;
          m_prevRunPosRDOQ [1 - refId][curState]  = bestPrevCodedPos;
          m_statePtRDOQ[curState][currRasterPos] = minState;
          int buffer2update = std::min(buffersize, curPos);
          memcpy(m_indexMapRDOQ[1 - refId][curState], m_indexMapRDOQ[refId][minState], sizeof(uint8_t)*buffer2update);
          memcpy(m_runMapRDOQ[1 - refId][curState], m_runMapRDOQ[refId][minState], sizeof(bool)*buffer2update);
          m_indexMapRDOQ[1 - refId][curState][currScanPos] = bestRunIndex;
          m_runMapRDOQ  [1 - refId][curState][currScanPos] = bestRunType;
        }
    
        if (useRotate) // early terminate: Rd cost >= min cost in horizontal scan
        {
          if ((m_stateCostRDOQ[1 - refId][0] >= minCost) &&
             (m_stateCostRDOQ[1 - refId][1] >= minCost) &&
             (m_stateCostRDOQ[1 - refId][2] >= minCost) )
          {
            return 0;
          }
        }
        refId = 1 - refId;
      }
      return 1;
    }
    
    double IntraSearch::rateDistOptPLT(
      bool      runType,
      uint8_t   runIndex,
      bool      prevRunType,
      uint8_t   prevRunIndex,
      uint8_t   aboveRunIndex,
      bool&     prevCodedRunType,
      int&      prevCodedPos,
      int       scanPos,
      uint32_t  width,
      int       dist,
      int       indexMaxValue,
      const BinFracBits* IndexfracBits,
      const BinFracBits& TypefracBits)
    {
      double rdCost = 0.0;
      bool identityFlag = !( (runType != prevRunType) || ( (runType == PLT_RUN_INDEX) && (runIndex != prevRunIndex) ) );
    
      if ( ( !identityFlag && runType == PLT_RUN_INDEX ) || scanPos == 0 ) // encode index value
      {
        uint8_t refIndex = (prevRunType == PLT_RUN_INDEX) ? prevRunIndex : aboveRunIndex;
        refIndex = (scanPos == 0) ? ( indexMaxValue + 1) : refIndex;
        if ( runIndex == refIndex )
        {
          rdCost = MAX_DOUBLE;
          return rdCost;
        }
    
        rdCost += m_pcRdCost->getLambda()*(m_truncBinBits[(runIndex > refIndex) ? runIndex - 1 : runIndex][(scanPos == 0) ? (indexMaxValue + 1) : indexMaxValue] << SCALE_BITS);
    
      rdCost += m_indexError[runIndex][m_scanOrder[scanPos].idx] * (1 << SCALE_BITS);
    
        rdCost += m_pcRdCost->getLambda()*( identityFlag ? (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[1]) : (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[0] ) );
    
      }
      if ( !identityFlag && scanPos >= width && prevRunType != PLT_RUN_COPY )
      {
    
        rdCost += m_pcRdCost->getLambda()*TypefracBits.intBits[runType];
    
      }
      if (!identityFlag || scanPos == 0)
      {
        prevCodedRunType = runType;
        prevCodedPos = scanPos;
      }
      return rdCost;
    }
    uint32_t IntraSearch::getEpExGolombNumBins(uint32_t symbol, uint32_t count)
    {
      uint32_t numBins = 0;
      while (symbol >= (uint32_t)(1 << count))
      {
        numBins++;
        symbol -= 1 << count;
        count++;
      }
      numBins++;
      numBins += count;
      assert(numBins <= 32);
      return numBins;
    }
    
    uint32_t IntraSearch::getTruncBinBits(uint32_t symbol, uint32_t maxSymbol)
    {
      uint32_t idxCodeBit = 0;
      uint32_t thresh;
      if (maxSymbol > 256)
      {
        uint32_t threshVal = 1 << 8;
        thresh = 8;
        while (threshVal <= maxSymbol)
        {
          thresh++;
          threshVal <<= 1;
        }
        thresh--;
      }
      else
      {
        thresh = g_tbMax[maxSymbol];
      }
      uint32_t uiVal = 1 << thresh;
      assert(uiVal <= maxSymbol);
      assert((uiVal << 1) > maxSymbol);
      assert(symbol < maxSymbol);
      uint32_t b = maxSymbol - uiVal;
      assert(b < uiVal);
      if (symbol < uiVal - b)
      {
        idxCodeBit = thresh;
      }
      else
      {
        idxCodeBit = thresh + 1;
      }
      return idxCodeBit;
    }
    
    void IntraSearch::initTBCTable(int bitDepth)
    {
      for (uint32_t i = 0; i < m_symbolSize; i++)
      {
        memset(m_truncBinBits[i], 0, sizeof(uint16_t)*(m_symbolSize + 1));
      }
      for (uint32_t i = 0; i < (m_symbolSize + 1); i++)
      {
        for (uint32_t j = 0; j < i; j++)
        {
          m_truncBinBits[j][i] = getTruncBinBits(j, i);
        }
      }
      memset(m_escapeNumBins, 0, sizeof(uint16_t)*m_symbolSize);
      for (uint32_t i = 0; i < m_symbolSize; i++)
      {
    
        m_escapeNumBins[i] = getEpExGolombNumBins(i, 5);
    
    void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, uint32_t yPos, uint32_t xPos, ComponentID compBegin, uint32_t numComp)
    
      CodingUnit    &cu = *cs.getCU(partitioner.chType);
    
      TransformUnit &tu = *cs.getTU(partitioner.chType);
    
      bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
    
    
      CPelBuf   orgBuf[3];
      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
      {
        CompArea  area = cu.blocks[comp];
    
        if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
    
        {
          orgBuf[comp] = cs.getPredBuf(area);
        }
        else
        {
          orgBuf[comp] = cs.getOrgBuf(area);
        }
      }
    
    
      int qp[3];
      int qpRem[3];
      int qpPer[3];
    
      int quantiserScale[3];
      int quantiserRightShift[3];
      int rightShiftOffset[3];
    
      int invquantiserRightShift[3];
    
        for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
        {
          QpParam cQP(tu, ComponentID(ch));
          qp[ch]                     = cQP.Qp(true);
          qpRem[ch]                  = qp[ch] % 6;
          qpPer[ch]                  = qp[ch] / 6;
          quantiserScale[ch]         = g_quantScales[0][qpRem[ch]];
          quantiserRightShift[ch]    = QUANT_SHIFT + qpPer[ch];
          rightShiftOffset[ch]       = 1 << (quantiserRightShift[ch] - 1);
          invquantiserRightShift[ch] = IQUANT_SHIFT;
          add[ch]                    = 1 << (invquantiserRightShift[ch] - 1);
        }
    
    
      uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
      uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
      for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
      {
    
        const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch));
    
        CompArea  area = cu.blocks[ch];
        PelBuf    recBuf = cs.getRecoBuf(area);
    
        PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)ch);
    
        if (compBegin != COMPONENT_Y || ch == 0)
        {
    
          if (lossless)
          {
            escapeValue.at(xPos, yPos) = orgBuf[ch].at(xPos, yPos);
    
    #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
            recBuf.at(xPos, yPos)      = orgBuf[ch].at(xPos, yPos);
    #else
    
            recBuf.at(xPos, yPos)      = escapeValue.at(xPos, yPos);
    
    #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
          escapeValue.at(xPos, yPos) = std::max<TCoeff>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]));
          assert(escapeValue.at(xPos, yPos) < (TCoeff(1) << (channelBitDepth + 1)));
          TCoeff value = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
          recBuf.at(xPos, yPos) = Pel(ClipBD<TCoeff>(value, channelBitDepth));//to be checked
    #else
    
          escapeValue.at(xPos, yPos) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
          assert(escapeValue.at(xPos, yPos) < (1 << (channelBitDepth + 1)));
    
          recBuf.at(xPos, yPos) = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
    
          recBuf.at(xPos, yPos) = Pel(ClipBD<int>(recBuf.at(xPos, yPos), channelBitDepth));//to be checked
    
        else if (compBegin == COMPONENT_Y && ch > 0 && yPos % (1 << scaleY) == 0 && xPos % (1 << scaleX) == 0)
    
          uint32_t yPosC = yPos >> scaleY;
          uint32_t xPosC = xPos >> scaleX;
    
          if (lossless)
          {
            escapeValue.at(xPosC, yPosC) = orgBuf[ch].at(xPosC, yPosC);
    
    #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
            recBuf.at(xPosC, yPosC)      = orgBuf[ch].at(xPosC, yPosC);
    #else
    
            recBuf.at(xPosC, yPosC)      = escapeValue.at(xPosC, yPosC);
    
    #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
    
            escapeValue.at(xPosC, yPosC) = std::max<TCoeff>(
              0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]));
            assert(escapeValue.at(xPosC, yPosC) < (TCoeff(1) << (channelBitDepth + 1)));
            TCoeff value = (((escapeValue.at(xPosC, yPosC) * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch])
                           >> invquantiserRightShift[ch];
            recBuf.at(xPosC, yPosC) = Pel(ClipBD<TCoeff>(value, channelBitDepth));   // to be checked
    
            escapeValue.at(xPosC, yPosC) = TCoeff(std::max<int>(
              0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
            assert(escapeValue.at(xPosC, yPosC) < (1 << (channelBitDepth + 1)));
            recBuf.at(xPosC, yPosC) =
              (((escapeValue.at(xPosC, yPosC) * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch])
              >> invquantiserRightShift[ch];
            recBuf.at(xPosC, yPosC) = Pel(ClipBD<int>(recBuf.at(xPosC, yPosC), channelBitDepth));   // to be checked
    
    void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
    
      CodingUnit &cu = *cs.getCU(partitioner.chType);
      const int channelBitDepth_L = cs.sps->getBitDepth(CHANNEL_TYPE_LUMA);
      const int channelBitDepth_C = cs.sps->getBitDepth(CHANNEL_TYPE_CHROMA);
    
    Vadim Seregin's avatar
    Vadim Seregin committed
    
    
      bool lossless        = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
    
      int  pcmShiftRight_L = (channelBitDepth_L - PLT_ENCBITDEPTH);
      int  pcmShiftRight_C = (channelBitDepth_C - PLT_ENCBITDEPTH);
      if (lossless)
      {
        pcmShiftRight_L = 0;
        pcmShiftRight_C = 0;
      }
    
    Vadim Seregin's avatar
    Vadim Seregin committed
    #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
    
      int maxPltSize = cu.isSepTree() ? MAXPLTSIZE_DUALTREE : MAXPLTSIZE;
    
    Vadim Seregin's avatar
    Vadim Seregin committed
    #else
      int maxPltSize = CS::isDualITree(cs) ? MAXPLTSIZE_DUALTREE : MAXPLTSIZE;
    #endif
    
      uint32_t height = cu.block(compBegin).height;
      uint32_t width = cu.block(compBegin).width;
    
    
      CPelBuf   orgBuf[3];
      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
      {
        CompArea  area = cu.blocks[comp];
    
        if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
    
        {
          orgBuf[comp] = cs.getPredBuf(area);
        }
        else
        {
          orgBuf[comp] = cs.getOrgBuf(area);
        }
      }
    
    
      TransformUnit &tu = *cs.getTU(partitioner.chType);
      QpParam cQP(tu, compBegin);
      int qp = cQP.Qp(true) - 12;
      qp = (qp < 0) ? 0 : ((qp > 56) ? 56 : qp);
      int errorLimit = g_paletteQuant[qp];
    
      uint32_t totalSize = height*width;
      SortingElement *pelList = new SortingElement[totalSize];
      SortingElement  element;
      SortingElement *pelListSort = new SortingElement[MAXPLTSIZE + 1];
    
      uint32_t dictMaxSize = maxPltSize;
    
      int last = -1;
    
      uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
      uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
    
      for (uint32_t y = 0; y < height; y++)
      {
        for (uint32_t x = 0; x < width; x++)
        {
          uint32_t org[3], pX, pY;
          for (int comp = compBegin; comp < (compBegin + numComp); comp++)
          {
            pX = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
            pY = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
            org[comp] = orgBuf[comp].at(pX, pY);
          }
          element.setAll(org, compBegin, numComp);
    
          ComponentID tmpCompBegin = compBegin;
          int tmpNumComp = numComp;
    
          if( cs.sps->getChromaFormatIdc() != CHROMA_444 &&
              numComp == 3 &&
    
             (x != ((x >> scaleX) << scaleX) || (y != ((y >> scaleY) << scaleY))) )
          {
            tmpCompBegin = COMPONENT_Y;
            tmpNumComp   = 1;
          }
    
          int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
          if (lossless)
          {
            if (bestSAD)
            {
              for (int i = idx - 1; i >= 0; i--)
              {
                uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
                if (sad == 0)
                {
                  bestSAD = sad;
                  besti   = i;
                  break;
                }
              }
            }
          }
          else
          {
    
              for (int i = idx - 1; i >= 0; i--)
    
                uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
                if (sad < bestSAD)
                {
                  bestSAD = sad;
                  besti   = i;
                  if (!sad)
                  {
                    break;
                  }
                }
    
          if (besti >= 0 && pelList[besti].almostEqualData(element, errorLimit, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless))
    
          {
            pelList[besti].addElement(element, tmpCompBegin, tmpNumComp);
            last = besti;
          }
          else
          {
            pelList[idx].copyDataFrom(element, tmpCompBegin, tmpNumComp);
            for (int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++)
    
              pelList[idx].setCnt(1, comp);
    
            last = idx;
            idx++;
          }
        }
      }
    
      if( cs.sps->getChromaFormatIdc() != CHROMA_444 && numComp == 3 )
      {
        for( int i = 0; i < idx; i++ )
        {
          pelList[i].setCnt( pelList[i].getCnt(COMPONENT_Y) + (pelList[i].getCnt(COMPONENT_Cb) >> 2), MAX_NUM_COMPONENT);
        }
      }
      else
      {
        if( compBegin == 0 )
        {
          for( int i = 0; i < idx; i++ )
          {
            pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Y), COMPONENT_Cb);
            pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Y), COMPONENT_Cr);
            pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Y), MAX_NUM_COMPONENT);
          }
        }
        else
        {
          for( int i = 0; i < idx; i++ )
          {
            pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Cb), COMPONENT_Y);
            pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Cb), MAX_NUM_COMPONENT);
          }
        }
      }
    
      for (int i = 0; i < dictMaxSize; i++)
    
        pelListSort[i].setCnt(0, COMPONENT_Y);
        pelListSort[i].setCnt(0, COMPONENT_Cb);
        pelListSort[i].setCnt(0, COMPONENT_Cr);
        pelListSort[i].setCnt(0, MAX_NUM_COMPONENT);
    
        pelListSort[i].resetAll(compBegin, numComp);
    
      dictMaxSize = 1;
      for (int i = 0; i < idx; i++)
    
        if( pelList[i].getCnt(MAX_NUM_COMPONENT) > pelListSort[dictMaxSize - 1].getCnt(MAX_NUM_COMPONENT) )
    
          for (j = dictMaxSize; j > 0; j--)
    
            if (pelList[i].getCnt(MAX_NUM_COMPONENT) > pelListSort[j - 1].getCnt(MAX_NUM_COMPONENT))
    
              pelListSort[j].copyAllFrom(pelListSort[j - 1], compBegin, numComp);
    
              dictMaxSize = std::min(dictMaxSize + 1, (uint32_t)maxPltSize);
    
          pelListSort[j].copyAllFrom(pelList[i], compBegin, numComp);
    
      uint32_t paletteSize = 0;
    
      uint64_t numColorBits = 0;
      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
      {
        numColorBits += (comp > 0) ? channelBitDepth_C : channelBitDepth_L;
      }
    
      const int plt_lambda_shift = (compBegin > 0) ? pcmShiftRight_C : pcmShiftRight_L;
      double    bitCost          = m_pcRdCost->getLambda() / (double) (1 << (2 * plt_lambda_shift)) * numColorBits;
    
      bool   reuseflag[MAXPLTPREDSIZE] = { false };
      int    run;
      double reuseflagCost;
    
      for (int i = 0; i < maxPltSize; i++)
    
      {
        if( pelListSort[i].getCnt(MAX_NUM_COMPONENT) )
        {
          ComponentID tmpCompBegin = compBegin;
          int tmpNumComp = numComp;
          if( cs.sps->getChromaFormatIdc() != CHROMA_444 && numComp == 3 && pelListSort[i].getCnt(COMPONENT_Cb) == 0 )
          {
            tmpCompBegin = COMPONENT_Y;
            tmpNumComp   = 1;
          }
    
          for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
          {
            int half = pelListSort[i].getCnt(comp) >> 1;
            cu.curPLT[comp][paletteSize] = (pelListSort[i].getSumData(comp) + half) / pelListSort[i].getCnt(comp);
          }
    
          int best = -1;
          if( errorLimit )
          {
            double pal[MAX_NUM_COMPONENT], err = 0.0, bestCost = 0.0;
            for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
            {
              pal[comp] = pelListSort[i].getSumData(comp) / (double)pelListSort[i].getCnt(comp);
              err = pal[comp] - cu.curPLT[comp][paletteSize];
              if( isChroma((ComponentID) comp) )
              {
                bestCost += (err * err * PLT_CHROMA_WEIGHTING) / (1 << (2 * pcmShiftRight_C)) * pelListSort[i].getCnt(comp);
              }
              else
              {
                bestCost += (err * err) / (1 << (2 * pcmShiftRight_L)) * pelListSort[i].getCnt(comp);
              }
            }
            bestCost += bitCost;
    
            for( int t = 0; t < cs.prevPLT.curPLTSize[compBegin]; t++ )
            {
              double cost = 0.0;
              for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
              {
                err = pal[comp] - cs.prevPLT.curPLT[comp][t];
                if( isChroma((ComponentID) comp) )
                {
                  cost += (err * err * PLT_CHROMA_WEIGHTING) / (1 << (2 * pcmShiftRight_C)) * pelListSort[i].getCnt(comp);
                }
                else
                {
                  cost += (err * err) / (1 << (2 * pcmShiftRight_L)) * pelListSort[i].getCnt(comp);
                }
              }
              run = 0;
              for (int t2 = t; t2 >= 0; t2--)
              {
                if (!reuseflag[t2])
                {
                  run++;
                }
                else
                {
                  break;
                }
              }
              reuseflagCost = m_pcRdCost->getLambda() / (double)(1 << (2 * plt_lambda_shift)) * getEpExGolombNumBins(run ? run + 1 : run, 0);
              cost += reuseflagCost;
    
              if( cost < bestCost )
              {
                best = t;
                bestCost = cost;
              }
            }
            if( best != -1 )
            {
              for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
              {
                cu.curPLT[comp][paletteSize] = cs.prevPLT.curPLT[comp][best];
              }
              reuseflag[best] = true;
            }
          }
    
          bool duplicate = false;
          if( pelListSort[i].getCnt(MAX_NUM_COMPONENT) == 1 && best == -1 )
          {
            duplicate = true;
          }
          else
          {
            for( int t = 0; t < paletteSize; t++ )
            {
              bool duplicateTmp = true;
              for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
              {
                duplicateTmp = duplicateTmp && (cu.curPLT[comp][paletteSize] == cu.curPLT[comp][t]);
              }
              if( duplicateTmp )
              {
                duplicate = true;
                break;
              }
            }
          }
          if( !duplicate )
          {
            if( cs.sps->getChromaFormatIdc() != CHROMA_444 && numComp == 3 && pelListSort[i].getCnt(COMPONENT_Cb) == 0 )
            {
              if( best != -1 )
              {
                cu.curPLT[COMPONENT_Cb][paletteSize] = cs.prevPLT.curPLT[COMPONENT_Cb][best];
                cu.curPLT[COMPONENT_Cr][paletteSize] = cs.prevPLT.curPLT[COMPONENT_Cr][best];
              }
              else
              {
                cu.curPLT[COMPONENT_Cb][paletteSize] = 1 << (channelBitDepth_C - 1);
                cu.curPLT[COMPONENT_Cr][paletteSize] = 1 << (channelBitDepth_C - 1);
              }
            }
            paletteSize++;
          }
        }
        else
        {
          break;
        }
      }
    
      cu.curPLTSize[compBegin] = paletteSize;
    
    Vadim Seregin's avatar
    Vadim Seregin committed
    #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
    
      if( cu.isLocalSepTree() )
    
        cu.curPLTSize[COMPONENT_Y] = paletteSize;
    
    Vadim Seregin's avatar
    Vadim Seregin committed
    #endif
    
      delete[] pelList;
      delete[] pelListSort;
    
    // -------------------------------------------------------------------------------------------------------------------
    // Intra search
    // -------------------------------------------------------------------------------------------------------------------
    
    
    void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx )
    
    {
      CodingUnit &cu = *cs.getCU( partitioner.chType );
    
      if (bLuma)
      {
    
        bool isFirst = cu.ispMode ? subTuIdx == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos();
    
    #if JVET_AD0208_IBC_ADAPT_FOR_CAM_CAPTURED_CONTENTS
          if ((!cs.slice->isIntra() || cs.slice->getUseIBC() || cs.slice->getSPS()->getPLTMode())
    #else
    
          if ((!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag() || cs.slice->getSPS()->getPLTMode())
    
              && cu.Y().valid())
    
          {
            m_CABACEstimator->cu_skip_flag( cu );
            m_CABACEstimator->pred_mode   ( cu );
          }
    
    Vadim Seregin's avatar
    Vadim Seregin committed
    #if ENABLE_DIMD
          m_CABACEstimator->cu_dimd_flag(cu);
    #endif
    
        }
    
        PredictionUnit &pu = *cs.getPU(partitioner.currArea().lumaPos(), partitioner.chType);
    
        // luma prediction mode
    
    Karsten Suehring's avatar
    Karsten Suehring committed
        if (isFirst)
    
    Karsten Suehring's avatar
    Karsten Suehring committed
          if ( !cu.Y().valid())
    
    Karsten Suehring's avatar
    Karsten Suehring committed
            m_CABACEstimator->pred_mode( cu );
    
          m_CABACEstimator->bdpcm_mode( cu, COMPONENT_Y );
    
    Karsten Suehring's avatar
    Karsten Suehring committed
          m_CABACEstimator->intra_luma_pred_mode( pu );
    
        }
      }
    
      if (bChroma)
      {
        bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos();
    
        PredictionUnit &pu = *cs.getPU( partitioner.currArea().chromaPos(), CHANNEL_TYPE_CHROMA );
    
    
    Karsten Suehring's avatar
    Karsten Suehring committed
        if( isFirst )
    
          m_CABACEstimator->bdpcm_mode( cu, ComponentID(CHANNEL_TYPE_CHROMA) );
    
    Karsten Suehring's avatar
    Karsten Suehring committed
          m_CABACEstimator->intra_chroma_pred_mode( pu );
    
    void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx, const PartSplit ispType )
    {
      const UnitArea &currArea = partitioner.currArea();
              int subTuCounter = subTuIdx;
      TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter );
      CodingUnit    &currCU = *currTU.cu;
    
      uint32_t currDepth           = partitioner.currTrDepth;
    
      const bool subdiv        = currTU.depth > currDepth;
    
      ComponentID compID = partitioner.chType == CHANNEL_TYPE_LUMA ? COMPONENT_Y : COMPONENT_Cb;
    
    Karsten Suehring's avatar
    Karsten Suehring committed
      if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
    
    Karsten Suehring's avatar
    Karsten Suehring committed
        CHECK( !subdiv, "TU split implied" );
      }
      else
      {
    
        CHECK( subdiv && !currCU.ispMode && isLuma( compID ), "No TU subdivision is allowed with QTBT" );
      }
    
    
      if (bChroma)
      {
        const bool chromaCbfISP = currArea.blocks[COMPONENT_Cb].valid() && currCU.ispMode && !subdiv;
        if ( !currCU.ispMode || chromaCbfISP )
    
          const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat);
          const uint32_t cbfDepth              = (chromaCbfISP ? currDepth - 1 : currDepth);
    
          for (uint32_t ch = COMPONENT_Cb; ch < numberValidComponents; ch++)
    
            const ComponentID compID = ComponentID(ch);
    
            if (currDepth == 0 || TU::getCbfAtDepth(currTU, compID, currDepth - 1) || chromaCbfISP)
            {
              const bool prevCbf = (compID == COMPONENT_Cr ? TU::getCbfAtDepth(currTU, COMPONENT_Cb, currDepth) : false);
              m_CABACEstimator->cbf_comp(cs, TU::getCbfAtDepth(currTU, compID, currDepth), currArea.blocks[compID],
                                         cbfDepth, prevCbf);
            }
    
          }
        }
      }
    
      if (subdiv)
      {
        if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
        {
          partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
        }
    
        else if( currCU.ispMode && isLuma( compID ) )
        {
          partitioner.splitCurrArea( ispType, cs );
        }
    
        {
          THROW("Cannot perform an implicit split!");
        }
    
          xEncSubdivCbfQT( cs, partitioner, bLuma, bChroma, subTuCounter, ispType );
          subTuCounter += subTuCounter != -1 ? 1 : 0;
    
        } while( partitioner.nextPart( cs ) );
    
        partitioner.exitCurrSplit();
      }
      else
      {
        //===== Cbfs =====
        if (bLuma)
        {
    
          bool previousCbf       = false;
          bool lastCbfIsInferred = false;
          if( ispType != TU_NO_ISP )
          {
            bool rootCbfSoFar = false;
    
            uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> floorLog2(currTU.lheight()) : currCU.lwidth() >> floorLog2(currTU.lwidth());
    
            if( subTuCounter == nTus - 1 )
            {
              TransformUnit* tuPointer = currCU.firstTU;
              for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
              {
                rootCbfSoFar |= TU::getCbfAtDepth( *tuPointer, COMPONENT_Y, currDepth );
                tuPointer = tuPointer->next;
              }
              if( !rootCbfSoFar )
              {
                lastCbfIsInferred = true;
              }
            }
            if( !lastCbfIsInferred )
            {
              previousCbf = TU::getPrevTuCbfAtDepth( currTU, COMPONENT_Y, partitioner.currTrDepth );
            }
          }
          if( !lastCbfIsInferred )
          {
            m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode );
          }
    
    void IntraSearch::xEncCoeffQT( CodingStructure &cs, Partitioner &partitioner, const ComponentID compID, const int subTuIdx, const PartSplit ispType, CUCtx* cuCtx )
    
    {
      const UnitArea &currArea  = partitioner.currArea();