Skip to content
Snippets Groups Projects
EncSlice.cpp 77 KiB
Newer Older
  • Learn to ignore specific revisions
  • /* The copyright in this software is being made available under the BSD
     * License, included below. This software may be subject to other third party
     * and contributor rights, including patent rights, and no such rights are
     * granted under this license.
     *
     * Copyright (c) 2010-2018, ITU/ISO/IEC
     * All rights reserved.
     *
     * Redistribution and use in source and binary forms, with or without
     * modification, are permitted provided that the following conditions are met:
     *
     *  * Redistributions of source code must retain the above copyright notice,
     *    this list of conditions and the following disclaimer.
     *  * Redistributions in binary form must reproduce the above copyright notice,
     *    this list of conditions and the following disclaimer in the documentation
     *    and/or other materials provided with the distribution.
     *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
     *    be used to endorse or promote products derived from this software without
     *    specific prior written permission.
     *
     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
     * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
     * THE POSSIBILITY OF SUCH DAMAGE.
     */
    
    /** \file     EncSlice.cpp
        \brief    slice encoder class
    */
    
    #include "EncSlice.h"
    
    #include "EncLib.h"
    #include "CommonLib/UnitTools.h"
    #include "CommonLib/Picture.h"
    
    #if K0149_BLOCK_STATISTICS
    #include "CommonLib/dtrace_blockstatistics.h"
    #endif
    
    
    #if ENABLE_WPP_PARALLELISM
    #include <mutex>
    extern recursive_mutex g_cache_mutex;
    #endif
    
    #include <math.h>
    
    //! \ingroup EncoderLib
    //! \{
    
    // ====================================================================================================================
    // Constructor / destructor / create / destroy
    // ====================================================================================================================
    
    EncSlice::EncSlice()
     : m_encCABACTableIdx(I_SLICE)
    
    #if ENABLE_QPA
     , m_adaptedLumaQP(-1)
    #endif
    
    {
    }
    
    EncSlice::~EncSlice()
    {
      destroy();
    }
    
    void EncSlice::create( int iWidth, int iHeight, ChromaFormat chromaFormat, uint32_t iMaxCUWidth, uint32_t iMaxCUHeight, uint8_t uhTotalDepth )
    {
    }
    
    void EncSlice::destroy()
    {
      // free lambda and QP arrays
      m_vdRdPicLambda.clear();
      m_vdRdPicQp.clear();
      m_viRdPicQp.clear();
    }
    
    void EncSlice::init( EncLib* pcEncLib, const SPS& sps )
    {
      m_pcCfg             = pcEncLib;
      m_pcLib             = pcEncLib;
      m_pcListPic         = pcEncLib->getListPic();
    
      m_pcGOPEncoder      = pcEncLib->getGOPEncoder();
      m_pcCuEncoder       = pcEncLib->getCuEncoder();
      m_pcInterSearch     = pcEncLib->getInterSearch();
      m_CABACWriter       = pcEncLib->getCABACEncoder()->getCABACWriter   (&sps);
      m_CABACEstimator    = pcEncLib->getCABACEncoder()->getCABACEstimator(&sps);
      m_pcTrQuant         = pcEncLib->getTrQuant();
      m_pcRdCost          = pcEncLib->getRdCost();
    
      // create lambda and QP arrays
      m_vdRdPicLambda.resize(m_pcCfg->getDeltaQpRD() * 2 + 1 );
      m_vdRdPicQp.resize(    m_pcCfg->getDeltaQpRD() * 2 + 1 );
      m_viRdPicQp.resize(    m_pcCfg->getDeltaQpRD() * 2 + 1 );
      m_pcRateCtrl        = pcEncLib->getRateCtrl();
    }
    
    void
    EncSlice::setUpLambda( Slice* slice, const double dLambda, int iQP)
    {
      // store lambda
      m_pcRdCost ->setLambda( dLambda, slice->getSPS()->getBitDepths() );
    
      // for RDO
      // in RdCost there is only one lambda because the luma and chroma bits are not separated, instead we weight the distortion of chroma.
      double dLambdas[MAX_NUM_COMPONENT] = { dLambda };
      for( uint32_t compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ )
      {
        const ComponentID compID = ComponentID( compIdx );
        int chromaQPOffset       = slice->getPPS()->getQpOffset( compID ) + slice->getSliceChromaQpDelta( compID );
        int qpc                  = ( iQP + chromaQPOffset < 0 ) ? iQP : getScaledChromaQP( iQP + chromaQPOffset, m_pcCfg->getChromaFormatIdc() );
        double tmpWeight         = pow( 2.0, ( iQP - qpc ) / 3.0 );  // takes into account of the chroma qp mapping and chroma qp Offset
        if( m_pcCfg->getDepQuantEnabledFlag() )
        {
          tmpWeight *= ( m_pcCfg->getGOPSize() >= 8 ? pow( 2.0, 0.1/3.0 ) : pow( 2.0, 0.2/3.0 ) );  // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
        }
        m_pcRdCost->setDistortionWeight( compID, tmpWeight );
    #if ENABLE_WPP_PARALLELISM
        for( int jId = 1; jId < ( m_pcLib->getNumWppThreads() + m_pcLib->getNumWppExtraLines() ); jId++ )
        {
          m_pcLib->getRdCost( slice->getPic()->scheduler.getWppDataId( jId ) )->setDistortionWeight( compID, tmpWeight );
        }
    #endif
        dLambdas[compIdx] = dLambda / tmpWeight;
      }
    
    #if RDOQ_CHROMA_LAMBDA
      // for RDOQ
      m_pcTrQuant->setLambdas( dLambdas );
    #else
      m_pcTrQuant->setLambda( dLambda );
    #endif
    
      // for SAO
      slice->setLambdas( dLambdas );
    }
    
    
    #if ENABLE_QPA
    
    static inline int apprI3Log2 (const double d) // rounded 3*log2(d)
    {
      return d < 1.5e-13 ? -128 : int (floor (3.0 * log (d) / log (2.0) + 0.5));
    }
    
    static void filterAndCalculateAverageEnergies (const Pel* pSrc, const int  iSrcStride,
                                                   double &hpEner,  const int  iHeight,    const int iWidth,
                                                   const uint32_t uBitDepth /* luma bit-depth (4-16) */)
    {
      uint64_t saAct = 0;
    
      // skip first row as there may be a black border frame
      pSrc += iSrcStride;
      // center rows
      for (int y = 1; y < iHeight - 1; y++)
      {
        // skip column as there may be a black border frame
    
        for (int x = 1; x < iWidth - 1; x++) // and columns
        {
          const int f = 12 * (int)pSrc[x  ] - 2 * ((int)pSrc[x-1] + (int)pSrc[x+1] + (int)pSrc[x  -iSrcStride] + (int)pSrc[x  +iSrcStride])
                           - (int)pSrc[x-1-iSrcStride] - (int)pSrc[x+1-iSrcStride] - (int)pSrc[x-1+iSrcStride] - (int)pSrc[x+1+iSrcStride];
          saAct += abs (f);
        }
        // skip column as there may be a black border frame
        pSrc += iSrcStride;
      }
      // skip last row as there may be a black border frame
    
      hpEner = double(saAct) / double((iWidth - 2) * (iHeight - 2));
    
      // lower limit, compensate for highpass amplification
      if (hpEner < double(1 << (uBitDepth - 4))) hpEner = double(1 << (uBitDepth - 4));
    }
    
    #ifndef GLOBAL_AVERAGING
      #define GLOBAL_AVERAGING 1 // "global" averaging of a_k across a set instead of one picture
    #endif
    
    #if GLOBAL_AVERAGING
    static double getAveragePictureEnergy (const CPelBuf picOrig, const uint32_t uBitDepth)
    {
      double hpEnerPic = 5.65625 * double(1 << (uBitDepth >> 1));   // square-root of a_pic value
    
      if (picOrig.width > 2048 && picOrig.height > 1280) // for UHD/4K
      {
        hpEnerPic *= (4.0 / 5.65625);
      }
      else if (picOrig.width <= 1024 || picOrig.height <= 640) // 480p
      {
        hpEnerPic *= (8.0 / 5.65625);
      }
    
      return hpEnerPic;
    }
    #endif
    
    static int applyQPAdaptationChroma (Picture* const pcPic, Slice* const pcSlice, EncCfg* const pcEncCfg, const int sliceQP)
    {
      double hpEner[MAX_NUM_COMPONENT] = {0.0, 0.0, 0.0};
      int    optSliceChromaQpOffset[2] = {0, 0};
      int    savedLumaQP               = -1;
    
      for (uint32_t comp = 0; comp < getNumberValidComponents (pcPic->chromaFormat); comp++)
      {
        const ComponentID compID = (ComponentID)comp;
        const CPelBuf    picOrig = pcPic->getOrigBuf (pcPic->block (compID));
    
        filterAndCalculateAverageEnergies (picOrig.buf, picOrig.stride, hpEner[comp], picOrig.height, picOrig.width,
                                           pcSlice->getSPS()->getBitDepth (toChannelType (compID)) - (isChroma (compID) ? 1 : 0));
        if (isChroma (compID))
        {
          const int  adaptChromaQPOffset = 2.0 * hpEner[comp] <= hpEner[0] ? 0 : apprI3Log2 (2.0 * hpEner[comp] / hpEner[0]);
       #if GLOBAL_AVERAGING
          int       averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] / getAveragePictureEnergy (pcPic->getOrigBuf().Y(), pcSlice->getSPS()->getBitDepth (CH_L))));
       #else
          int       averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP); // mean slice QP
       #endif
       #if SHARP_LUMA_DELTA_QP
    
          // change mean picture QP index based on picture's average luma value (Sharp)
          if (pcEncCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES)
          {
            const CPelBuf picLuma = pcPic->getOrigBuf().Y();
            uint64_t uAvgLuma = 0;
    
            for (SizeType y = 0; y < picLuma.height; y++)
            {
              for (SizeType x = 0; x < picLuma.width; x++)
              {
                uAvgLuma += (uint64_t)picLuma.at (x, y);
              }
            }
            uAvgLuma = (uAvgLuma + (picLuma.area() >> 1)) / picLuma.area();
    
            averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + 1 - int((3 * uAvgLuma * uAvgLuma) >> uint64_t (2 * pcSlice->getSPS()->getBitDepth (CH_L) - 1)));
          }
       #endif
          const int lumaChromaMappingDQP = averageAdaptedLumaQP - getScaledChromaQP (averageAdaptedLumaQP, pcEncCfg->getChromaFormatIdc());
    
          optSliceChromaQpOffset[comp-1] = std::min (3 + lumaChromaMappingDQP, adaptChromaQPOffset + lumaChromaMappingDQP);
    
          if (savedLumaQP < 0) savedLumaQP = averageAdaptedLumaQP; // save it for later
        }
      }
    
      pcEncCfg->setSliceChromaOffsetQpIntraOrPeriodic (pcEncCfg->getSliceChromaOffsetQpPeriodicity(), optSliceChromaQpOffset);
    
      return savedLumaQP;
    }
    
    #endif // ENABLE_QPA
    
    
    /**
     - non-referenced frame marking
     - QP computation based on temporal structure
     - lambda computation based on QP
     - set temporal layer ID and the parameter sets
     .
     \param pcPic         picture class
     \param pocLast       POC of last picture
     \param pocCurr       current POC
     \param iNumPicRcvd   number of received pictures
     \param iGOPid        POC offset for hierarchical structure
     \param rpcSlice      slice header class
     \param isField       true for field coding
     */
    
    void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr, const int iGOPid, Slice*& rpcSlice, const bool isField
      , bool isEncodeLtRef
    )
    
    {
      double dQP;
      double dLambda;
    
      rpcSlice = pcPic->slices[0];
      rpcSlice->setSliceBits(0);
      rpcSlice->setPic( pcPic );
      rpcSlice->initSlice();
    
      int multipleFactor = pcPic->cs->sps->getSpsNext().getUseCompositeRef() ? 2 : 1;
      if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && isEncodeLtRef)
      {
        rpcSlice->setPicOutputFlag(false);
      }
      else
      {
        rpcSlice->setPicOutputFlag(true);
      }
    
      rpcSlice->setPOC( pocCurr );
      rpcSlice->setDepQuantEnabledFlag( m_pcCfg->getDepQuantEnabledFlag() );
    #if HEVC_USE_SIGN_HIDING
      rpcSlice->setSignDataHidingEnabledFlag( m_pcCfg->getSignDataHidingEnabledFlag() );
    #endif
    
    #if SHARP_LUMA_DELTA_QP
      pcPic->fieldPic = isField;
      m_gopID = iGOPid;
    #endif
    
      // depth computation based on GOP size
      int depth;
      {
        int poc = rpcSlice->getPOC();
        if(isField)
        {
          poc = (poc/2) % (m_pcCfg->getGOPSize()/2);
        }
        else
        {
    
          poc = poc % (m_pcCfg->getGOPSize() * multipleFactor);
    
          int step = m_pcCfg->getGOPSize() * multipleFactor;
    
            for (int j = i; j<(m_pcCfg->getGOPSize() * multipleFactor); j += step)
    
            {
              if ( j == poc )
              {
                i=0;
                break;
              }
            }
            step >>= 1;
            depth++;
          }
        }
    
        if(m_pcCfg->getHarmonizeGopFirstFieldCoupleEnabled() && poc != 0)
        {
          if (isField && ((rpcSlice->getPOC() % 2) == 1))
          {
            depth++;
          }
        }
      }
    
      // slice type
      SliceType eSliceType;
    
      eSliceType=B_SLICE;
      if(!(isField && pocLast == 1) || !m_pcCfg->getEfficientFieldIRAPEnabled())
      {
        if(m_pcCfg->getDecodingRefreshType() == 3)
        {
    
          eSliceType = (pocLast == 0 || pocCurr % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
    
          eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
    
        }
      }
    
      rpcSlice->setSliceType    ( eSliceType );
    
      // ------------------------------------------------------------------------------------------------------------------
      // Non-referenced frame marking
      // ------------------------------------------------------------------------------------------------------------------
    
      if(pocLast == 0)
      {
        rpcSlice->setTemporalLayerNonReferenceFlag(false);
      }
      else
      {
        rpcSlice->setTemporalLayerNonReferenceFlag(!m_pcCfg->getGOPEntry(iGOPid).m_refPic);
      }
      pcPic->referenced = true;
    
      // ------------------------------------------------------------------------------------------------------------------
      // QP setting
      // ------------------------------------------------------------------------------------------------------------------
    
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
      dQP = m_pcCfg->getQPForPicture(iGOPid, rpcSlice);
    #else
      dQP = m_pcCfg->getBaseQP();
      if(eSliceType!=I_SLICE)
      {
    #if SHARP_LUMA_DELTA_QP
        if (!(( m_pcCfg->getMaxDeltaQP() == 0) && (!m_pcCfg->getLumaLevelToDeltaQPMapping().isEnabled()) && (dQP == -rpcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA) ) && (rpcSlice->getPPS()->getTransquantBypassEnabledFlag())))
    #else
        if (!(( m_pcCfg->getMaxDeltaQP() == 0 ) && (dQP == -rpcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA) ) && (rpcSlice->getPPS()->getTransquantBypassEnabledFlag())))
    #endif
        {
          dQP += m_pcCfg->getGOPEntry(iGOPid).m_QPOffset;
        }
      }
    
      // modify QP
      const int* pdQPs = m_pcCfg->getdQPs();
      if ( pdQPs )
      {
        dQP += pdQPs[ rpcSlice->getPOC() ];
      }
    
      if (m_pcCfg->getCostMode()==COST_LOSSLESS_CODING)
      {
        dQP=LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP;
        m_pcCfg->setDeltaQpRD(0);
      }
    #endif
    
      // ------------------------------------------------------------------------------------------------------------------
      // Lambda computation
      // ------------------------------------------------------------------------------------------------------------------
    
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
      const int temporalId=m_pcCfg->getGOPEntry(iGOPid).m_temporalId;
    #if !SHARP_LUMA_DELTA_QP
      const std::vector<double> &intraLambdaModifiers=m_pcCfg->getIntraLambdaModifier();
    #endif
    #endif
      int iQP;
      double dOrigQP = dQP;
    
      // pre-compute lambda and QP values for all possible QP candidates
      for ( int iDQpIdx = 0; iDQpIdx < 2 * m_pcCfg->getDeltaQpRD() + 1; iDQpIdx++ )
      {
        // compute QP value
        dQP = dOrigQP + ((iDQpIdx+1)>>1)*(iDQpIdx%2 ? -1 : 1);
    #if SHARP_LUMA_DELTA_QP
        dLambda = calculateLambda(rpcSlice, iGOPid, depth, dQP, dQP, iQP );
    #else
        // compute lambda value
        int    NumberBFrames = ( m_pcCfg->getGOPSize() - 1 );
        int    SHIFT_QP = 12;
    
        int    bitdepth_luma_qp_scale =
          6
          * (rpcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8
             - DISTORTION_PRECISION_ADJUSTMENT(rpcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)));
        double qp_temp = (double) dQP + bitdepth_luma_qp_scale - SHIFT_QP;
    #if FULL_NBIT
        double qp_temp_orig = (double) dQP - SHIFT_QP;
    #endif
        // Case #1: I or P-slices (key-frame)
        double dQPFactor = m_pcCfg->getGOPEntry(iGOPid).m_QPFactor;
        if ( eSliceType==I_SLICE )
        {
          if (m_pcCfg->getIntraQpFactor()>=0.0 && m_pcCfg->getGOPEntry(iGOPid).m_sliceType != I_SLICE)
          {
            dQPFactor=m_pcCfg->getIntraQpFactor();
          }
          else
          {
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
            if(m_pcCfg->getLambdaFromQPEnable())
            {
              dQPFactor=0.57;
            }
            else
            {
    #endif
            double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)(isField ? NumberBFrames/2 : NumberBFrames) );
    
            dQPFactor=0.57*dLambda_scale;
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
            }
    #endif
          }
        }
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
        else if( m_pcCfg->getLambdaFromQPEnable() )
        {
          dQPFactor=0.57;
        }
    #endif
    
        dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 );
    
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
        if(!m_pcCfg->getLambdaFromQPEnable() && depth>0)
    #else
        if ( depth>0 )
    #endif
        {
    #if FULL_NBIT
            dLambda *= Clip3( 2.00, 4.00, (qp_temp_orig / 6.0) ); // (j == B_SLICE && p_cur_frm->layer != 0 )
    #else
            dLambda *= Clip3( 2.00, 4.00, (qp_temp / 6.0) ); // (j == B_SLICE && p_cur_frm->layer != 0 )
    #endif
        }
    
        // if hadamard is used in ME process
        if ( !m_pcCfg->getUseHADME() && rpcSlice->getSliceType( ) != I_SLICE )
        {
          dLambda *= 0.95;
        }
    
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
        double lambdaModifier;
        if( rpcSlice->getSliceType( ) != I_SLICE || intraLambdaModifiers.empty())
        {
          lambdaModifier = m_pcCfg->getLambdaModifier( temporalId );
        }
        else
        {
          lambdaModifier = intraLambdaModifiers[ (temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size()-1) ];
        }
        dLambda *= lambdaModifier;
    #endif
    
    
        iQP = Clip3( -rpcSlice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) );
    
    #endif
    
        m_vdRdPicLambda[iDQpIdx] = dLambda;
        m_vdRdPicQp    [iDQpIdx] = dQP;
        m_viRdPicQp    [iDQpIdx] = iQP;
      }
    
      // obtain dQP = 0 case
      dLambda = m_vdRdPicLambda[0];
      dQP     = m_vdRdPicQp    [0];
      iQP     = m_viRdPicQp    [0];
    
    #if !X0038_LAMBDA_FROM_QP_CAPABILITY
      const int temporalId=m_pcCfg->getGOPEntry(iGOPid).m_temporalId;
      const std::vector<double> &intraLambdaModifiers=m_pcCfg->getIntraLambdaModifier();
    #endif
    
    #if W0038_CQP_ADJ
    
     #if ENABLE_QPA
      m_adaptedLumaQP = -1;
    
      if ((m_pcCfg->getUsePerceptQPA() || m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0) && !m_pcCfg->getUseRateCtrl() && rpcSlice->getPPS()->getSliceChromaQpFlag() &&
          (rpcSlice->isIntra() || (m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0 && (rpcSlice->getPOC() % m_pcCfg->getSliceChromaOffsetQpPeriodicity()) == 0)))
      {
        m_adaptedLumaQP = applyQPAdaptationChroma (pcPic, rpcSlice, m_pcCfg, iQP);
      }
     #endif
    
      if(rpcSlice->getPPS()->getSliceChromaQpFlag())
      {
    
        const bool bUseIntraOrPeriodicOffset = rpcSlice->isIntra() || (m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0 && (rpcSlice->getPOC() % m_pcCfg->getSliceChromaOffsetQpPeriodicity()) == 0);
        int cbQP = bUseIntraOrPeriodicOffset ? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(false) : m_pcCfg->getGOPEntry(iGOPid).m_CbQPoffset;
        int crQP = bUseIntraOrPeriodicOffset ? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(true)  : m_pcCfg->getGOPEntry(iGOPid).m_CrQPoffset;
    
    
        cbQP = Clip3( -12, 12, cbQP + rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb) ) - rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb);
        crQP = Clip3( -12, 12, crQP + rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr) ) - rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr);
        rpcSlice->setSliceChromaQpDelta(COMPONENT_Cb, Clip3( -12, 12, cbQP));
        CHECK(!(rpcSlice->getSliceChromaQpDelta(COMPONENT_Cb)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb)<=12 && rpcSlice->getSliceChromaQpDelta(COMPONENT_Cb)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb)>=-12), "Unspecified error");
        rpcSlice->setSliceChromaQpDelta(COMPONENT_Cr, Clip3( -12, 12, crQP));
        CHECK(!(rpcSlice->getSliceChromaQpDelta(COMPONENT_Cr)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr)<=12 && rpcSlice->getSliceChromaQpDelta(COMPONENT_Cr)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr)>=-12), "Unspecified error");
      }
      else
      {
        rpcSlice->setSliceChromaQpDelta( COMPONENT_Cb, 0 );
        rpcSlice->setSliceChromaQpDelta( COMPONENT_Cr, 0 );
      }
    #endif
    
    #if !X0038_LAMBDA_FROM_QP_CAPABILITY
      double lambdaModifier;
      if( rpcSlice->getSliceType( ) != I_SLICE || intraLambdaModifiers.empty())
      {
        lambdaModifier = m_pcCfg->getLambdaModifier( temporalId );
      }
      else
      {
        lambdaModifier = intraLambdaModifiers[ (temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size()-1) ];
      }
    
      dLambda *= lambdaModifier;
    #endif
    
      setUpLambda(rpcSlice, dLambda, iQP);
    
    #if WCG_EXT
      // cost = Distortion + Lambda*R,
      // when QP is adjusted by luma, distortion is changed, so we have to adjust lambda to match the distortion, then the cost function becomes
      // costA = Distortion + AdjustedLambda * R          -- currently, costA is still used when calculating intermediate cost of using SAD, HAD, resisual etc.
      // an alternative way is to weight the distortion to before the luma QP adjustment, then the cost function becomes
      // costB = weightedDistortion + Lambda * R          -- currently, costB is used to calculat final cost, and when DF_FUNC is DF_DEFAULT
      m_pcRdCost->saveUnadjustedLambda();
    #endif
    
      if (m_pcCfg->getFastMEForGenBLowDelayEnabled())
      {
        // restore original slice type
    
        if(!(isField && pocLast == 1) || !m_pcCfg->getEfficientFieldIRAPEnabled())
        {
          if(m_pcCfg->getDecodingRefreshType() == 3)
          {
    
            eSliceType = (pocLast == 0 || (pocCurr) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
    
            eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
    
          }
        }
    
        rpcSlice->setSliceType        ( eSliceType );
      }
    
      if (m_pcCfg->getUseRecalculateQPAccordingToLambda())
      {
        dQP = xGetQPValueAccordingToLambda( dLambda );
    
        iQP = Clip3( -rpcSlice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) );
    
      }
    
      rpcSlice->setSliceQp           ( iQP );
      rpcSlice->setSliceQpDelta      ( 0 );
    #if !W0038_CQP_ADJ
      rpcSlice->setSliceChromaQpDelta( COMPONENT_Cb, 0 );
      rpcSlice->setSliceChromaQpDelta( COMPONENT_Cr, 0 );
    #endif
      rpcSlice->setUseChromaQpAdj( rpcSlice->getPPS()->getPpsRangeExtension().getChromaQpOffsetListEnabledFlag() );
      rpcSlice->setNumRefIdx(REF_PIC_LIST_0,m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive);
      rpcSlice->setNumRefIdx(REF_PIC_LIST_1,m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive);
    
      if ( m_pcCfg->getDeblockingFilterMetric() )
      {
        rpcSlice->setDeblockingFilterOverrideFlag(true);
        rpcSlice->setDeblockingFilterDisable(false);
        rpcSlice->setDeblockingFilterBetaOffsetDiv2( 0 );
        rpcSlice->setDeblockingFilterTcOffsetDiv2( 0 );
      }
      else if (rpcSlice->getPPS()->getDeblockingFilterControlPresentFlag())
      {
        rpcSlice->setDeblockingFilterOverrideFlag( rpcSlice->getPPS()->getDeblockingFilterOverrideEnabledFlag() );
        rpcSlice->setDeblockingFilterDisable( rpcSlice->getPPS()->getPPSDeblockingFilterDisabledFlag() );
        if ( !rpcSlice->getDeblockingFilterDisable())
        {
          if ( rpcSlice->getDeblockingFilterOverrideFlag() && eSliceType!=I_SLICE)
          {
            rpcSlice->setDeblockingFilterBetaOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_betaOffsetDiv2 + m_pcCfg->getLoopFilterBetaOffset()  );
            rpcSlice->setDeblockingFilterTcOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_tcOffsetDiv2 + m_pcCfg->getLoopFilterTcOffset() );
          }
          else
          {
            rpcSlice->setDeblockingFilterBetaOffsetDiv2( m_pcCfg->getLoopFilterBetaOffset() );
            rpcSlice->setDeblockingFilterTcOffsetDiv2( m_pcCfg->getLoopFilterTcOffset() );
          }
        }
      }
      else
      {
        rpcSlice->setDeblockingFilterOverrideFlag( false );
        rpcSlice->setDeblockingFilterDisable( false );
        rpcSlice->setDeblockingFilterBetaOffsetDiv2( 0 );
        rpcSlice->setDeblockingFilterTcOffsetDiv2( 0 );
      }
    
      rpcSlice->setDepth            ( depth );
    
      pcPic->layer =  temporalId;
      if(eSliceType==I_SLICE)
      {
        pcPic->layer = 0;
      }
      rpcSlice->setTLayer( pcPic->layer );
    
      rpcSlice->setSliceMode            ( m_pcCfg->getSliceMode()            );
      rpcSlice->setSliceArgument        ( m_pcCfg->getSliceArgument()        );
    #if HEVC_DEPENDENT_SLICES
      rpcSlice->setSliceSegmentMode     ( m_pcCfg->getSliceSegmentMode()     );
      rpcSlice->setSliceSegmentArgument ( m_pcCfg->getSliceSegmentArgument() );
    #endif
      rpcSlice->setMaxNumMergeCand      ( m_pcCfg->getMaxNumMergeCand()      );
    
    #if JVET_L0217_L0678_PARTITION_HIGHLEVEL_CONSTRAINT
      rpcSlice->setSplitConsOverrideFlag(false);
      rpcSlice->setMinQTSize( rpcSlice->getSPS()->getSpsNext().getMinQTSize(eSliceType));
      rpcSlice->setMaxBTDepth( rpcSlice->isIntra() ? rpcSlice->getSPS()->getSpsNext().getMaxBTDepthI() : rpcSlice->getSPS()->getSpsNext().getMaxBTDepth() );
      rpcSlice->setMaxBTSize( rpcSlice->isIntra() ? rpcSlice->getSPS()->getSpsNext().getMaxBTSizeI() : rpcSlice->getSPS()->getSpsNext().getMaxBTSize() );
      rpcSlice->setMaxTTSize( rpcSlice->isIntra() ? rpcSlice->getSPS()->getSpsNext().getMaxTTSizeI() : rpcSlice->getSPS()->getSpsNext().getMaxTTSize() );
      if ( eSliceType == I_SLICE && rpcSlice->getSPS()->getSpsNext().getUseDualITree() )
      {
        rpcSlice->setMinQTSizeIChroma( rpcSlice->getSPS()->getSpsNext().getMinQTSize(eSliceType, CHANNEL_TYPE_CHROMA) );
        rpcSlice->setMaxBTDepthIChroma( rpcSlice->getSPS()->getSpsNext().getMaxBTDepthIChroma() );
        rpcSlice->setMaxBTSizeIChroma( rpcSlice->getSPS()->getSpsNext().getMaxBTSizeIChroma() );
        rpcSlice->setMaxTTSizeIChroma( rpcSlice->getSPS()->getSpsNext().getMaxTTSizeIChroma() );
      }
    #else
    
      rpcSlice->setMaxBTSize            ( rpcSlice->isIntra() ? MAX_BT_SIZE : MAX_BT_SIZE_INTER );
    
    }
    
    
    #if SHARP_LUMA_DELTA_QP
    double EncSlice::calculateLambda( const Slice*     slice,
                                      const int        GOPid, // entry in the GOP table
                                      const int        depth, // slice GOP hierarchical depth.
                                      const double     refQP, // initial slice-level QP
                                      const double     dQP,   // initial double-precision QP
                                            int       &iQP )  // returned integer QP.
    {
      enum   SliceType eSliceType    = slice->getSliceType();
      const  bool      isField       = slice->getPic()->fieldPic;
      const  int       NumberBFrames = ( m_pcCfg->getGOPSize() - 1 );
      const  int       SHIFT_QP      = 12;
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
      const int temporalId=m_pcCfg->getGOPEntry(GOPid).m_temporalId;
      const std::vector<double> &intraLambdaModifiers=m_pcCfg->getIntraLambdaModifier();
    #endif
    
      int bitdepth_luma_qp_scale = 6
                                   * (slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8
                                      - DISTORTION_PRECISION_ADJUSTMENT(slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)));
      double qp_temp = dQP + bitdepth_luma_qp_scale - SHIFT_QP;
      // Case #1: I or P-slices (key-frame)
      double dQPFactor = m_pcCfg->getGOPEntry(GOPid).m_QPFactor;
      if ( eSliceType==I_SLICE )
      {
        if (m_pcCfg->getIntraQpFactor()>=0.0 && m_pcCfg->getGOPEntry(GOPid).m_sliceType != I_SLICE)
        {
          dQPFactor=m_pcCfg->getIntraQpFactor();
        }
        else
        {
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
          if(m_pcCfg->getLambdaFromQPEnable())
          {
            dQPFactor=0.57;
          }
          else
          {
    #endif
            double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)(isField ? NumberBFrames/2 : NumberBFrames) );
            dQPFactor=0.57*dLambda_scale;
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
          }
    #endif
        }
      }
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
      else if( m_pcCfg->getLambdaFromQPEnable() )
      {
        dQPFactor=0.57;
      }
    #endif
    
      double dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 );
    
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
      if( !(m_pcCfg->getLambdaFromQPEnable()) && depth>0 )
    #else
      if ( depth>0 )
    #endif
      {
        double qp_temp_ref = refQP + bitdepth_luma_qp_scale - SHIFT_QP;
        dLambda *= Clip3(2.00, 4.00, (qp_temp_ref / 6.0));   // (j == B_SLICE && p_cur_frm->layer != 0 )
      }
    
      // if hadamard is used in ME process
      if ( !m_pcCfg->getUseHADME() && slice->getSliceType( ) != I_SLICE )
      {
        dLambda *= 0.95;
      }
    
    #if X0038_LAMBDA_FROM_QP_CAPABILITY
      double lambdaModifier;
      if( eSliceType != I_SLICE || intraLambdaModifiers.empty())
      {
        lambdaModifier = m_pcCfg->getLambdaModifier( temporalId );
      }
      else
      {
        lambdaModifier = intraLambdaModifiers[ (temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size()-1) ];
      }
      dLambda *= lambdaModifier;
    #endif
    
    
      iQP = Clip3( -slice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) );
    
    
      if( m_pcCfg->getDepQuantEnabledFlag() )
      {
        dLambda *= pow( 2.0, 0.25/3.0 ); // slight lambda adjustment for dependent quantization (due to different slope of quantizer)
      }
    
      // NOTE: the lambda modifiers that are sometimes applied later might be best always applied in here.
      return dLambda;
    }
    #endif
    
    void EncSlice::resetQP( Picture* pic, int sliceQP, double lambda )
    {
      Slice* slice = pic->slices[0];
    
      // store lambda
      slice->setSliceQp( sliceQP );
      setUpLambda(slice, lambda, sliceQP);
    }
    
    #if ENABLE_QPA
    
    static bool applyQPAdaptation (Picture* const pcPic,     Slice* const pcSlice,        const PreCalcValues& pcv,
    
                                   const uint32_t startAddr, const uint32_t boundingAddr, const bool useSharpLumaDQP,
    
                                   const double hpEnerAvg,   const double hpEnerMax,      const bool useFrameWiseQPA, const int previouslyAdaptedLumaQP = -1)
    
    {
      const int  iBitDepth   = pcSlice->getSPS()->getBitDepth (CHANNEL_TYPE_LUMA);
      const int  iQPIndex    = pcSlice->getSliceQp(); // initial QP index for current slice, used in following loops
    #if HEVC_TILES_WPP
      const TileMap& tileMap = *pcPic->tileMap;
    #endif
      bool   sliceQPModified = false;
    
    #if GLOBAL_AVERAGING
      const double hpEnerPic = 1.0 / getAveragePictureEnergy (pcPic->getOrigBuf().Y(), iBitDepth); // inverse, speed
    #else
      const double hpEnerPic = 1.0 / hpEnerAvg; // speedup: multiply instead of divide in loop below; 1.0 for tuning
    #endif
    
      if (useFrameWiseQPA || (iQPIndex >= MAX_QP))
    
        if (useFrameWiseQPA)
        {
          iQPFixed = (previouslyAdaptedLumaQP < 0) ? Clip3 (0, MAX_QP, iQPIndex + apprI3Log2 (hpEnerAvg * hpEnerPic)) : previouslyAdaptedLumaQP; // average-activity slice QP
        }
        else
        {
          iQPFixed = Clip3 (0, MAX_QP, iQPIndex + ((apprI3Log2 (hpEnerAvg * hpEnerPic) + apprI3Log2 (hpEnerMax * hpEnerPic) + 1) >> 1)); // adapted slice QP = (mean(QP) + max(QP)) / 2
        }
    
    #if SHARP_LUMA_DELTA_QP
    
        // change new fixed QP based on average CTU luma value (Sharp)
    
        if (useSharpLumaDQP && (iQPIndex < MAX_QP) && (previouslyAdaptedLumaQP < 0))
    
        {
          uint64_t uAvgLuma = 0;
    
          for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++)
          {
    #if HEVC_TILES_WPP
            const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr);
    #else
            const uint32_t ctuRsAddr = ctuTsAddr;
    #endif
    
            uAvgLuma += (uint64_t)pcPic->m_iOffsetCtu[ctuRsAddr];
          }
          uAvgLuma = (uAvgLuma + ((boundingAddr - startAddr) >> 1)) / (boundingAddr - startAddr);
    
          iQPFixed = Clip3 (0, MAX_QP, iQPFixed + 1 - int((3 * uAvgLuma * uAvgLuma) >> uint64_t(2 * iBitDepth - 1)));
        }
    #endif
    
    
        if (iQPIndex >= MAX_QP) iQPFixed = MAX_QP;
    
        {
          const double* oldLambdas = pcSlice->getLambdas();
          const double  corrFactor = pow (2.0, double(iQPFixed - iQPIndex) / 3.0);
          const double  newLambdas[MAX_NUM_COMPONENT] = {oldLambdas[0] * corrFactor, oldLambdas[1] * corrFactor, oldLambdas[2] * corrFactor};
    
          CHECK (iQPIndex != pcSlice->getSliceQpBase(), "Invalid slice QP!");
          pcSlice->setLambdas (newLambdas);
          pcSlice->setSliceQp (iQPFixed); // update the slice/base QPs
          pcSlice->setSliceQpBase (iQPFixed);
    
          sliceQPModified = true;
        }
    
        for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++)
        {
    #if HEVC_TILES_WPP
          const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr);
    #else
          const uint32_t ctuRsAddr = ctuTsAddr;
    #endif
    
          pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPFixed; // fixed QPs
        }
      }
      else
      {
        for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++)
        {
    #if HEVC_TILES_WPP
          const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr);
    #else
          const uint32_t ctuRsAddr = ctuTsAddr;
    #endif
    
    
          int iQPAdapt = Clip3 (0, MAX_QP, iQPIndex + apprI3Log2 (pcPic->m_uEnerHpCtu[ctuRsAddr] * hpEnerPic));
    
          if (pcv.widthInCtus > 1) // try to enforce CTU SNR greater than zero dB
    
          if (!pcSlice->isIntra()) // try to enforce CTU SNR greater than zero dB
    
    #endif
          {
            const Pel      dcOffset   = pcPic->m_iOffsetCtu[ctuRsAddr];
    #if SHARP_LUMA_DELTA_QP
    
            // change adaptive QP based on mean CTU luma value (Sharp)
            if (useSharpLumaDQP)
            {
              const uint64_t uAvgLuma   = (uint64_t)dcOffset;
    
    
              iQPAdapt = std::max (0, iQPAdapt + 1 - int((3 * uAvgLuma * uAvgLuma) >> uint64_t(2 * iBitDepth - 1)));
    
            const uint32_t uRefScale  = g_invQuantScales[iQPAdapt % 6] << ((iQPAdapt / 6) + iBitDepth - 4);
    
            const CompArea subArea    = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUHeight, pcv.maxCUWidth, pcv.maxCUHeight)), pcPic->Y());
            const Pel*     pSrc       = pcPic->getOrigBuf (subArea).buf;
            const SizeType iSrcStride = pcPic->getOrigBuf (subArea).stride;
            const SizeType iSrcHeight = pcPic->getOrigBuf (subArea).height;
            const SizeType iSrcWidth  = pcPic->getOrigBuf (subArea).width;
            uint32_t uAbsDCless = 0;
    
            // compute sum of absolute DC-less (high-pass) luma values
            for (SizeType h = 0; h < iSrcHeight; h++)
            {
              for (SizeType w = 0; w < iSrcWidth; w++)
              {
                uAbsDCless += (uint32_t)abs (pSrc[w] - dcOffset);
              }
              pSrc += iSrcStride;
            }
    
            if (iSrcHeight >= 64 || iSrcWidth >= 64)  // normalization
            {
              const uint64_t blockSize = uint64_t(iSrcWidth * iSrcHeight);
    
              uAbsDCless = uint32_t((uint64_t(uAbsDCless) * 64*64 + (blockSize >> 1)) / blockSize);
            }
    
            if (uAbsDCless < 64*64) uAbsDCless = 64*64;  // limit to 1
    
            // reduce QP index if CTU would be fully quantized to zero
            if (uAbsDCless < uRefScale)
            {
    
              const int limit  = std::min (0, ((iQPIndex + 4) >> 3) - 6);
              const int redVal = std::max (limit, apprI3Log2 ((double)uAbsDCless / (double)uRefScale));
    
              iQPAdapt = std::max (0, iQPAdapt + redVal);
    
            }
    #if SHARP_LUMA_DELTA_QP
    
            if (iQPAdapt > MAX_QP) iQPAdapt = MAX_QP;
    #endif
          }
    
          pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPAdapt; // adapted QPs
    
    
          if (pcv.widthInCtus > 1) // try to reduce local bitrate peaks via minimum smoothing of the adapted QPs
    
          {
            iQPAdapt = ctuRsAddr % pcv.widthInCtus; // horizontal offset
            if (iQPAdapt == 0)
            {
              iQPAdapt = (ctuRsAddr > 1) ? pcPic->m_iOffsetCtu[ctuRsAddr - 2] : 0;
            }
            else // iQPAdapt >= 1
            {
    
              iQPAdapt = (iQPAdapt > 1) ? std::min (pcPic->m_iOffsetCtu[ctuRsAddr - 2], pcPic->m_iOffsetCtu[ctuRsAddr]) : pcPic->m_iOffsetCtu[ctuRsAddr];
    
              iQPAdapt = std::min (iQPAdapt, (int)pcPic->m_iOffsetCtu[ctuRsAddr - 1 - pcv.widthInCtus]);
    
            }
            if ((ctuRsAddr > 0) && (pcPic->m_iOffsetCtu[ctuRsAddr - 1] < (Pel)iQPAdapt))
            {
              pcPic->m_iOffsetCtu[ctuRsAddr - 1] = (Pel)iQPAdapt;
            }
            if ((ctuTsAddr == boundingAddr - 1) && (ctuRsAddr > pcv.widthInCtus)) // last CTU in the given slice
            {
    
              iQPAdapt = std::min (pcPic->m_iOffsetCtu[ctuRsAddr - 1], pcPic->m_iOffsetCtu[ctuRsAddr - pcv.widthInCtus]);
    
              if (pcPic->m_iOffsetCtu[ctuRsAddr] < (Pel)iQPAdapt)
              {
                pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPAdapt;
              }
            }
          }
        } // end iteration over all CTUs in current slice
      }
    
      return sliceQPModified;
    }
    #endif // ENABLE_QPA
    
    // ====================================================================================================================
    // Public member functions
    // ====================================================================================================================
    
    //! set adaptive search range based on poc difference
    void EncSlice::setSearchRange( Slice* pcSlice )
    {
      int iCurrPOC = pcSlice->getPOC();
      int iRefPOC;
      int iGOPSize = m_pcCfg->getGOPSize();
      int iOffset = (iGOPSize >> 1);
      int iMaxSR = m_pcCfg->getSearchRange();
      int iNumPredDir = pcSlice->isInterP() ? 1 : 2;
    
      for (int iDir = 0; iDir < iNumPredDir; iDir++)