Skip to content
Snippets Groups Projects
QuantRDOQ.cpp 75.7 KiB
Newer Older
  • Learn to ignore specific revisions
  • /* The copyright in this software is being made available under the BSD
     * License, included below. This software may be subject to other third party
     * and contributor rights, including patent rights, and no such rights are
     * granted under this license.
     *
    
     * Copyright (c) 2010-2019, ITU/ISO/IEC
    
     * All rights reserved.
     *
     * Redistribution and use in source and binary forms, with or without
     * modification, are permitted provided that the following conditions are met:
     *
     *  * Redistributions of source code must retain the above copyright notice,
     *    this list of conditions and the following disclaimer.
     *  * Redistributions in binary form must reproduce the above copyright notice,
     *    this list of conditions and the following disclaimer in the documentation
     *    and/or other materials provided with the distribution.
     *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
     *    be used to endorse or promote products derived from this software without
     *    specific prior written permission.
     *
     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
     * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
     * THE POSSIBILITY OF SUCH DAMAGE.
     */
    
    /** \file     QuantRDOQ.cpp
        \brief    transform and quantization class
    */
    
    #include "QuantRDOQ.h"
    
    #include "UnitTools.h"
    #include "ContextModelling.h"
    #include "CodingStructure.h"
    #include "CrossCompPrediction.h"
    
    #include "dtrace_next.h"
    #include "dtrace_buffer.h"
    
    #include <stdlib.h>
    #include <limits>
    #include <memory.h>
    
    
    struct coeffGroupRDStats
    {
      int    iNNZbeforePos0;
      double d64CodedLevelandDist; // distortion and level cost only
      double d64UncodedDist;    // all zero coded block distortion
      double d64SigCost;
      double d64SigCost_0;
    };
    
    
    //! \ingroup CommonLib
    //! \{
    
    // ====================================================================================================================
    // Constants
    // ====================================================================================================================
    
    
    // ====================================================================================================================
    // Static functions
    // ====================================================================================================================
    
    // ====================================================================================================================
    // QuantRDOQ class member functions
    // ====================================================================================================================
    
    
    QuantRDOQ::QuantRDOQ( const Quant* other ) : Quant( other )
    {
    
      const QuantRDOQ *rdoq = dynamic_cast<const QuantRDOQ*>( other );
      CHECK( other && !rdoq, "The RDOQ cast must be successfull!" );
      xInitScalingList( rdoq );
    }
    
    QuantRDOQ::~QuantRDOQ()
    {
      xDestroyScalingList();
    }
    
    
    
    
    /** Get the best level in RD sense
     *
     * \returns best quantized transform level for given scan position
     *
     * This method calculates the best quantized transform level for a given scan position.
     */
    inline uint32_t QuantRDOQ::xGetCodedLevel( double&            rd64CodedCost,
                                           double&            rd64CodedCost0,
                                           double&            rd64CodedCostSig,
                                           Intermediate_Int   lLevelDouble,
                                           uint32_t               uiMaxAbsLevel,
                                           const BinFracBits* fracBitsSig,
                                           const BinFracBits& fracBitsPar,
                                           const BinFracBits& fracBitsGt1,
                                           const BinFracBits& fracBitsGt2,
    
    #if !JVET_O0052_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT
    
                                           const int          remRegBins,
                                           unsigned           goRiceZero,
    
                                           uint16_t             ui16AbsGoRice,
                                           int                iQBits,
                                           double             errorScale,
                                           bool               bLast,
                                           bool               useLimitedPrefixLength,
                                           const int          maxLog2TrDynamicRange
                                         ) const
    {
      double dCurrCostSig   = 0;
      uint32_t   uiBestAbsLevel = 0;
    
      if( !bLast && uiMaxAbsLevel < 3 )
      {
        rd64CodedCostSig    = xGetRateSigCoef( *fracBitsSig, 0 );
        rd64CodedCost       = rd64CodedCost0 + rd64CodedCostSig;
        if( uiMaxAbsLevel == 0 )
        {
          return uiBestAbsLevel;
        }
      }
      else
      {
        rd64CodedCost       = MAX_DOUBLE;
      }
    
      if( !bLast )
      {
        dCurrCostSig        = xGetRateSigCoef( *fracBitsSig, 1 );
      }
    
      uint32_t uiMinAbsLevel    = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
      for( int uiAbsLevel  = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
      {
        double dErr         = double( lLevelDouble  - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
    
    #if JVET_O0052_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT
    	double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, ui16AbsGoRice, true, maxLog2TrDynamicRange ) );
    #else
    	double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, ui16AbsGoRice, true, maxLog2TrDynamicRange ) );
    #endif
    
        dCurrCost          += dCurrCostSig;
    
        if( dCurrCost < rd64CodedCost )
        {
          uiBestAbsLevel    = uiAbsLevel;
          rd64CodedCost     = dCurrCost;
          rd64CodedCostSig  = dCurrCostSig;
        }
      }
    
      return uiBestAbsLevel;
    }
    
    /** Calculates the cost for specific absolute transform level
     * \param uiAbsLevel scaled quantized level
     * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
     * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
     * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
     * \param c1Idx
     * \param c2Idx
     * \param useLimitedPrefixLength
     * \param maxLog2TrDynamicRange
     * \returns cost of given absolute transform level
     */
    inline int QuantRDOQ::xGetICRate( const uint32_t         uiAbsLevel,
                                      const BinFracBits& fracBitsPar,
                                      const BinFracBits& fracBitsGt1,
                                      const BinFracBits& fracBitsGt2,
    
    #if !JVET_O0052_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT
    
                                      const int          remRegBins,
                                      unsigned           goRiceZero,
    
                                      const uint16_t       ui16AbsGoRice,
                                      const bool         useLimitedPrefixLength,
                                      const int          maxLog2TrDynamicRange  ) const
    {
    
      {
        int       iRate   = int( xGetIEPRate() ); // cost of sign bit
        uint32_t  symbol  = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel );
        uint32_t  length;
    
        const int threshold = COEF_REMAIN_BIN_REDUCTION;
    
        if( symbol < ( threshold << ui16AbsGoRice ) )
        {
          length = symbol >> ui16AbsGoRice;
          iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
        }
        else if( useLimitedPrefixLength )
        {
          const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) );
    
          uint32_t prefixLength = 0;
          uint32_t suffix = ( symbol >> ui16AbsGoRice ) - COEF_REMAIN_BIN_REDUCTION;
    
          while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) )
          {
            prefixLength++;
          }
    
          const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ui16AbsGoRice ) : ( prefixLength + 1/*separator*/ );
    
          iRate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice ) << SCALE_BITS;
        }
        else
        {
          length = ui16AbsGoRice;
          symbol = symbol - ( threshold << ui16AbsGoRice );
          while( symbol >= ( 1 << length ) )
          {
            symbol -= ( 1 << ( length++ ) );
          }
          iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
        }
        return iRate;
      }
    
      int iRate = int( xGetIEPRate() ); // cost of sign bit
    
      const uint32_t cthres = 4;
    
      if( uiAbsLevel >= cthres )
      {
        uint32_t symbol = ( uiAbsLevel - cthres ) >> 1;
    
        const int threshold = COEF_REMAIN_BIN_REDUCTION;
    
        if( symbol < ( threshold << ui16AbsGoRice ) )
        {
          length = symbol >> ui16AbsGoRice;
          iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
        }
        else if( useLimitedPrefixLength )
        {
          const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) );
    
          uint32_t prefixLength = 0;
          uint32_t suffix = ( symbol >> ui16AbsGoRice ) - COEF_REMAIN_BIN_REDUCTION;
    
          while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) )
          {
            prefixLength++;
          }
    
          const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ui16AbsGoRice ) : ( prefixLength + 1/*separator*/ );
    
          iRate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice ) << SCALE_BITS;
        }
        else
        {
          length = ui16AbsGoRice;
          symbol = symbol - ( threshold << ui16AbsGoRice );
          while( symbol >= ( 1 << length ) )
          {
            symbol -= ( 1 << ( length++ ) );
          }
          iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
        }
    
    
        iRate += fracBitsGt1.intBits[1];
        iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1];
    
        iRate += fracBitsGt2.intBits[1];
    
        iRate += fracBitsGt1.intBits[1];
        iRate += fracBitsPar.intBits[0];
        iRate += fracBitsGt2.intBits[0];
    
      }
      else if( uiAbsLevel == 3 )
      {
        iRate += fracBitsGt1.intBits[1];
        iRate += fracBitsPar.intBits[1];
        iRate += fracBitsGt2.intBits[0];
      }
      else
      {
        iRate = 0;
      }
      return  iRate;
    }
    
    inline double QuantRDOQ::xGetRateSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const
    {
      return xGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] );
    }
    
    /** Calculates the cost of signaling the last significant coefficient in the block
     * \param uiPosX X coordinate of the last significant coefficient
     * \param uiPosY Y coordinate of the last significant coefficient
     * \param component colour component ID
     * \returns cost of last significant coefficient
     */
    /*
     * \param uiWidth width of the transform unit (TU)
    */
    inline double QuantRDOQ::xGetRateLast( const int* lastBitsX, const int* lastBitsY, unsigned PosX, unsigned PosY ) const
    {
      uint32_t    CtxX  = g_uiGroupIdx[PosX];
      uint32_t    CtxY  = g_uiGroupIdx[PosY];
      double  Cost  = lastBitsX[ CtxX ] + lastBitsY[ CtxY ];
      if( CtxX > 3 )
      {
        Cost += xGetIEPRate() * ((CtxX-2)>>1);
      }
      if( CtxY > 3 )
      {
        Cost += xGetIEPRate() * ((CtxY-2)>>1);
      }
      return xGetICost( Cost );
    }
    
    
    inline double QuantRDOQ::xGetRateSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const
    {
      return xGetICost( fracBitsSig.intBits[uiSignificance] );
    }
    
    /** Get the cost for a specific rate
     * \param dRate rate of a bit
     * \returns cost at the specific rate
     */
    inline double QuantRDOQ::xGetICost        ( double                          dRate         ) const
    {
      return m_dLambda * dRate;
    }
    
    /** Get the cost of an equal probable bit
     * \returns cost of equal probable bit
     */
    inline double QuantRDOQ::xGetIEPRate      (                                               ) const
    {
      return 32768;
    }
    
    
    
    /** set quantized matrix coefficient for encode
     * \param scalingList            quantized matrix address
     * \param format                 chroma format
     * \param maxLog2TrDynamicRange
     * \param bitDepths              reference to bit depth array for all channels
     */
    void QuantRDOQ::setScalingList(ScalingList *scalingList, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths)
    {
      Quant::setScalingList( scalingList, maxLog2TrDynamicRange, bitDepths );
    
      const int minimumQp = 0;
      const int maximumQp = SCALING_LIST_REM_NUM;
    
      for(uint32_t size = 0; size < SCALING_LIST_SIZE_NUM; size++)
      {
        for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
        {
          for(int qp = minimumQp; qp < maximumQp; qp++)
          {
    //         xSetScalingListEnc(scalingList,list,size,qp);
    //         xSetScalingListDec(*scalingList,list,size,qp);
            xSetErrScaleCoeff(list,size, size,qp,maxLog2TrDynamicRange, bitDepths);
          }
        }
      }
    }
    
    
    
    
    double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth)
    {
      const int iTransformShift = getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
      double    dErrScale = (double)(1 << SCALE_BITS);                                // Compensate for scaling of bitcount in Lagrange cost function
      double    dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0);
      dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift));                     // Compensate for scaling through forward transform
      const int  QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp];
      double    finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1));
      return    finalErrScale;
    }
    
    
    
    
    /** set error scale coefficients
     * \param list                   list ID
     * \param size
     * \param qp                     quantization parameter
     * \param maxLog2TrDynamicRange
     * \param bitDepths              reference to bit depth array for all channels
     */
    void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths )
    {
      const int width = g_scalingListSizeX[sizeX];
      const int height = g_scalingListSizeX[sizeY];
      const ChannelType channelType = ( ( list == 0 ) || ( list == MAX_NUM_COMPONENT ) ) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA;
      const int channelBitDepth = bitDepths.recon[channelType];
      const int iTransformShift = getTransformShift( channelBitDepth, Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ), maxLog2TrDynamicRange[channelType] );  // Represents scaling through forward transform
    
      uint32_t i, uiMaxNumCoeff = width * height;
      int *piQuantcoeff;
      double *pdErrScale;
      piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY );
    
      pdErrScale   = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp);
    
    
      double dErrScale = (double)( 1 << SCALE_BITS );                                // Compensate for scaling of bitcount in Lagrange cost function
    
    
      const bool needsSqrt2 = ((g_aucLog2[width] + g_aucLog2[height]) & 1) == 1;
    
      double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 );
    
      dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) );                     // Compensate for scaling through forward transform
    
      for( i = 0; i < uiMaxNumCoeff; i++ )
      {
        pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i]
                        / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType]) << 1));
      }
    
    
      int QStep = g_quantScales[needsSqrt2][qp];
    
    
      xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) =
        dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType]) << 1));
    }
    
    /** set flat matrix value to quantized coefficient
     */
    void QuantRDOQ::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths)
    {
      Quant::setFlatScalingList( maxLog2TrDynamicRange, bitDepths );
    
      const int minimumQp = 0;
      const int maximumQp = SCALING_LIST_REM_NUM;
    
      for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++)
      {
        for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++)
        {
          for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
          {
            for(int qp = minimumQp; qp < maximumQp; qp++)
            {
              xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths );
            }
          }
        }
      }
    }
    
    /** initialization process of scaling list array
     */
    void QuantRDOQ::xInitScalingList( const QuantRDOQ* other )
    {
      m_isErrScaleListOwner = other == nullptr;
    
      for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
      {
        for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
        {
          for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
          {
            for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
            {
              if( m_isErrScaleListOwner )
              {
                m_errScale[sizeIdX][sizeIdY][listId][qp] = new double[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]];
              }
              else
              {
                m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp];
              }
            } // listID loop
          }
        }
      }
    }
    
    /** destroy quantization matrix array
     */
    void QuantRDOQ::xDestroyScalingList()
    {
      if( !m_isErrScaleListOwner ) return;
    
      for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
      {
        for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
        {
          for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
          {
            for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
            {
              if(m_errScale[sizeIdX][sizeIdY][listId][qp])
              {
                delete [] m_errScale[sizeIdX][sizeIdY][listId][qp];
              }
            }
          }
        }
      }
    //   Quant::destroyScalingList();
    }
    
    
    void QuantRDOQ::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx)
    {
      const CompArea &rect      = tu.blocks[compID];
      const uint32_t uiWidth        = rect.width;
      const uint32_t uiHeight       = rect.height;
    
      const CCoeffBuf &piCoef   = pSrc;
            CoeffBuf   piQCoef  = tu.getCoeffs(compID);
    
    
    Chen-Yen Lai's avatar
    Chen-Yen Lai committed
      const bool useTransformSkip      = tu.mtsIdx==MTS_SKIP && isLuma(compID);
    
    
      bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
    
    
      if( !tu.cu->ispMode || !isLuma(compID) )
    
      {
        useRDOQ &= uiWidth > 2;
        useRDOQ &= uiHeight > 2;
      }
    
      if (useRDOQ && (isLuma(compID) || RDOQ_CHROMA))
      {
    #if T0196_SELECTIVE_RDOQ
        if (!m_useSelectiveRDOQ || xNeedRDOQ(tu, compID, piCoef, cQP))
        {
    #endif
    
          if( isLuma( compID ) && useTransformSkip )
          {
    
            if( tu.cu->bdpcmMode && isLuma(compID) )
            {
              forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx );
            }
            else
            {
              xRateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx );
            }
    
          }
          else
          {
            xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
          }
    
    #if T0196_SELECTIVE_RDOQ
        }
        else
        {
          piQCoef.fill(0);
          uiAbsSum = 0;
        }
    #endif
      }
      else
      {
        Quant::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
      }
    }
    
    
    
    void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx)
    {
      const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
    
      const SPS &sps            = *tu.cs->sps;
      const CompArea &rect      = tu.blocks[compID];
      const uint32_t uiWidth        = rect.width;
      const uint32_t uiHeight       = rect.height;
      const ChannelType chType  = toChannelType(compID);
      const int channelBitDepth = sps.getBitDepth( chType );
    
      const bool extendedPrecision     = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
      const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
    
    
      const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID);
    
      /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
      * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
      * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
      * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
      */
    
      // Represents scaling through forward transform
      int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
    
    
      if (tu.mtsIdx==MTS_SKIP && extendedPrecision)
    
      {
        iTransformShift = std::max<int>(0, iTransformShift);
      }
    
      double     d64BlockUncodedCost               = 0;
      const uint32_t uiLog2BlockWidth                  = g_aucLog2[uiWidth];
      const uint32_t uiLog2BlockHeight                 = g_aucLog2[uiHeight];
      const uint32_t uiMaxNumCoeff                     = rect.area();
    
      CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
    
      int scalingListType = getScalingListType(tu.cu->predMode, compID);
    
      CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
    
      const TCoeff *plSrcCoeff = pSrc.buf;
            TCoeff *piDstCoeff = tu.getCoeffs(compID).buf;
    
      double *pdCostCoeff  = m_pdCostCoeff;
      double *pdCostSig    = m_pdCostSig;
      double *pdCostCoeff0 = m_pdCostCoeff0;
      int    *rateIncUp    = m_rateIncUp;
      int    *rateIncDown  = m_rateIncDown;
      int    *sigRateDelta = m_sigRateDelta;
      TCoeff *deltaU       = m_deltaU;
    
    
      memset(piDstCoeff, 0, sizeof(*piDstCoeff) * uiMaxNumCoeff);
    
      memset( m_pdCostCoeff,  0, sizeof( double ) *  uiMaxNumCoeff );
      memset( m_pdCostSig,    0, sizeof( double ) *  uiMaxNumCoeff );
      memset( m_rateIncUp,    0, sizeof( int    ) *  uiMaxNumCoeff );
      memset( m_rateIncDown,  0, sizeof( int    ) *  uiMaxNumCoeff );
      memset( m_sigRateDelta, 0, sizeof( int    ) *  uiMaxNumCoeff );
      memset( m_deltaU,       0, sizeof( TCoeff ) *  uiMaxNumCoeff );
    
    
    
      const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID );
    
    #if JVET_O0919_TS_MIN_QP
      const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
      const double *const pdErrScale = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
      const int    *const piQCoef    = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight);
    #else
    
    Chen-Yen Lai's avatar
    Chen-Yen Lai committed
      const double *const pdErrScale = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem);
      const int    *const piQCoef    = getQuantCoeff(scalingListType, cQP.rem, uiLog2BlockWidth, uiLog2BlockHeight);
    
      const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
    
      const bool   enableScalingLists             = getUseScalingList(uiWidth, uiHeight, isTransformSkip);
    
    #if JVET_O0919_TS_MIN_QP
      const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)];
      const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1), cQP.rem(isTransformSkip));
    #else
    
      const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem];
      const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1), cQP.rem);
    
    #endif
    #if JVET_O0919_TS_MIN_QP
      const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
    #else
    
      const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
    
    
    
      const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
      const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;
    
      CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag());
      const int    iCGSizeM1      = (1 << cctx.log2CGSize()) - 1;
    
      int     iCGLastScanPos      = -1;
      double  d64BaseCost         = 0;
      int     iLastScanPos        = -1;
    
    
    #if JVET_O0052_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT
      int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
      int remRegBins = (uiWidth * uiHeight * ctxBinSampleRatio) >> 4;
    #else
    
      bool      is2x2subblock = ( iCGSizeM1 == 3 );
      int       remGt2Bins    = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK );
    
      int       remRegBins    = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK );
    
    
      double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig;
      memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
    
      const int iCGNum = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();
    
      int iScanPos;
      coeffGroupRDStats rdStats;
    
    #if ENABLE_TRACING
      DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
    #endif
    
    
      const uint32_t lfnstIdx = tu.cu->lfnstIdx;
    
    
    
      for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--)
      {
        cctx.initSubblock( subSetId );
    
    
        uint32_t maxNonZeroPosInCG = iCGSizeM1;
        if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
        {
          maxNonZeroPosInCG = 7;
        }
    
    
        memset( &rdStats, 0, sizeof (coeffGroupRDStats));
    
    
        for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- )
        {
          iScanPos = cctx.minSubPos() + iScanPosinCG;
          uint32_t    blkPos = cctx.blockPos( iScanPos );
          piDstCoeff[ blkPos ] = 0;
        }
        for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
    
        {
          iScanPos = cctx.minSubPos() + iScanPosinCG;
          //===== quantization =====
          uint32_t    uiBlkPos          = cctx.blockPos(iScanPos);
    
          // set coeff
          const int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos]               : defaultQuantisationCoefficient;
          const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos]               : defaultErrorScale;
          const int64_t  tmpLevel                = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
    
          const Intermediate_Int lLevelDouble  = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
    
          uint32_t uiMaxAbsLevel        = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
    
          const double dErr         = double( lLevelDouble );
          pdCostCoeff0[ iScanPos ]  = dErr * dErr * errorScale;
          d64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];
          piDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;
    
          if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
          {
            iLastScanPos            = iScanPos;
            iCGLastScanPos          = cctx.subSetId();
          }
    
          if ( iLastScanPos >= 0 )
          {
    
    #if ENABLE_TRACING
            uint32_t uiCGPosY = cctx.cgPosX();
            uint32_t uiCGPosX = cctx.cgPosY();
            uint32_t uiPosY = cctx.posY( iScanPos );
            uint32_t uiPosX = cctx.posX( iScanPos );
            DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY );
    #endif
            //===== coefficient level estimation =====
            unsigned ctxIdSig = 0;
            if( iScanPos != iLastScanPos )
            {
              ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 );
            }
            uint32_t    uiLevel;
            uint8_t ctxOffset     = cctx.ctxOffsetAbs     ();
            uint32_t    uiParCtx      = cctx.parityCtxIdAbs   ( ctxOffset );
            uint32_t    uiGt1Ctx      = cctx.greater1CtxIdAbs ( ctxOffset );
            uint32_t    uiGt2Ctx      = cctx.greater2CtxIdAbs ( ctxOffset );
    
              unsigned  sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );
    
              goRiceParam             = g_auiGoRiceParsCoeff   [ sumAbs ];
              goRiceZero              = g_auiGoRicePosCoeff0[0][ sumAbs ];
            }
    
    
            const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
            const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx );
            const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx );
    
            if( iScanPos == iLastScanPos )
            {
    
    #if JVET_O0052_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT
              uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
                                        lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange );
    #else
    
              uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
                                        lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange );
    
            }
            else
            {
              DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
    
              const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
    
    #if JVET_O0052_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT
              uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
                                        lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange );
    #else
    
              uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
                                        lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange );
    
              sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] );
    
            }
    
            DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel );
            DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) );
            DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) );
    
            deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
    
            if( uiLevel > 0 )
            {
    
    #if JVET_O0052_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT
              int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
              rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
              rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
    #else
    
              int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
              rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
              rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
    
    #if JVET_O0052_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT
                int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
                rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
    #else
    
                int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
                rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
    
              }
              else
              {
                rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ];
              }
    
            }
            piDstCoeff[ uiBlkPos ] = uiLevel;
            d64BaseCost           += pdCostCoeff [ iScanPos ];
    
    
            if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) )
            {
    
    #if !JVET_O0052_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT
    
              remGt2Bins    = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK );
              remRegBins    = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins;
    
            else if( remRegBins >= 4 )
    
              int  sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4);
              goRiceParam = g_auiGoRiceParsCoeff[sumAll];
    
              remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);
    
          }
          else
          {
            d64BaseCost    += pdCostCoeff0[ iScanPos ];
          }
          rdStats.d64SigCost += pdCostSig[ iScanPos ];
          if (iScanPosinCG == 0 )
          {
            rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
          }
          if (piDstCoeff[ uiBlkPos ] )
          {
            cctx.setSigGroup();
            rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
            rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
            if ( iScanPosinCG != 0 )
            {
              rdStats.iNNZbeforePos0++;
            }
          }
        } //end for (iScanPosinCG)
    
        if (iCGLastScanPos >= 0)
        {
          if( cctx.subSetId() )
          {
            if( !cctx.isSigGroup() )
            {
              const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
              d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
              pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
            }
            else
            {
              if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
              {
                if ( rdStats.iNNZbeforePos0 == 0 )
                {
                  d64BaseCost -= rdStats.d64SigCost_0;
                  rdStats.d64SigCost -= rdStats.d64SigCost_0;
                }
                // rd-cost if SigCoeffGroupFlag = 0, initialization
                double d64CostZeroCG = d64BaseCost;
    
                const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
    
                if (cctx.subSetId() < iCGLastScanPos)
                {
                  d64BaseCost  += xGetRateSigCoeffGroup(fracBitsSigGroup,1);
                  d64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0);
                  pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1);
                }
    
                // try to convert the current coeff group from non-zero to all-zero
                d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels
                d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
                d64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels
    
                                                         // if we can save cost, change this block to all-zero block
                if ( d64CostZeroCG < d64BaseCost )
                {
                  cctx.resetSigGroup();
                  d64BaseCost = d64CostZeroCG;
                  if (cctx.subSetId() < iCGLastScanPos)
                  {
                    pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0);
                  }
                  // reset coeffs to 0 in this block
    
                  for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
    
                  {
                    iScanPos      = cctx.minSubPos() + iScanPosinCG;
                    uint32_t uiBlkPos = cctx.blockPos( iScanPos );
    
                    if (piDstCoeff[ uiBlkPos ])
                    {
                      piDstCoeff [ uiBlkPos ] = 0;
                      pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
                      pdCostSig  [ iScanPos ] = 0;
                    }
                  }
                } // end if ( d64CostAllZeros < d64BaseCost )
              }
            } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
          }
          else
          {
            cctx.setSigGroup();
          }
        }
      } //end for (cctx.subSetId)
    
    
      //===== estimate last position =====
      if ( iLastScanPos < 0 )
      {
        return;
      }
    
      double  d64BestCost         = 0;
      int     iBestLastIdxP1      = 0;
    
    
      if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 )
      {
        const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() );
        d64BestCost  = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] );
        d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] );
      }
      else
      {
    
        bool previousCbf       = tu.cbf[COMPONENT_Cb];
        bool lastCbfIsInferred = false;
        if( useIntraSubPartitions )
        {
          bool rootCbfSoFar       = false;
          bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
          uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> g_aucLog2[tu.lheight()] : tu.cu->lwidth() >> g_aucLog2[tu.lwidth()];
          if( isLastSubPartition )
          {
            TransformUnit* tuPointer = tu.cu->firstTU;
            for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
            {
              rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMPONENT_Y, tu.depth);
              tuPointer     = tuPointer->next;
            }
            if( !rootCbfSoFar )
            {
              lastCbfIsInferred = true;
            }
          }
          if( !lastCbfIsInferred )
          {
            previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
          }
        }
    
    #if JVET_O0193_REMOVE_TR_DEPTH_IN_CBF_CTX
        BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) );
    #else
    
        BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, tu.depth, previousCbf, useIntraSubPartitions ) ) );
    
    
        if( !lastCbfIsInferred )
        {
          d64BestCost  = d64BlockUncodedCost + xGetICost(fracBitsQtCbf.intBits[0]);
          d64BaseCost += xGetICost(fracBitsQtCbf.intBits[1]);
        }
        else
        {
          d64BestCost  = d64BlockUncodedCost;
        }
    
      }
    
      int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 };
      int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 };
      {
    
        int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth);
        int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight);