Skip to content
Snippets Groups Projects
QuantRDOQ.cpp 75.5 KiB
Newer Older
  • Learn to ignore specific revisions
  •     for ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++)
        {
          const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) );
          lastBitsY[ ctxId ]   = bitsY + fB.intBits[ 0 ];
          bitsY               +=         fB.intBits[ 1 ];
        }
        lastBitsY[ctxId] = bitsY;
      }
    
    
      bool bFoundLast = false;
      for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
      {
        d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
        if (cctx.isSigGroup( iCGScanPos ) )
        {
    
          uint32_t maxNonZeroPosInCG = iCGSizeM1;
          if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
          {
            maxNonZeroPosInCG = 7;
          }
          for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
    
          {
            iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG;
    
            if (iScanPos > iLastScanPos)
            {
              continue;
            }
            uint32_t   uiBlkPos     = cctx.blockPos( iScanPos );
    
            if( piDstCoeff[ uiBlkPos ] )
            {
              uint32_t   uiPosY = uiBlkPos >> uiLog2BlockWidth;
              uint32_t   uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
              double d64CostLast  = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY );
    
              double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
    
              if( totalCost < d64BestCost )
              {
                iBestLastIdxP1  = iScanPos + 1;
                d64BestCost     = totalCost;
              }
              if( piDstCoeff[ uiBlkPos ] > 1 )
              {
                bFoundLast = true;
                break;
              }
              d64BaseCost      -= pdCostCoeff[ iScanPos ];
              d64BaseCost      += pdCostCoeff0[ iScanPos ];
            }
            else
            {
              d64BaseCost      -= pdCostSig[ iScanPos ];
            }
          } //end for
          if (bFoundLast)
          {
            break;
          }
        } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
        DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID );
        DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\n", (int64_t)( d64BlockUncodedCost ) );
        DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded  =%d\n", (int64_t)( d64BaseCost ) );
    
      } // end for
    
    
      for ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
      {
        int blkPos = cctx.blockPos( scanPos );
        TCoeff level = piDstCoeff[ blkPos ];
        uiAbsSum += level;
        piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
      }
    
      //===== clean uncoded coefficients =====
      for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
      {
        piDstCoeff[ cctx.blockPos( scanPos ) ] = 0;
      }
    
      if( cctx.signHiding() && uiAbsSum>=2)
      {
    
        const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);
        int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16
                                      / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
    
    Hongtao Wang's avatar
    Hongtao Wang committed
                                 + 0.5);
    
        for (int subSet = iCGNum - 1; subSet >= 0; subSet--)
    
        {
          int  subPos         = subSet << cctx.log2CGSize();
          int  firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1;
          absSum = 0 ;
    
          for( n = iCGSizeM1; n >= 0; --n )
          {
            if( piDstCoeff[ cctx.blockPos( n + subPos )] )
            {
              lastNZPosInCG = n;
              break;
            }
          }
    
          for( n = 0; n <= iCGSizeM1; n++ )
          {
            if( piDstCoeff[ cctx.blockPos( n + subPos )] )
            {
              firstNZPosInCG = n;
              break;
            }
          }
    
          for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ )
          {
            absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]);
          }
    
          if(lastNZPosInCG>=0 && lastCG==-1)
          {
            lastCG = 1;
          }
    
          if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
          {
            uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1);
            if( signbit!=(absSum&0x1) )  // hide but need tune
            {
              // calculate the cost
              int64_t minCostInc = std::numeric_limits<int64_t>::max(), curCost = std::numeric_limits<int64_t>::max();
              int minPos = -1, finalChange = 0, curChange = 0;
    
              for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n )
              {
                uint32_t uiBlkPos   = cctx.blockPos( n + subPos );
                if(piDstCoeff[ uiBlkPos ] != 0 )
                {
                  int64_t costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
                  int64_t costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
                    -   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
    
                  if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
                  {
                    costDown -= (4<<SCALE_BITS);
                  }
    
                  if(costUp<costDown)
                  {
                    curCost = costUp;
                    curChange =  1;
                  }
                  else
                  {
                    curChange = -1;
                    if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
                    {
                      curCost = std::numeric_limits<int64_t>::max();
                    }
                    else
                    {
                      curCost = costDown;
                    }
                  }
                }
                else
                {
                  curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<SCALE_BITS) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
                  curChange = 1 ;
    
                  if(n<firstNZPosInCG)
                  {
                    uint32_t thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
                    if(thissignbit != signbit )
                    {
                      curCost = std::numeric_limits<int64_t>::max();
                    }
                  }
                }
    
                if( curCost<minCostInc)
                {
                  minCostInc = curCost;
                  finalChange = curChange;
                  minPos = uiBlkPos;
                }
              }
    
              if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
              {
                finalChange = -1;
              }
    
              if(plSrcCoeff[minPos]>=0)
              {
                piDstCoeff[minPos] += finalChange ;
              }
              else
              {
                piDstCoeff[minPos] -= finalChange ;
              }
            }
          }
    
          if(lastCG==1)
          {
            lastCG=0 ;
          }
        }
      }
    }
    
    
    void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx )
    {
      const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
    
      const SPS &sps            = *tu.cs->sps;
      const CompArea &rect      = tu.blocks[compID];
      const uint32_t width      = rect.width;
      const uint32_t height     = rect.height;
      const ChannelType chType  = toChannelType(compID);
      const int channelBitDepth = sps.getBitDepth( chType );
    
      const bool extendedPrecision     = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
      const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
    
      int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange );
    
      if( extendedPrecision )
      {
        transformShift = std::max<int>( 0, transformShift );
      }
    
            double   blockUncodedCost                   = 0;
      const uint32_t maxNumCoeff                        = rect.area();
    
      CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" );
    
      int scalingListType = getScalingListType( tu.cu->predMode, compID );
      CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" );
    
      const TCoeff *srcCoeff = coeffs.buf;
            TCoeff *dstCoeff = tu.getCoeffs( compID ).buf;
    
      double *costCoeff  = m_pdCostCoeff;
      double *costSig    = m_pdCostSig;
      double *costCoeff0 = m_pdCostCoeff0;
    
      memset( m_pdCostCoeff,  0, sizeof( double ) *  maxNumCoeff );
      memset( m_pdCostSig,    0, sizeof( double ) *  maxNumCoeff );
    
    
      const bool   needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID );  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
    
    #if JVET_P0058_CHROMA_TS
      const bool   isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
    
    #else
    #if JVET_P0059_CHROMA_BDPCM
      const bool   isTransformSkip = (tu.mtsIdx == MTS_SKIP && isLuma(compID)) || ( tu.cu->bdpcmModeChroma && isChroma(compID) );
    
      const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
    
    #if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
      const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
    #else
    
      const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + transformShift + ( needsSqrt2Scale ? -1 : 0 );  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
    
      const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)];
    
    #if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
      const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
    #else
    
      const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale( tu, compID ), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth );
    
    
      const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1;
    
    
      uint32_t coeffLevels[3];
      double   coeffLevelError[4];
    
    
      CoeffCodingContext cctx( tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag() );
      const int sbSizeM1    = ( 1 << cctx.log2CGSize() ) - 1;
      double    baseCost    = 0;
      uint32_t  goRiceParam = 0;
    
      double *costSigSubBlock = m_pdCostCoeffGroupSig;
      memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
    
      const int sbNum = width * height >> cctx.log2CGSize();
      int scanPos;
      coeffGroupRDStats rdStats;
    
      bool anySigCG = false;
    
    
    #if JVET_P0072_SIMPLIFIED_TSRC
      int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
      cctx.setNumCtxBins(maxCtxBins);
    #endif
    
    
      for( int sbId = 0; sbId < sbNum; sbId++ )
      {
        cctx.initSubblock( sbId );
    
    
    Hongtao Wang's avatar
    Hongtao Wang committed
        int noCoeffCoded = 0;
        baseCost = 0.0;
    
        memset( &rdStats, 0, sizeof (coeffGroupRDStats));
    
    
    #if JVET_P0072_SIMPLIFIED_TSRC
        rdStats.iNumSbbCtxBins = 0;
    #endif
    
    
        for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
        {
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          int lastPosCoded = sbSizeM1;
    
          scanPos = cctx.minSubPos() + scanPosInSB;
          //===== quantization =====
          uint32_t blkPos = cctx.blockPos( scanPos );
    
          // set coeff
          const int64_t          tmpLevel    = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient;
          const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>( tmpLevel, std::numeric_limits<Intermediate_Int>::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) );
    
    
          uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
          uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
    
          uint32_t downAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits));
          uint32_t upAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), downAbsLevel + 1);
    
          m_testedLevels = 0;
          coeffLevels[m_testedLevels++] = roundAbsLevel;
    
          if (minAbsLevel != roundAbsLevel)
            coeffLevels[m_testedLevels++] = minAbsLevel;
    
          int rightPixel, belowPixel, predPixel;
    
          cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
          predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, 0);
    
          if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1)
            coeffLevels[m_testedLevels++] = upAbsLevel;
    
          double dErr = double(levelDouble);
          coeffLevelError[0] = dErr * dErr * errorScale;
    
          costCoeff0[scanPos] = coeffLevelError[0];
          blockUncodedCost   += costCoeff0[ scanPos ];
          dstCoeff[blkPos]    = coeffLevels[0];
    
    
          //===== coefficient level estimation =====
                unsigned    ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff );
                uint32_t    cLevel;
          const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() );
    
          goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff );
    
          unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0);
          const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
    
          const uint8_t     sign         = srcCoeff[ blkPos ] < 0 ? 1 : 0;
    
    
          DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
    
          unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, 0);
          const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
    
    
          const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          bool lastCoeff = false; //
          if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
          {
            lastCoeff = true;
          }
    
    #if JVET_P0072_SIMPLIFIED_TSRC
          int numUsedCtxBins = 0;
          cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
                                        &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange, numUsedCtxBins);
    
          cctx.decimateNumCtxBins(numUsedCtxBins);
          rdStats.iNumSbbCtxBins += numUsedCtxBins;
    
    #else
    
          cLevel = xGetCodedLevelTSPred( costCoeff[ scanPos ], costCoeff0[ scanPos ], costSig[ scanPos ], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
                                        &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange);
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          if (cLevel > 0)
          {
            noCoeffCoded++;
          }
    
    
          TCoeff level = cLevel;
          dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level;
    
          baseCost           += costCoeff[ scanPos ];
          rdStats.d64SigCost += costSig[ scanPos ];
    
          if( dstCoeff[ blkPos ] )
          {
            cctx.setSigGroup();
            rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ];
            rdStats.d64UncodedDist       += costCoeff0[ scanPos ];
          }
        } //end for (iScanPosinCG)
    
        if( !cctx.isSigGroup() )
        {
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
          baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost;
          costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
    
    #if JVET_P0072_SIMPLIFIED_TSRC
          cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block
    #endif
    
    Hongtao Wang's avatar
    Hongtao Wang committed
        else if( sbId != sbNum - 1 || anySigCG )
    
        {
          // rd-cost if SigCoeffGroupFlag = 0, initialization
          double costZeroSB = baseCost;
    
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
    
          baseCost   += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
          costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
          costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
    
          costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
          costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
          costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
    
          if( costZeroSB < baseCost )
          {
            cctx.resetSigGroup();
            baseCost = costZeroSB;
            costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
    
    #if JVET_P0072_SIMPLIFIED_TSRC
            cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block
    #endif
    
    Hongtao Wang's avatar
    Hongtao Wang committed
            for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
    
            {
              scanPos = cctx.minSubPos() + scanPosInSB;
              uint32_t blkPos = cctx.blockPos( scanPos );
    
              if( dstCoeff[ blkPos ] )
              {
                dstCoeff[ blkPos ] = 0;
                costCoeff[ scanPos ] = costCoeff0[ scanPos ];
                costSig[ scanPos] = 0;
              }
            }
          }
          else
          {
            anySigCG = true;
          }
        }
      }
    
      //===== estimate last position =====
      for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ )
      {
        int blkPos = cctx.blockPos( scanPos );
        TCoeff level = dstCoeff[ blkPos ];
    
    void QuantRDOQ::forwardRDPCM( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx )
    {
      const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
    
      const SPS &sps = *tu.cs->sps;
      const CompArea &rect = tu.blocks[compID];
      const uint32_t width = rect.width;
      const uint32_t height = rect.height;
      const ChannelType chType = toChannelType(compID);
      const int channelBitDepth = sps.getBitDepth(chType);
    
      const bool extendedPrecision = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
      const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
    
    #if JVET_P0059_CHROMA_BDPCM
      const int  dirMode = isLuma(compID) ? tu.cu->bdpcmMode : tu.cu->bdpcmModeChroma;
    #else
    
      const int  dirMode = tu.cu->bdpcmMode;
    
      int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
    
      if (extendedPrecision)
      {
        transformShift = std::max<int>(0, transformShift);
      }
    
      double   blockUncodedCost = 0;
      const uint32_t maxNumCoeff = rect.area();
    
      CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
    
      int scalingListType = getScalingListType(tu.cu->predMode, compID);
      CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
    
      const TCoeff *srcCoeff = coeffs.buf;
      TCoeff *dstCoeff = tu.getCoeffs(compID).buf;
    
      double *costCoeff = m_pdCostCoeff;
      double *costSig = m_pdCostSig;
      double *costCoeff0 = m_pdCostCoeff0;
    
      memset(m_pdCostCoeff, 0, sizeof(double) *  maxNumCoeff);
      memset(m_pdCostSig, 0, sizeof(double) *  maxNumCoeff);
      memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff);
    
    
      const bool   needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID);  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
    
    #if JVET_P0058_CHROMA_TS
      const bool   isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
    
    #else
    #if JVET_P0059_CHROMA_BDPCM
      const bool   isTransformSkip = (tu.mtsIdx == MTS_SKIP && isLuma(compID)) || (tu.cu->bdpcmModeChroma && isChroma(compID) );
    
      const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
    
    #if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
      const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
    #else
    
      const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + transformShift + ( needsSqrt2Scale ? -1 : 0 );  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
    
      const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
    
    #if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
      const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
    #else
    
      const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth);
    
      TrQuantParams trQuantParams;
    
    #if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
      trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip)));
    #else
    
      trQuantParams.rightShift = (IQUANT_SHIFT - (transformShift + qp.per(isTransformSkip)));
    
      trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
    
    
      const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
    
    
      uint32_t coeffLevels[3];
      double   coeffLevelError[4];
    
    
      CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag());
      const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1;
      double    baseCost = 0;
      uint32_t  goRiceParam = 0;
    
      double *costSigSubBlock = m_pdCostCoeffGroupSig;
      memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double));
    
      const int sbNum = width * height >> cctx.log2CGSize();
      int scanPos;
      coeffGroupRDStats rdStats;
    
      bool anySigCG = false;
    
    
    #if JVET_P0072_SIMPLIFIED_TSRC
      int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
      cctx.setNumCtxBins(maxCtxBins);
    #endif
    
    
      for (int sbId = 0; sbId < sbNum; sbId++)
      {
        cctx.initSubblock(sbId);
    
    
    Hongtao Wang's avatar
    Hongtao Wang committed
        int noCoeffCoded = 0;
        baseCost = 0.0;
    
        memset(&rdStats, 0, sizeof(coeffGroupRDStats));
    
    #if JVET_P0072_SIMPLIFIED_TSRC
        rdStats.iNumSbbCtxBins = 0;
    #endif
    
    
        for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
        {
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          int lastPosCoded = sbSizeM1;
    
          scanPos = cctx.minSubPos() + scanPosInSB;
          //===== quantization =====
          uint32_t blkPos = cctx.blockPos(scanPos);
    
          const int posX = cctx.posX(scanPos);
          const int posY = cctx.posY(scanPos);
          const int posS = (1 == dirMode) ? posX : posY;
          const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
          TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0;
    
          // set coeff
          const int64_t          tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient;
          const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (qBits - 1)));
    
          uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
          uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
    
          m_testedLevels = 0;
          coeffLevels[m_testedLevels++] = roundAbsLevel;
    
          if (minAbsLevel != roundAbsLevel)
            coeffLevels[m_testedLevels++] = minAbsLevel;
    
          double dErr = double(levelDouble);
          coeffLevelError[0]  = dErr * dErr * errorScale;
    
          costCoeff0[scanPos] = coeffLevelError[0];
          blockUncodedCost   += costCoeff0[scanPos];
          dstCoeff[blkPos]    = coeffLevels[0];
    
    
          //===== coefficient level estimation =====
          unsigned    ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff);
          uint32_t    cLevel;
          const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS());
    
          goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff);
    
          unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode);
          const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
    
          const uint8_t     sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0;
    
          unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode);
          const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
    
          DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig);
    
    
          const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig);
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          bool lastCoeff = false; //
          if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
          {
            lastCoeff = true;
          }
    
          int rightPixel, belowPixel;
          cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
    
    #if JVET_P0072_SIMPLIFIED_TSRC
          int numUsedCtxBins = 0;
          cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
            &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange, numUsedCtxBins);
          cctx.decimateNumCtxBins(numUsedCtxBins);
          rdStats.iNumSbbCtxBins += numUsedCtxBins;
    #else
    
          cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
            &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange);
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          if (cLevel > 0)
          {
            noCoeffCoded++;
          }
    
          dstCoeff[blkPos] = cLevel;
    
          if (sign)
          {
            dstCoeff[blkPos] = -dstCoeff[blkPos];
          }
          xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams );
          m_fullCoeff[blkPos] += predCoeff;
    
          baseCost += costCoeff[scanPos];
          rdStats.d64SigCost += costSig[scanPos];
    
          if (dstCoeff[blkPos])
          {
            cctx.setSigGroup();
            rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos];
            rdStats.d64UncodedDist += costCoeff0[scanPos];
          }
        } //end for (iScanPosinCG)
    
        if (!cctx.isSigGroup())
        {
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
          baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
          costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
    
    #if JVET_P0072_SIMPLIFIED_TSRC
          cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block
    #endif
    
    Hongtao Wang's avatar
    Hongtao Wang committed
        else if (sbId != sbNum - 1 || anySigCG)
    
        {
          // rd-cost if SigCoeffGroupFlag = 0, initialization
          double costZeroSB = baseCost;
    
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
    
          baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
          costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
          costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
    
          costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
          costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
          costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
    
          if (costZeroSB < baseCost)
          {
            cctx.resetSigGroup();
            baseCost = costZeroSB;
            costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
    
    #if JVET_P0072_SIMPLIFIED_TSRC
            cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block
    #endif
    
    Hongtao Wang's avatar
    Hongtao Wang committed
            for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
    
            {
              scanPos = cctx.minSubPos() + scanPosInSB;
              uint32_t blkPos = cctx.blockPos(scanPos);
    
              const int posX = cctx.posX(scanPos);
              const int posY = cctx.posY(scanPos);
              const int posS = (1 == dirMode) ? posX : posY;
              const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
              m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0;
    
              if (dstCoeff[blkPos])
              {
                dstCoeff[blkPos] = 0;
                costCoeff[scanPos] = costCoeff0[scanPos];
                costSig[scanPos] = 0;
              }
            }
          }
          else
          {
            anySigCG = true;
          }
        }
      }
    
      //===== estimate last position =====
      for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++)
      {
        int blkPos = cctx.blockPos(scanPos);
        TCoeff level = dstCoeff[blkPos];
    
      }
    }
    
    void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeff& coeff, const TrQuantParams& trQuantParams)
    {
      // xDequant
      if (trQuantParams.rightShift > 0)
      {
        const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1);
        pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift);
      }
      else
      {
        pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) << -trQuantParams.rightShift);
      }
    }
    
    
    inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double&            rd64CodedCost,
      double&            rd64CodedCost0,
      double&            rd64CodedCostSig,
      Intermediate_Int    levelDouble,
      int                 qBits,
      double              errorScale,
      uint32_t coeffLevels[],
      double coeffLevelError[],
      const BinFracBits* fracBitsSig,
      const BinFracBits& fracBitsPar,
      CoeffCodingContext& cctx,
      const FracBitsAccess& fracBitsAccess,
      const BinFracBits& fracBitsSign,
      const BinFracBits& fracBitsGt1,
      const uint8_t      sign,
      int                rightPixel,
      int                belowPixel,
      uint16_t           ricePar,
      bool               isLast,
      bool               useLimitedPrefixLength,
      const int          maxLog2TrDynamicRange
    
    #if JVET_P0072_SIMPLIFIED_TSRC
      , int&               numUsedCtxBins
    #endif
    
    ) const
    {
      double currCostSig = 0;
      uint32_t   bestAbsLevel = 0;
    
    #if JVET_P0072_SIMPLIFIED_TSRC
      numUsedCtxBins = 0;
      int numBestCtxBin = 0;
    #endif
    
    #if JVET_P0072_SIMPLIFIED_TSRC
        if (cctx.numCtxBins() >= 4)
          rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0);
        else
          rd64CodedCostSig = xGetICost(1 << SCALE_BITS);
    #else
    
        rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0);
    
        rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
    
    #if JVET_P0072_SIMPLIFIED_TSRC
        if (cctx.numCtxBins() >= 4)
          numUsedCtxBins++;
    #endif
    
        if (coeffLevels[0] == 0)
        {
          return bestAbsLevel;
        }
      }
      else
      {
        rd64CodedCost = MAX_DOUBLE;
      }
    
      if (!isLast)
      {
    
    #if JVET_P0072_SIMPLIFIED_TSRC
        if (cctx.numCtxBins() >= 4)
          currCostSig = xGetRateSigCoef(*fracBitsSig, 1);
        else
          currCostSig = xGetICost(1 << SCALE_BITS);
    #else
    
        currCostSig = xGetRateSigCoef(*fracBitsSig, 1);
    
    #endif
    #if JVET_P0072_SIMPLIFIED_TSRC
        if (coeffLevels[0] >= 3 && cctx.numCtxBins() >= 4)
          numUsedCtxBins++;
    #endif
    
      }
    
      for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++)
      {
        int absLevel = coeffLevels[errorInd - 1];
        double dErr = 0.0;
        dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits));
        coeffLevelError[errorInd] = dErr * dErr * errorScale;
    
    Kato Yusuke's avatar
    Kato Yusuke committed
    #if JVET_P0298_DISABLE_LEVELMAPPING_IN_BYPASS
        int modAbsLevel = absLevel;
    
    Kato Yusuke's avatar
    Kato Yusuke committed
        if (cctx.numCtxBins() >= 4) 
        {
    
    Kato Yusuke's avatar
    Kato Yusuke committed
          modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm);
        }
    #else
    
        int modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm);
    
    Kato Yusuke's avatar
    Kato Yusuke committed
    #endif
    
    #if JVET_P0072_SIMPLIFIED_TSRC
        int numCtxBins = 0;
        double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, useLimitedPrefixLength, maxLog2TrDynamicRange));
    #else
    
        double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, sign, ricePar, useLimitedPrefixLength, maxLog2TrDynamicRange));
    
    #endif
    
    #if JVET_P0072_SIMPLIFIED_TSRC
        if (cctx.numCtxBins() >= 4)
          dCurrCost += currCostSig; // if cctx.numCtxBins < 4, xGetICRateTS return rate including sign cost. dont need to add any more
    #else
    
    
        if (dCurrCost < rd64CodedCost)
        {
          bestAbsLevel = absLevel;
          rd64CodedCost = dCurrCost;
          rd64CodedCostSig = currCostSig;
    
    #if JVET_P0072_SIMPLIFIED_TSRC
          numBestCtxBin = numCtxBins;
    #endif
    
    #if JVET_P0072_SIMPLIFIED_TSRC
      numUsedCtxBins += numBestCtxBin;
    #endif
    
    
    inline int QuantRDOQ::xGetICRateTS( const uint32_t            absLevel,
                                        const BinFracBits&        fracBitsPar,
                                        const CoeffCodingContext& cctx,
                                        const FracBitsAccess&     fracBitsAccess,
                                        const BinFracBits&        fracBitsSign,
    
    #if JVET_P0072_SIMPLIFIED_TSRC
                                        int&                      numCtxBins,
    #endif
    
                                        const uint8_t             sign,
                                        const uint16_t            ricePar,
                                        const bool                useLimitedPrefixLength,
                                        const int                 maxLog2TrDynamicRange  ) const
    {
    
     
      
    #if JVET_P0072_SIMPLIFIED_TSRC
      if (cctx.numCtxBins() < 4) // Full by-pass coding 
      {
        int rate = absLevel ? (1 << SCALE_BITS) : 0; // 1 bit to signal sign of non-zero 
    
        uint32_t symbol = absLevel;
    
        uint32_t length;
        const int threshold = COEF_REMAIN_BIN_REDUCTION;
        if (symbol < (threshold << ricePar))
        {
          length = symbol >> ricePar;
          rate += (length + 1 + ricePar) << SCALE_BITS;
        }
        else if (useLimitedPrefixLength)
        {
          const uint32_t maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange));
    
          uint32_t prefixLength = 0;
          uint32_t suffix = (symbol >> ricePar) - COEF_REMAIN_BIN_REDUCTION;
    
          while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
          {
            prefixLength++;
          }
    
          const uint32_t suffixLength = (prefixLength == maximumPrefixLength) ? (maxLog2TrDynamicRange - ricePar) : (prefixLength + 1/*separator*/);
    
          rate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar) << SCALE_BITS;
        }
        else
        {
          length = ricePar;
          symbol = symbol - (threshold << ricePar);
          while (symbol >= (1 << length))
          {
            symbol -= (1 << (length++));
          }
          rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
        }
    
        return rate;
      }
    
      else if (cctx.numCtxBins() >= 4 && cctx.numCtxBins() < 8) // First pass context coding and all by-pass coding ( Sign flag is not counted here)
      {
        int rate = fracBitsSign.intBits[sign]; // sign bits
        if (absLevel)
          numCtxBins++;
    
        if (absLevel > 1)
        {
          rate += fracBitsGt1.intBits[1];
          rate += fracBitsPar.intBits[(absLevel - 2) & 1];
    
          numCtxBins += 2;
    
          int cutoffVal = 2;
    
          if (absLevel >= cutoffVal)
          {
            uint32_t symbol = (absLevel - cutoffVal) >> 1;
            uint32_t length;
            const int threshold = COEF_REMAIN_BIN_REDUCTION;
            if (symbol < (threshold << ricePar))
            {
              length = symbol >> ricePar;
              rate += (length + 1 + ricePar) << SCALE_BITS;
            }
            else if (useLimitedPrefixLength)
            {
              const uint32_t maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange));
    
              uint32_t prefixLength = 0;
              uint32_t suffix = (symbol >> ricePar) - COEF_REMAIN_BIN_REDUCTION;
    
              while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
              {
                prefixLength++;
              }
    
              const uint32_t suffixLength = (prefixLength == maximumPrefixLength) ? (maxLog2TrDynamicRange - ricePar) : (prefixLength + 1/*separator*/);
    
              rate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar) << SCALE_BITS;
            }
            else
            {
              length = ricePar;
              symbol = symbol - (threshold << ricePar);
              while (symbol >= (1 << length))
              {
                symbol -= (1 << (length++));
              }
              rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
            }
          }
        }
        else if (absLevel == 1)
        {
          rate += fracBitsGt1.intBits[0];
          numCtxBins++;
        }
        else
        {
          rate = 0;
        }
        return rate;
    
      }
    
    #endif
      
      
    
      int rate = fracBitsSign.intBits[sign];
    
    
    #if JVET_P0072_SIMPLIFIED_TSRC
      if (absLevel)
        numCtxBins++;
    #endif