Skip to content
Snippets Groups Projects
QuantRDOQ.cpp 65.3 KiB
Newer Older
  • Learn to ignore specific revisions
  •             d64BestCost     = totalCost;
              }
              if( piDstCoeff[ uiBlkPos ] > 1 )
              {
                bFoundLast = true;
                break;
              }
              d64BaseCost      -= pdCostCoeff[ iScanPos ];
              d64BaseCost      += pdCostCoeff0[ iScanPos ];
            }
            else
            {
              d64BaseCost      -= pdCostSig[ iScanPos ];
            }
          } //end for
          if (bFoundLast)
          {
            break;
          }
        } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
        DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID );
        DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\n", (int64_t)( d64BlockUncodedCost ) );
        DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded  =%d\n", (int64_t)( d64BaseCost ) );
    
      } // end for
    
    
      for ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
      {
        int blkPos = cctx.blockPos( scanPos );
        TCoeff level = piDstCoeff[ blkPos ];
        uiAbsSum += level;
        piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
      }
    
      //===== clean uncoded coefficients =====
      for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
      {
        piDstCoeff[ cctx.blockPos( scanPos ) ] = 0;
      }
    
      if( cctx.signHiding() && uiAbsSum>=2)
      {
    
        const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);
        int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16
                                      / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
    
    Hongtao Wang's avatar
    Hongtao Wang committed
                                 + 0.5);
    
        for (int subSet = iCGNum - 1; subSet >= 0; subSet--)
    
        {
          int  subPos         = subSet << cctx.log2CGSize();
          int  firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1;
          absSum = 0 ;
    
          for( n = iCGSizeM1; n >= 0; --n )
          {
            if( piDstCoeff[ cctx.blockPos( n + subPos )] )
            {
              lastNZPosInCG = n;
              break;
            }
          }
    
          for( n = 0; n <= iCGSizeM1; n++ )
          {
            if( piDstCoeff[ cctx.blockPos( n + subPos )] )
            {
              firstNZPosInCG = n;
              break;
            }
          }
    
          for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ )
          {
            absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]);
          }
    
          if(lastNZPosInCG>=0 && lastCG==-1)
          {
            lastCG = 1;
          }
    
          if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
          {
            uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1);
            if( signbit!=(absSum&0x1) )  // hide but need tune
            {
              // calculate the cost
              int64_t minCostInc = std::numeric_limits<int64_t>::max(), curCost = std::numeric_limits<int64_t>::max();
              int minPos = -1, finalChange = 0, curChange = 0;
    
              for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n )
              {
                uint32_t uiBlkPos   = cctx.blockPos( n + subPos );
                if(piDstCoeff[ uiBlkPos ] != 0 )
                {
                  int64_t costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
                  int64_t costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
                    -   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
    
                  if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
                  {
                    costDown -= (4<<SCALE_BITS);
                  }
    
                  if(costUp<costDown)
                  {
                    curCost = costUp;
                    curChange =  1;
                  }
                  else
                  {
                    curChange = -1;
                    if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
                    {
                      curCost = std::numeric_limits<int64_t>::max();
                    }
                    else
                    {
                      curCost = costDown;
                    }
                  }
                }
                else
                {
                  curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<SCALE_BITS) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
                  curChange = 1 ;
    
                  if(n<firstNZPosInCG)
                  {
                    uint32_t thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
                    if(thissignbit != signbit )
                    {
                      curCost = std::numeric_limits<int64_t>::max();
                    }
                  }
                }
    
                if( curCost<minCostInc)
                {
                  minCostInc = curCost;
                  finalChange = curChange;
                  minPos = uiBlkPos;
                }
              }
    
              if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
              {
                finalChange = -1;
              }
    
              if(plSrcCoeff[minPos]>=0)
              {
                piDstCoeff[minPos] += finalChange ;
              }
              else
              {
                piDstCoeff[minPos] -= finalChange ;
              }
            }
          }
    
          if(lastCG==1)
          {
            lastCG=0 ;
          }
        }
      }
    }
    
    
    void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx )
    {
      const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
    
      const SPS &sps            = *tu.cs->sps;
      const CompArea &rect      = tu.blocks[compID];
      const uint32_t width      = rect.width;
      const uint32_t height     = rect.height;
      const ChannelType chType  = toChannelType(compID);
      const int channelBitDepth = sps.getBitDepth( chType );
    
      const bool extendedPrecision     = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
      const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
    
      int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange );
    
      if( extendedPrecision )
      {
        transformShift = std::max<int>( 0, transformShift );
      }
    
            double   blockUncodedCost                   = 0;
      const uint32_t maxNumCoeff                        = rect.area();
    
      CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" );
    
      int scalingListType = getScalingListType( tu.cu->predMode, compID );
      CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" );
    
      const TCoeff *srcCoeff = coeffs.buf;
            TCoeff *dstCoeff = tu.getCoeffs( compID ).buf;
    
      double *costCoeff  = m_pdCostCoeff;
      double *costSig    = m_pdCostSig;
      double *costCoeff0 = m_pdCostCoeff0;
    
      memset( m_pdCostCoeff,  0, sizeof( double ) *  maxNumCoeff );
      memset( m_pdCostSig,    0, sizeof( double ) *  maxNumCoeff );
    
    
      const bool   needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID );  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
    
      const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
      const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + transformShift + ( needsSqrt2Scale ? -1 : 0 );  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
      const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)];
      const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale( tu, compID ), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth );
    
    
      const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1;
    
    
      uint32_t coeffLevels[3];
      double   coeffLevelError[4];
    
    
      CoeffCodingContext cctx( tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag() );
      const int sbSizeM1    = ( 1 << cctx.log2CGSize() ) - 1;
      double    baseCost    = 0;
      uint32_t  goRiceParam = 0;
    
      double *costSigSubBlock = m_pdCostCoeffGroupSig;
      memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
    
      const int sbNum = width * height >> cctx.log2CGSize();
      int scanPos;
      coeffGroupRDStats rdStats;
    
      bool anySigCG = false;
    
      for( int sbId = 0; sbId < sbNum; sbId++ )
      {
        cctx.initSubblock( sbId );
    
    
    Hongtao Wang's avatar
    Hongtao Wang committed
        int noCoeffCoded = 0;
        baseCost = 0.0;
    
        memset( &rdStats, 0, sizeof (coeffGroupRDStats));
    
        for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
        {
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          int lastPosCoded = sbSizeM1;
    
          scanPos = cctx.minSubPos() + scanPosInSB;
          //===== quantization =====
          uint32_t blkPos = cctx.blockPos( scanPos );
    
          // set coeff
          const int64_t          tmpLevel    = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient;
          const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>( tmpLevel, std::numeric_limits<Intermediate_Int>::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) );
    
    
          uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
          uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
    
          uint32_t downAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits));
          uint32_t upAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), downAbsLevel + 1);
    
          m_testedLevels = 0;
          coeffLevels[m_testedLevels++] = roundAbsLevel;
    
          if (minAbsLevel != roundAbsLevel)
            coeffLevels[m_testedLevels++] = minAbsLevel;
    
          int rightPixel, belowPixel, predPixel;
    
          cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
          predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, 0);
    
          if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1)
            coeffLevels[m_testedLevels++] = upAbsLevel;
    
          double dErr = double(levelDouble);
          coeffLevelError[0] = dErr * dErr * errorScale;
    
          costCoeff0[scanPos] = coeffLevelError[0];
          blockUncodedCost   += costCoeff0[ scanPos ];
          dstCoeff[blkPos]    = coeffLevels[0];
    
    
          //===== coefficient level estimation =====
                unsigned    ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff );
                uint32_t    cLevel;
          const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() );
    
          goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff );
    
          unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0);
          const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
    
          const uint8_t     sign         = srcCoeff[ blkPos ] < 0 ? 1 : 0;
    
    
          DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
    
          unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, 0);
          const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
    
    
          const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          bool lastCoeff = false; //
          if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
          {
            lastCoeff = true;
          }
    
          cLevel = xGetCodedLevelTSPred( costCoeff[ scanPos ], costCoeff0[ scanPos ], costSig[ scanPos ], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
                                        &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange);
    
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          if (cLevel > 0)
          {
            noCoeffCoded++;
          }
    
    
          TCoeff level = cLevel;
          dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level;
    
          baseCost           += costCoeff[ scanPos ];
          rdStats.d64SigCost += costSig[ scanPos ];
    
          if( dstCoeff[ blkPos ] )
          {
            cctx.setSigGroup();
            rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ];
            rdStats.d64UncodedDist       += costCoeff0[ scanPos ];
          }
        } //end for (iScanPosinCG)
    
        if( !cctx.isSigGroup() )
        {
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
          baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost;
          costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
        }
    
    Hongtao Wang's avatar
    Hongtao Wang committed
        else if( sbId != sbNum - 1 || anySigCG )
    
        {
          // rd-cost if SigCoeffGroupFlag = 0, initialization
          double costZeroSB = baseCost;
    
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
    
          baseCost   += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
          costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
          costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
    
          costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
          costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
          costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
    
          if( costZeroSB < baseCost )
          {
            cctx.resetSigGroup();
            baseCost = costZeroSB;
            costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
    
    
    Hongtao Wang's avatar
    Hongtao Wang committed
            for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
    
            {
              scanPos = cctx.minSubPos() + scanPosInSB;
              uint32_t blkPos = cctx.blockPos( scanPos );
    
              if( dstCoeff[ blkPos ] )
              {
                dstCoeff[ blkPos ] = 0;
                costCoeff[ scanPos ] = costCoeff0[ scanPos ];
                costSig[ scanPos] = 0;
              }
            }
          }
          else
          {
            anySigCG = true;
          }
        }
      }
    
      //===== estimate last position =====
      for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ )
      {
        int blkPos = cctx.blockPos( scanPos );
        TCoeff level = dstCoeff[ blkPos ];
    
    void QuantRDOQ::forwardRDPCM( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx )
    {
      const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
    
      const SPS &sps = *tu.cs->sps;
      const CompArea &rect = tu.blocks[compID];
      const uint32_t width = rect.width;
      const uint32_t height = rect.height;
      const ChannelType chType = toChannelType(compID);
      const int channelBitDepth = sps.getBitDepth(chType);
    
      const bool extendedPrecision = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
      const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
      const int  dirMode = tu.cu->bdpcmMode;
    
      int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
    
      if (extendedPrecision)
      {
        transformShift = std::max<int>(0, transformShift);
      }
    
      double   blockUncodedCost = 0;
      const uint32_t maxNumCoeff = rect.area();
    
      CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
    
      int scalingListType = getScalingListType(tu.cu->predMode, compID);
      CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
    
      const TCoeff *srcCoeff = coeffs.buf;
      TCoeff *dstCoeff = tu.getCoeffs(compID).buf;
    
      double *costCoeff = m_pdCostCoeff;
      double *costSig = m_pdCostSig;
      double *costCoeff0 = m_pdCostCoeff0;
    
      memset(m_pdCostCoeff, 0, sizeof(double) *  maxNumCoeff);
      memset(m_pdCostSig, 0, sizeof(double) *  maxNumCoeff);
      memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff);
    
    
      const bool   needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID);  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
    
      const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
      const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + transformShift + ( needsSqrt2Scale ? -1 : 0 );  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
      const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
      const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth);
    
    
      TrQuantParams trQuantParams;
    
      trQuantParams.rightShift = (IQUANT_SHIFT - (transformShift + qp.per(isTransformSkip)));
      trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
    
    
      const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
    
    
      uint32_t coeffLevels[3];
      double   coeffLevelError[4];
    
    
      CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag());
      const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1;
      double    baseCost = 0;
      uint32_t  goRiceParam = 0;
    
      double *costSigSubBlock = m_pdCostCoeffGroupSig;
      memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double));
    
      const int sbNum = width * height >> cctx.log2CGSize();
      int scanPos;
      coeffGroupRDStats rdStats;
    
      bool anySigCG = false;
    
      for (int sbId = 0; sbId < sbNum; sbId++)
      {
        cctx.initSubblock(sbId);
    
    
    Hongtao Wang's avatar
    Hongtao Wang committed
        int noCoeffCoded = 0;
        baseCost = 0.0;
    
        memset(&rdStats, 0, sizeof(coeffGroupRDStats));
    
        for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
        {
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          int lastPosCoded = sbSizeM1;
    
          scanPos = cctx.minSubPos() + scanPosInSB;
          //===== quantization =====
          uint32_t blkPos = cctx.blockPos(scanPos);
    
          const int posX = cctx.posX(scanPos);
          const int posY = cctx.posY(scanPos);
          const int posS = (1 == dirMode) ? posX : posY;
          const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
          TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0;
    
          // set coeff
          const int64_t          tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient;
          const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (qBits - 1)));
    
          uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
          uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
    
          m_testedLevels = 0;
          coeffLevels[m_testedLevels++] = roundAbsLevel;
    
          if (minAbsLevel != roundAbsLevel)
            coeffLevels[m_testedLevels++] = minAbsLevel;
    
          double dErr = double(levelDouble);
          coeffLevelError[0]  = dErr * dErr * errorScale;
    
          costCoeff0[scanPos] = coeffLevelError[0];
          blockUncodedCost   += costCoeff0[scanPos];
          dstCoeff[blkPos]    = coeffLevels[0];
    
    
          //===== coefficient level estimation =====
          unsigned    ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff);
          uint32_t    cLevel;
          const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS());
    
          goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff);
    
          unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode);
          const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
    
          const uint8_t     sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0;
    
          unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode);
          const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
    
          DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig);
    
    
          const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig);
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          bool lastCoeff = false; //
          if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
          {
            lastCoeff = true;
          }
    
          int rightPixel, belowPixel;
          cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
          cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
            &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange);
    
    
    Hongtao Wang's avatar
    Hongtao Wang committed
          if (cLevel > 0)
          {
            noCoeffCoded++;
          }
    
          dstCoeff[blkPos] = cLevel;
    
          if (sign)
          {
            dstCoeff[blkPos] = -dstCoeff[blkPos];
          }
          xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams );
          m_fullCoeff[blkPos] += predCoeff;
    
          baseCost += costCoeff[scanPos];
          rdStats.d64SigCost += costSig[scanPos];
    
          if (dstCoeff[blkPos])
          {
            cctx.setSigGroup();
            rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos];
            rdStats.d64UncodedDist += costCoeff0[scanPos];
          }
        } //end for (iScanPosinCG)
    
        if (!cctx.isSigGroup())
        {
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
          baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
          costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
        }
    
    Hongtao Wang's avatar
    Hongtao Wang committed
        else if (sbId != sbNum - 1 || anySigCG)
    
        {
          // rd-cost if SigCoeffGroupFlag = 0, initialization
          double costZeroSB = baseCost;
    
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
    
          baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
          costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
          costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
    
          costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
          costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
          costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
    
          if (costZeroSB < baseCost)
          {
            cctx.resetSigGroup();
            baseCost = costZeroSB;
            costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
    
    
    Hongtao Wang's avatar
    Hongtao Wang committed
            for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
    
            {
              scanPos = cctx.minSubPos() + scanPosInSB;
              uint32_t blkPos = cctx.blockPos(scanPos);
    
              const int posX = cctx.posX(scanPos);
              const int posY = cctx.posY(scanPos);
              const int posS = (1 == dirMode) ? posX : posY;
              const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
              m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0;
    
              if (dstCoeff[blkPos])
              {
                dstCoeff[blkPos] = 0;
                costCoeff[scanPos] = costCoeff0[scanPos];
                costSig[scanPos] = 0;
              }
            }
          }
          else
          {
            anySigCG = true;
          }
        }
      }
    
      //===== estimate last position =====
      for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++)
      {
        int blkPos = cctx.blockPos(scanPos);
        TCoeff level = dstCoeff[blkPos];
    
      }
    }
    
    void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeff& coeff, const TrQuantParams& trQuantParams)
    {
      // xDequant
      if (trQuantParams.rightShift > 0)
      {
        const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1);
        pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift);
      }
      else
      {
        pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) << -trQuantParams.rightShift);
      }
    }
    
    
    inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double&            rd64CodedCost,
      double&            rd64CodedCost0,
      double&            rd64CodedCostSig,
      Intermediate_Int    levelDouble,
      int                 qBits,
      double              errorScale,
      uint32_t coeffLevels[],
      double coeffLevelError[],
      const BinFracBits* fracBitsSig,
      const BinFracBits& fracBitsPar,
      CoeffCodingContext& cctx,
      const FracBitsAccess& fracBitsAccess,
      const BinFracBits& fracBitsSign,
      const BinFracBits& fracBitsGt1,
      const uint8_t      sign,
      int                rightPixel,
      int                belowPixel,
      uint16_t           ricePar,
      bool               isLast,
      bool               useLimitedPrefixLength,
      const int          maxLog2TrDynamicRange
    ) const
    {
      double currCostSig = 0;
      uint32_t   bestAbsLevel = 0;
      if (!isLast && coeffLevels[0] < 3)
      {
        rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0);
        rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
        if (coeffLevels[0] == 0)
        {
          return bestAbsLevel;
        }
      }
      else
      {
        rd64CodedCost = MAX_DOUBLE;
      }
    
      if (!isLast)
      {
        currCostSig = xGetRateSigCoef(*fracBitsSig, 1);
      }
    
      for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++)
      {
        int absLevel = coeffLevels[errorInd - 1];
        double dErr = 0.0;
        dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits));
        coeffLevelError[errorInd] = dErr * dErr * errorScale;
        int modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm);
        double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, sign, ricePar, useLimitedPrefixLength, maxLog2TrDynamicRange));
        dCurrCost += currCostSig;
    
        if (dCurrCost < rd64CodedCost)
        {
          bestAbsLevel = absLevel;
          rd64CodedCost = dCurrCost;
          rd64CodedCostSig = currCostSig;
        }
      }
    
      return bestAbsLevel;
    }
    
    
    inline int QuantRDOQ::xGetICRateTS( const uint32_t            absLevel,
                                        const BinFracBits&        fracBitsPar,
                                        const CoeffCodingContext& cctx,
                                        const FracBitsAccess&     fracBitsAccess,
                                        const BinFracBits&        fracBitsSign,
    
                                        const uint8_t             sign,
                                        const uint16_t            ricePar,
                                        const bool                useLimitedPrefixLength,
                                        const int                 maxLog2TrDynamicRange  ) const
    {
      int rate = fracBitsSign.intBits[sign];
    
    
      if( absLevel > 1 )
      {
        rate += fracBitsGt1.intBits[1];
        rate += fracBitsPar.intBits[( absLevel - 2 ) & 1];
    
              int cutoffVal = 2;
        const int numGtBins = 4;
        for( int i = 0; i < numGtBins; i++ )
        {
          if( absLevel >= cutoffVal )
          {
            const uint16_t ctxGtX = cctx.greaterXCtxIdAbsTS( cutoffVal>>1 );
            const BinFracBits &fracBitsGtX = fracBitsAccess.getFracBitsArray( ctxGtX );
            unsigned gtX = ( absLevel >= ( cutoffVal + 2 ) );
            rate += fracBitsGtX.intBits[gtX];
          }
          cutoffVal += 2;
        }
    
        if( absLevel >= cutoffVal )
        {
          uint32_t symbol = ( absLevel - cutoffVal ) >> 1;
          uint32_t length;
          const int threshold = COEF_REMAIN_BIN_REDUCTION;
          if( symbol < ( threshold << ricePar ) )
          {
            length = symbol >> ricePar;
            rate  += ( length + 1 + ricePar ) << SCALE_BITS;
          }
          else if( useLimitedPrefixLength )
          {
            const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) );
    
            uint32_t prefixLength = 0;
            uint32_t suffix = ( symbol >> ricePar ) - COEF_REMAIN_BIN_REDUCTION;
    
            while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) )
            {
              prefixLength++;
            }
    
            const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ricePar ) : ( prefixLength + 1/*separator*/ );
    
            rate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar ) << SCALE_BITS;
          }
          else
          {
            length = ricePar;
            symbol = symbol - ( threshold << ricePar );
            while( symbol >= ( 1 << length ) )
            {
              symbol -= ( 1 << ( length++ ) );
            }
            rate += ( threshold + length + 1 - ricePar + length ) << SCALE_BITS;
          }
        }
      }
      else if( absLevel == 1 )
      {
        rate += fracBitsGt1.intBits[0];
      }
      else
      {
        rate = 0;
      }
      return rate;
    }