Skip to content
Snippets Groups Projects
QuantRDOQ.cpp 44.2 KiB
Newer Older
  • Learn to ignore specific revisions
  • /* The copyright in this software is being made available under the BSD
     * License, included below. This software may be subject to other third party
     * and contributor rights, including patent rights, and no such rights are
     * granted under this license.
     *
    
     * Copyright (c) 2010-2019, ITU/ISO/IEC
    
     * All rights reserved.
     *
     * Redistribution and use in source and binary forms, with or without
     * modification, are permitted provided that the following conditions are met:
     *
     *  * Redistributions of source code must retain the above copyright notice,
     *    this list of conditions and the following disclaimer.
     *  * Redistributions in binary form must reproduce the above copyright notice,
     *    this list of conditions and the following disclaimer in the documentation
     *    and/or other materials provided with the distribution.
     *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
     *    be used to endorse or promote products derived from this software without
     *    specific prior written permission.
     *
     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
     * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
     * THE POSSIBILITY OF SUCH DAMAGE.
     */
    
    /** \file     QuantRDOQ.cpp
        \brief    transform and quantization class
    */
    
    #include "QuantRDOQ.h"
    
    #include "UnitTools.h"
    #include "ContextModelling.h"
    #include "CodingStructure.h"
    #include "CrossCompPrediction.h"
    
    #include "dtrace_next.h"
    #include "dtrace_buffer.h"
    
    #include <stdlib.h>
    #include <limits>
    #include <memory.h>
    
    
    struct coeffGroupRDStats
    {
      int    iNNZbeforePos0;
      double d64CodedLevelandDist; // distortion and level cost only
      double d64UncodedDist;    // all zero coded block distortion
      double d64SigCost;
      double d64SigCost_0;
    };
    
    
    //! \ingroup CommonLib
    //! \{
    
    // ====================================================================================================================
    // Constants
    // ====================================================================================================================
    
    
    // ====================================================================================================================
    // Static functions
    // ====================================================================================================================
    
    // ====================================================================================================================
    // QuantRDOQ class member functions
    // ====================================================================================================================
    
    
    QuantRDOQ::QuantRDOQ( const Quant* other ) : Quant( other )
    {
    
      const QuantRDOQ *rdoq = dynamic_cast<const QuantRDOQ*>( other );
      CHECK( other && !rdoq, "The RDOQ cast must be successfull!" );
    #if HEVC_USE_SCALING_LISTS
      xInitScalingList( rdoq );
    #endif
    }
    
    QuantRDOQ::~QuantRDOQ()
    {
    #if HEVC_USE_SCALING_LISTS
      xDestroyScalingList();
    #endif
    }
    
    
    
    
    /** Get the best level in RD sense
     *
     * \returns best quantized transform level for given scan position
     *
     * This method calculates the best quantized transform level for a given scan position.
     */
    inline uint32_t QuantRDOQ::xGetCodedLevel( double&            rd64CodedCost,
                                           double&            rd64CodedCost0,
                                           double&            rd64CodedCostSig,
                                           Intermediate_Int   lLevelDouble,
                                           uint32_t               uiMaxAbsLevel,
                                           const BinFracBits* fracBitsSig,
                                           const BinFracBits& fracBitsPar,
                                           const BinFracBits& fracBitsGt1,
                                           const BinFracBits& fracBitsGt2,
    
                                           const int          remGt2Bins,
                                           const int          remRegBins,
                                           unsigned           goRiceZero,
    
                                           uint16_t             ui16AbsGoRice,
                                           int                iQBits,
                                           double             errorScale,
                                           bool               bLast,
                                           bool               useLimitedPrefixLength,
                                           const int          maxLog2TrDynamicRange
                                         ) const
    {
      double dCurrCostSig   = 0;
      uint32_t   uiBestAbsLevel = 0;
    
      if( !bLast && uiMaxAbsLevel < 3 )
      {
        rd64CodedCostSig    = xGetRateSigCoef( *fracBitsSig, 0 );
        rd64CodedCost       = rd64CodedCost0 + rd64CodedCostSig;
        if( uiMaxAbsLevel == 0 )
        {
          return uiBestAbsLevel;
        }
      }
      else
      {
        rd64CodedCost       = MAX_DOUBLE;
      }
    
      if( !bLast )
      {
        dCurrCostSig        = xGetRateSigCoef( *fracBitsSig, 1 );
      }
    
      uint32_t uiMinAbsLevel    = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
      for( int uiAbsLevel  = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
      {
        double dErr         = double( lLevelDouble  - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
    
        double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, ui16AbsGoRice, useLimitedPrefixLength, maxLog2TrDynamicRange ) );
    
        dCurrCost          += dCurrCostSig;
    
        if( dCurrCost < rd64CodedCost )
        {
          uiBestAbsLevel    = uiAbsLevel;
          rd64CodedCost     = dCurrCost;
          rd64CodedCostSig  = dCurrCostSig;
        }
      }
    
      return uiBestAbsLevel;
    }
    
    /** Calculates the cost for specific absolute transform level
     * \param uiAbsLevel scaled quantized level
     * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
     * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
     * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
     * \param c1Idx
     * \param c2Idx
     * \param useLimitedPrefixLength
     * \param maxLog2TrDynamicRange
     * \returns cost of given absolute transform level
     */
    inline int QuantRDOQ::xGetICRate( const uint32_t         uiAbsLevel,
                                      const BinFracBits& fracBitsPar,
                                      const BinFracBits& fracBitsGt1,
                                      const BinFracBits& fracBitsGt2,
    
                                      const int          remGt2Bins,
                                      const int          remRegBins,
                                      unsigned           goRiceZero,
    
                                      const uint16_t       ui16AbsGoRice,
                                      const bool         useLimitedPrefixLength,
                                      const int          maxLog2TrDynamicRange  ) const
    {
    
      if( remRegBins < 3 )
      {
        int       iRate   = int( xGetIEPRate() ); // cost of sign bit
        uint32_t  symbol  = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel );
        uint32_t  length;
        const int threshold = g_auiGoRiceRange[ui16AbsGoRice];
        if( symbol < ( threshold << ui16AbsGoRice ) )
        {
          length = symbol >> ui16AbsGoRice;
          iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
        }
        else if( useLimitedPrefixLength )
        {
          const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) );
    
          uint32_t prefixLength = 0;
          uint32_t suffix = ( symbol >> ui16AbsGoRice ) - COEF_REMAIN_BIN_REDUCTION;
    
          while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) )
          {
            prefixLength++;
          }
    
          const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ui16AbsGoRice ) : ( prefixLength + 1/*separator*/ );
    
          iRate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice ) << SCALE_BITS;
        }
        else
        {
          length = ui16AbsGoRice;
          symbol = symbol - ( threshold << ui16AbsGoRice );
          while( symbol >= ( 1 << length ) )
          {
            symbol -= ( 1 << ( length++ ) );
          }
          iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
        }
        return iRate;
      }
    
      int iRate = int( xGetIEPRate() ); // cost of sign bit
      const uint32_t cthres = ( remGt2Bins ? 4 : 2 );
      if( uiAbsLevel >= cthres )
      {
        uint32_t symbol = ( uiAbsLevel - cthres ) >> 1;
    
        uint32_t length;
        const int threshold = g_auiGoRiceRange[ui16AbsGoRice];
        if( symbol < ( threshold << ui16AbsGoRice ) )
        {
          length = symbol >> ui16AbsGoRice;
          iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
        }
        else if( useLimitedPrefixLength )
        {
          const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) );
    
          uint32_t prefixLength = 0;
          uint32_t suffix = ( symbol >> ui16AbsGoRice ) - COEF_REMAIN_BIN_REDUCTION;
    
          while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) )
          {
            prefixLength++;
          }
    
          const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ui16AbsGoRice ) : ( prefixLength + 1/*separator*/ );
    
          iRate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice ) << SCALE_BITS;
        }
        else
        {
          length = ui16AbsGoRice;
          symbol = symbol - ( threshold << ui16AbsGoRice );
          while( symbol >= ( 1 << length ) )
          {
            symbol -= ( 1 << ( length++ ) );
          }
          iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
        }
    
    
        iRate += fracBitsGt1.intBits[1];
        iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1];
        if( remGt2Bins )
        {
          iRate += fracBitsGt2.intBits[1];
        }
    
        iRate += fracBitsGt1.intBits[1];
        iRate += fracBitsPar.intBits[0];
        iRate += fracBitsGt2.intBits[0];
    
    283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
      }
      else if( uiAbsLevel == 3 )
      {
        iRate += fracBitsGt1.intBits[1];
        iRate += fracBitsPar.intBits[1];
        iRate += fracBitsGt2.intBits[0];
      }
      else
      {
        iRate = 0;
      }
      return  iRate;
    }
    
    inline double QuantRDOQ::xGetRateSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const
    {
      return xGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] );
    }
    
    /** Calculates the cost of signaling the last significant coefficient in the block
     * \param uiPosX X coordinate of the last significant coefficient
     * \param uiPosY Y coordinate of the last significant coefficient
     * \param component colour component ID
     * \returns cost of last significant coefficient
     */
    /*
     * \param uiWidth width of the transform unit (TU)
    */
    inline double QuantRDOQ::xGetRateLast( const int* lastBitsX, const int* lastBitsY, unsigned PosX, unsigned PosY ) const
    {
      uint32_t    CtxX  = g_uiGroupIdx[PosX];
      uint32_t    CtxY  = g_uiGroupIdx[PosY];
      double  Cost  = lastBitsX[ CtxX ] + lastBitsY[ CtxY ];
      if( CtxX > 3 )
      {
        Cost += xGetIEPRate() * ((CtxX-2)>>1);
      }
      if( CtxY > 3 )
      {
        Cost += xGetIEPRate() * ((CtxY-2)>>1);
      }
      return xGetICost( Cost );
    }
    
    
    inline double QuantRDOQ::xGetRateSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const
    {
      return xGetICost( fracBitsSig.intBits[uiSignificance] );
    }
    
    /** Get the cost for a specific rate
     * \param dRate rate of a bit
     * \returns cost at the specific rate
     */
    inline double QuantRDOQ::xGetICost        ( double                          dRate         ) const
    {
      return m_dLambda * dRate;
    }
    
    /** Get the cost of an equal probable bit
     * \returns cost of equal probable bit
     */
    inline double QuantRDOQ::xGetIEPRate      (                                               ) const
    {
      return 32768;
    }
    
    
    
    #if HEVC_USE_SCALING_LISTS
    /** set quantized matrix coefficient for encode
     * \param scalingList            quantized matrix address
     * \param format                 chroma format
     * \param maxLog2TrDynamicRange
     * \param bitDepths              reference to bit depth array for all channels
     */
    void QuantRDOQ::setScalingList(ScalingList *scalingList, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths)
    {
      Quant::setScalingList( scalingList, maxLog2TrDynamicRange, bitDepths );
    
      const int minimumQp = 0;
      const int maximumQp = SCALING_LIST_REM_NUM;
    
      for(uint32_t size = 0; size < SCALING_LIST_SIZE_NUM; size++)
      {
        for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
        {
          for(int qp = minimumQp; qp < maximumQp; qp++)
          {
    //         xSetScalingListEnc(scalingList,list,size,qp);
    //         xSetScalingListDec(*scalingList,list,size,qp);
            xSetErrScaleCoeff(list,size, size,qp,maxLog2TrDynamicRange, bitDepths);
          }
        }
      }
    }
    
    
    
    #if HM_QTBT_AS_IN_JEM_QUANT
    #endif
    #else
    
    double QuantRDOQ::xGetErrScaleCoeff( SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth )
    {
      const int iTransformShift = getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
    #if HM_QTBT_AS_IN_JEM_QUANT
      double    dErrScale       = (double)( 1 << SCALE_BITS );                                // Compensate for scaling of bitcount in Lagrange cost function
      bool      needsSrqt2      = TU::needsBlockSizeTrafoScale( Size(width, height) );// ( ( (sizeX+sizeY) & 1 ) !=0 );
      double    dTransShift     = (double)iTransformShift + ( needsSrqt2 ? -0.5 : 0.0 );
      dErrScale                 = dErrScale*pow( 2.0, ( -2.0*dTransShift ) );                     // Compensate for scaling through forward transform
      int       QStep           = ( needsSrqt2 ? ( ( g_quantScales[qp] * 181 ) >> 7 ) : g_quantScales[qp] );
      double    finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1));
    #else
      int errShift = SCALE_BITS - ((iTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) << 1);
      double    dErrScale       = exp2( double( errShift ) );
      double    finalErrScale   = dErrScale / double( g_quantScales[qp] * g_quantScales[qp] );
    #endif
      return    finalErrScale;
    }
    #endif
    
    
    
    #if HEVC_USE_SCALING_LISTS
    /** set error scale coefficients
     * \param list                   list ID
     * \param size
     * \param qp                     quantization parameter
     * \param maxLog2TrDynamicRange
     * \param bitDepths              reference to bit depth array for all channels
     */
    void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths )
    {
      const int width = g_scalingListSizeX[sizeX];
      const int height = g_scalingListSizeX[sizeY];
      const ChannelType channelType = ( ( list == 0 ) || ( list == MAX_NUM_COMPONENT ) ) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA;
      const int channelBitDepth = bitDepths.recon[channelType];
      const int iTransformShift = getTransformShift( channelBitDepth, Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ), maxLog2TrDynamicRange[channelType] );  // Represents scaling through forward transform
    
      uint32_t i, uiMaxNumCoeff = width * height;
      int *piQuantcoeff;
      double *pdErrScale;
      piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY );
      pdErrScale   = xGetErrScaleCoeff( list, sizeX, sizeY, qp );
    
    #if HM_QTBT_AS_IN_JEM_QUANT
      double dErrScale = (double)( 1 << SCALE_BITS );                                // Compensate for scaling of bitcount in Lagrange cost function
    
      bool   needsSrqt2 = TU::needsBlockSizeTrafoScale( Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ) );// ( ( (sizeX+sizeY) & 1 ) !=0 );
      double dTransShift = (double)iTransformShift + ( needsSrqt2 ? -0.5 : 0.0 );
      dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) );                     // Compensate for scaling through forward transform
    
      for( i = 0; i < uiMaxNumCoeff; i++ )
      {
        pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i]
                        / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType]) << 1));
      }
    
      int QStep = ( needsSrqt2 ? ( ( g_quantScales[qp] * 181 ) >> 7 ) : g_quantScales[qp] );
    
      xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) =
        dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType]) << 1));
    #else
      int errShift = SCALE_BITS - ((iTransformShift + DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType])) << 1);
      double dErrScale = exp2( double( errShift ) );
      for( i = 0; i < uiMaxNumCoeff; i++ )
      {
        pdErrScale[i] = dErrScale / double( piQuantcoeff[i] * piQuantcoeff[i] );
      }
      xGetErrScaleCoeffNoScalingList( list, sizeX, sizeY, qp ) = dErrScale / double( g_quantScales[qp] * g_quantScales[qp] );
    #endif
    }
    
    /** set flat matrix value to quantized coefficient
     */
    void QuantRDOQ::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths)
    {
      Quant::setFlatScalingList( maxLog2TrDynamicRange, bitDepths );
    
      const int minimumQp = 0;
      const int maximumQp = SCALING_LIST_REM_NUM;
    
      for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++)
      {
        for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++)
        {
          for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
          {
            for(int qp = minimumQp; qp < maximumQp; qp++)
            {
              xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths );
            }
          }
        }
      }
    }
    
    /** initialization process of scaling list array
     */
    void QuantRDOQ::xInitScalingList( const QuantRDOQ* other )
    {
      m_isErrScaleListOwner = other == nullptr;
    
      for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
      {
        for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
        {
          for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
          {
            for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
            {
              if( m_isErrScaleListOwner )
              {
                m_errScale[sizeIdX][sizeIdY][listId][qp] = new double[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]];
              }
              else
              {
                m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp];
              }
            } // listID loop
          }
        }
      }
    }
    
    /** destroy quantization matrix array
     */
    void QuantRDOQ::xDestroyScalingList()
    {
      if( !m_isErrScaleListOwner ) return;
    
      for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
      {
        for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
        {
          for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
          {
            for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
            {
              if(m_errScale[sizeIdX][sizeIdY][listId][qp])
              {
                delete [] m_errScale[sizeIdX][sizeIdY][listId][qp];
              }
            }
          }
        }
      }
    //   Quant::destroyScalingList();
    }
    #endif
    
    
    void QuantRDOQ::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx)
    {
      const CompArea &rect      = tu.blocks[compID];
      const uint32_t uiWidth        = rect.width;
      const uint32_t uiHeight       = rect.height;
    
      const CCoeffBuf &piCoef   = pSrc;
            CoeffBuf   piQCoef  = tu.getCoeffs(compID);
    
      const bool useTransformSkip      = tu.transformSkip[compID];
    
      bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
    
      {
        useRDOQ &= uiWidth > 2;
        useRDOQ &= uiHeight > 2;
      }
    
      if (useRDOQ && (isLuma(compID) || RDOQ_CHROMA))
      {
    #if T0196_SELECTIVE_RDOQ
        if (!m_useSelectiveRDOQ || xNeedRDOQ(tu, compID, piCoef, cQP))
        {
    #endif
          xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
    #if T0196_SELECTIVE_RDOQ
        }
        else
        {
          piQCoef.fill(0);
          uiAbsSum = 0;
        }
    #endif
      }
      else
      {
        Quant::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
      }
    }
    
    
    
    void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx)
    {
      const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
    
      const SPS &sps            = *tu.cs->sps;
      const CompArea &rect      = tu.blocks[compID];
      const uint32_t uiWidth        = rect.width;
      const uint32_t uiHeight       = rect.height;
      const ChannelType chType  = toChannelType(compID);
      const int channelBitDepth = sps.getBitDepth( chType );
    
      const bool extendedPrecision     = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
      const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
    
      /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
      * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
      * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
      * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
      */
    
      // Represents scaling through forward transform
      int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
    
      if (tu.transformSkip[compID] && extendedPrecision)
      {
        iTransformShift = std::max<int>(0, iTransformShift);
      }
    
      double     d64BlockUncodedCost               = 0;
      const uint32_t uiLog2BlockWidth                  = g_aucLog2[uiWidth];
    #if HEVC_USE_SCALING_LISTS
      const uint32_t uiLog2BlockHeight                 = g_aucLog2[uiHeight];
    #endif
      const uint32_t uiMaxNumCoeff                     = rect.area();
    
      CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
    
    #if HEVC_USE_SCALING_LISTS
      int scalingListType = getScalingListType(tu.cu->predMode, compID);
    
      CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
    #endif
    
      const TCoeff *plSrcCoeff = pSrc.buf;
            TCoeff *piDstCoeff = tu.getCoeffs(compID).buf;
    
      double *pdCostCoeff  = m_pdCostCoeff;
      double *pdCostSig    = m_pdCostSig;
      double *pdCostCoeff0 = m_pdCostCoeff0;
    #if HEVC_USE_SIGN_HIDING
      int    *rateIncUp    = m_rateIncUp;
      int    *rateIncDown  = m_rateIncDown;
      int    *sigRateDelta = m_sigRateDelta;
      TCoeff *deltaU       = m_deltaU;
    #endif
    
      memset( m_pdCostCoeff,  0, sizeof( double ) *  uiMaxNumCoeff );
      memset( m_pdCostSig,    0, sizeof( double ) *  uiMaxNumCoeff );
    #if HEVC_USE_SIGN_HIDING
      memset( m_rateIncUp,    0, sizeof( int    ) *  uiMaxNumCoeff );
      memset( m_rateIncDown,  0, sizeof( int    ) *  uiMaxNumCoeff );
      memset( m_sigRateDelta, 0, sizeof( int    ) *  uiMaxNumCoeff );
      memset( m_deltaU,       0, sizeof( TCoeff ) *  uiMaxNumCoeff );
    #endif
    
    
      const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
    
    #if HEVC_USE_SCALING_LISTS
      const double *const pdErrScale = xGetErrScaleCoeff(scalingListType, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1), cQP.rem);
      const int    *const piQCoef    = getQuantCoeff(scalingListType, cQP.rem, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1));
    
      const bool   enableScalingLists             = getUseScalingList(uiWidth, uiHeight, tu.transformSkip[compID]);
    #if HM_QTBT_AS_IN_JEM_QUANT
      const int    defaultQuantisationCoefficient = ( TU::needsSqrt2Scale( rect ) ? ( g_quantScales[cQP.rem] * 181 ) >> 7 : g_quantScales[cQP.rem] );
      const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1), cQP.rem);
    #else
      const double blkErrScale                    = ( TU::needsQP3Offset( tu, compID ) ? 2.0 : 1.0 );
      const int    defaultQuantisationCoefficient = g_quantScales[cQP.rem];
      const double defaultErrorScale              = blkErrScale * xGetErrScaleCoeffNoScalingList( scalingListType, ( uiLog2BlockWidth - 1 ), ( uiLog2BlockHeight - 1 ), cQP.rem );
    #endif
    #else //HEVC_USE_SCALING_LISTS
    #if HM_QTBT_AS_IN_JEM_QUANT
      const int    quantisationCoefficient = ( TU::needsSqrt2Scale( rect ) ? ( g_quantScales[cQP.rem] * 181 ) >> 7 : g_quantScales[cQP.rem] );
      const double errorScale              = xGetErrScaleCoeff( uiWidth, uiHeight, cQP.rem, maxLog2TrDynamicRange, channelBitDepth );
    #else
      const double blkErrScale             = ( TU::needsQP3Offset( tu, compID ) ? 2.0 : 1.0 );
      const int    quantisationCoefficient = g_quantScales[cQP.rem];
      const double errorScale              = blkErrScale * xGetErrScaleCoeff( uiWidth, uiHeight, cQP.rem, maxLog2TrDynamicRange, channelBitDepth );
    #endif
    #endif//HEVC_USE_SCALING_LISTS
    
    
    #if HEVC_USE_SIGN_HIDING
      const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
    #endif
      const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;
    
    #if HEVC_USE_SIGN_HIDING
      CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag());
    #else
      CoeffCodingContext cctx(tu, compID);
    #endif
      const int    iCGSizeM1      = (1 << cctx.log2CGSize()) - 1;
    
      int     iCGLastScanPos      = -1;
      double  d64BaseCost         = 0;
      int     iLastScanPos        = -1;
    
    
      bool      is2x2subblock = ( iCGSizeM1 == 3 );
      int       remGt2Bins    = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK );
      int       remRegBins    = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins;
      uint32_t  goRiceParam   = 0;
    
    
      double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig;
      memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
    
      const int iCGNum  = uiWidth * uiHeight >> cctx.log2CGSize();
      int iScanPos;
      coeffGroupRDStats rdStats;
    
    #if ENABLE_TRACING
      DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
    #endif
    
    
    
      for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--)
      {
        cctx.initSubblock( subSetId );
    
        memset( &rdStats, 0, sizeof (coeffGroupRDStats));
    
        for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--)
        {
          iScanPos = cctx.minSubPos() + iScanPosinCG;
          //===== quantization =====
          uint32_t    uiBlkPos          = cctx.blockPos(iScanPos);
    
          // set coeff
    #if HEVC_USE_SCALING_LISTS
          const int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos]               : defaultQuantisationCoefficient;
    #if HM_QTBT_AS_IN_JEM_QUANT
          const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos]               : defaultErrorScale;
    #else
          const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos] * blkErrScale : defaultErrorScale;
    #endif
    #endif
          const int64_t  tmpLevel                = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
    
          const Intermediate_Int lLevelDouble  = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
    
          uint32_t uiMaxAbsLevel        = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
    
          const double dErr         = double( lLevelDouble );
          pdCostCoeff0[ iScanPos ]  = dErr * dErr * errorScale;
          d64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];
          piDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;
    
          if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
          {
            iLastScanPos            = iScanPos;
            iCGLastScanPos          = cctx.subSetId();
          }
    
          if ( iLastScanPos >= 0 )
          {
    
    #if ENABLE_TRACING
            uint32_t uiCGPosY = cctx.cgPosX();
            uint32_t uiCGPosX = cctx.cgPosY();
            uint32_t uiPosY = cctx.posY( iScanPos );
            uint32_t uiPosX = cctx.posX( iScanPos );
            DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY );
    #endif
            //===== coefficient level estimation =====
            unsigned ctxIdSig = 0;
            if( iScanPos != iLastScanPos )
            {
              ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 );
            }
            uint32_t    uiLevel;
            uint8_t ctxOffset     = cctx.ctxOffsetAbs     ();
            uint32_t    uiParCtx      = cctx.parityCtxIdAbs   ( ctxOffset );
            uint32_t    uiGt1Ctx      = cctx.greater1CtxIdAbs ( ctxOffset );
            uint32_t    uiGt2Ctx      = cctx.greater2CtxIdAbs ( ctxOffset );
    
            uint32_t    goRiceZero    = 0;
            if( remRegBins < 3 )
            {
              unsigned  sumAbs        = cctx.templateAbsSum( iScanPos, piDstCoeff );
              goRiceParam             = g_auiGoRiceParsCoeff   [ sumAbs ];
              goRiceZero              = g_auiGoRicePosCoeff0[0][ sumAbs ];
            }
    
    
            const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
            const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx );
            const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx );
    
            if( iScanPos == iLastScanPos )
            {
    
              uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
                                        lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange );
    
            }
            else
            {
              DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
    
              const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
    
              uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
                                        lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange );
    #if HEVC_USE_SIGN_HIDING
              sigRateDelta[ uiBlkPos ] = ( remRegBins < 3 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] );
    
    #endif
            }
    
            DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel );
            DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) );
            DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) );
    
    #if HEVC_USE_SIGN_HIDING
            deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
    
            if( uiLevel > 0 )
            {
    
              int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
              rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
              rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
    
              if( remRegBins < 3 )
              {
                int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
                rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
              }
              else
              {
                rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ];
              }
    
            }
    #endif
            piDstCoeff[ uiBlkPos ] = uiLevel;
            d64BaseCost           += pdCostCoeff [ iScanPos ];
    
    
            if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) )
            {
              remGt2Bins    = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK );
              remRegBins    = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins;
              goRiceParam   = 0;
            }
            else if( remRegBins >= 3 )
            {
              const uint32_t baseLevel = ( remGt2Bins ? 4 : 2 );
              if( goRiceParam < 3 && ((uiLevel-baseLevel)>>1) > (3<<goRiceParam)-1 )
              {
                goRiceParam++;
              }
              if( uiLevel >= 2 && remGt2Bins )
              {
                remGt2Bins--;
              }
              remRegBins -= std::min<int>( uiLevel, 2 ) + (iScanPos != iLastScanPos);
            }
    
          }
          else
          {
            d64BaseCost    += pdCostCoeff0[ iScanPos ];
          }
          rdStats.d64SigCost += pdCostSig[ iScanPos ];
          if (iScanPosinCG == 0 )
          {
            rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
          }
          if (piDstCoeff[ uiBlkPos ] )
          {
            cctx.setSigGroup();
            rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
            rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
            if ( iScanPosinCG != 0 )
            {
              rdStats.iNNZbeforePos0++;
            }
          }
        } //end for (iScanPosinCG)
    
        if (iCGLastScanPos >= 0)
        {
          if( cctx.subSetId() )
          {
            if( !cctx.isSigGroup() )
            {
              const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
              d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
              pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
            }
            else
            {
              if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
              {
                if ( rdStats.iNNZbeforePos0 == 0 )
                {
                  d64BaseCost -= rdStats.d64SigCost_0;
                  rdStats.d64SigCost -= rdStats.d64SigCost_0;
                }
                // rd-cost if SigCoeffGroupFlag = 0, initialization
                double d64CostZeroCG = d64BaseCost;
    
                const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
    
                if (cctx.subSetId() < iCGLastScanPos)
                {
                  d64BaseCost  += xGetRateSigCoeffGroup(fracBitsSigGroup,1);
                  d64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0);
                  pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1);
                }
    
                // try to convert the current coeff group from non-zero to all-zero
                d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels
                d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
                d64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels
    
                                                         // if we can save cost, change this block to all-zero block
                if ( d64CostZeroCG < d64BaseCost )
                {
                  cctx.resetSigGroup();
                  d64BaseCost = d64CostZeroCG;
                  if (cctx.subSetId() < iCGLastScanPos)
                  {
                    pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0);
                  }
                  // reset coeffs to 0 in this block
                  for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--)
                  {
                    iScanPos      = cctx.minSubPos() + iScanPosinCG;
                    uint32_t uiBlkPos = cctx.blockPos( iScanPos );
    
                    if (piDstCoeff[ uiBlkPos ])
                    {
                      piDstCoeff [ uiBlkPos ] = 0;
                      pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
                      pdCostSig  [ iScanPos ] = 0;
                    }
                  }
                } // end if ( d64CostAllZeros < d64BaseCost )
              }
            } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
          }
          else
          {
            cctx.setSigGroup();
          }
        }
      } //end for (cctx.subSetId)
    
    
      //===== estimate last position =====
      if ( iLastScanPos < 0 )
      {
        return;
      }
    
      double  d64BestCost         = 0;
      int     iBestLastIdxP1      = 0;
    
    
      if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 )
      {
        const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() );
        d64BestCost  = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] );
        d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] );
      }
      else
      {
        BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, tu.depth, tu.cbf[COMPONENT_Cb] ) ) );
    
        d64BestCost  = d64BlockUncodedCost + xGetICost( fracBitsQtCbf.intBits[0] );
        d64BaseCost += xGetICost( fracBitsQtCbf.intBits[1] );
      }
    
      int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 };
      int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 };
      {
    #if HEVC_USE_MDCS
        int dim1  = ( cctx.scanType() == SCAN_VER ? uiHeight : uiWidth  );
        int dim2  = ( cctx.scanType() == SCAN_VER ? uiWidth  : uiHeight );
    #else
        int dim1  = uiWidth;
        int dim2  = uiHeight;
    #endif
        int bitsX = 0;
        int bitsY = 0;
        int ctxId;
        //X-coordinate
        for ( ctxId = 0; ctxId < g_uiGroupIdx[dim1-1]; ctxId++)
        {
          const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) );
          lastBitsX[ ctxId ]   = bitsX + fB.intBits[ 0 ];
          bitsX               +=         fB.intBits[ 1 ];
        }
        lastBitsX[ctxId] = bitsX;
        //Y-coordinate
        for ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++)
        {
          const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) );
          lastBitsY[ ctxId ]   = bitsY + fB.intBits[ 0 ];
          bitsY               +=         fB.intBits[ 1 ];
        }
        lastBitsY[ctxId] = bitsY;
      }
    
    
      bool bFoundLast = false;
      for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
      {
        d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
        if (cctx.isSigGroup( iCGScanPos ) )
        {
          for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--)
          {
            iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG;
    
            if (iScanPos > iLastScanPos)
            {