Skip to content
Snippets Groups Projects
TrQuant.cpp 47.3 KiB
Newer Older
  • Learn to ignore specific revisions
  • /* The copyright in this software is being made available under the BSD
     * License, included below. This software may be subject to other third party
     * and contributor rights, including patent rights, and no such rights are
     * granted under this license.
     *
    
     * Copyright (c) 2010-2019, ITU/ISO/IEC
    
     * All rights reserved.
     *
     * Redistribution and use in source and binary forms, with or without
     * modification, are permitted provided that the following conditions are met:
     *
     *  * Redistributions of source code must retain the above copyright notice,
     *    this list of conditions and the following disclaimer.
     *  * Redistributions in binary form must reproduce the above copyright notice,
     *    this list of conditions and the following disclaimer in the documentation
     *    and/or other materials provided with the distribution.
     *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
     *    be used to endorse or promote products derived from this software without
     *    specific prior written permission.
     *
     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
     * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
     * THE POSSIBILITY OF SUCH DAMAGE.
     */
    
    /** \file     TrQuant.cpp
        \brief    transform and quantization class
    */
    
    #include "TrQuant.h"
    #include "TrQuant_EMT.h"
    
    #include "UnitTools.h"
    #include "ContextModelling.h"
    #include "CodingStructure.h"
    #include "CrossCompPrediction.h"
    
    
    #include "dtrace_buffer.h"
    
    #include <stdlib.h>
    #include <limits>
    #include <memory.h>
    
    #include "QuantRDOQ.h"
    #include "DepQuant.h"
    
    #if RExt__DECODER_DEBUG_TOOL_STATISTICS
    #include "CommonLib/CodingStatistics.h"
    #endif
    
    struct coeffGroupRDStats
    {
      int    iNNZbeforePos0;
      double d64CodedLevelandDist; // distortion and level cost only
      double d64UncodedDist;    // all zero coded block distortion
      double d64SigCost;
      double d64SigCost_0;
    };
    
    FwdTrans *fastFwdTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
    {
      { fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, fastForwardDCT2_B64 },
      { nullptr,            fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, nullptr },
      { nullptr,            fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, nullptr },
    };
    
    InvTrans *fastInvTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
    {
      { fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, fastInverseDCT2_B64 },
      { nullptr,            fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, nullptr },
      { nullptr,            fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, nullptr },
    };
    
    //! \ingroup CommonLib
    //! \{
    
    
    static inline int64_t square( const int d ) { return d * (int64_t)d; }
    
    template<int signedMode> std::pair<int64_t,int64_t> fwdTransformCbCr( const PelBuf &resCb, const PelBuf &resCr, PelBuf& resC1, PelBuf& resC2 )
    {
      const Pel*  cb  = resCb.buf;
      const Pel*  cr  = resCr.buf;
      Pel*        c1  = resC1.buf;
      Pel*        c2  = resC2.buf;
      int64_t     d1  = 0;
      int64_t     d2  = 0;
      for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
      {
        for( SizeType x = 0; x < resCb.width; x++ )
        {
          int cbx = cb[x], crx = cr[x];
          if      ( signedMode ==  1 )
    
            c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
            d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
    
          else if ( signedMode == -1 )
          {
            c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
            d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
    
          else if ( signedMode ==  2 )
          {
            c1[x] = Pel( ( cbx + crx ) / 2 );
            d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
    
          else if ( signedMode == -2 )
          {
            c1[x] = Pel( ( cbx - crx ) / 2 );
            d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
    
          else if ( signedMode ==  3 )
          {
            c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
            d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
          }
          else if ( signedMode == -3 )
          {
            c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
            d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
    
          else
          {
            d1   += square( cbx );
            d2   += square( crx );
          }
        }
      }
      return std::make_pair(d1,d2);
    }
    
    template<int signedMode> void invTransformCbCr( PelBuf &resCb, PelBuf &resCr )
    {
      Pel*  cb  = resCb.buf;
      Pel*  cr  = resCr.buf;
      for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
      {
        for( SizeType x = 0; x < resCb.width; x++ )
        {
    
          if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
    
          else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
          else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
          else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
          else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
          else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
        }
      }
    }
    
    
    // ====================================================================================================================
    // TrQuant class member functions
    // ====================================================================================================================
    TrQuant::TrQuant() : m_quant( nullptr )
    {
      // allocate temporary buffers
    
      {
        m_invICT      = m_invICTMem + maxAbsIctMode;
        m_invICT[ 0]  = invTransformCbCr< 0>;
        m_invICT[ 1]  = invTransformCbCr< 1>;
        m_invICT[-1]  = invTransformCbCr<-1>;
        m_invICT[ 2]  = invTransformCbCr< 2>;
        m_invICT[-2]  = invTransformCbCr<-2>;
        m_invICT[ 3]  = invTransformCbCr< 3>;
        m_invICT[-3]  = invTransformCbCr<-3>;
        m_fwdICT      = m_fwdICTMem + maxAbsIctMode;
        m_fwdICT[ 0]  = fwdTransformCbCr< 0>;
        m_fwdICT[ 1]  = fwdTransformCbCr< 1>;
        m_fwdICT[-1]  = fwdTransformCbCr<-1>;
        m_fwdICT[ 2]  = fwdTransformCbCr< 2>;
        m_fwdICT[-2]  = fwdTransformCbCr<-2>;
        m_fwdICT[ 3]  = fwdTransformCbCr< 3>;
        m_fwdICT[-3]  = fwdTransformCbCr<-3>;
      }
    
    }
    
    TrQuant::~TrQuant()
    {
      if( m_quant )
      {
        delete m_quant;
        m_quant = nullptr;
      }
    }
    
    #if ENABLE_SPLIT_PARALLELISM
    void TrQuant::copyState( const TrQuant& other )
    {
      m_quant->copyState( *other.m_quant );
    }
    #endif
    
    void TrQuant::xDeQuant(const TransformUnit &tu,
                                 CoeffBuf      &dstCoeff,
                           const ComponentID   &compID,
                           const QpParam       &cQP)
    {
      m_quant->dequant( tu, dstCoeff, compID, cQP );
    }
    
    void TrQuant::init( const Quant* otherQuant,
                        const uint32_t uiMaxTrSize,
                        const bool bUseRDOQ,
                        const bool bUseRDOQTS,
    #if T0196_SELECTIVE_RDOQ
                        const bool useSelectiveRDOQ,
    #endif
    
                        const bool bEnc
    
    )
    {
      delete m_quant;
      m_quant = nullptr;
    
      {
        m_quant = new DepQuant( otherQuant, bEnc );
      }
    
      if( m_quant )
      {
        m_quant->init( uiMaxTrSize, bUseRDOQ, bUseRDOQTS, useSelectiveRDOQ );
      }
    }
    
    
    void TrQuant::fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize )
    {
      const int8_t* trMat  = ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ];
      const int     trSize = ( size > 4 ) ? 48 : 16;
      int           coef;
      int*          out    = dst;
    
      assert( index < 3 );
    
      for( int j = 0; j < zeroOutSize; j++ )
      {
        int*          srcPtr   = src;
        const int8_t* trMatTmp = trMat;
        coef = 0;
        for( int i = 0; i < trSize; i++ )
        {
          coef += *srcPtr++ * *trMatTmp++;
        }
        *out++ = ( coef + 64 ) >> 7;
        trMat += trSize;
      }
    
      ::memset( out, 0, ( trSize - zeroOutSize ) * sizeof( int ) );
    }
    
    void TrQuant::invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize )
    {
      int             maxLog2TrDynamicRange =  15;
      const TCoeff    outputMinimum         = -( 1 << maxLog2TrDynamicRange );
      const TCoeff    outputMaximum         =  ( 1 << maxLog2TrDynamicRange ) - 1;
      const int8_t*   trMat                 =  ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ];
      const int       trSize                =  ( size > 4 ) ? 48 : 16;
      int             resi;
      int*            out                   =  dst;
    
      assert( index < 3 );
    
      for( int j = 0; j < trSize; j++ )
      {
        resi = 0;
        const int8_t* trMatTmp = trMat;
        int*          srcPtr   = src;
        for( int i = 0; i < zeroOutSize; i++ )
        {
          resi += *srcPtr++ * *trMatTmp;
          trMatTmp += trSize;
        }
        *out++ = Clip3( outputMinimum, outputMaximum, ( int ) ( resi + 64 ) >> 7 );
        trMat++;
      }
    }
    
    uint32_t TrQuant::getLFNSTIntraMode( int wideAngPredMode )
    {
      uint32_t intraMode;
    
      if( wideAngPredMode < 0 )
      {
        intraMode = ( uint32_t ) ( wideAngPredMode + ( NUM_EXT_LUMA_MODE >> 1 ) + NUM_LUMA_MODE );
      }
      else if( wideAngPredMode >= NUM_LUMA_MODE )
      {
        intraMode = ( uint32_t ) ( wideAngPredMode + ( NUM_EXT_LUMA_MODE >> 1 ) );
      }
      else
      {
        intraMode = ( uint32_t ) wideAngPredMode;
      }
    
      return intraMode;
    }
    
    bool TrQuant::getTransposeFlag( uint32_t intraMode )
    {
      return ( ( intraMode >= NUM_LUMA_MODE ) && ( intraMode >= ( NUM_LUMA_MODE + ( NUM_EXT_LUMA_MODE >> 1 ) ) ) ) ||
             ( ( intraMode <  NUM_LUMA_MODE ) && ( intraMode >  DIA_IDX ) );
    }
    
    void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID )
    {
      const CompArea& area     = tu.blocks[ compID ];
      const uint32_t  width    = area.width;
      const uint32_t  height   = area.height;
      const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
    
    
    #if JVET_P0058_CHROMA_TS
      if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4)
    #else
    
      if( lfnstIdx && tu.mtsIdx != MTS_SKIP && width >= 4 && height >= 4 )
    
      {
        const bool whge3 = width >= 8 && height >= 8;
        const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ];
        uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) );
    
        if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) )
        {
    
          intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) );
    
        if (PU::isMIP(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)))
        {
          intraMode = PLANAR_IDX;
        }
    
        CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" );
    
        if( lfnstIdx < 3 )
        {
          intraMode = getLFNSTIntraMode( PU::getWideAngIntraMode( tu, intraMode, compID ) );
    #if RExt__DECODER_DEBUG_TOOL_STATISTICS
          CodingStatistics::IncrementStatisticTool( CodingStatisticsClassType { STATS__TOOL_LFNST, width, height, compID } );
    #endif
          bool          transposeFlag   = getTransposeFlag( intraMode );
          const int     sbSize          = whge3 ? 8 : 4;
          bool          tu4x4Flag       = ( width == 4 && height == 4 );
          bool          tu8x8Flag       = ( width == 8 && height == 8 );
          TCoeff*       lfnstTemp;
          TCoeff*       coeffTemp;
    
    Mischa Siekmann's avatar
    Mischa Siekmann committed
              int y;
              lfnstTemp = m_tempInMatrix; // inverse spectral rearrangement
    
              TCoeff * dst = lfnstTemp;
              const ScanElement * scanPtr = scan;
              for( y = 0; y < 16; y++ )
              {
                *dst++ = coeffTemp[ scanPtr->idx ];
                scanPtr++;
              }
    
              invLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
    
              lfnstTemp = m_tempOutMatrix; // inverse spectral rearrangement
    
              if( transposeFlag )
              {
                if( sbSize == 4 )
                {
                  for( y = 0; y < 4; y++ )
                  {
                    coeffTemp[ 0 ] = lfnstTemp[ 0 ];  coeffTemp[ 1 ] = lfnstTemp[  4 ];
                    coeffTemp[ 2 ] = lfnstTemp[ 8 ];  coeffTemp[ 3 ] = lfnstTemp[ 12 ];
                    lfnstTemp++;
                    coeffTemp += width;
                  }
                }
                else // ( sbSize == 8 )
                {
                  for( y = 0; y < 8; y++ )
                  {
                    coeffTemp[ 0 ] = lfnstTemp[  0 ];  coeffTemp[ 1 ] = lfnstTemp[  8 ];
                    coeffTemp[ 2 ] = lfnstTemp[ 16 ];  coeffTemp[ 3 ] = lfnstTemp[ 24 ];
    
                    if( y < 4 )
    
                    {
                      coeffTemp[ 4 ] = lfnstTemp[ 32 ];  coeffTemp[ 5 ] = lfnstTemp[ 36 ];
                      coeffTemp[ 6 ] = lfnstTemp[ 40 ];  coeffTemp[ 7 ] = lfnstTemp[ 44 ];
                    }
                    lfnstTemp++;
                    coeffTemp += width;
                  }
                }
              }
              else
              {
                for( y = 0; y < sbSize; y++ )
                {
                  uint32_t uiStride = ( y < 4 ) ? sbSize : 4;
                  ::memcpy( coeffTemp, lfnstTemp, uiStride * sizeof( TCoeff ) );
                  lfnstTemp += uiStride;
                  coeffTemp += width;
                }
              }
        }
      }
    }
    
    void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, const bool loadTr )
    {
      const CompArea& area     = tu.blocks[ compID ];
      const uint32_t  width    = area.width;
      const uint32_t  height   = area.height;
      const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
    
    
    #if JVET_P0058_CHROMA_TS
      if( lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4)
    #else
    
      if( lfnstIdx && tu.mtsIdx != MTS_SKIP && width >= 4 && height >= 4 )
    
      {
        const bool whge3 = width >= 8 && height >= 8;
        const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ];
        uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) );
    
        if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) )
        {
    
          intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) );
    
        if (PU::isMIP(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)))
        {
          intraMode = PLANAR_IDX;
        }
    
        CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" );
    
        if( lfnstIdx < 3 )
        {
          intraMode = getLFNSTIntraMode( PU::getWideAngIntraMode( tu, intraMode, compID ) );
    
          bool            transposeFlag   = getTransposeFlag( intraMode );
          const int       sbSize          = whge3 ? 8 : 4;
          bool            tu4x4Flag       = ( width == 4 && height == 4 );
          bool            tu8x8Flag       = ( width == 8 && height == 8 );
          TCoeff*         lfnstTemp;
          TCoeff*         coeffTemp;
    
    #if JVET_P0058_CHROMA_TS
          TCoeff *        tempCoeff = loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_tempCoeff;
    #else
    
          TCoeff *        tempCoeff = loadTr ? m_mtsCoeffs[tu.mtsIdx] : m_tempCoeff;
    
    Mischa Siekmann's avatar
    Mischa Siekmann committed
              int y;
              lfnstTemp = m_tempInMatrix; // forward low frequency non-separable transform
              coeffTemp = tempCoeff;
    
    
              if( transposeFlag )
              {
                if( sbSize == 4 )
                {
                  for( y = 0; y < 4; y++ )
                  {
                    lfnstTemp[ 0 ] = coeffTemp[ 0 ];  lfnstTemp[  4 ] = coeffTemp[ 1 ];
                    lfnstTemp[ 8 ] = coeffTemp[ 2 ];  lfnstTemp[ 12 ] = coeffTemp[ 3 ];
                    lfnstTemp++;
                    coeffTemp += width;
                  }
                }
                else // ( sbSize == 8 )
                {
                  for( y = 0; y < 8; y++ )
                  {
                    lfnstTemp[  0 ] = coeffTemp[ 0 ];  lfnstTemp[  8 ] = coeffTemp[ 1 ];
                    lfnstTemp[ 16 ] = coeffTemp[ 2 ];  lfnstTemp[ 24 ] = coeffTemp[ 3 ];
    
                    if( y < 4 )
    
                    {
                      lfnstTemp[ 32 ] = coeffTemp[ 4 ];  lfnstTemp[ 36 ] = coeffTemp[ 5 ];
                      lfnstTemp[ 40 ] = coeffTemp[ 6 ];  lfnstTemp[ 44 ] = coeffTemp[ 7 ];
                    }
                    lfnstTemp++;
                    coeffTemp += width;
                  }
                }
              }
              else
              {
                for( y = 0; y < sbSize; y++ )
                {
                  uint32_t uiStride = ( y < 4 ) ? sbSize : 4;
                  ::memcpy( lfnstTemp, coeffTemp, uiStride * sizeof( TCoeff ) );
                  lfnstTemp += uiStride;
                  coeffTemp += width;
                }
              }
    
              fwdLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
    
              lfnstTemp = m_tempOutMatrix; // forward spectral rearrangement
    
    Mischa Siekmann's avatar
    Mischa Siekmann committed
              coeffTemp = tempCoeff;
    
              const ScanElement * scanPtr = scan;
              int lfnstCoeffNum = ( sbSize == 4 ) ? sbSize * sbSize : 48;
              for( y = 0; y < lfnstCoeffNum; y++ )
              {
                coeffTemp[ scanPtr->idx ] = *lfnstTemp++;
                scanPtr++;
              }
        }
      }
    }
    
    
    
    void TrQuant::invTransformNxN( TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQP )
    {
      const CompArea &area    = tu.blocks[compID];
      const uint32_t uiWidth      = area.width;
      const uint32_t uiHeight     = area.height;
    
    
      CHECK( uiWidth > tu.cs->sps->getMaxTbSize() || uiHeight > tu.cs->sps->getMaxTbSize(), "Maximal allowed transformation size exceeded!" );
    
      if (tu.cu->transQuantBypass)
      {
        // where should this logic go?
        const bool rotateResidual = TU::isNonTransformedResidualRotated(tu, compID);
        const CCoeffBuf pCoeff    = tu.getCoeffs(compID);
    
        for (uint32_t y = 0, coefficientIndex = 0; y < uiHeight; y++)
        {
          for (uint32_t x = 0; x < uiWidth; x++, coefficientIndex++)
          {
            pResi.at(x, y) = rotateResidual ? pCoeff.at(pCoeff.width - x - 1, pCoeff.height - y - 1) : pCoeff.at(x, y);
          }
        }
      }
      else
      {
    
        CoeffBuf tempCoeff = CoeffBuf(m_tempCoeff, area);
    
        xDeQuant( tu, tempCoeff, compID, cQP );
    
        DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
    
    
        if( tu.cs->sps->getUseLFNST() )
        {
          xInvLfnst( tu, compID );
        }
    
    
    #if JVET_P0058_CHROMA_TS
        if( tu.mtsIdx[compID] == MTS_SKIP )
    
    #else
    #if JVET_P0059_CHROMA_BDPCM
        if( (isLuma(compID) && tu.mtsIdx == MTS_SKIP) || (isChroma(compID) && tu.cu->bdpcmModeChroma))
    
        if( isLuma(compID) && tu.mtsIdx == MTS_SKIP )
    
        {
          xITransformSkip( tempCoeff, pResi, tu, compID );
        }
        else
        {
          xIT( tu, compID, tempCoeff, pResi );
        }
      }
    
      //DTRACE_BLOCK_COEFF(tu.getCoeffs(compID), tu, tu.cu->predMode, compID);
      DTRACE_PEL_BUF( D_RESIDUALS, pResi, tu, tu.cu->predMode, compID);
      invRdpcmNxN(tu, compID, pResi);
    }
    
    void TrQuant::invRdpcmNxN(TransformUnit& tu, const ComponentID &compID, PelBuf &pcResidual)
    {
      const CompArea &area    = tu.blocks[compID];
    
    
    #if JVET_P0058_CHROMA_TS
      if (CU::isRDPCMEnabled(*tu.cu) && (tu.mtsIdx[compID] == MTS_SKIP || tu.cu->transQuantBypass))
    #else
    
      if (CU::isRDPCMEnabled(*tu.cu) && (tu.mtsIdx==MTS_SKIP || tu.cu->transQuantBypass))
    
      {
        const uint32_t uiWidth  = area.width;
        const uint32_t uiHeight = area.height;
    
        RDPCMMode rdpcmMode = RDPCM_OFF;
    
        if (tu.cu->predMode == MODE_INTRA)
        {
          const ChannelType chType = toChannelType(compID);
          const uint32_t uiChFinalMode = PU::getFinalIntraMode(*tu.cs->getPU(area.pos(), chType), chType);
    
          if (uiChFinalMode == VER_IDX || uiChFinalMode == HOR_IDX)
          {
            rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
          }
        }
        else  // not intra case
        {
          rdpcmMode = RDPCMMode(tu.rdpcm[compID]);
        }
    
        const TCoeff pelMin = (TCoeff) std::numeric_limits<Pel>::min();
        const TCoeff pelMax = (TCoeff) std::numeric_limits<Pel>::max();
    
        if (rdpcmMode == RDPCM_VER)
        {
          for (uint32_t uiX = 0; uiX < uiWidth; uiX++)
          {
            TCoeff accumulator = pcResidual.at(uiX, 0); // 32-bit accumulator
    
            for (uint32_t uiY = 1; uiY < uiHeight; uiY++)
            {
              accumulator            += pcResidual.at(uiX, uiY);
              pcResidual.at(uiX, uiY) = (Pel) Clip3<TCoeff>(pelMin, pelMax, accumulator);
            }
          }
        }
        else if (rdpcmMode == RDPCM_HOR)
        {
          for (uint32_t uiY = 0; uiY < uiHeight; uiY++)
          {
            TCoeff accumulator = pcResidual.at(0, uiY);
    
            for (uint32_t uiX = 1; uiX < uiWidth; uiX++)
            {
              accumulator            += pcResidual.at(uiX, uiY);
              pcResidual.at(uiX, uiY) = (Pel) Clip3<TCoeff>(pelMin, pelMax, accumulator);
            }
          }
        }
      }
    }
    
    
    
    std::pair<int64_t,int64_t> TrQuant::fwdTransformICT( const TransformUnit &tu, const PelBuf &resCb, const PelBuf &resCr, PelBuf &resC1, PelBuf &resC2, int jointCbCr )
    {
      CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
      CHECK( Size(resCb) != Size(resC1), "resCb and resC1 have different sizes" );
      CHECK( Size(resCb) != Size(resC2), "resCb and resC2 have different sizes" );
      return (*m_fwdICT[ TU::getICTMode(tu, jointCbCr) ])( resCb, resCr, resC1, resC2 );
    }
    
    void TrQuant::invTransformICT( const TransformUnit &tu, PelBuf &resCb, PelBuf &resCr )
    {
      CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
      (*m_invICT[ TU::getICTMode(tu) ])( resCb, resCr );
    }
    
    std::vector<int> TrQuant::selectICTCandidates( const TransformUnit &tu, CompStorage* resCb, CompStorage* resCr )
    {
      CHECK( !resCb[0].valid() || !resCr[0].valid(), "standard components are not valid" );
    
      if( !CU::isIntra( *tu.cu ) )
      {
        int cbfMask = 3;
        resCb[cbfMask].create( tu.blocks[COMPONENT_Cb] );
        resCr[cbfMask].create( tu.blocks[COMPONENT_Cr] );
        fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
        std::vector<int> cbfMasksToTest;
        cbfMasksToTest.push_back( cbfMask );
        return cbfMasksToTest;
      }
    
      std::pair<int64_t,int64_t> pairDist[4];
      for( int cbfMask = 0; cbfMask < 4; cbfMask++ )
      {
        if( cbfMask )
        {
          CHECK( resCb[cbfMask].valid() || resCr[cbfMask].valid(), "target components for cbfMask=" << cbfMask << " are already present" );
          resCb[cbfMask].create( tu.blocks[COMPONENT_Cb] );
          resCr[cbfMask].create( tu.blocks[COMPONENT_Cr] );
        }
        pairDist[cbfMask] = fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
      }
    
      std::vector<int> cbfMasksToTest;
      int64_t minDist1  = std::min<int64_t>( pairDist[0].first, pairDist[0].second );
      int64_t minDist2  = std::numeric_limits<int64_t>::max();
      int     cbfMask1  = 0;
      int     cbfMask2  = 0;
    
      for( int cbfMask : { 1, 2, 3 } )
    
      {
        if( pairDist[cbfMask].first < minDist1 )
        {
          cbfMask2  = cbfMask1; minDist2  = minDist1;
          cbfMask1  = cbfMask;  minDist1  = pairDist[cbfMask1].first;
        }
        else if( pairDist[cbfMask].first < minDist2 )
        {
          cbfMask2  = cbfMask;  minDist2  = pairDist[cbfMask2].first;
        }
      }
      if( cbfMask1 )
      {
        cbfMasksToTest.push_back( cbfMask1 );
      }
      if( cbfMask2 && ( ( minDist2 < (9*minDist1)/8 ) || ( !cbfMask1 && minDist2 < (3*minDist1)/2 ) ) )
      {
        cbfMasksToTest.push_back( cbfMask2 );
      }
    
      return cbfMasksToTest;
    }
    
    
    
    
    // ------------------------------------------------------------------------------------------------
    // Logical transform
    // ------------------------------------------------------------------------------------------------
    
    
    void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer)
    
      const bool isExplicitMTS = (CU::isIntra(*tu.cu) ? tu.cs->sps->getUseIntraMTS() : tu.cs->sps->getUseInterMTS() && CU::isInter(*tu.cu)) && isLuma(compID);
    
      const bool isImplicitMTS = CU::isIntra(*tu.cu) && tu.cs->sps->getUseImplicitMTS() && isLuma(compID) && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0;
    
      const bool isISP = CU::isIntra(*tu.cu) && tu.cu->ispMode && isLuma(compID);
      const bool isSBT = CU::isInter(*tu.cu) && tu.cu->sbtInfo && isLuma(compID);
    
      trTypeHor = DCT2;
      trTypeVer = DCT2;
    
      if (!tu.cs->sps->getUseMTS())
        return;
    
    
      if (isImplicitMTS || isISP)
      {
        int  width = tu.blocks[compID].width;
        int  height = tu.blocks[compID].height;
        bool widthDstOk = width >= 4 && width <= 16;
        bool heightDstOk = height >= 4 && height <= 16;
    
        if (widthDstOk)
          trTypeHor = DST7;
        if (heightDstOk)
          trTypeVer = DST7;
        return;
      }
    
    
      if (isSBT)
    
      {
        uint8_t sbtIdx = tu.cu->getSbtIdx();
        uint8_t sbtPos = tu.cu->getSbtPos();
    
        if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_VER_QUAD )
        {
          assert( tu.lwidth() <= MTS_INTER_MAX_CU_SIZE );
          if( tu.lheight() > MTS_INTER_MAX_CU_SIZE )
          {
            trTypeHor = trTypeVer = DCT2;
          }
          else
          {
            if( sbtPos == SBT_POS0 )  { trTypeHor = DCT8;  trTypeVer = DST7; }
            else                      { trTypeHor = DST7;  trTypeVer = DST7; }
          }
        }
        else
        {
          assert( tu.lheight() <= MTS_INTER_MAX_CU_SIZE );
          if( tu.lwidth() > MTS_INTER_MAX_CU_SIZE )
          {
            trTypeHor = trTypeVer = DCT2;
          }
          else
          {
            if( sbtPos == SBT_POS0 )  { trTypeHor = DST7;  trTypeVer = DCT8; }
            else                      { trTypeHor = DST7;  trTypeVer = DST7; }
          }
        }
        return;
      }
    
    #if JVET_P0058_CHROMA_TS
        if (tu.mtsIdx[compID] > MTS_SKIP)
    #else
    
        if (tu.mtsIdx > MTS_SKIP)
    
    #if JVET_P0058_CHROMA_TS
          int indHor = (tu.mtsIdx[compID] - MTS_DST7_DST7) & 1;
          int indVer = (tu.mtsIdx[compID] - MTS_DST7_DST7) >> 1;
    #else
    
          int indHor = (tu.mtsIdx - MTS_DST7_DST7) & 1;
          int indVer = (tu.mtsIdx - MTS_DST7_DST7) >> 1;
    
          trTypeHor = indHor ? DCT8 : DST7;
          trTypeVer = indVer ? DCT8 : DST7;
        }
      }
    }
    
    
    
    void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPelBuf &resi, CoeffBuf &dstCoeff, const int width, const int height )
    
    {
      const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange( toChannelType( compID ) );
      const unsigned bitDepth               = tu.cs->sps->getBitDepth(              toChannelType( compID ) );
      const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
    
      const uint32_t transformWidthIndex    = floorLog2(width ) - 1;  // nLog2WidthMinus1, since transform start from 2-point
      const uint32_t transformHeightIndex   = floorLog2(height) - 1;  // nLog2HeightMinus1, since transform start from 2-point
    
      int trTypeHor = DCT2;
      int trTypeVer = DCT2;
    
      getTrTypes ( tu, compID, trTypeHor, trTypeVer );
    
    Mischa Siekmann's avatar
    Mischa Siekmann committed
      int  skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
      int  skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
      if( tu.cs->sps->getUseLFNST() && tu.cu->lfnstIdx )
      {
        if( (width == 4 && height > 4) || (width > 4 && height == 4) )
        {
          skipWidth  = width  - 4;
          skipHeight = height - 4;
        }
        else if( (width >= 8 && height >= 8) )
        {
          skipWidth  = width  - 8;
          skipHeight = height - 8;
        }
      }
    
    #if RExt__DECODER_DEBUG_TOOL_STATISTICS
    
      if ( trTypeHor != DCT2 )
      {
    
        CodingStatistics::IncrementStatisticTool( CodingStatisticsClassType{ STATS__TOOL_EMT, uint32_t( width ), uint32_t( height ), compID } );
    
      ALIGN_DATA( MEMORY_ALIGN_DEF_SIZE, TCoeff block[MAX_TB_SIZEY * MAX_TB_SIZEY] );
    
      const Pel *resiBuf    = resi.buf;
      const int  resiStride = resi.stride;
    
          block[( y * width ) + x] = resiBuf[( y * resiStride ) + x];
    
      if( width > 1 && height > 1 ) // 2-D transform
      {
    
        const int      shift_1st              = ((floorLog2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
        const int      shift_2nd              =  (floorLog2(height))            + TRANSFORM_MATRIX_SHIFT                          + COM16_C806_TRANS_PREC;
    
        CHECK( shift_1st < 0, "Negative shift" );
        CHECK( shift_2nd < 0, "Negative shift" );
    
      TCoeff *tmp = ( TCoeff * ) alloca( width * height * sizeof( TCoeff ) );
    
      fastFwdTrans[trTypeHor][transformWidthIndex ](block,        tmp, shift_1st, height,        0, skipWidth);
      fastFwdTrans[trTypeVer][transformHeightIndex](tmp, dstCoeff.buf, shift_2nd, width, skipWidth, skipHeight);
    
      else if( height == 1 ) //1-D horizontal transform
    
        const int      shift              = ((floorLog2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
    
        CHECK( shift < 0, "Negative shift" );
    
        CHECKD( ( transformWidthIndex < 0 ), "There is a problem with the width." );
    
        fastFwdTrans[trTypeHor][transformWidthIndex]( block, dstCoeff.buf, shift, 1, 0, skipWidth );
    
      }
      else //if (iWidth == 1) //1-D vertical transform
      {
    
        int shift = ( ( floorLog2(height) ) + bitDepth + TRANSFORM_MATRIX_SHIFT ) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
    
        CHECK( shift < 0, "Negative shift" );
    
        CHECKD( ( transformHeightIndex < 0 ), "There is a problem with the height." );
    
        fastFwdTrans[trTypeVer][transformHeightIndex]( block, dstCoeff.buf, shift, 1, 0, skipHeight );
    
    }
    
    void TrQuant::xIT( const TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pCoeff, PelBuf &pResidual )
    {
    
      const int      width                  = pCoeff.width;
      const int      height                 = pCoeff.height;
    
      const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange( toChannelType( compID ) );
      const unsigned bitDepth               = tu.cs->sps->getBitDepth(              toChannelType( compID ) );
      const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
      const TCoeff   clipMinimum            = -( 1 << maxLog2TrDynamicRange );
      const TCoeff   clipMaximum            =  ( 1 << maxLog2TrDynamicRange ) - 1;
    
      const uint32_t transformWidthIndex    = floorLog2(width ) - 1;                                // nLog2WidthMinus1, since transform start from 2-point
      const uint32_t transformHeightIndex   = floorLog2(height) - 1;                                // nLog2HeightMinus1, since transform start from 2-point
    
      int trTypeHor = DCT2;
      int trTypeVer = DCT2;
    
      getTrTypes ( tu, compID, trTypeHor, trTypeVer );
    
    Mischa Siekmann's avatar
    Mischa Siekmann committed
      int skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
      int skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
      if( tu.cs->sps->getUseLFNST() && tu.cu->lfnstIdx )
      {
        if( (width == 4 && height > 4) || (width > 4 && height == 4) )
        {
          skipWidth  = width  - 4;
          skipHeight = height - 4;
        }
        else if( (width >= 8 && height >= 8) )
        {
          skipWidth  = width  - 8;
          skipHeight = height - 8;
        }
      }
    
      TCoeff *block = ( TCoeff * ) alloca( width * height * sizeof( TCoeff ) );
    
      if( width > 1 && height > 1 ) //2-D transform
      {
    
        const int      shift_1st              =   TRANSFORM_MATRIX_SHIFT + 1 + COM16_C806_TRANS_PREC; // 1 has been added to shift_1st at the expense of shift_2nd
        const int      shift_2nd              = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth + COM16_C806_TRANS_PREC;
        CHECK( shift_1st < 0, "Negative shift" );
        CHECK( shift_2nd < 0, "Negative shift" );
    
        TCoeff *tmp = ( TCoeff * ) alloca( width * height * sizeof( TCoeff ) );
    
      fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, tmp, shift_1st, width, skipWidth, skipHeight, clipMinimum, clipMaximum);
      fastInvTrans[trTypeHor][transformWidthIndex] (tmp,      block, shift_2nd, height,         0, skipWidth, clipMinimum, clipMaximum);
    
      }
      else if( width == 1 ) //1-D vertical transform
      {
    
        int shift = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth + COM16_C806_TRANS_PREC;
        CHECK( shift < 0, "Negative shift" );
    
        CHECK( ( transformHeightIndex < 0 ), "There is a problem with the height." );
    
        fastInvTrans[trTypeVer][transformHeightIndex]( pCoeff.buf, block, shift + 1, 1, 0, skipHeight, clipMinimum, clipMaximum );
    
      }
      else //if(iHeight == 1) //1-D horizontal transform
      {
    
        const int      shift              = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth + COM16_C806_TRANS_PREC;
        CHECK( shift < 0, "Negative shift" );
    
        CHECK( ( transformWidthIndex < 0 ), "There is a problem with the width." );
    
        fastInvTrans[trTypeHor][transformWidthIndex]( pCoeff.buf, block, shift + 1, 1, 0, skipWidth, clipMinimum, clipMaximum );
    
      Pel *resiBuf    = pResidual.buf;
      int  resiStride = pResidual.stride;
    
          resiBuf[( y * resiStride ) + x] = Pel( block[( y * width ) + x] );
    
      }
    }
    
    /** Wrapper function between HM interface and core NxN transform skipping
     */
    void TrQuant::xITransformSkip(const CCoeffBuf     &pCoeff,
                                        PelBuf        &pResidual,
                                  const TransformUnit &tu,
                                  const ComponentID   &compID)
    {
      const CompArea &area      = tu.blocks[compID];
      const int width           = area.width;
      const int height          = area.height;
    
    
    #if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
      for (uint32_t y = 0; y < height; y++)
      {
          for (uint32_t x = 0; x < width; x++)
          {
              pResidual.at(x, y) = Pel(pCoeff.at(x, y));
          }
      }
    #else
    
      const int maxLog2TrDynamicRange = tu.cs->sps->getMaxLog2TrDynamicRange(toChannelType(compID));
      const int channelBitDepth = tu.cs->sps->getBitDepth(toChannelType(compID));
    
    
    #if JVET_P0058_CHROMA_TS && RExt__DECODER_DEBUG_TOOL_STATISTICS
      CodingStatistics::IncrementStatisticTool(CodingStatisticsClassType{ STATS__TOOL_EMT, uint32_t(width), uint32_t(height), compID });
    #endif
    
    
      int iTransformShift = getTransformShift(channelBitDepth, area.size(), maxLog2TrDynamicRange);
      if( tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag() )
      {
        iTransformShift = std::max<int>( 0, iTransformShift );
      }
    
      int iWHScale = 1;
    
      const bool rotateResidual = TU::isNonTransformedResidualRotated( tu, compID );
    
      if( iTransformShift >= 0 )
      {
        const TCoeff offset = iTransformShift == 0 ? 0 : ( 1 << ( iTransformShift - 1 ) );
    
        for( uint32_t y = 0; y < height; y++ )
        {
          for( uint32_t x = 0; x < width; x++ )
          {
            pResidual.at( x, y ) = Pel( ( ( rotateResidual ? pCoeff.at( pCoeff.width - x - 1, pCoeff.height - y - 1 ) : pCoeff.at( x, y ) ) * iWHScale + offset ) >> iTransformShift );
          }
        }
      }
      else //for very high bit depths
      {
        iTransformShift = -iTransformShift;
    
        for( uint32_t y = 0; y < height; y++ )
        {
          for( uint32_t x = 0; x < width; x++ )
          {
            pResidual.at( x, y ) = Pel( ( rotateResidual ? pCoeff.at( pCoeff.width - x - 1, pCoeff.height - y - 1 ) : pCoeff.at( x, y ) )  * iWHScale << iTransformShift );
          }
        }
      }