Skip to content
Snippets Groups Projects
IntraPrediction.cpp 67.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • /* The copyright in this software is being made available under the BSD
     * License, included below. This software may be subject to other third party
     * and contributor rights, including patent rights, and no such rights are
     * granted under this license.
     *
     * Copyright (c) 2010-2018, ITU/ISO/IEC
     * All rights reserved.
     *
     * Redistribution and use in source and binary forms, with or without
     * modification, are permitted provided that the following conditions are met:
     *
     *  * Redistributions of source code must retain the above copyright notice,
     *    this list of conditions and the following disclaimer.
     *  * Redistributions in binary form must reproduce the above copyright notice,
     *    this list of conditions and the following disclaimer in the documentation
     *    and/or other materials provided with the distribution.
     *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
     *    be used to endorse or promote products derived from this software without
     *    specific prior written permission.
     *
     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
     * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
     * THE POSSIBILITY OF SUCH DAMAGE.
     */
    
    /** \file     Prediction.cpp
        \brief    prediction class
    */
    
    #include "IntraPrediction.h"
    
    #include "Unit.h"
    #include "UnitTools.h"
    #include "Buffer.h"
    
    #include "dtrace_next.h"
    #include "Rom.h"
    
    #include <memory.h>
    
    
    #if JVET_L0628_4TAP_INTRA
    #include "CommonLib/InterpolationFilter.h"
    #endif //JVET_L0628_4TAP_INTRA
    
    
    //! \ingroup CommonLib
    //! \{
    
    // ====================================================================================================================
    // Tables
    // ====================================================================================================================
    
    const uint8_t IntraPrediction::m_aucIntraFilter[MAX_NUM_CHANNEL_TYPE][MAX_INTRA_FILTER_DEPTHS] =
    {
      { // Luma
        20, //   1xn
        20, //   2xn
        20, //   4xn
        14, //   8xn
        2,  //  16xn
        0,  //  32xn
    
    #if HM_MDIS_AS_IN_JEM && !JVET_L0628_4TAP_INTRA
    
        20, //  64xn
    #else
        0,  //  64xn
    #endif
        0,  // 128xn
      },
      { // Chroma
        40, //   1xn
        40, //   2xn
        40, //   4xn
        28, //   8xn
        4,  //  16xn
        0,  //  32xn
    
    #if HM_MDIS_AS_IN_JEM && !JVET_L0628_4TAP_INTRA
    
    #if JVET_L0628_4TAP_INTRA
    const TFilterCoeff g_intraGaussFilter[32][4] = {
      { 16, 32, 16, 0 },
      { 15, 29, 17, 3 },
      { 15, 29, 17, 3 },
      { 14, 29, 18, 3 },
      { 13, 29, 18, 4 },
      { 13, 28, 19, 4 },
      { 13, 28, 19, 4 },
      { 12, 28, 20, 4 },
      { 11, 28, 20, 5 },
      { 11, 27, 21, 5 },
      { 10, 27, 22, 5 },
      { 9, 27, 22, 6 },
      { 9, 26, 23, 6 },
      { 9, 26, 23, 6 },
      { 8, 25, 24, 7 },
      { 8, 25, 24, 7 },
      { 8, 24, 24, 8 },
      { 7, 24, 25, 8 },
      { 7, 24, 25, 8 },
      { 6, 23, 26, 9 },
      { 6, 23, 26, 9 },
      { 6, 22, 27, 9 },
      { 5, 22, 27, 10 },
      { 5, 21, 27, 11 },
      { 5, 20, 28, 11 },
      { 4, 20, 28, 12 },
      { 4, 19, 28, 13 },
      { 4, 19, 28, 13 },
      { 4, 18, 29, 13 },
      { 3, 18, 29, 14 },
      { 3, 17, 29, 15 },
      { 3, 17, 29, 15 } 
    };
    #endif
    
    
    // ====================================================================================================================
    // Constructor / destructor / initialize
    // ====================================================================================================================
    
    IntraPrediction::IntraPrediction()
    :
      m_currChromaFormat( NUM_CHROMA_FORMAT )
    {
      for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
      {
        for (uint32_t buf = 0; buf < NUM_PRED_BUF; buf++)
        {
          m_piYuvExt[ch][buf] = nullptr;
        }
      }
    
      m_piTemp = nullptr;
    
    #if JVET_L0338_MDLM
      m_pMdlmTemp = nullptr;
    #endif
    
    }
    
    IntraPrediction::~IntraPrediction()
    {
      destroy();
    }
    
    void IntraPrediction::destroy()
    {
      for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
      {
        for (uint32_t buf = 0; buf < NUM_PRED_BUF; buf++)
        {
          delete[] m_piYuvExt[ch][buf];
          m_piYuvExt[ch][buf] = nullptr;
        }
      }
    
      delete[] m_piTemp;
      m_piTemp = nullptr;
    
    #if JVET_L0338_MDLM
      delete[] m_pMdlmTemp;
      m_pMdlmTemp = nullptr;
    #endif
    
    }
    
    void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY)
    {
      // if it has been initialised before, but the chroma format has changed, release the memory and start again.
      if (m_piYuvExt[COMPONENT_Y][PRED_BUF_UNFILTERED] != nullptr && m_currChromaFormat != chromaFormatIDC)
      {
        destroy();
      }
    
      m_currChromaFormat = chromaFormatIDC;
    
      if (m_piYuvExt[COMPONENT_Y][PRED_BUF_UNFILTERED] == nullptr) // check if first is null (in which case, nothing initialised yet)
      {
        m_iYuvExtSize = (MAX_CU_SIZE * 2 + 1) * (MAX_CU_SIZE * 2 + 1);
    
        for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
        {
          for (uint32_t buf = 0; buf < NUM_PRED_BUF; buf++)
          {
            m_piYuvExt[ch][buf] = new Pel[m_iYuvExtSize];
          }
        }
      }
    
      int shift = bitDepthY + 4;
      for (int i = 32; i < 64; i++)
      {
        m_auShiftLM[i - 32] = ((1 << shift) + i / 2) / i;
      }
      if (m_piTemp == nullptr)
      {
        m_piTemp = new Pel[(MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1)];
      }
    
    #if JVET_L0338_MDLM
      if (m_pMdlmTemp == nullptr)
      {
        m_pMdlmTemp = new Pel[(2 * MAX_CU_SIZE + 1)*(2 * MAX_CU_SIZE + 1)];//MDLM will use top-above and left-below samples.
      }
    #endif
    
    }
    
    // ====================================================================================================================
    // Public member functions
    // ====================================================================================================================
    
    // Function for calculating DC value of the reference samples used in Intra prediction
    //NOTE: Bit-Limit - 25-bit source
    Pel IntraPrediction::xGetPredValDc( const CPelBuf &pSrc, const Size &dstSize )
    {
      CHECK( dstSize.width == 0 || dstSize.height == 0, "Empty area provided" );
    
      int idx, sum = 0;
      Pel dcVal;
      const int width  = dstSize.width;
      const int height = dstSize.height;
      const auto denom     = (width == height) ? (width << 1) : std::max(width,height);
      const auto divShift  = g_aucLog2[denom];
      const auto divOffset = (denom >> 1);
    
      if ( width >= height )
      {
      for( idx = 0; idx < width; idx++ )
      {
        sum += pSrc.at( 1 + idx, 0 );
      }
      }
      if ( width <= height )
      {   
      for( idx = 0; idx < height; idx++ )
      {
        sum += pSrc.at( 0, 1 + idx );
      }  
      }
    
      dcVal = (sum + divOffset) >> divShift;
      return dcVal;
    }
    
      int IntraPrediction::getWideAngle( int width, int height, int predMode )
      {
        if ( predMode > DC_IDX && predMode <= VDIA_IDX )
        {
          int modeShift = (std::min(2, abs(g_aucLog2[width] - g_aucLog2[height])) << 2) + 2;
          if ( width > height && predMode < 2 + modeShift )
          {
            predMode += (VDIA_IDX - 1);
          }
          else if ( height > width && predMode > VDIA_IDX - modeShift )
          {
            predMode -= (VDIA_IDX - 1);
          }
        }
        return predMode;
      }
    
      void IntraPrediction::setReferenceArrayLengths( const CompArea &area )
      {
        // set Top and Left reference samples length
        const int  width    = area.width;
        const int  height   = area.height;
        int blockShapeRatio = std::min(2, abs(g_aucLog2[width] - g_aucLog2[height]));
    
        m_leftRefLength     = (height << 1);
        m_topRefLength      = (width << 1);
        if( width > height )
        {
          m_leftRefLength  += (width >> blockShapeRatio) - height + ((width + 31) >> 5);
        }
        else if( height > width )
        {
          m_topRefLength   += (height >> blockShapeRatio) - width + ((height + 31) >> 5);
        }
    
      }
    
    void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu, const bool useFilteredPredSamples )
    {
      const ComponentID    compID       = MAP_CHROMA( compId );
      const ChannelType    channelType  = toChannelType( compID );
      const int            iWidth       = piPred.width;
      const int            iHeight      = piPred.height;
      const uint32_t           uiDirMode    = PU::getFinalIntraMode( pu, channelType );
    
    
      CHECK( g_aucLog2[iWidth] < 2 && pu.cs->pcv->noChroma2x2, "Size not allowed" );
      CHECK( g_aucLog2[iWidth] > 7, "Size not allowed" );
      CHECK( iWidth != iHeight && !pu.cs->pcv->rectCUs, "Rectangular block are only allowed with QTBT" );
    
      const int  srcStride  = m_topRefLength  + 1;
      const int  srcHStride = m_leftRefLength + 1;
    
      Pel *ptrSrc = getPredictorPtr(compID, useFilteredPredSamples);
      const ClpRng& clpRng(pu.cu->cs->slice->clpRng(compID));
    
      switch (uiDirMode)
      {
        case(PLANAR_IDX): xPredIntraPlanar(CPelBuf(ptrSrc, srcStride, srcHStride), piPred, *pu.cs->sps); break;
        case(DC_IDX):     xPredIntraDc(CPelBuf(ptrSrc, srcStride, srcHStride), piPred, channelType, false); break;
    
    #if JVET_L0628_4TAP_INTRA
        case(2): 
        case(DIA_IDX):
        case(VDIA_IDX):
          if (getWideAngle(iWidth, iHeight, uiDirMode) == static_cast<int>(uiDirMode)) // check if uiDirMode is not wide-angle
          {
            xPredIntraAng(CPelBuf(ptrSrc, srcStride, srcHStride), piPred, channelType, uiDirMode, clpRng, *pu.cs->sps, useFilteredPredSamples); 
            break;
          }
        default:          xPredIntraAng(CPelBuf(getPredictorPtr(compID, false), srcStride, srcHStride), piPred, channelType, uiDirMode, clpRng, *pu.cs->sps, useFilteredPredSamples); break;
    #else //JVET_L0628_4TAP_INTRA
    
        default:          xPredIntraAng(CPelBuf(ptrSrc, srcStride, srcHStride), piPred, channelType, uiDirMode, clpRng, *pu.cs->sps, false); break;
    
    #endif //JVET_L0628_4TAP_INTRA
    
      }
    
      bool pdpcCondition = (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX || uiDirMode == HOR_IDX || uiDirMode == VER_IDX);
      if (pdpcCondition)
      {
        const CPelBuf srcBuf = CPelBuf(ptrSrc, srcStride, srcStride);
        PelBuf dstBuf = piPred;
        const int scale = ((g_aucLog2[iWidth] - 2 + g_aucLog2[iHeight] - 2 + 2) >> 2);
        CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
    
        if (uiDirMode == PLANAR_IDX)
        {
          for (int y = 0; y < iHeight; y++)
          {
            int wT = 32 >> std::min(31, ((y << 1) >> scale));
            const Pel left = srcBuf.at(0, y + 1);
            for (int x = 0; x < iWidth; x++)
            {
              const Pel top = srcBuf.at(x + 1, 0);
              int wL = 32 >> std::min(31, ((x << 1) >> scale));
              dstBuf.at(x, y) = ClipPel((wL * left + wT * top + (64 - wL - wT) * dstBuf.at(x, y) + 32) >> 6, clpRng);
            }
          }
        }
        else if (uiDirMode == DC_IDX)
        {
          const Pel topLeft = srcBuf.at(0, 0);
          for (int y = 0; y < iHeight; y++)
          {
            int wT = 32 >> std::min(31, ((y << 1) >> scale));
            const Pel left = srcBuf.at(0, y + 1);
            for (int x = 0; x < iWidth; x++)
            {
              const Pel top = srcBuf.at(x + 1, 0);
              int wL = 32 >> std::min(31, ((x << 1) >> scale));
              int wTL = (wL >> 4) + (wT >> 4);
              dstBuf.at(x, y) = ClipPel((wL * left + wT * top - wTL * topLeft + (64 - wL - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
            }
          }
        }
        else if (uiDirMode == HOR_IDX)
        {
          const Pel topLeft = srcBuf.at(0, 0);
          for (int y = 0; y < iHeight; y++)
          {
            int wT = 32 >> std::min(31, ((y << 1) >> scale));
            for (int x = 0; x < iWidth; x++)
            {
              const Pel top = srcBuf.at(x + 1, 0);
              int wTL = wT;
              dstBuf.at(x, y) = ClipPel((wT * top - wTL * topLeft + (64 - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
            }
          }
        }
        else if (uiDirMode == VER_IDX)
        {
          const Pel topLeft = srcBuf.at(0, 0);
          for (int y = 0; y < iHeight; y++)
          {
            const Pel left = srcBuf.at(0, y + 1);
            for (int x = 0; x < iWidth; x++)
            {
              int wL = 32 >> std::min(31, ((x << 1) >> scale));
              int wTL = wL;
              dstBuf.at(x, y) = ClipPel((wL * left - wTL * topLeft + (64 - wL + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
            }
          }
        }
      }
    }
    void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf &piPred, const PredictionUnit &pu, const CompArea& chromaArea, int intraDir)
    {
      int  iLumaStride = 0;
      PelBuf Temp;
    
    #if JVET_L0338_MDLM
      if ((intraDir == MDLM_L_IDX) || (intraDir == MDLM_T_IDX))
      {
        iLumaStride = 2 * MAX_CU_SIZE + 1;
        Temp = PelBuf(m_pMdlmTemp + iLumaStride + 1, iLumaStride, Size(chromaArea));
      }
      else
      {
    #endif
    
      iLumaStride = MAX_CU_SIZE + 1;
      Temp = PelBuf(m_piTemp + iLumaStride + 1, iLumaStride, Size(chromaArea));
    
    #if JVET_L0338_MDLM
      }
    #endif
    
      int a, b, iShift;
      xGetLMParameters(pu, compID, chromaArea, a, b, iShift);
    
      ////// final prediction
      piPred.copyFrom(Temp);
      piPred.linearTransform(a, iShift, b, true, pu.cs->slice->clpRng(compID));
    }
    
    void IntraPrediction::xFilterGroup(Pel* pMulDst[], int i, Pel const * const piSrc, int iRecStride, bool bAboveAvaillable, bool bLeftAvaillable)
    {
      pMulDst[0][i] = (piSrc[1] + piSrc[iRecStride + 1] + 1) >> 1;
    
      pMulDst[1][i] = (piSrc[iRecStride] + piSrc[iRecStride + 1] + 1) >> 1;
    
      pMulDst[3][i] = (piSrc[0] + piSrc[1] + 1) >> 1;
    
      pMulDst[2][i] = (piSrc[0] + piSrc[1] + piSrc[iRecStride] + piSrc[iRecStride + 1] + 2) >> 2;
    
    }
    
    
    
    /** Function for deriving planar intra prediction. This function derives the prediction samples for planar mode (intra coding).
     */
    
    //NOTE: Bit-Limit - 24-bit source
    void IntraPrediction::xPredIntraPlanar( const CPelBuf &pSrc, PelBuf &pDst, const SPS& sps )
    {
      const uint32_t width  = pDst.width;
      const uint32_t height = pDst.height;
      const uint32_t log2W  = g_aucLog2[ width ];
      const uint32_t log2H  = g_aucLog2[ height ];
    
      int leftColumn[MAX_CU_SIZE + 1], topRow[MAX_CU_SIZE + 1], bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE];
      const uint32_t offset = width * height;
    
      // Get left and above reference column and row
      for( int k = 0; k < width + 1; k++ )
      {
        topRow[k] = pSrc.at( k + 1, 0 );
      }
    
      for( int k = 0; k < height + 1; k++ )
      {
        leftColumn[k] = pSrc.at( 0, k + 1 );
      }
    
      // Prepare intermediate variables used in interpolation
      int bottomLeft = leftColumn[height];
      int topRight = topRow[width];
    
      for( int k = 0; k < width; k++ )
      {
        bottomRow[k] = bottomLeft - topRow[k];
        topRow[k]    = topRow[k] << log2H;
      }
    
      for( int k = 0; k < height; k++ )
      {
        rightColumn[k] = topRight - leftColumn[k];
        leftColumn[k]  = leftColumn[k] << log2W;
      }
    
      const uint32_t finalShift = 1 + log2W + log2H;
      const uint32_t stride     = pDst.stride;
      Pel*       pred       = pDst.buf;
      for( int y = 0; y < height; y++, pred += stride )
      {
        int horPred = leftColumn[y];
    
        for( int x = 0; x < width; x++ )
        {
          horPred += rightColumn[y];
          topRow[x] += bottomRow[x];
    
          int vertPred = topRow[x];
          pred[x]      = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift;
        }
      }
    }
    
    
    
    
    void IntraPrediction::xPredIntraDc( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter )
    {
      const Pel dcval = xGetPredValDc( pSrc, pDst );
      pDst.fill( dcval );
    
    #if HEVC_USE_DC_PREDFILTERING
      if( enableBoundaryFilter )
      {
        xDCPredFiltering( pSrc, pDst, channelType );
      }
    #endif
    }
    
    #if HEVC_USE_DC_PREDFILTERING
    /** Function for filtering intra DC predictor. This function performs filtering left and top edges of the prediction samples for DC mode (intra coding).
     */
    void IntraPrediction::xDCPredFiltering(const CPelBuf &pSrc, PelBuf &pDst, const ChannelType &channelType)
    {
      uint32_t iWidth = pDst.width;
      uint32_t iHeight = pDst.height;
      int x, y;
    
      if (isLuma(channelType) && (iWidth <= MAXIMUM_INTRA_FILTERED_WIDTH) && (iHeight <= MAXIMUM_INTRA_FILTERED_HEIGHT))
      {
        //top-left
        pDst.at(0, 0) = (Pel)((pSrc.at(1, 0) + pSrc.at(0, 1) + 2 * pDst.at(0, 0) + 2) >> 2);
    
        //top row (vertical filter)
        for ( x = 1; x < iWidth; x++ )
        {
          pDst.at(x, 0) = (Pel)((pSrc.at(x + 1, 0)  +  3 * pDst.at(x, 0) + 2) >> 2);
        }
    
        //left column (horizontal filter)
        for ( y = 1; y < iHeight; y++ )
        {
          pDst.at(0, y) = (Pel)((pSrc.at(0, y + 1) + 3 * pDst.at(0, y) + 2) >> 2);
        }
      }
    
      return;
    }
    #endif
    
    // Function for deriving the angular Intra predictions
    
    /** Function for deriving the simplified angular intra predictions.
    *
    * This function derives the prediction samples for the angular mode based on the prediction direction indicated by
    * the prediction mode index. The prediction direction is given by the displacement of the bottom row of the block and
    * the reference row above the block in the case of vertical prediction or displacement of the rightmost column
    * of the block and reference column left from the block in the case of the horizontal prediction. The displacement
    * is signalled at 1/32 pixel accuracy. When projection of the predicted pixel falls inbetween reference samples,
    * the predicted value for the pixel is linearly interpolated from the reference samples. All reference samples are taken
    * from the extended main reference.
    */
    //NOTE: Bit-Limit - 25-bit source
    #if HEVC_USE_HOR_VER_PREDFILTERING
    void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const bool bEnableEdgeFilters, const SPS& sps, const bool enableBoundaryFilter )
    
    #elif JVET_L0628_4TAP_INTRA
    void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const SPS& sps, const bool useFilteredPredSamples )
    
    #else
    void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const SPS& sps, const bool enableBoundaryFilter )
    #endif
    {
      int width =int(pDst.width);
      int height=int(pDst.height);
    
      CHECK( !( dirMode > DC_IDX && dirMode < NUM_LUMA_MODE ), "Invalid intra dir" );
    
      int              predMode           = getWideAngle(width, height, dirMode);
      const bool       bIsModeVer         = predMode >= DIA_IDX;
      const int        intraPredAngleMode = (bIsModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX);
      const int        absAngMode         = abs(intraPredAngleMode);
      const int        signAng            = intraPredAngleMode < 0 ? -1 : 1;
    #if HEVC_USE_HOR_VER_PREDFILTERING
      const bool       edgeFilter         = bEnableEdgeFilters && isLuma(channelType) && (width <= MAXIMUM_INTRA_FILTERED_WIDTH) && (height <= MAXIMUM_INTRA_FILTERED_HEIGHT);
    #endif
    
      // Set bitshifts and scale the angle parameter to block size
    
      static const int angTable[27]    = { 0,    1,    2,    3,    5,    7,    9,   11,   13,   15,   17,   19,   21,   23,   26,   29,   32,   35,  39,  45,  49,  54,  60,  68,  79,  93, 114 };
      static const int invAngTable[27] = { 0, 8192, 4096, 2731, 1638, 1170,  910,  745,  630,  546,  482,  431,  390,  356,  315,  282,  256,  234, 210, 182, 167, 152, 137, 120, 104,  88,  72 }; // (256 * 32) / Angle
    
      int invAngle                    = invAngTable[absAngMode];
      int absAng                      = angTable   [absAngMode];
      int intraPredAngle              = signAng * absAng;
    
      Pel* refMain;
      Pel* refSide;
    
    
      Pel  refAbove[2 * MAX_CU_SIZE + 3];
      Pel  refLeft [2 * MAX_CU_SIZE + 3];
    
    
    
      // Initialize the Main and Left reference array.
      if (intraPredAngle < 0)
      {
    
    #if JVET_L0628_4TAP_INTRA
        auto width    = int(pDst.width) +1;
        auto height   = int(pDst.height)+1;  
        auto lastIdx  = bIsModeVer ? width : height;
        auto firstIdx = ( ((bIsModeVer ? height : width) -1) * intraPredAngle ) >> 5;
    #endif //JVET_L0628_4TAP_INTRA
    
        for( int x = 0; x < width + 1; x++ )
        {
          refAbove[x + height - 1] = pSrc.at( x, 0 );
        }
        for( int y = 0; y < height + 1; y++ )
        {
          refLeft[y + width - 1] = pSrc.at( 0, y );
        }
        refMain = (bIsModeVer ? refAbove + height : refLeft  + width ) - 1;
        refSide = (bIsModeVer ? refLeft  + width  : refAbove + height) - 1;
    
        // Extend the Main reference to the left.
        int invAngleSum    = 128;       // rounding for (shift by 8)
    
    #if JVET_L0628_4TAP_INTRA
        for( int k = -1; k > firstIdx; k-- )
    #else //JVET_L0628_4TAP_INTRA
    
        const int refMainOffsetPreScale = bIsModeVer ? height : width;
        for( int k = -1; k > (refMainOffsetPreScale * intraPredAngle) >> 5; k-- )
    
    #endif //JVET_L0628_4TAP_INTRA
    
        {
          invAngleSum += invAngle;
          refMain[k] = refSide[invAngleSum>>8];
        }
    
    #if JVET_L0628_4TAP_INTRA
        refMain[lastIdx] = refMain[lastIdx-1];
        refMain[firstIdx] = refMain[firstIdx+1];
    #endif //JVET_L0628_4TAP_INTRA
    
    #if JVET_L0628_4TAP_INTRA
          refAbove[x+1] = pSrc.at(x, 0);
    #else //JVET_L0628_4TAP_INTRA
    
    #endif //JVET_L0628_4TAP_INTRA
    
    #if JVET_L0628_4TAP_INTRA
          refLeft[y+1]  = pSrc.at(0, y);
    #else //JVET_L0628_4TAP_INTRA
    
    #endif //JVET_L0628_4TAP_INTRA
    
        }
        refMain = bIsModeVer ? refAbove : refLeft ;
        refSide = bIsModeVer ? refLeft  : refAbove;
    
    
    #if JVET_L0628_4TAP_INTRA
        refMain++;
        refSide++;
        refMain[-1] = refMain[0];
        auto lastIdx = 1 + ((bIsModeVer) ? m_topRefLength : m_leftRefLength);
        refMain[lastIdx] = refMain[lastIdx-1];
    #endif //JVET_L0628_4TAP_INTRA
    
      }
    
      // swap width/height if we are doing a horizontal mode:
      Pel tempArray[MAX_CU_SIZE*MAX_CU_SIZE];
      const int dstStride = bIsModeVer ? pDst.stride : MAX_CU_SIZE;
      Pel *pDstBuf = bIsModeVer ? pDst.buf : tempArray;
      if (!bIsModeVer)
      {
        std::swap(width, height);
      }
    
    
      if( intraPredAngle == 0 )  // pure vertical or pure horizontal
      {
        for( int y = 0; y < height; y++ )
        {
          for( int x = 0; x < width; x++ )
          {
            pDstBuf[y*dstStride + x] = refMain[x + 1];
          }
        }
    #if HEVC_USE_HOR_VER_PREDFILTERING
        if (edgeFilter)
        {
          for( int y = 0; y < height; y++ )
          {
            pDstBuf[y*dstStride] = ClipPel( pDstBuf[y*dstStride] + ( ( refSide[y + 1] - refSide[0] ) >> 1 ), clpRng );
          }
        }
    #endif
      }
      else
      {
        Pel *pDsty=pDstBuf;
    
        for (int y=0, deltaPos=intraPredAngle; y<height; y++, deltaPos+=intraPredAngle, pDsty+=dstStride)
        {
          const int deltaInt   = deltaPos >> 5;
          const int deltaFract = deltaPos & (32 - 1);
    
    
    #if JVET_L0628_4TAP_INTRA
          if( (absAng & (32 - 1)) != 0 ) // use 4-tap interpolation only for intra prediction modes with fractional displacements
    #elif HM_4TAPIF_AS_IN_JEM
    
    #else //JVET_L0628_4TAP_INTRA
    
    #if JVET_L0628_4TAP_INTRA
            if( isLuma(channelType) )
            {
              Pel                        p[4];
              const bool                 useCubicFilter = !useFilteredPredSamples;
              TFilterCoeff const * const f              = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : g_intraGaussFilter[deltaFract];
    
              int         refMainIndex   = deltaInt + 1;
    
              for( int x = 0; x < width; x++, refMainIndex++ )
              {
                p[0] = refMain[refMainIndex - 1];
                p[1] = refMain[refMainIndex];
                p[2] = refMain[refMainIndex + 1];
                p[3] = refMain[refMainIndex + 2];
    
                pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6);
    
                if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
                {
                  pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
                }
              }
            }
            else
    #endif //JVET_L0628_4TAP_INTRA
    
            {
              // Do linear filtering
              const Pel *pRM = refMain + deltaInt + 1;
              int lastRefMainPel = *pRM++;
              for( int x = 0; x < width; pRM++, x++ )
              {
                int thisRefMainPel = *pRM;
                pDsty[x + 0] = ( Pel ) ( ( ( 32 - deltaFract )*lastRefMainPel + deltaFract*thisRefMainPel + 16 ) >> 5 );
                lastRefMainPel = thisRefMainPel;
              }
            }
          }
          else
          {
            // Just copy the integer samples
            for( int x = 0; x < width; x++ )
            {
              pDsty[x] = refMain[x + deltaInt + 1];
            }
          }
          const int numModes = 8;
          const int scale = ((g_aucLog2[width] - 2 + g_aucLog2[height] - 2 + 2) >> 2);
          CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
    
          if (predMode == 2 || predMode == VDIA_IDX)
          {
            int wT = 16 >> std::min(31, ((y << 1) >> scale));
    
            for (int x = 0; x < width; x++)
            {
              int wL = 16 >> std::min(31, ((x << 1) >> scale));
              if (wT + wL == 0) break;
    
              int c = x + y + 1;
    
              const Pel left = (wL != 0) ? refSide[c + 1] : 0;
              const Pel top  = (wT != 0) ? refMain[c + 1] : 0;
    
    
              pDsty[x] = ClipPel((wL * left + wT * top + (64 - wL - wT) * pDsty[x] + 32) >> 6, clpRng);
            }
          }
          else if ((predMode >= VDIA_IDX - numModes && predMode != VDIA_IDX) || (predMode != 2 && predMode <= (2 + numModes)))
          {
            int invAngleSum0 = 2;
            for (int x = 0; x < width; x++)
            {
              invAngleSum0 += invAngle;
              int deltaPos0 = invAngleSum0 >> 2;
              int deltaFrac0 = deltaPos0 & 63;
              int deltaInt0 = deltaPos0 >> 6;
    
              int deltay = y + deltaInt0 + 1;
              if (deltay >(bIsModeVer ? m_leftRefLength : m_topRefLength) - 1) break;
    
              int wL = 32 >> std::min(31, ((x << 1) >> scale));
              if (wL == 0) break;
              Pel *p = refSide + deltay;
    
              Pel left = (((64 - deltaFrac0) * p[0] + deltaFrac0 * p[1] + 32) >> 6);
              pDsty[x] = ClipPel((wL * left + (64 - wL) * pDsty[x] + 32) >> 6, clpRng);
            }
          }
        }
    #if HEVC_USE_HOR_VER_PREDFILTERING
        if( edgeFilter && absAng <= 1 )
        {
          for( int y = 0; y < height; y++ )
          {
            pDstBuf[y*dstStride] = ClipPel( pDstBuf[y*dstStride] + ((refSide[y + 1] - refSide[0]) >> 2), clpRng );
          }
        }
    #endif
      }
    
      // Flip the block if this is the horizontal mode
      if( !bIsModeVer )
      {
        for( int y = 0; y < height; y++ )
        {
          for( int x = 0; x < width; x++ )
          {
            pDst.at( y, x ) = pDstBuf[x];
          }
          pDstBuf += dstStride;
        }
      }
    }
    
    
    bool IntraPrediction::useDPCMForFirstPassIntraEstimation(const PredictionUnit &pu, const uint32_t &uiDirMode)
    {
      return CU::isRDPCMEnabled(*pu.cu) && pu.cu->transQuantBypass && (uiDirMode == HOR_IDX || uiDirMode == VER_IDX);
    }
    
    inline bool isAboveLeftAvailable  ( const CodingUnit &cu, const ChannelType &chType, const Position &posLT );
    inline int  isAboveAvailable      ( const CodingUnit &cu, const ChannelType &chType, const Position &posLT, const uint32_t uiNumUnitsInPU, const uint32_t unitWidth, bool *validFlags );
    inline int  isLeftAvailable       ( const CodingUnit &cu, const ChannelType &chType, const Position &posLT, const uint32_t uiNumUnitsInPU, const uint32_t unitWidth, bool *validFlags );
    inline int  isAboveRightAvailable ( const CodingUnit &cu, const ChannelType &chType, const Position &posRT, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *validFlags );
    inline int  isBelowLeftAvailable  ( const CodingUnit &cu, const ChannelType &chType, const Position &posLB, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *validFlags );
    
    void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool bFilterRefSamples)
    {
      const CodingStructure& cs   = *cu.cs;
    
      Pel *refBufUnfiltered   = m_piYuvExt[area.compID][PRED_BUF_UNFILTERED];
      Pel *refBufFiltered     = m_piYuvExt[area.compID][PRED_BUF_FILTERED];
    
      setReferenceArrayLengths(area);
    
      // ----- Step 1: unfiltered reference samples -----
      xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, cu );
      // ----- Step 2: filtered reference samples -----
      if( bFilterRefSamples )
      {
        xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps );
      }
    }
    
    void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu )
    {
      const ChannelType      chType = toChannelType( area.compID );
      const CodingStructure &cs     = *cu.cs;
      const SPS             &sps    = *cs.sps;
      const PreCalcValues   &pcv    = *cs.pcv;
    
      const int  tuWidth            = area.width;
      const int  tuHeight           = area.height;
      const int  predSize           = m_topRefLength;
      const int  predHSize          = m_leftRefLength;
      const int  predStride         = predSize + 1;
    
      const bool noShift            = pcv.noChroma2x2 && area.width == 4; // don't shift on the lowest level (chroma not-split)
      const int  unitWidth          = pcv.minCUWidth  >> (noShift ? 0 : getComponentScaleX( area.compID, sps.getChromaFormatIdc() ));
      const int  unitHeight         = pcv.minCUHeight >> (noShift ? 0 : getComponentScaleY( area.compID, sps.getChromaFormatIdc() ));
    
      const int  totalAboveUnits    = (predSize + (unitWidth - 1)) / unitWidth;
      const int  totalLeftUnits     = (predHSize + (unitHeight - 1)) / unitHeight;
      const int  totalUnits         = totalAboveUnits + totalLeftUnits + 1; //+1 for top-left
      const int  numAboveUnits      = std::max<int>( tuWidth / unitWidth, 1 );
      const int  numLeftUnits       = std::max<int>( tuHeight / unitHeight, 1 );
      const int  numAboveRightUnits = totalAboveUnits - numAboveUnits;
      const int  numLeftBelowUnits  = totalLeftUnits - numLeftUnits;
    
      CHECK( numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0, "Size not supported" );
    
      // ----- Step 1: analyze neighborhood -----
      const Position posLT          = area;
      const Position posRT          = area.topRight();
      const Position posLB          = area.bottomLeft();
    
      bool  neighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1];
      int   numIntraNeighbor = 0;
    
      memset( neighborFlags, 0, totalUnits );
    
      neighborFlags[totalLeftUnits] = isAboveLeftAvailable( cu, chType, posLT );
      numIntraNeighbor += neighborFlags[totalLeftUnits] ? 1 : 0;
      numIntraNeighbor += isAboveAvailable     ( cu, chType, posLT, numAboveUnits,      unitWidth,  (neighborFlags + totalLeftUnits + 1) );
      numIntraNeighbor += isAboveRightAvailable( cu, chType, posRT, numAboveRightUnits, unitWidth,  (neighborFlags + totalLeftUnits + 1 + numAboveUnits) );
      numIntraNeighbor += isLeftAvailable      ( cu, chType, posLT, numLeftUnits,       unitHeight, (neighborFlags + totalLeftUnits - 1) );
      numIntraNeighbor += isBelowLeftAvailable ( cu, chType, posLB, numLeftBelowUnits,  unitHeight, (neighborFlags + totalLeftUnits - 1 - numLeftUnits) );
    
      // ----- Step 2: fill reference samples (depending on neighborhood) -----
      CHECK((predHSize + 1) * predStride > m_iYuvExtSize, "Reference sample area not supported");
    
      const Pel*  srcBuf    = recoBuf.buf;
      const int   srcStride = recoBuf.stride;
            Pel*  ptrDst    = refBufUnfiltered;
      const Pel*  ptrSrc;
      const Pel   valueDC   = 1 << (sps.getBitDepth( chType ) - 1);
    
    
      if( numIntraNeighbor == 0 )
      {
        // Fill border with DC value
        for( int j = 0; j <= predSize; j++ ) { ptrDst[j]            = valueDC; }
        for( int i = 1; i <= predHSize; i++ ) { ptrDst[i*predStride] = valueDC; }
      }
      else if( numIntraNeighbor == totalUnits )
      {
        // Fill top-left border and top and top right with rec. samples
        ptrSrc = srcBuf - srcStride - 1;
        for( int j = 0; j <= predSize; j++ ) { ptrDst[j] = ptrSrc[j]; }
        // Fill left and below left border with rec. samples
        ptrSrc = srcBuf - 1;
        for( int i = 1; i <= predHSize; i++ ) { ptrDst[i*predStride] = *(ptrSrc); ptrSrc += srcStride; }
      }
      else // reference samples are partially available
      {
        // BB: old implementation using tmpLineBuf
        // ---------------------------------------
        Pel  tmpLineBuf[5 * MAX_CU_SIZE];
        Pel* ptrTmp;
        int  unitIdx;
    
        // Initialize
        const int totalSamples = (totalLeftUnits * unitHeight) + ((totalAboveUnits + 1) * unitWidth); // all above units have "unitWidth" samples each, all left/below-left units have "unitHeight" samples each
        for( int k = 0; k < totalSamples; k++ ) { tmpLineBuf[k] = valueDC; }
    
        // Fill top-left sample
        ptrSrc = srcBuf - srcStride - 1;
        ptrTmp = tmpLineBuf + (totalLeftUnits * unitHeight);
        unitIdx = totalLeftUnits;
        if( neighborFlags[unitIdx] )
        {
          Pel topLeftVal = ptrSrc[0];
          for( int j = 0; j < unitWidth; j++ ) { ptrTmp[j] = topLeftVal; }
        }
    
        // Fill left & below-left samples (downwards)
        ptrSrc += srcStride;
        ptrTmp--;
        unitIdx--;
    
        for( int k = 0; k < totalLeftUnits; k++ )
        {
          if( neighborFlags[unitIdx] )
          {
            for( int i = 0; i < unitHeight; i++ ) { ptrTmp[-i] = ptrSrc[i*srcStride]; }
          }
          ptrSrc += unitHeight*srcStride;
          ptrTmp -= unitHeight;
          unitIdx--;
        }
    
        // Fill above & above-right samples (left-to-right) (each unit has "unitWidth" samples)
        ptrSrc = srcBuf - srcStride;
        ptrTmp = tmpLineBuf + (totalLeftUnits * unitHeight) + unitWidth; // offset line buffer by totalLeftUnits*unitHeight (for left/below-left) + unitWidth (for above-left)
        unitIdx = totalLeftUnits + 1;
        for( int k = 0; k < totalAboveUnits; k++ )
        {
          if( neighborFlags[unitIdx] )
          {
            for( int j = 0; j < unitWidth; j++ ) { ptrTmp[j] = ptrSrc[j]; }
          }
          ptrSrc += unitWidth;
          ptrTmp += unitWidth;
          unitIdx++;
        }
    
        // Pad reference samples when necessary
        int  currUnit       = 0;
        Pel* ptrTmpCurrUnit = tmpLineBuf;
    
        if( !neighborFlags[0] )
        {
          int nextUnit = 1;
          while( nextUnit < totalUnits && !neighborFlags[nextUnit] )
          {
            nextUnit++;
          }
          Pel* ptrTmpRef = tmpLineBuf + ((nextUnit < totalLeftUnits) ? (nextUnit * unitHeight) : ((totalLeftUnits * (unitHeight - unitWidth)) + (nextUnit * unitWidth)));
          const Pel refSample = *ptrTmpRef;
          // Pad unavailable samples with new value
          // fill left column
          while( currUnit < std::min<int>( nextUnit, totalLeftUnits ) )
          {
            for( int i = 0; i < unitHeight; i++ ) { ptrTmpCurrUnit[i] = refSample; }
            ptrTmpCurrUnit += unitHeight;
            currUnit++;
          }
          // fill top row
          while( currUnit < nextUnit )
          {
            for( int j = 0; j < unitWidth; j++ ) { ptrTmpCurrUnit[j] = refSample; }
            ptrTmpCurrUnit += unitWidth;
            currUnit++;
          }
        }
    
        // pad all other reference samples.