Skip to content
Snippets Groups Projects
InterPrediction.cpp 74.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • 
      Pel*          gradX0 = m_gradX0;
      Pel*          gradX1 = m_gradX1;
      Pel*          gradY0 = m_gradY0;
      Pel*          gradY1 = m_gradY1;
    
      int           stridePredMC = widthG + 2;
      const Pel*    srcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + stridePredMC + 1;
      const Pel*    srcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + stridePredMC + 1;
      const int     src0Stride = stridePredMC;
      const int     src1Stride = stridePredMC;
    
      Pel*          dstY = yuvDst.Y().buf;
      const int     dstStride = yuvDst.Y().stride;
      const Pel*    srcY0Temp = srcY0;
      const Pel*    srcY1Temp = srcY1;
    
    
      for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
      {
        Pel* dstTempPtr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + stridePredMC + 1;
    
        Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1;
        Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1;
    
    #if JVET_M0063_BDOF_FIX
        xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY, clipBitDepths.recon[toChannelType(COMPONENT_Y)]);
    #else
    
        xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY);
    
        Pel* padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 2;
        for (int y = 0; y< height; y++)
    
          padStr[-1] = padStr[0];
          padStr[width] = padStr[width - 1];
          padStr += stridePredMC;
    
        padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 1;
        ::memcpy(padStr - stridePredMC, padStr, sizeof(Pel)*(widthG));
        ::memcpy(padStr + height*stridePredMC, padStr + (height - 1)*stridePredMC, sizeof(Pel)*(widthG));
    
      }
    
      const ClpRng& clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y);
      const int   bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
      const int   shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
      const int   offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
    
    #if JVET_M0063_BDOF_FIX
      const int   limit = (bitDepth>12)? 2 : ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5));
    #else
    
      const int   limit = ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5));
    
      int*     dotProductTemp1 = m_dotProduct1;
      int*     dotProductTemp2 = m_dotProduct2;
      int*     dotProductTemp3 = m_dotProduct3;
      int*     dotProductTemp5 = m_dotProduct5;
      int*     dotProductTemp6 = m_dotProduct6;
    
    #if JVET_M0063_BDOF_FIX
      xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG, bitDepth);
    #else
    
      xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG);
    
      int xUnit = (width >> 2);
      int yUnit = (height >> 2);
    
      Pel *dstY0 = dstY;
      gradX0 = m_gradX0; gradX1 = m_gradX1;
      gradY0 = m_gradY0; gradY1 = m_gradY1;
    
    
      for (int yu = 0; yu < yUnit; yu++)
      {
        for (int xu = 0; xu < xUnit; xu++)
        {
          if (m_bioPredSubBlkDist[yu*xUnit + xu] < m_bioSubBlkDistThres)
          {
    
            srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
            srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src1Stride + xu) << 2);
            dstY0 = dstY + ((yu*dstStride + xu) << 2);
    
            PelBuf dstPelBuf(dstY0, dstStride, Size(4, 4));
            dstPelBuf.addAvg(CPelBuf(srcY0Temp, src0Stride, Size(4, 4)), CPelBuf(srcY1Temp, src1Stride, Size(4, 4)), clpRng);
    
            continue;
          }
    
          int     sGxdI = 0, sGydI = 0, sGxGy = 0, sGx2 = 0, sGy2 = 0;
          int     tmpx = 0, tmpy = 0;
    
          dotProductTemp1 = m_dotProduct1 + offsetPos + ((yu*widthG + xu) << 2);
          dotProductTemp2 = m_dotProduct2 + offsetPos + ((yu*widthG + xu) << 2);
          dotProductTemp3 = m_dotProduct3 + offsetPos + ((yu*widthG + xu) << 2);
          dotProductTemp5 = m_dotProduct5 + offsetPos + ((yu*widthG + xu) << 2);
          dotProductTemp6 = m_dotProduct6 + offsetPos + ((yu*widthG + xu) << 2);
    
          xCalcBlkGradient(xu << 2, yu << 2, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, sGx2, sGy2, sGxGy, sGxdI, sGydI, widthG, heightG, (1 << 2));
    
    
          if (sGx2 > 0)
          {
            tmpx = rightShiftMSB(sGxdI << 3, sGx2);
            tmpx = Clip3(-limit, limit, tmpx);
          }
          if (sGy2 > 0)
          {
            int     mainsGxGy = sGxGy >> 12;
            int     secsGxGy = sGxGy & ((1 << 12) - 1);
            int     tmpData = tmpx * mainsGxGy;
            tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
            tmpy = rightShiftMSB(((sGydI << 3) - tmpData), sGy2);
            tmpy = Clip3(-limit, limit, tmpy);
          }
    
    
          srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
          srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
          gradX0 = m_gradX0 + offsetPos + ((yu*widthG + xu) << 2);
          gradX1 = m_gradX1 + offsetPos + ((yu*widthG + xu) << 2);
          gradY0 = m_gradY0 + offsetPos + ((yu*widthG + xu) << 2);
          gradY1 = m_gradY1 + offsetPos + ((yu*widthG + xu) << 2);
    
          dstY0 = dstY + ((yu*dstStride + xu) << 2);
    
          xAddBIOAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, gradX0, gradX1, gradY0, gradY1, widthG, (1 << 2), (1 << 2), (int)tmpx, (int)tmpy, shiftNum, offset, clpRng);
    
    Daniel Luo's avatar
    Daniel Luo committed
    #if !JVET_M0487_INT_EXTEND
    
    void InterPrediction::bioSampleExtendBilinearFilter(Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int dim, int fracX, int fracY, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng)
    {
      Pel const* pSrc = NULL;
      Pel*       pDst = NULL;
    
    
      int vFilterSize = NTAPS_BILINEAR;
    
      int widthTmp = 0;
      int heightTmp = 0;
    
      for (int cand = 0; cand < 4; cand++)  // top, left, bottom and right
      {
    
        if (cand == 0)  // top
        {
          pSrc = src;
          pDst = dst;
          widthTmp = width;
          heightTmp = dim;
        }
        else if (cand == 1)  // left
        {
          pSrc = src + dim*srcStride;
          pDst = dst + dim*dstStride;
          widthTmp = dim;
          heightTmp = height - 2 * dim;
        }
        else if (cand == 2)  // bottom
        {
          pSrc = src + (height - dim)*srcStride;
          pDst = dst + (height - dim)*dstStride;
          widthTmp = width;
          heightTmp = dim;
        }
        else if (cand == 3)  // right
        {
          pSrc = src + dim*srcStride + width - dim;
          pDst = dst + dim*dstStride + width - dim;
          widthTmp = dim;
          heightTmp = height - 2 * dim;
        }
    
        if (fracY == 0)
        {
          m_if.filterHor(COMPONENT_Y, pSrc, srcStride, pDst, dstStride, widthTmp, heightTmp, fracX, isLast, fmt, clpRng, 1);
        }
        else if (fracX == 0)
        {
          m_if.filterVer(COMPONENT_Y, pSrc, srcStride, pDst, dstStride, widthTmp, heightTmp, fracY, true, isLast, fmt, clpRng, 1);
        }
        else
        {
          PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][COMPONENT_Y], Size(width, height));
          tmpBuf.stride = width;
    
          m_if.filterHor(COMPONENT_Y, pSrc - ((vFilterSize >> 1) - 1) * srcStride, srcStride, tmpBuf.buf, tmpBuf.stride, widthTmp, heightTmp + vFilterSize - 1, fracX, false, fmt, clpRng, 1);
    
          JVET_J0090_SET_CACHE_ENABLE( false );
    
          m_if.filterVer(COMPONENT_Y, tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, pDst, dstStride, widthTmp, heightTmp, fracY, false, isLast, fmt, clpRng, 1);
    
          JVET_J0090_SET_CACHE_ENABLE( true );
    
    Daniel Luo's avatar
    Daniel Luo committed
    #endif
    
    
    bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths)
    {
      const int     width = pu.lwidth();
      const int     height = pu.lheight();
      const int     clipbd = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
      const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(clipbd);
      const int     shift = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
      const int     xUnit = (width >> 2);
      const int     yUnit = (height >> 2);
    
      m_bioDistThres = (shift <= 5) ? (((32 << (clipbd - 8))*width*height) >> (5 - shift)) : (((32 << (clipbd - 8))*width*height) << (shift - 5));
      m_bioSubBlkDistThres = (shift <= 5) ? (((64 << (clipbd - 8)) << 4) >> (5 - shift)) : (((64 << (clipbd - 8)) << 4) << (shift - 5));
    
      m_bioDistThres >>= distortionShift;
      m_bioSubBlkDistThres >>= distortionShift;
    
      DistParam cDistParam;
      Distortion dist = 0;
      for (int yu = 0, blkIdx = 0; yu < yUnit; yu++)
      {
        for (int xu = 0; xu < xUnit; xu++, blkIdx++)
        {
          const Pel* pPred0 = pYuvSrc0 + ((yu*src0Stride + xu) << 2);
          const Pel* pPred1 = pYuvSrc1 + ((yu*src1Stride + xu) << 2);
    
          m_pcRdCost->setDistParam(cDistParam, pPred0, pPred1, src0Stride, src1Stride, clipbd, COMPONENT_Y, (1 << 2), (1 << 2), 0, 1, false, true);
          m_bioPredSubBlkDist[blkIdx] = cDistParam.distFunc(cDistParam);
          dist += m_bioPredSubBlkDist[blkIdx];
        }
      }
    
      return (dist >= m_bioDistThres);
    }
    
    
    void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
    {
      g_pelBufOP.addBIOAvg4(src0, src0Stride, src1, src1Stride, dst, dstStride, gradX0, gradX1, gradY0, gradY1, gradStride, width, height, tmpx, tmpy, shift, offset, clpRng);
    }
    
    
    #if JVET_M0063_BDOF_FIX
    void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth)
    {
      g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY, bitDepth);
    }
    
    void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth)
    {
      g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG, bitDepth);
    }
    #else
    
    void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY)
    {
      g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY);
    }
    
    void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG)
    {
      g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG);
    }
    
    
    void InterPrediction::xCalcBlkGradient(int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize)
    {
      g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize);
    }
    
    void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied )
    
    {
      const int iRefIdx0 = pu.refIdx[0];
      const int iRefIdx1 = pu.refIdx[1];
    
      if( iRefIdx0 >= 0 && iRefIdx1 >= 0 )
      {
    
        if( pu.cu->GBiIdx != GBI_DEFAULT )
        {
    
          CHECK(bioApplied, "GBi is disallowed with BIO");
    
          pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx);
          return;
        }
    
          const int  src0Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
          const int  src1Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
    
          const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + 2 * src0Stride + 2;
          const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + 2 * src1Stride + 2;
    
          bool bioEnabled = xCalcBiPredSubBlkDist(pu, pSrcY0, src0Stride, pSrcY1, src1Stride, clipBitDepths);
          if (bioEnabled)
          {
            applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths);
          }
          else
          {
            pcYuvDst.bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
          }
        }
    
        pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied);
    
        if( pu.cu->triangle )
        {
          pcYuvDst.copyFrom( pcYuvSrc0 );
        }
        else
    
        pcYuvDst.copyClip( pcYuvSrc0, clpRngs );
      }
      else if( iRefIdx0 < 0 && iRefIdx1 >= 0 )
      {
    
        if( pu.cu->triangle )
        {
          pcYuvDst.copyFrom( pcYuvSrc1 );
        }
        else
    
    void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList 
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
      , const bool luma, const bool chroma
    
    Yu Han's avatar
    Yu Han committed
      // dual tree handling for IBC as the only ref
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
      if (!luma || !chroma)
      {
        if (!luma && chroma)
        {
          xChromaMC(pu, predBuf);
          return;
        }
        else // (luma && !chroma)
        {
          xPredInterUni(pu, eRefPicList, predBuf, false
            , false
            , luma, chroma);
          return;
        }
      }
      // else, go with regular MC below
    
            CodingStructure &cs = *pu.cs;
      const PPS &pps            = *cs.pps;
      const SliceType sliceType =  cs.slice->getSliceType();
    
      if( eRefPicList != REF_PIC_LIST_X )
      {
        if( ( ( sliceType == P_SLICE && pps.getUseWP() ) || ( sliceType == B_SLICE && pps.getWPBiPred() ) ) )
        {
    
          xPredInterUni         ( pu,          eRefPicList, predBuf, true 
            , false
            , true, true
          );
    
          xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred );
        }
        else
        {
    
          xPredInterUni( pu, eRefPicList, predBuf, false 
            , false
            , true, true
          );
    
    Yu Han's avatar
    Yu Han committed
        if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC)
    
        {
          xSubPuMC( pu, predBuf, eRefPicList );
        }
        else if( xCheckIdenticalMotion( pu ) )
        {
    
          xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false 
            , false
            , true, true
          );
    
    void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList 
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
      , const bool luma, const bool chroma
    
    {
      for( auto &pu : CU::traversePUs( cu ) )
      {
        PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
    
    #if JVET_M0147_DMVR
        pu.mvRefine = true;
    #endif
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
        motionCompensation( pu, predBuf, eRefPicList 
          , luma, chroma
        );
    
    #if JVET_M0147_DMVR
        pu.mvRefine = false;
    #endif
    
    void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/ 
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
      , const bool luma, const bool chroma
    
      motionCompensation( pu, predBuf, eRefPicList 
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
        , luma, chroma
    
    int InterPrediction::rightShiftMSB(int numer, int denom)
    {
      int     d;
      int msbIdx = 0;
      for (msbIdx = 0; msbIdx<32; msbIdx++)
      {
        if (denom < ((int)1 << msbIdx))
        {
          break;
        }
      }
    
      int shiftIdx = msbIdx - 1;
      d = (numer >> shiftIdx);
    
    rlliao's avatar
    rlliao committed
    void InterPrediction::motionCompensation4Triangle( CodingUnit &cu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 )
    
    {
      for( auto &pu : CU::traversePUs( cu ) )
      {
        const UnitArea localUnitArea( cu.cs->area.chromaFormat, Area( 0, 0, pu.lwidth(), pu.lheight() ) );
    
    rlliao's avatar
    rlliao committed
        PelUnitBuf tmpTriangleBuf = m_triangleBuf.getBuf( localUnitArea );
    
        PelUnitBuf predBuf        = cu.cs->getPredBuf( pu );
         
    
    rlliao's avatar
    rlliao committed
        triangleMrgCtx.setMergeInfo( pu, candIdx0 );
    
        PU::spanMotionInfo( pu );
        motionCompensation( pu, tmpTriangleBuf );
       
    
    rlliao's avatar
    rlliao committed
        triangleMrgCtx.setMergeInfo( pu, candIdx1 );
    
        PU::spanMotionInfo( pu );
        motionCompensation( pu, predBuf );
    
    
    #if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
        weightedTriangleBlk( pu, splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, tmpTriangleBuf, predBuf );
    #else
    
    rlliao's avatar
    rlliao committed
        weightedTriangleBlk( pu, PU::getTriangleWeights(pu, triangleMrgCtx, candIdx0, candIdx1), splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, tmpTriangleBuf, predBuf );
    
    #if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
    void InterPrediction::weightedTriangleBlk( PredictionUnit &pu, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
    #else
    
    rlliao's avatar
    rlliao committed
    void InterPrediction::weightedTriangleBlk( PredictionUnit &pu, bool weights, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
    
    #if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
      if( channel == CHANNEL_TYPE_LUMA )
      {
        xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
      }
      else if( channel == CHANNEL_TYPE_CHROMA )
      {
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
      }
      else
      {
        xWeightedTriangleBlk( pu, pu.lumaSize().width,   pu.lumaSize().height,   COMPONENT_Y,  splitDir, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
      }
    #else
    
    rlliao's avatar
    rlliao committed
      if( channel == CHANNEL_TYPE_LUMA )
    
    rlliao's avatar
    rlliao committed
        xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, weights, predDst, predSrc0, predSrc1 );
    
    rlliao's avatar
    rlliao committed
      else if( channel == CHANNEL_TYPE_CHROMA )
      {
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, weights, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, weights, predDst, predSrc0, predSrc1 );
      }
      else
      {
        xWeightedTriangleBlk( pu, pu.lumaSize().width,   pu.lumaSize().height,   COMPONENT_Y,  splitDir, weights, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, weights, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, weights, predDst, predSrc0, predSrc1 );
      }
    
    #if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
    void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
    #else
    
    rlliao's avatar
    rlliao committed
    void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, const bool weights, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
    
    rlliao's avatar
    rlliao committed
    {
      Pel*    dst        = predDst .get(compIdx).buf;
      Pel*    src0       = predSrc0.get(compIdx).buf;
      Pel*    src1       = predSrc1.get(compIdx).buf;
      int32_t strideDst  = predDst .get(compIdx).stride  - width;
      int32_t strideSrc0 = predSrc0.get(compIdx).stride  - width;
      int32_t strideSrc1 = predSrc1.get(compIdx).stride  - width;
    
      const char    log2WeightBase    = 3;
      const ClpRng  clipRng           = pu.cu->slice->clpRngs().comp[compIdx];
      const int32_t clipbd            = clipRng.bd;
      const int32_t shiftDefault      = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
      const int32_t offsetDefault     = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS;
      const int32_t shiftWeighted     = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
      const int32_t offsetWeighted    = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
                                      
      const int32_t ratioWH           = (width > height) ? (width / height) : 1;
      const int32_t ratioHW           = (width > height) ? 1 : (height / width);
    
    #if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
    
      const bool    longWeight        = (compIdx == COMPONENT_Y) || ( predDst.chromaFormat == CHROMA_444 );
      const int32_t weightedLength    = longWeight ? 7 : 3;
    
    rlliao's avatar
    rlliao committed
      const Pel*    pelWeighted       = (compIdx == COMPONENT_Y) ? g_trianglePelWeightedLuma[splitDir][weights] : g_trianglePelWeightedChroma[predDst.chromaFormat == CHROMA_444 ? 0 : 1][splitDir][weights];
      const int32_t weightedLength    = (compIdx == COMPONENT_Y) ? g_triangleWeightLengthLuma[weights] : g_triangleWeightLengthChroma[predDst.chromaFormat == CHROMA_444 ? 0 : 1][weights];
    
    rlliao's avatar
    rlliao committed
            int32_t weightedStartPos  = ( splitDir == 0 ) ? ( 0 - (weightedLength >> 1) * ratioWH ) : ( width - ((weightedLength + 1) >> 1) * ratioWH );
            int32_t weightedEndPos    = weightedStartPos + weightedLength * ratioWH - 1;
            int32_t weightedPosoffset =( splitDir == 0 ) ? ratioWH : -ratioWH;
      
    
    #if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
            Pel     tmpPelWeighted;
            int32_t weightIdx;
    #else
    
    rlliao's avatar
    rlliao committed
      const Pel*    tmpPelWeighted;
    
    rlliao's avatar
    rlliao committed
            int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd;
      
      for( y = 0; y < height; y+= ratioHW )
    
    rlliao's avatar
    rlliao committed
        for( tmpY = ratioHW; tmpY > 0; tmpY-- )
    
    rlliao's avatar
    rlliao committed
          for( x = 0; x < weightedStartPos; x++ )
    
    rlliao's avatar
    rlliao committed
            *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng );
            src0++;
            src1++;
    
    rlliao's avatar
    rlliao committed
          tmpWeightedStart = std::max((int32_t)0, weightedStartPos);
          tmpWeightedEnd   = std::min(weightedEndPos, (int32_t)(width - 1));
    
    #if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
          weightIdx        = 1;
    #else
    
    rlliao's avatar
    rlliao committed
          tmpPelWeighted   = pelWeighted;
    
    rlliao's avatar
    rlliao committed
          if( weightedStartPos < 0 )
    
    #if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
            weightIdx     += abs(weightedStartPos) / ratioWH;
    #else
    
    rlliao's avatar
    rlliao committed
            tmpPelWeighted += abs(weightedStartPos) / ratioWH;
    
    rlliao's avatar
    rlliao committed
          for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH )
    
    rlliao's avatar
    rlliao committed
            for( tmpX = ratioWH; tmpX > 0; tmpX-- )
    
    #if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
    
              tmpPelWeighted = Clip3( 1, 7, longWeight ? weightIdx : (weightIdx * 2));
    
              tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted;
              *dst++         = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
    #else
    
    rlliao's avatar
    rlliao committed
              *dst++ = ClipPel( rightShift( ((*tmpPelWeighted)*(*src0++) + ((8 - (*tmpPelWeighted)) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
    
    #if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
            weightIdx ++;
    #else
    
    rlliao's avatar
    rlliao committed
            tmpPelWeighted++;
    
    rlliao's avatar
    rlliao committed
          for( x = weightedEndPos + 1; x < width; x++ )
    
    rlliao's avatar
    rlliao committed
            *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng );
            src0++;
            src1++;
    
    rlliao's avatar
    rlliao committed
    
          dst  += strideDst;
          src0 += strideSrc0;
          src1 += strideSrc1;
    
    rlliao's avatar
    rlliao committed
        weightedStartPos += weightedPosoffset;
        weightedEndPos   += weightedPosoffset;
    
    1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000
    #if JVET_M0147_DMVR
    const uint64_t   MAX_UINT64 = 0xFFFFFFFFFFFFFFFFU;
    void InterPrediction::xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
    {
      int offset, width, height;
      int padsize;
      Mv cMv;
      const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
      int mvshift = (MV_FRACTIONAL_BITS_INTERNAL);
      for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
      {
        cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
        pcPad.bufs[compID].stride = (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + NTAPS_LUMA);
        int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
        width = pcPad.bufs[compID].width;
        height = pcPad.bufs[compID].height;
        offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
        padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
        int mvshiftTemp = mvshift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
        width += (filtersize - 1);
        height += (filtersize - 1);
        cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
          -(((filtersize >> 1) - 1) << mvshiftTemp));
        clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps);
        /* Pre-fetch similar to HEVC*/
        {
          CPelBuf refBuf;
          Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
          refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()));
          PelBuf &dstBuf = pcPad.bufs[compID];
          g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
        }
        /*padding on all side of size DMVR_PAD_LENGTH*/
        {
          g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize);
        }
      }
    }
    inline int32_t div_for_maxq7(int64_t N, int64_t D)
    {
      int32_t sign, q;
      sign = 0;
      if (N < 0)
      {
        sign = 1;
        N = -N;
      }
    
      q = 0;
      D = (D << 3);
      if (N >= D)
      {
        N -= D;
        q++;
      }
      q = (q << 1);
    
      D = (D >> 1);
      if (N >= D)
      {
        N -= D;
        q++;
      }
      q = (q << 1);
    
      if (N >= (D >> 1))
        q++;
    
      if (sign)
        return (-q);
      return(q);
    }
    
    void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv)
    {
      int64_t iNum, iDenom;
      int32_t iMvDeltaSubPel;
      int32_t MvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/
                                                            /*horizontal*/
        iNum = (int64_t)((sadBuffer[1] - sadBuffer[3]) << MvSubPelLvl);
        iDenom = (int64_t)((sadBuffer[1] + sadBuffer[3] - (sadBuffer[0] << 1)));
    
        if (0 != iDenom)
        {
          if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0]))
          {
            iMvDeltaSubPel = div_for_maxq7(iNum, iDenom);
            deltaMv[0] = (iMvDeltaSubPel);
          }
          else
          {
            if (sadBuffer[1] == sadBuffer[0])
            {
              deltaMv[0] = -8;// half pel
            }
            else
            {
              deltaMv[0] = 8;// half pel
            }
          }
        }
    
        /*vertical*/
        iNum = (int64_t)((sadBuffer[2] - sadBuffer[4]) << MvSubPelLvl);
        iDenom = (int64_t)((sadBuffer[2] + sadBuffer[4] - (sadBuffer[0] << 1)));
        if (0 != iDenom)
        {
          if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0]))
          {
            iMvDeltaSubPel = div_for_maxq7(iNum, iDenom);
            deltaMv[1] = (iMvDeltaSubPel);
          }
          else
          {
            if (sadBuffer[2] == sadBuffer[0])
            {
              deltaMv[1] = -8;// half pel
            }
            else
            {
              deltaMv[1] = 8;// half pel
            }
          }
        }
      return;
    }
    
    void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *delta_mv, uint64_t *pSADsArray, int width, int height)
    {
      const int32_t refStrideL0 = m_biLinearBufStride;
      const int32_t refStrideL1 = m_biLinearBufStride;
      Pel *pRefL0Orig = pRefL0;
      Pel *pRefL1Orig = pRefL1;
      for (int nIdx = SAD_BOTTOM; nIdx <= SAD_TOP_LEFT; ++nIdx)
      {
        int32_t SadOffset = ((m_pSearchOffset[nIdx].getVer() * ((DMVR_NUM_ITERATION << 1) + 1)) + m_pSearchOffset[nIdx].getHor());
        pRefL0 = pRefL0Orig + m_pSearchOffset[nIdx].hor + (m_pSearchOffset[nIdx].ver * refStrideL0);
        pRefL1 = pRefL1Orig - m_pSearchOffset[nIdx].hor - (m_pSearchOffset[nIdx].ver * refStrideL1);
        if (*(pSADsArray + SadOffset) == MAX_UINT64)
        {
          const uint64_t cost = xDMVRCost(bd, pRefL0, refStrideL0, pRefL1, refStrideL1, width, height);
          *(pSADsArray + SadOffset) = cost;
        }
        if (nIdx == SAD_LEFT)
        {
          int32_t down = -1, right = -1;
          if (pSADsArray[(((DMVR_NUM_ITERATION << 1) + 1))] < pSADsArray[-(((DMVR_NUM_ITERATION << 1) + 1))])
          {
            down = 1;
          }
          if (pSADsArray[1] < pSADsArray[-1])
          {
            right = 1;
          }
          m_pSearchOffset[SAD_TOP_LEFT].set(right, down);
        }
        if (*(pSADsArray + SadOffset) < minCost)
        {
          minCost = *(pSADsArray + SadOffset);
          delta_mv[0] = m_pSearchOffset[nIdx].getHor();
          delta_mv[1] = m_pSearchOffset[nIdx].getVer();
        }
      }
    }
    
    void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bBIOApplied
      , const Mv mergeMV[NUM_REF_PIC_LIST_01]
    )
    {
      int offset, deltaIntMvX, deltaIntMvY;
    
      PelUnitBuf pcYUVTemp = pcYuvSrc0;
      PelUnitBuf pcPadTemp = pcPad0;
      /*always high precision MVs are used*/
      int mvshift = 4;
    
      for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
      {
        RefPicList refId = (RefPicList)k;
        Mv cMv = pu.mv[refId];
        m_iRefListIdx = refId;
        const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
        clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
    
        Mv startMv = mergeMV[refId];
        clipMv(startMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
    
        for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
        {
          int mvshiftTemp = mvshift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
          int leftPixelExtra;
          if (compID == COMPONENT_Y)
          {
            leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
          }
          else
          {
            leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
          }
    
          deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
            (startMv.getHor() >> mvshiftTemp);
          deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
            (startMv.getVer() >> mvshiftTemp);
    
          CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
    
          offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
          offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
          offset += (deltaIntMvX);
          PelBuf &srcBuf = pcPadTemp.bufs[compID];
          xPredInterBlk((ComponentID)compID, pu, refPic, cMv, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
            bBIOApplied, false, 0, 0, 0, (srcBuf.buf + offset), pcPadTemp.bufs[compID].stride);
        }
        pcYUVTemp = pcYuvSrc1;
        pcPadTemp = pcPad1;
      }
    }
    
    uint64_t InterPrediction::xDMVRCost(int iBitDepth, Pel* pOrg, uint32_t uiRefStride, const Pel* pRef, uint32_t uiOrgStride, int iWidth, int iHeight)
    {
      DistParam cDistParam;
      cDistParam.applyWeight = false;
      cDistParam.useMR = false;
      m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, uiOrgStride, uiRefStride, iBitDepth, COMPONENT_Y, iWidth, iHeight , 1);
      uint64_t uiCost = cDistParam.distFunc(cDistParam);
      return uiCost;
    }
    
    void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *total_delta_mv, int16_t *delta_mv, uint64_t *pSADsArray)
    {
    
      int sadStride = (((DMVR_NUM_ITERATION << 1) + 1));
      uint64_t sadbuffer[5];
      int32_t deltaMv[2] = { 0,0 };
      if (notZeroCost && delta_mv[0] == 0 && delta_mv[1] == 0)
      {
        sadbuffer[0] = pSADsArray[0];
        sadbuffer[1] = pSADsArray[-1];
        sadbuffer[2] = pSADsArray[-sadStride];
        sadbuffer[3] = pSADsArray[1];
        sadbuffer[4] = pSADsArray[sadStride];
        xSubPelErrorSrfc(sadbuffer, deltaMv);
        total_delta_mv[0] += deltaMv[0];
        total_delta_mv[1] += deltaMv[1];
      }
    }
    
    void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs)
    {
      const int iRefIdx0 = pu.refIdx[0];
      const int iRefIdx1 = pu.refIdx[1];
      /*use merge MV as starting MV*/
      Mv StartingMVL0(pu.mv[REF_PIC_LIST_0]);
      Mv StartingMVL1(pu.mv[REF_PIC_LIST_1]);
    
      /*Clip the starting MVs*/
      clipMv(StartingMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
      clipMv(StartingMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
    
      /*L0 MC for refinement*/
      {
        int offset;
        int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
        offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride + 1);
        offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride;
        offset += (-(int)DMVR_NUM_ITERATION);
        PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y];
        PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0,
          (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1)), pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1)));
    
        xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, iRefIdx0), StartingMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y],
          false, false, pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
        );
      }
    
      /*L1 MC for refinement*/
      {
        int offset;
        int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
        offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride + 1);
        offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride;
        offset += (-(int)DMVR_NUM_ITERATION);
        PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y];
        PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1,
          (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1)), pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1)));
    
        xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_1, iRefIdx1), StartingMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y],
          false, false, pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
        );
      }
    }
    
    void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bBIOApplied)
    {  
      bool bDMVRApplied = true;
      
      int iterationCount = DMVR_NUM_ITERATION;
      /*Always High Precision*/
      int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
      
      /*use merge MV as starting MV*/
      Mv mergeMv[] = { pu.mv[REF_PIC_LIST_0] , pu.mv[REF_PIC_LIST_1] };
      
      m_biLinearBufStride = (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1));  
    
      int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
      int dx = std::min<int>(pu.lumaSize().width,  DMVR_SUBCU_WIDTH);
      /*L0 Padding*/
      m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
          PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
    
      xPrefetchPad(pu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
    
      /*L1 Padding*/
      m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
          PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
    
      xPrefetchPad(pu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
    
      xinitMC(pu, clpRngs);
    
      // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
      Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (iterationCount * m_biLinearBufStride) + iterationCount;
      Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (iterationCount * m_biLinearBufStride) + iterationCount;
    
      Position puPos = pu.lumaPos();
      
      int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd;
      if (bDMVRApplied)
      {
        int num = 0;
    
        int yStart = 0;
        for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
        {
          for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
          {       
            uint64_t minCost = MAX_UINT64;
            bool notZeroCost = true;
            int16_t totalDeltaMV[2] = { 0,0 };
            int16_t deltaMV[2] = { 0, 0 };
            uint64_t  *pSADsArray;
            for (int i = 0; i < (((DMVR_NUM_ITERATION << 1) + 1) * ((DMVR_NUM_ITERATION << 1) + 1)); i++)
            {
              m_SADsArray[i] = MAX_UINT64;
            }
            pSADsArray = &m_SADsArray[(((DMVR_NUM_ITERATION << 1) + 1) * ((DMVR_NUM_ITERATION << 1) + 1)) >> 1];
    
            Pel *addrL0Centre = biLinearPredL0 + yStart * m_biLinearBufStride + xStart;
            Pel *addrL1Centre = biLinearPredL1 + yStart * m_biLinearBufStride + xStart;
            for (int i = 0; i < iterationCount; i++)
            {
              deltaMV[0] = 0;
              deltaMV[1] = 0;
              Pel *addrL0 = addrL0Centre + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride);
              Pel *addrL1 = addrL1Centre - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride);
              if (i == 0)
              {
                minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy);
                if (minCost < ((4 * dx * (dy >> 1/*for alternate line*/))))
                {
                  notZeroCost = false;
                  break;
                }
                pSADsArray[0] = minCost;
              }
              if (!minCost)
              {
                notZeroCost = false;
                break;
              }
    
              xBIPMVRefine(bd, addrL0, addrL1, minCost, deltaMV, pSADsArray, dx, dy);
    
              if (deltaMV[0] == 0 && deltaMV[1] == 0)
              {
                break;
              }
              totalDeltaMV[0] += deltaMV[0];
              totalDeltaMV[1] += deltaMV[1];
              pSADsArray += ((deltaMV[1] * (((DMVR_NUM_ITERATION << 1) + 1))) + deltaMV[0]);
            }
    
            totalDeltaMV[0] = (totalDeltaMV[0] << mvShift);
            totalDeltaMV[1] = (totalDeltaMV[1] << mvShift);
            xDMVRSubPixelErrorSurface(notZeroCost, totalDeltaMV, deltaMV, pSADsArray);
            
            pu.mvdL0SubPu[num] = Mv(totalDeltaMV[0], totalDeltaMV[1]);
    
            num++;
          }
        }
      }
    
      {
        PredictionUnit subPu = pu;
        subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy)));
        PelUnitBuf           m_cYuvRefBuffSubCuDMVRL0;
        PelUnitBuf           m_cYuvRefBuffSubCuDMVRL1;
        PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ?
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) :
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr())));
        PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ?
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) :
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr())));
    
        srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu));
        srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu));
        PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu));
    
        int x = 0, y = 0;