Skip to content
Snippets Groups Projects
InterPrediction.cpp 121 KiB
Newer Older
  • Learn to ignore specific revisions
  • #if JVET_O0280_SIMD_TRIANGLE_WEIGHTING
        m_if.weightedTriangleBlk( pu, pu.lumaSize().width,   pu.lumaSize().height,   COMPONENT_Y,  splitDir, predDst, predSrc0, predSrc1 );
        m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
        m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
    #else
    
        xWeightedTriangleBlk( pu, pu.lumaSize().width,   pu.lumaSize().height,   COMPONENT_Y,  splitDir, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
    
    void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
    
    rlliao's avatar
    rlliao committed
    {
      Pel*    dst        = predDst .get(compIdx).buf;
      Pel*    src0       = predSrc0.get(compIdx).buf;
      Pel*    src1       = predSrc1.get(compIdx).buf;
      int32_t strideDst  = predDst .get(compIdx).stride  - width;
      int32_t strideSrc0 = predSrc0.get(compIdx).stride  - width;
      int32_t strideSrc1 = predSrc1.get(compIdx).stride  - width;
    
      const char    log2WeightBase    = 3;
      const ClpRng  clipRng           = pu.cu->slice->clpRngs().comp[compIdx];
      const int32_t clipbd            = clipRng.bd;
      const int32_t shiftDefault      = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
      const int32_t offsetDefault     = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS;
      const int32_t shiftWeighted     = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
      const int32_t offsetWeighted    = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
    
    rlliao's avatar
    rlliao committed
      const int32_t ratioWH           = (width > height) ? (width / height) : 1;
      const int32_t ratioHW           = (width > height) ? 1 : (height / width);
    
    
      const bool    longWeight        = (compIdx == COMPONENT_Y);
    
      const int32_t weightedLength    = longWeight ? 7 : 3;
    
    rlliao's avatar
    rlliao committed
            int32_t weightedStartPos  = ( splitDir == 0 ) ? ( 0 - (weightedLength >> 1) * ratioWH ) : ( width - ((weightedLength + 1) >> 1) * ratioWH );
            int32_t weightedEndPos    = weightedStartPos + weightedLength * ratioWH - 1;
            int32_t weightedPosoffset =( splitDir == 0 ) ? ratioWH : -ratioWH;
    
            Pel     tmpPelWeighted;
            int32_t weightIdx;
    
    rlliao's avatar
    rlliao committed
            int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd;
    
    rlliao's avatar
    rlliao committed
      for( y = 0; y < height; y+= ratioHW )
    
    rlliao's avatar
    rlliao committed
        for( tmpY = ratioHW; tmpY > 0; tmpY-- )
    
    rlliao's avatar
    rlliao committed
          for( x = 0; x < weightedStartPos; x++ )
    
    rlliao's avatar
    rlliao committed
            *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng );
            src0++;
            src1++;
    
    rlliao's avatar
    rlliao committed
          tmpWeightedStart = std::max((int32_t)0, weightedStartPos);
          tmpWeightedEnd   = std::min(weightedEndPos, (int32_t)(width - 1));
    
    rlliao's avatar
    rlliao committed
          if( weightedStartPos < 0 )
    
            weightIdx     += abs(weightedStartPos) / ratioWH;
    
    rlliao's avatar
    rlliao committed
          for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH )
    
    rlliao's avatar
    rlliao committed
            for( tmpX = ratioWH; tmpX > 0; tmpX-- )
    
              tmpPelWeighted = Clip3( 1, 7, longWeight ? weightIdx : (weightIdx * 2));
    
              tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted;
              *dst++         = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
    
    rlliao's avatar
    rlliao committed
          for( x = weightedEndPos + 1; x < width; x++ )
    
    rlliao's avatar
    rlliao committed
            *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng );
            src0++;
            src1++;
    
    rlliao's avatar
    rlliao committed
    
          dst  += strideDst;
          src0 += strideSrc0;
          src1 += strideSrc1;
    
    rlliao's avatar
    rlliao committed
        weightedStartPos += weightedPosoffset;
        weightedEndPos   += weightedPosoffset;
    
    #if JVET_O0297_DMVR_PADDING // For Dec speedup
    void InterPrediction::xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId, bool forLuma)
    {
      int offset, width, height;
      Mv cMv;
    
    #if JVET_O1164_RPR
      const Picture* refPic = pu.cu->slice->getRefPic( refId, pu.refIdx[refId] )->unscaledPic;
    #else
    
      const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
    
      int mvShift = (MV_FRACTIONAL_BITS_INTERNAL);
    
      int start = 0;
      int end = MAX_NUM_COMPONENT;
    
      start = forLuma ? 0 : 1;
      end = forLuma ? 1 : MAX_NUM_COMPONENT;
    
      for (int compID = start; compID < end; compID++)
      {
        cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
        pcPad.bufs[compID].stride = (pcPad.bufs[compID].width + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA);
        int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
        width = pcPad.bufs[compID].width;
        height = pcPad.bufs[compID].height;
        offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
        int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
        width += (filtersize - 1);
        height += (filtersize - 1);
        cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
          -(((filtersize >> 1) - 1) << mvshiftTemp));
    
        bool wrapRef = false;
    
        if( pu.cs->sps->getWrapAroundEnabledFlag() )
    
          wrapRef = wrapClipMv( cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps );   
        }
        else
        {
          clipMv( cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps );
        }
    #else
        if( pu.cs->sps->getWrapAroundEnabledFlag() ) 
    
          wrapRef = wrapClipMv( cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps);
    
        }
        else
        {
          clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps);
        }
    
        /* Pre-fetch similar to HEVC*/
        {
          CPelBuf refBuf;
          Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
    
          refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()), wrapRef);
    
          PelBuf &dstBuf = pcPad.bufs[compID];
          g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
        }
      }
    }
    void InterPrediction::xPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
    {
      int offset = 0, width, height;
      int padsize;
      Mv cMv;
      for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
      {
        int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
        width = pcPad.bufs[compID].width;
        height = pcPad.bufs[compID].height;
        offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
        padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
        width += (filtersize - 1);
        height += (filtersize - 1);
        /*padding on all side of size DMVR_PAD_LENGTH*/
        {
          g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize);
        }
      }
    }
    #else
    
    void InterPrediction::xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
    {
      int offset, width, height;
      int padsize;
      Mv cMv;
      const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
    
      int mvShift = (MV_FRACTIONAL_BITS_INTERNAL);
    
      for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
      {
        cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
    
        pcPad.bufs[compID].stride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA);
    
        int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
        width = pcPad.bufs[compID].width;
        height = pcPad.bufs[compID].height;
        offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
        padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
    
        int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
    
        width += (filtersize - 1);
        height += (filtersize - 1);
        cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
          -(((filtersize >> 1) - 1) << mvshiftTemp));
    
        if( pu.cs->sps->getWrapAroundEnabledFlag() )
    
    #if JVET_O1164_PS
          wrapRef = wrapClipMv( cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps );
    #else
    
          wrapRef = wrapClipMv( cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps);
    
    #if JVET_O1164_PS
          clipMv( cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps );
    #else
    
          clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps);
    
        /* Pre-fetch similar to HEVC*/
        {
          CPelBuf refBuf;
          Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
    
          refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()), wrapRef);
    
          PelBuf &dstBuf = pcPad.bufs[compID];
          g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
    
    #if JVET_J0090_MEMORY_BANDWITH_MEASURE
          JVET_J0090_SET_REF_PICTURE( refPic, (ComponentID)compID );
          for ( int row = 0 ; row < height ; row++ )
          {
            for ( int col = 0 ; col < width ; col++ )
            {
              JVET_J0090_CACHE_ACCESS( ((Pel *)refBuf.buf) + row * refBuf.stride + col, __FILE__, __LINE__ );
            }
          }
    #endif
    
        }
        /*padding on all side of size DMVR_PAD_LENGTH*/
        {
          g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize);
        }
      }
    }
    
    inline int32_t div_for_maxq7(int64_t N, int64_t D)
    {
      int32_t sign, q;
      sign = 0;
      if (N < 0)
      {
        sign = 1;
        N = -N;
      }
    
      q = 0;
      D = (D << 3);
      if (N >= D)
      {
        N -= D;
        q++;
      }
      q = (q << 1);
    
      D = (D >> 1);
      if (N >= D)
      {
        N -= D;
        q++;
      }
      q = (q << 1);
    
      if (N >= (D >> 1))
        q++;
    
      if (sign)
        return (-q);
      return(q);
    }
    
    void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv)
    {
    
      int64_t numerator, denominator;
      int32_t mvDeltaSubPel;
      int32_t mvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/
    
        numerator = (int64_t)((sadBuffer[1] - sadBuffer[3]) << mvSubPelLvl);
        denominator = (int64_t)((sadBuffer[1] + sadBuffer[3] - (sadBuffer[0] << 1)));
    
        if (0 != denominator)
    
        {
          if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0]))
          {
    
            mvDeltaSubPel = div_for_maxq7(numerator, denominator);
            deltaMv[0] = (mvDeltaSubPel);
    
          }
          else
          {
            if (sadBuffer[1] == sadBuffer[0])
            {
              deltaMv[0] = -8;// half pel
            }
            else
            {
              deltaMv[0] = 8;// half pel
            }
          }
        }
    
        /*vertical*/
    
        numerator = (int64_t)((sadBuffer[2] - sadBuffer[4]) << mvSubPelLvl);
        denominator = (int64_t)((sadBuffer[2] + sadBuffer[4] - (sadBuffer[0] << 1)));
        if (0 != denominator)
    
        {
          if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0]))
          {
    
            mvDeltaSubPel = div_for_maxq7(numerator, denominator);
            deltaMv[1] = (mvDeltaSubPel);
    
          }
          else
          {
            if (sadBuffer[2] == sadBuffer[0])
            {
              deltaMv[1] = -8;// half pel
            }
            else
            {
              deltaMv[1] = 8;// half pel
            }
          }
        }
      return;
    }
    
    
    void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *deltaMV, uint64_t *pSADsArray, int width, int height)
    
    {
      const int32_t refStrideL0 = m_biLinearBufStride;
      const int32_t refStrideL1 = m_biLinearBufStride;
      Pel *pRefL0Orig = pRefL0;
      Pel *pRefL1Orig = pRefL1;
    
    Jeeva Raj A's avatar
    Jeeva Raj A committed
      for (int nIdx = 0; (nIdx < 25); ++nIdx)
    
        int32_t sadOffset = ((m_pSearchOffset[nIdx].getVer() * ((2 * DMVR_NUM_ITERATION) + 1)) + m_pSearchOffset[nIdx].getHor());
    
        pRefL0 = pRefL0Orig + m_pSearchOffset[nIdx].hor + (m_pSearchOffset[nIdx].ver * refStrideL0);
        pRefL1 = pRefL1Orig - m_pSearchOffset[nIdx].hor - (m_pSearchOffset[nIdx].ver * refStrideL1);
    
        if (*(pSADsArray + sadOffset) == MAX_UINT64)
    
        {
          const uint64_t cost = xDMVRCost(bd, pRefL0, refStrideL0, pRefL1, refStrideL1, width, height);
    
          *(pSADsArray + sadOffset) = cost;
    
        if (*(pSADsArray + sadOffset) < minCost)
    
          minCost = *(pSADsArray + sadOffset);
          deltaMV[0] = m_pSearchOffset[nIdx].getHor();
          deltaMV[1] = m_pSearchOffset[nIdx].getVer();
    
    void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied
    
      , const Mv mergeMV[NUM_REF_PIC_LIST_01]
    
    #if JVET_O0297_DMVR_PADDING // For Dec speedup
      , bool blockMoved
    #endif
    
    )
    {
      int offset, deltaIntMvX, deltaIntMvY;
    
      PelUnitBuf pcYUVTemp = pcYuvSrc0;
      PelUnitBuf pcPadTemp = pcPad0;
      /*always high precision MVs are used*/
    
      int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
    
    
      for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
      {
        RefPicList refId = (RefPicList)k;
        Mv cMv = pu.mv[refId];
        m_iRefListIdx = refId;
    
    #if JVET_O1164_RPR
        const Picture* refPic = pu.cu->slice->getRefPic( refId, pu.refIdx[refId] )->unscaledPic;
    #else
    
        const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
    
        Mv cMvClipped = cMv;
    
    #if JVET_O1164_PS
        clipMv( cMvClipped, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps );
    #else
    
        clipMv(cMvClipped, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
    
        if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvForMCTSConstraint( pu, startMv, MV_PRECISION_INTERNAL ) )
        {
          const Area& tileArea = pu.cs->picture->mctsInfo.getTileArea();
          printf( "Attempt an access over tile boundary at block %d,%d %d,%d with MV %d,%d (in Tile TL: %d,%d BR: %d,%d)\n",
            pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), startMv.getHor(), startMv.getVer(), tileArea.topLeft().x, tileArea.topLeft().y, tileArea.bottomRight().x, tileArea.bottomRight().y );
          THROW( "MCTS constraint failed!" );
        }
    
        for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
        {
    
    #if JVET_O0297_DMVR_PADDING // For Dec speedup
    
          Pel *srcBufPelPtr = NULL;
          int pcPadstride = 0;
          if (blockMoved || (compID == 0))
          {
            pcPadstride = pcPadTemp.bufs[compID].stride;
            int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
            int leftPixelExtra;
            if (compID == COMPONENT_Y)
            {
              leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
            }
            else
            {
              leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
            }
            PelBuf &srcBuf = pcPadTemp.bufs[compID];
            deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
              (startMv.getHor() >> mvshiftTemp);
            deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
              (startMv.getVer() >> mvshiftTemp);
    
            CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
    
            offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
            offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
            offset += (deltaIntMvX);
            srcBufPelPtr = (srcBuf.buf + offset);
          }
    
    
    #if JVET_O1164_RPR
          xPredInterBlk( (ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
            bioApplied, false, pu.cu->slice->getScalingRatio( refId, pu.refIdx[refId] ), 0, 0, 0, srcBufPelPtr, pcPadstride );
    #else
    
          xPredInterBlk((ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
            bioApplied, false, 0, 0, 0, srcBufPelPtr, pcPadstride);
    
          int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
    
          int leftPixelExtra;
          if (compID == COMPONENT_Y)
          {
            leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
          }
          else
          {
            leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
          }
    
          deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
            (startMv.getHor() >> mvshiftTemp);
          deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
            (startMv.getVer() >> mvshiftTemp);
    
          CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
    
          offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
          offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
          offset += (deltaIntMvX);
          PelBuf &srcBuf = pcPadTemp.bufs[compID];
    
          xPredInterBlk((ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
    
            bioApplied, false, 0, 0, 0, (srcBuf.buf + offset), pcPadTemp.bufs[compID].stride);
    
    uint64_t InterPrediction::xDMVRCost(int bitDepth, Pel* pOrg, uint32_t refStride, const Pel* pRef, uint32_t orgStride, int width, int height)
    
    {
      DistParam cDistParam;
      cDistParam.applyWeight = false;
      cDistParam.useMR = false;
    
      m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, orgStride, refStride, bitDepth, COMPONENT_Y, width, height, 1);
    
      uint64_t uiCost = cDistParam.distFunc(cDistParam);
    
      return uiCost>>1;
    
    void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *totalDeltaMV, int16_t *deltaMV, uint64_t *pSADsArray)
    
      int sadStride = (((2 * DMVR_NUM_ITERATION) + 1));
    
    Jeeva Raj A's avatar
    Jeeva Raj A committed
      if (notZeroCost && (abs(totalDeltaMV[0]) != (2 << MV_FRACTIONAL_BITS_INTERNAL))
        && (abs(totalDeltaMV[1]) != (2 << MV_FRACTIONAL_BITS_INTERNAL)))
    
        int32_t tempDeltaMv[2] = { 0,0 };
    
        sadbuffer[0] = pSADsArray[0];
        sadbuffer[1] = pSADsArray[-1];
        sadbuffer[2] = pSADsArray[-sadStride];
        sadbuffer[3] = pSADsArray[1];
        sadbuffer[4] = pSADsArray[sadStride];
    
        xSubPelErrorSrfc(sadbuffer, tempDeltaMv);
        totalDeltaMV[0] += tempDeltaMv[0];
        totalDeltaMV[1] += tempDeltaMv[1];
    
      }
    }
    
    void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs)
    {
    
      const int refIdx0 = pu.refIdx[0];
      const int refIdx1 = pu.refIdx[1];
    
      /*use merge MV as starting MV*/
    
      Mv mergeMVL0(pu.mv[REF_PIC_LIST_0]);
      Mv mergeMVL1(pu.mv[REF_PIC_LIST_1]);
    
    #if JVET_O1164_PS
      clipMv( mergeMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps );
      clipMv( mergeMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps );
    #else
    
      clipMv(mergeMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
      clipMv(mergeMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
    
    
      /*L0 MC for refinement*/
      {
        int offset;
        int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
        offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride + 1);
        offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride;
        offset += (-(int)DMVR_NUM_ITERATION);
        PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y];
        PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0,
    
    #if JVET_O0297_DMVR_PADDING // For Dec speedup
          m_biLinearBufStride
    #else
          (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION))
    
          , pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
    
    #if JVET_O1164_RPR
        xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->unscaledPic, mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y],
    
          false, false, pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ), pu.lwidth() + ( 2 * DMVR_NUM_ITERATION ), pu.lheight() + ( 2 * DMVR_NUM_ITERATION ), true, ( (Pel *)srcBuf.buf ) + offset, srcBuf.stride );
    
        xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, refIdx0), mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y],
          false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
    
      }
    
      /*L1 MC for refinement*/
      {
        int offset;
        int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
        offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride + 1);
        offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride;
        offset += (-(int)DMVR_NUM_ITERATION);
        PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y];
        PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1,
    
    #if JVET_O0297_DMVR_PADDING // For Dec speedup
          m_biLinearBufStride
    #else
          (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION))
    #endif
          , pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
    
    #if JVET_O1164_RPR
        xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->unscaledPic, mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y],
    
          false, false, pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ), pu.lwidth() + ( 2 * DMVR_NUM_ITERATION ), pu.lheight() + ( 2 * DMVR_NUM_ITERATION ), true, ( (Pel *)srcBuf.buf ) + offset, srcBuf.stride );
    
        xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_1, refIdx1), mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y],
          false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
    
    void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bioApplied)
    
    Jeeva Raj A's avatar
    Jeeva Raj A committed
      int iterationCount = 1;
    
      /*Always High Precision*/
      int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
    
      /*use merge MV as starting MV*/
      Mv mergeMv[] = { pu.mv[REF_PIC_LIST_0] , pu.mv[REF_PIC_LIST_1] };
    
    
      m_biLinearBufStride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION));
    
    
      int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
      int dx = std::min<int>(pu.lumaSize().width,  DMVR_SUBCU_WIDTH);
    
    #if !JVET_O0297_DMVR_PADDING
    
      /*L0 Padding*/
      m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
          PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
    
      xPrefetchPad(pu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
    
      /*L1 Padding*/
      m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
          PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
    
      xPrefetchPad(pu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
    
    
      JVET_J0090_SET_CACHE_ENABLE( false );
    
      xinitMC(pu, clpRngs);
    
      // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
    
    Jeeva Raj A's avatar
    Jeeva Raj A committed
      Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
      Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
    
      Position puPos = pu.lumaPos();
    
      int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd;
    
      int            bioEnabledThres = 2 * dy * dx;
    
    Chen-Yen Lai's avatar
    Chen-Yen Lai committed
      bool           bioAppliedType[MAX_NUM_SUBCU_DMVR];
    
    #if JVET_O0297_DMVR_PADDING // For Dec speedup
        int scaleX = getComponentScaleX(COMPONENT_Cb, pu.chromaFormat);
        int scaleY = getComponentScaleY(COMPONENT_Cb, pu.chromaFormat);
        m_biLinearBufStride = (dx + (2 * DMVR_NUM_ITERATION));
        // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
        Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
        Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
    
        PredictionUnit subPu = pu;
        subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy)));
        m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
          PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
          PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
            PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
        m_cYuvRefBuffDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu));
    
        m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
          PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
          PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
            PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
        m_cYuvRefBuffDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu));
    
        PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ?
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) :
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr())));
        PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ?
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) :
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr())));
    
        srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu));
        srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu));
    #endif
    
    
        int yStart = 0;
        for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
        {
          for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
    
    #if JVET_O0297_DMVR_PADDING
            PredictionUnit subPu = pu;
            subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy)));
    #if! JVET_O0297_DMVR_PADDING // For Dec speedup
            /*L0 Padding*/
    #if! JVET_O0297_DMVR_PADDING // For Dec speedup
            m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
              PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
              PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
                PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
            m_cYuvRefBuffDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu));
    #endif
            xPrefetchPad(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
    
            /*L1 Padding*/
    #if! JVET_O0297_DMVR_PADDING // For Dec speedup
            m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
              PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
              PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
                PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
            m_cYuvRefBuffDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu));
    #endif
            xPrefetchPad(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
    #else
            xPrefetch(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0, 1);
            xPrefetch(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1, 1);
    #endif
    
            xinitMC(subPu, clpRngs);
    
    #if! JVET_O0297_DMVR_PADDING // For Dec speedup
            // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
            Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
            Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
    #endif
    #endif
    
            uint64_t minCost = MAX_UINT64;
            bool notZeroCost = true;
            int16_t totalDeltaMV[2] = { 0,0 };
            int16_t deltaMV[2] = { 0, 0 };
            uint64_t  *pSADsArray;
    
            for (int i = 0; i < (((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)); i++)
    
            pSADsArray = &m_SADsArray[(((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)) >> 1];
    
    #if !JVET_O0297_DMVR_PADDING
    
            Pel *addrL0Centre = biLinearPredL0 + yStart * m_biLinearBufStride + xStart;
            Pel *addrL1Centre = biLinearPredL1 + yStart * m_biLinearBufStride + xStart;
    
            for (int i = 0; i < iterationCount; i++)
            {
              deltaMV[0] = 0;
              deltaMV[1] = 0;
    
    #if JVET_O0297_DMVR_PADDING
              Pel *addrL0 = biLinearPredL0 + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride);
              Pel *addrL1 = biLinearPredL1 - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride);
    #else
    
              Pel *addrL0 = addrL0Centre + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride);
              Pel *addrL1 = addrL1Centre - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride);
    
              if (i == 0)
              {
                minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy);
    
    Yi-Wen Chen's avatar
    Yi-Wen Chen committed
    #if JVET_O0590_REDUCE_DMVR_ORIG_MV_COST
    
                minCost -= (minCost >>2);
    
    Yi-Wen Chen's avatar
    Yi-Wen Chen committed
    #endif
    
                if (minCost < (dx * dy))
    
                {
                  notZeroCost = false;
                  break;
                }
                pSADsArray[0] = minCost;
              }
              if (!minCost)
              {
                notZeroCost = false;
                break;
              }
    
              xBIPMVRefine(bd, addrL0, addrL1, minCost, deltaMV, pSADsArray, dx, dy);
    
              if (deltaMV[0] == 0 && deltaMV[1] == 0)
              {
                break;
              }
              totalDeltaMV[0] += deltaMV[0];
              totalDeltaMV[1] += deltaMV[1];
    
              pSADsArray += ((deltaMV[1] * (((2 * DMVR_NUM_ITERATION) + 1))) + deltaMV[0]);
    
    #if JVET_O0055_INT_DMVR_DIS_BDOF
    
    Chen-Yen Lai's avatar
    Chen-Yen Lai committed
            bioAppliedType[num] = (minCost < bioEnabledThres) ? false : bioApplied;
    
            totalDeltaMV[0] = (totalDeltaMV[0] << mvShift);
            totalDeltaMV[1] = (totalDeltaMV[1] << mvShift);
            xDMVRSubPixelErrorSurface(notZeroCost, totalDeltaMV, deltaMV, pSADsArray);
    
            pu.mvdL0SubPu[num] = Mv(totalDeltaMV[0], totalDeltaMV[1]);
    
    #if JVET_O0297_DMVR_PADDING
    #if! JVET_O0297_DMVR_PADDING // For Dec speedup
            PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ?
            PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) :
            PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr())));
            PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ?
            PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) :
            PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr())));
    
            srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu));
            srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu));
    #endif
            PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu));
    
    #if JVET_O0297_DMVR_PADDING // For Dec speedup
            bool blockMoved = false;
            if (pu.mvdL0SubPu[num] != Mv(0, 0))
            {
              blockMoved = true;
              xPrefetch(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0, 0);
              xPrefetch(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1, 0);
              xPad(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
              xPad(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
            }
    #endif
    
            int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, pcYuvDst.bufs[COMPONENT_Cb].stride, pcYuvDst.bufs[COMPONENT_Cr].stride };
            subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num];
            subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num];
    
            subPu.mv[0].clipToStorageBitDepth();
            subPu.mv[1].clipToStorageBitDepth();
    
    
    #if JVET_O0055_INT_DMVR_DIS_BDOF
    
            xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffDMVRL0, m_cYuvRefBuffDMVRL1, bioAppliedType[num], mergeMv
    #else
            xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffDMVRL0, m_cYuvRefBuffDMVRL1, bioApplied, mergeMv
    #endif
    #if JVET_O0297_DMVR_PADDING // For Dec speedup
              , blockMoved
    #endif
            );
    
            subPredBuf.bufs[COMPONENT_Y].buf = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y];
    
    #if !JVET_O0297_DMVR_PADDING // For Dec speedup
            int scaleX = getComponentScaleX(COMPONENT_Cb, pu.chromaFormat);
            int scaleY = getComponentScaleY(COMPONENT_Cb, pu.chromaFormat);
    #endif
            subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cb]);
    
    #if !JVET_O0297_DMVR_PADDING // For Dec speedup
            scaleX = getComponentScaleX(COMPONENT_Cr, pu.chromaFormat);
            scaleY = getComponentScaleY(COMPONENT_Cr, pu.chromaFormat);
    #endif
            subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cr]);
    
    
    #if JVET_O0055_INT_DMVR_DIS_BDOF
    
            xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioAppliedType[num]);
    #else
    
            xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioApplied);
    
    #if !JVET_O0297_DMVR_PADDING
    
      {
        PredictionUnit subPu = pu;
        subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy)));
        PelUnitBuf           m_cYuvRefBuffSubCuDMVRL0;
        PelUnitBuf           m_cYuvRefBuffSubCuDMVRL1;
        PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ?
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) :
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr())));
        PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ?
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) :
          PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr())));
    
        srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu));
        srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu));
        PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu));
    
        int x = 0, y = 0;
        int xStart = 0, yStart = 0;
    
    
        int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, pcYuvDst.bufs[COMPONENT_Cb].stride, pcYuvDst.bufs[COMPONENT_Cr].stride };
        for (y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
        {
          for (x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
          {
            subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy)));
    
            subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num];
            subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num];
    
            subPu.mv[0].clipToStorageBitDepth();
            subPu.mv[1].clipToStorageBitDepth();
    
            m_cYuvRefBuffSubCuDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu));
            m_cYuvRefBuffSubCuDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu));
    
    #if JVET_O0055_INT_DMVR_DIS_BDOF
    
    Chen-Yen Lai's avatar
    Chen-Yen Lai committed
            xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffSubCuDMVRL0, m_cYuvRefBuffSubCuDMVRL1, bioAppliedType[num], mergeMv);
    
            xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffSubCuDMVRL0, m_cYuvRefBuffSubCuDMVRL1, bioApplied, mergeMv);
    
    
            subPredBuf.bufs[COMPONENT_Y].buf  = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y];
    
            int scaleX = getComponentScaleX(COMPONENT_Cb, pu.chromaFormat);
            int scaleY =  getComponentScaleY(COMPONENT_Cb, pu.chromaFormat);
            subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cb]);
    
            scaleX =  getComponentScaleX(COMPONENT_Cr, pu.chromaFormat);
            scaleY =  getComponentScaleY(COMPONENT_Cr, pu.chromaFormat);
            subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cr]);
    
    
    #if JVET_O0055_INT_DMVR_DIS_BDOF
    
            xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioAppliedType[num]);
    #else
    
            xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioApplied);
    
    #endif
      JVET_J0090_SET_CACHE_ENABLE(true);
    
    #if JVET_J0090_MEMORY_BANDWITH_MEASURE
    void InterPrediction::cacheAssign( CacheModel *cache )
    {
      m_cacheModel = cache;
      m_if.cacheAssign( cache );
      m_if.initInterpolationFilter( !cache->isCacheEnable() );
    }
    #endif
    
    
    #if JVET_O1170_IBC_VIRTUAL_BUFFER
    void InterPrediction::xFillIBCBuffer(CodingUnit &cu)
    {
      for (auto &currPU : CU::traverseTUs(cu))
      {
        for (const CompArea &area : currPU.blocks)
        {
    
    Jizheng Xu's avatar
    Jizheng Xu committed
          if (!area.valid())
            continue;
    
    
          const unsigned int lcuWidth = cu.cs->slice->getSPS()->getMaxCUWidth();
    
    Jizheng Xu's avatar
    Jizheng Xu committed
          const int shiftSample = ::getComponentScaleX(area.compID, cu.chromaFormat);
    
          const int ctuSizeLog2 = floorLog2(lcuWidth) - shiftSample;
    
          const int pux = area.x & ((m_IBCBufferWidth >> shiftSample) - 1);
          const int puy = area.y & (( 1 << ctuSizeLog2 ) - 1);
          const CompArea dstArea = CompArea(area.compID, cu.chromaFormat, Position(pux, puy), Size(area.width, area.height));
          CPelBuf srcBuf = cu.cs->getRecoBuf(area);
          PelBuf dstBuf = m_IBCBuffer.getBuf(dstArea);
    
    Jizheng Xu's avatar
    Jizheng Xu committed
    
    
          dstBuf.copyFrom(srcBuf);
        }
      }
    }
    
    void InterPrediction::xIntraBlockCopy(PredictionUnit &pu, PelUnitBuf &predBuf, const ComponentID compID)
    {
      const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
      int shiftSample = ::getComponentScaleX(compID, pu.chromaFormat);
    
      const int ctuSizeLog2 = floorLog2(lcuWidth) - shiftSample;
    
      pu.bv = pu.mv[REF_PIC_LIST_0];
      pu.bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
      int refx, refy;
      if (compID == COMPONENT_Y)
      {
        refx = pu.Y().x + pu.bv.hor;
        refy = pu.Y().y + pu.bv.ver;
      }
      else
      {//Cb or Cr
        refx = pu.Cb().x + (pu.bv.hor >> shiftSample);
        refy = pu.Cb().y + (pu.bv.ver >> shiftSample);
      }
      refx &= ((m_IBCBufferWidth >> shiftSample) - 1);
      refy &= ((1 << ctuSizeLog2) - 1);
    
    
    Jizheng Xu's avatar
    Jizheng Xu committed
      if (refx + predBuf.bufs[compID].width <= (m_IBCBufferWidth >> shiftSample))
    
    Jizheng Xu's avatar
    Jizheng Xu committed
        const CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, predBuf.bufs[compID].height));
        const CPelBuf refBuf = m_IBCBuffer.getBuf(srcArea);
        predBuf.bufs[compID].copyFrom(refBuf);
    
    Jizheng Xu's avatar
    Jizheng Xu committed
      {//wrap around
        int width = (m_IBCBufferWidth >> shiftSample) - refx;
        CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(width, predBuf.bufs[compID].height));
        CPelBuf srcBuf = m_IBCBuffer.getBuf(srcArea);
        PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height));
        dstBuf.copyFrom(srcBuf);
    
        width = refx + predBuf.bufs[compID].width - (m_IBCBufferWidth >> shiftSample);
        srcArea = CompArea(compID, pu.chromaFormat, Position(0, refy), Size(width, predBuf.bufs[compID].height));
        srcBuf = m_IBCBuffer.getBuf(srcArea);
        dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_IBCBufferWidth >> shiftSample) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height));
        dstBuf.copyFrom(srcBuf);
    
      }
    }
    
    #if JVET_O1170_CHECK_BV_AT_DECODER
    void InterPrediction::resetIBCBuffer(const ChromaFormat chromaFormatIDC, const int ctuSize)
    {
      const UnitArea area = UnitArea(chromaFormatIDC, Area(0, 0, m_IBCBufferWidth, ctuSize));
      m_IBCBuffer.getBuf(area).fill(-1);
    }
    
    void InterPrediction::resetVPDUforIBC(const ChromaFormat chromaFormatIDC, const int ctuSize, const int vSize, const int xPos, const int yPos)
    {
      const UnitArea area = UnitArea(chromaFormatIDC, Area(xPos & (m_IBCBufferWidth - 1), yPos & (ctuSize - 1), vSize, vSize));
      m_IBCBuffer.getBuf(area).fill(-1);
    }
    
    bool InterPrediction::isLumaBvValid(const int ctuSize, const int xCb, const int yCb, const int width, const int height, const int xBv, const int yBv)
    {
      if(((yCb + yBv) & (ctuSize - 1)) + height > ctuSize)
      {
        return false;
      }
      int refTLx = xCb + xBv;
      int refTLy = (yCb + yBv) & (ctuSize - 1);
      PelBuf buf = m_IBCBuffer.Y();
      for(int x = 0; x < width; x += 4)
      {
        for(int y = 0; y < height; y += 4)
        {
          if(buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false;
          if(buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false;
          if(buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false;
          if(buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false;
        }
      }
      return true;
    }
    #endif
    #endif
    
    bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& mv, PelUnitBuf& dstPic, const bool bi, const bool wrapRef, const ClpRng& clpRng )
    
    {
      const ChromaFormat  chFmt = pu.chromaFormat;
      const bool          rndRes = !bi;
    
      int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleX( compID, chFmt );
      int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleY( compID, chFmt );
    
      PelBuf &dstBuf = dstPic.bufs[compID];
      unsigned width = dstBuf.width;
      unsigned height = dstBuf.height;
      CPelBuf refBuf;