Skip to content
Snippets Groups Projects
InterPrediction.cpp 76.7 KiB
Newer Older
  • Learn to ignore specific revisions
  •   int           offsetPos = widthG*BIO_EXTEND_SIZE + BIO_EXTEND_SIZE;
    
      Pel*          gradX0 = m_gradX0;
      Pel*          gradX1 = m_gradX1;
      Pel*          gradY0 = m_gradY0;
      Pel*          gradY1 = m_gradY1;
    
      int           stridePredMC = widthG + 2;
      const Pel*    srcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + stridePredMC + 1;
      const Pel*    srcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + stridePredMC + 1;
      const int     src0Stride = stridePredMC;
      const int     src1Stride = stridePredMC;
    
      Pel*          dstY = yuvDst.Y().buf;
      const int     dstStride = yuvDst.Y().stride;
      const Pel*    srcY0Temp = srcY0;
      const Pel*    srcY1Temp = srcY1;
    
    
      for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
      {
        Pel* dstTempPtr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + stridePredMC + 1;
    
        Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1;
        Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1;
    
        xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY, clipBitDepths.recon[toChannelType(COMPONENT_Y)]);
    
        Pel* padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 2;
        for (int y = 0; y< height; y++)
    
          padStr[-1] = padStr[0];
          padStr[width] = padStr[width - 1];
          padStr += stridePredMC;
    
        padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 1;
        ::memcpy(padStr - stridePredMC, padStr, sizeof(Pel)*(widthG));
        ::memcpy(padStr + height*stridePredMC, padStr + (height - 1)*stridePredMC, sizeof(Pel)*(widthG));
    
      }
    
      const ClpRng& clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y);
      const int   bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
      const int   shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
      const int   offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
    
      const int   limit = (1<<(std::max<int>(5, bitDepth - 7)));
    
      int*     dotProductTemp1 = m_dotProduct1;
      int*     dotProductTemp2 = m_dotProduct2;
      int*     dotProductTemp3 = m_dotProduct3;
      int*     dotProductTemp5 = m_dotProduct5;
      int*     dotProductTemp6 = m_dotProduct6;
    
      xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG, bitDepth);
    
      int xUnit = (width >> 2);
      int yUnit = (height >> 2);
    
      Pel *dstY0 = dstY;
      gradX0 = m_gradX0; gradX1 = m_gradX1;
      gradY0 = m_gradY0; gradY1 = m_gradY1;
    
    
      for (int yu = 0; yu < yUnit; yu++)
      {
        for (int xu = 0; xu < xUnit; xu++)
        {
    
          if (m_bioPredSubBlkDist[yu*xUnit + xu] < m_bioSubBlkDistThres)
          {
    
            srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
            srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src1Stride + xu) << 2);
            dstY0 = dstY + ((yu*dstStride + xu) << 2);
    
            PelBuf dstPelBuf(dstY0, dstStride, Size(4, 4));
            dstPelBuf.addAvg(CPelBuf(srcY0Temp, src0Stride, Size(4, 4)), CPelBuf(srcY1Temp, src1Stride, Size(4, 4)), clpRng);
    
    
          int     sGxdI = 0, sGydI = 0, sGxGy = 0, sGx2 = 0, sGy2 = 0;
          int     tmpx = 0, tmpy = 0;
    
          dotProductTemp1 = m_dotProduct1 + offsetPos + ((yu*widthG + xu) << 2);
          dotProductTemp2 = m_dotProduct2 + offsetPos + ((yu*widthG + xu) << 2);
          dotProductTemp3 = m_dotProduct3 + offsetPos + ((yu*widthG + xu) << 2);
          dotProductTemp5 = m_dotProduct5 + offsetPos + ((yu*widthG + xu) << 2);
          dotProductTemp6 = m_dotProduct6 + offsetPos + ((yu*widthG + xu) << 2);
    
          xCalcBlkGradient(xu << 2, yu << 2, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, sGx2, sGy2, sGxGy, sGxdI, sGydI, widthG, heightG, (1 << 2));
    
    
          if (sGx2 > 0)
          {
            tmpx = rightShiftMSB(sGxdI << 3, sGx2);
            tmpx = Clip3(-limit, limit, tmpx);
          }
          if (sGy2 > 0)
          {
            int     mainsGxGy = sGxGy >> 12;
            int     secsGxGy = sGxGy & ((1 << 12) - 1);
            int     tmpData = tmpx * mainsGxGy;
            tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
            tmpy = rightShiftMSB(((sGydI << 3) - tmpData), sGy2);
            tmpy = Clip3(-limit, limit, tmpy);
          }
    
    
          srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
          srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
          gradX0 = m_gradX0 + offsetPos + ((yu*widthG + xu) << 2);
          gradX1 = m_gradX1 + offsetPos + ((yu*widthG + xu) << 2);
          gradY0 = m_gradY0 + offsetPos + ((yu*widthG + xu) << 2);
          gradY1 = m_gradY1 + offsetPos + ((yu*widthG + xu) << 2);
    
          dstY0 = dstY + ((yu*dstStride + xu) << 2);
    
          xAddBIOAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, gradX0, gradX1, gradY0, gradY1, widthG, (1 << 2), (1 << 2), (int)tmpx, (int)tmpy, shiftNum, offset, clpRng);
    
        }  // xu
      }  // yu
    }
    
    
    bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths)
    {
      const int     width = pu.lwidth();
      const int     height = pu.lheight();
      const int     clipbd = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
      const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(clipbd);
      const int     shift = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
      const int     xUnit = (width >> 2);
      const int     yUnit = (height >> 2);
    
      m_bioDistThres = (shift <= 5) ? (((32 << (clipbd - 8))*width*height) >> (5 - shift)) : (((32 << (clipbd - 8))*width*height) << (shift - 5));
    
      m_bioSubBlkDistThres = (shift <= 5) ? (((64 << (clipbd - 8)) << 4) >> (5 - shift)) : (((64 << (clipbd - 8)) << 4) << (shift - 5)); 
    
    
      m_bioDistThres >>= distortionShift;
      m_bioSubBlkDistThres >>= distortionShift;
    
      DistParam cDistParam;
      Distortion dist = 0;
      for (int yu = 0, blkIdx = 0; yu < yUnit; yu++)
      {
        for (int xu = 0; xu < xUnit; xu++, blkIdx++)
        {
          const Pel* pPred0 = pYuvSrc0 + ((yu*src0Stride + xu) << 2);
          const Pel* pPred1 = pYuvSrc1 + ((yu*src1Stride + xu) << 2);
    
          m_pcRdCost->setDistParam(cDistParam, pPred0, pPred1, src0Stride, src1Stride, clipbd, COMPONENT_Y, (1 << 2), (1 << 2), 0, 1, false, true);
          m_bioPredSubBlkDist[blkIdx] = cDistParam.distFunc(cDistParam);
          dist += m_bioPredSubBlkDist[blkIdx];
        }
      }
    
      return (dist >= m_bioDistThres);
    }
    
    
    void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
    {
      g_pelBufOP.addBIOAvg4(src0, src0Stride, src1, src1Stride, dst, dstStride, gradX0, gradX1, gradY0, gradY1, gradStride, width, height, tmpx, tmpy, shift, offset, clpRng);
    }
    
    
    void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth)
    {
      g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY, bitDepth);
    }
    
    void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth)
    {
      g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG, bitDepth);
    }
    
    
    void InterPrediction::xCalcBlkGradient(int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize)
    {
      g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize);
    }
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
    void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, PelUnitBuf* yuvDstTmp /*= NULL*/)
    #else
    
    void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied )
    
    {
      const int iRefIdx0 = pu.refIdx[0];
      const int iRefIdx1 = pu.refIdx[1];
    
      if( iRefIdx0 >= 0 && iRefIdx1 >= 0 )
      {
    
        if( pu.cu->GBiIdx != GBI_DEFAULT )
        {
    
          CHECK(bioApplied, "GBi is disallowed with BIO");
    
          pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx);
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
          if (yuvDstTmp)
            yuvDstTmp->copyFrom(pcYuvDst);
    #endif
    
          const int  src0Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
          const int  src1Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
    
          const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + 2 * src0Stride + 2;
          const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + 2 * src1Stride + 2;
    
    
    #if JVET_O0055_INT_DMVR_DIS_BDOF 
          bool bioEnabled = true;
    #else
    
          bool bioEnabled = xCalcBiPredSubBlkDist(pu, pSrcY0, src0Stride, pSrcY1, src1Stride, clipBitDepths);
    
          if (bioEnabled)
          {
            applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths);
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
            if (yuvDstTmp)
              yuvDstTmp->bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
    #endif
    
          }
          else
          {
            pcYuvDst.bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
            if (yuvDstTmp)
              yuvDstTmp->bufs[0].copyFrom(pcYuvDst.bufs[0]);
    #endif
    
        if (pu.cs->pps->getWPBiPred())
        {
          const int iRefIdx0 = pu.refIdx[0];
          const int iRefIdx1 = pu.refIdx[1];
          WPScalingParam  *pwp0;
          WPScalingParam  *pwp1;
          getWpScaling(pu.cu->slice, iRefIdx0, iRefIdx1, pwp0, pwp1);
          if (!bioApplied)
          {
            addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Y);
          }
          addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cb);
          addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cr);
        }
        else
        {
          pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied);
        }
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
        if (yuvDstTmp)
        {
          if (bioApplied)
          {
            yuvDstTmp->bufs[1].copyFrom(pcYuvDst.bufs[1]);
            yuvDstTmp->bufs[2].copyFrom(pcYuvDst.bufs[2]);
          }
          else
            yuvDstTmp->copyFrom(pcYuvDst);
        }
    #endif
    
        if( pu.cu->triangle )
        {
          pcYuvDst.copyFrom( pcYuvSrc0 );
        }
        else
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
        if (yuvDstTmp)
          yuvDstTmp->copyFrom(pcYuvDst);
    #endif
    
        if( pu.cu->triangle )
        {
          pcYuvDst.copyFrom( pcYuvSrc1 );
        }
        else
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
        if (yuvDstTmp)
          yuvDstTmp->copyFrom(pcYuvDst);
    #endif
    
    void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
      , const bool luma, const bool chroma
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
      , PelUnitBuf* predBufWOBIO /*= NULL*/
    #endif
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
      CHECK(predBufWOBIO && pu.mhIntraFlag, "the case should not happen!");
    #endif
    
    
    Yu Han's avatar
    Yu Han committed
      // dual tree handling for IBC as the only ref
    
      if ((!luma || !chroma) && eRefPicList == REF_PIC_LIST_0)
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
      {
        if (!luma && chroma)
        {
          xChromaMC(pu, predBuf);
          return;
        }
        else // (luma && !chroma)
        {
          xPredInterUni(pu, eRefPicList, predBuf, false
            , false
            , luma, chroma);
          return;
        }
      }
      // else, go with regular MC below
    
            CodingStructure &cs = *pu.cs;
      const PPS &pps            = *cs.pps;
      const SliceType sliceType =  cs.slice->getSliceType();
    
      if( eRefPicList != REF_PIC_LIST_X )
      {
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
        CHECK(predBufWOBIO != NULL, "the case should not happen!");
    #endif
    
        if( ( ( sliceType == P_SLICE && pps.getUseWP() ) || ( sliceType == B_SLICE && pps.getWPBiPred() ) ) )
        {
    
          xPredInterUni         ( pu,          eRefPicList, predBuf, true
    
          xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred );
        }
        else
        {
    
          xPredInterUni( pu, eRefPicList, predBuf, false
    
        CHECK( !pu.cu->affine && pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 && ( pu.lwidth() + pu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" );
        WPScalingParam *wp0;
        WPScalingParam *wp1;
        int refIdx0 = pu.refIdx[REF_PIC_LIST_0];
        int refIdx1 = pu.refIdx[REF_PIC_LIST_1];
        pu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0);
        pu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1);
    
        bool bioApplied = false;
        const Slice &slice = *pu.cs->slice;
        if (pu.cs->sps->getBDOFEnabledFlag())
        {
    
          if (pu.cu->affine || m_subPuMC)
          {
            bioApplied = false;
          }
          else
          {
    
            const bool biocheck0 = !((wp0[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Y].bPresentFlag) && slice.getSliceType() == B_SLICE);
    
            const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE);
            if (biocheck0
              && biocheck1
              && PU::isBiPredFromDifferentDir(pu)
    
              && pu.Y().height != 4
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
          if (bioApplied && pu.mhIntraFlag)
          {
            bioApplied = false;
          }
    #endif
    
    
          if (bioApplied && pu.cu->smvdMode)
    
            bioApplied = false;
          }
    
          if (pu.cu->cs->sps->getUseGBi() && bioApplied && pu.cu->GBiIdx != GBI_DEFAULT)
    
            bioApplied = false;
          }
    
          if (pu.mmvdEncOptMode == 2 && pu.mmvdMergeFlag)
    
            bioApplied = false;
          }
        }
        bool dmvrApplied = false;
        dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu);
        if ((pu.lumaSize().width > MAX_BDOF_APPLICATION_REGION || pu.lumaSize().height > MAX_BDOF_APPLICATION_REGION) && pu.mergeType != MRG_TYPE_SUBPU_ATMVP && (bioApplied && !dmvrApplied))
        {
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
          xSubPuBio(pu, predBuf, eRefPicList, predBufWOBIO);
    #else
    
          xSubPuBio(pu, predBuf, eRefPicList);
    
    Yu Han's avatar
    Yu Han committed
        if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC)
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
          CHECK(predBufWOBIO != NULL, "the case should not happen!");
    #endif 
    
          xSubPuMC( pu, predBuf, eRefPicList );
        }
        else if( xCheckIdenticalMotion( pu ) )
        {
    
          xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
          if (predBufWOBIO)
            predBufWOBIO->copyFrom(predBuf);
    #endif
    
    #if JVET_O0108_DIS_DMVR_BDOF_CIIP
          xPredInterBi(pu, predBuf, predBufWOBIO);
    #else
    
    void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
      , const bool luma, const bool chroma
    
    {
      for( auto &pu : CU::traversePUs( cu ) )
      {
        PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
    
        motionCompensation( pu, predBuf, eRefPicList
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
          , luma, chroma
        );
    
    void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
      , const bool luma, const bool chroma
    
      motionCompensation( pu, predBuf, eRefPicList
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
        , luma, chroma
    
    int InterPrediction::rightShiftMSB(int numer, int denom)
    {
      int     d;
      int msbIdx = 0;
      for (msbIdx = 0; msbIdx<32; msbIdx++)
      {
        if (denom < ((int)1 << msbIdx))
        {
          break;
        }
      }
    
      int shiftIdx = msbIdx - 1;
      d = (numer >> shiftIdx);
    
    rlliao's avatar
    rlliao committed
    void InterPrediction::motionCompensation4Triangle( CodingUnit &cu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 )
    
    {
      for( auto &pu : CU::traversePUs( cu ) )
      {
        const UnitArea localUnitArea( cu.cs->area.chromaFormat, Area( 0, 0, pu.lwidth(), pu.lheight() ) );
    
    rlliao's avatar
    rlliao committed
        PelUnitBuf tmpTriangleBuf = m_triangleBuf.getBuf( localUnitArea );
    
        PelUnitBuf predBuf        = cu.cs->getPredBuf( pu );
    
    rlliao's avatar
    rlliao committed
        triangleMrgCtx.setMergeInfo( pu, candIdx0 );
    
        PU::spanMotionInfo( pu );
        motionCompensation( pu, tmpTriangleBuf );
    
    Valeri George's avatar
    Valeri George committed
        {
          if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvBufferForMCTSConstraint( pu, true ) )
          {
            printf( "DECODER_TRIANGLE_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() );
          }
        }
    
    rlliao's avatar
    rlliao committed
        triangleMrgCtx.setMergeInfo( pu, candIdx1 );
    
        PU::spanMotionInfo( pu );
        motionCompensation( pu, predBuf );
    
    
    Valeri George's avatar
    Valeri George committed
        {
          if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvBufferForMCTSConstraint( pu, true ) )
          {
            printf( "DECODER_TRIANGLE_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() );
          }
        }
    
        weightedTriangleBlk( pu, splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, tmpTriangleBuf, predBuf );
    
    void InterPrediction::weightedTriangleBlk( PredictionUnit &pu, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
    
      if( channel == CHANNEL_TYPE_LUMA )
      {
    
    #if JVET_O0280_SIMD_TRIANGLE_WEIGHTING
        m_if.weightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
    #else
    
        xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
    
      }
      else if( channel == CHANNEL_TYPE_CHROMA )
      {
    
    #if JVET_O0280_SIMD_TRIANGLE_WEIGHTING
        m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
        m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
    #else
    
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
    
    #if JVET_O0280_SIMD_TRIANGLE_WEIGHTING
        m_if.weightedTriangleBlk( pu, pu.lumaSize().width,   pu.lumaSize().height,   COMPONENT_Y,  splitDir, predDst, predSrc0, predSrc1 );
        m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
        m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
    #else
    
        xWeightedTriangleBlk( pu, pu.lumaSize().width,   pu.lumaSize().height,   COMPONENT_Y,  splitDir, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
        xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
    
    void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
    
    rlliao's avatar
    rlliao committed
    {
      Pel*    dst        = predDst .get(compIdx).buf;
      Pel*    src0       = predSrc0.get(compIdx).buf;
      Pel*    src1       = predSrc1.get(compIdx).buf;
      int32_t strideDst  = predDst .get(compIdx).stride  - width;
      int32_t strideSrc0 = predSrc0.get(compIdx).stride  - width;
      int32_t strideSrc1 = predSrc1.get(compIdx).stride  - width;
    
      const char    log2WeightBase    = 3;
      const ClpRng  clipRng           = pu.cu->slice->clpRngs().comp[compIdx];
      const int32_t clipbd            = clipRng.bd;
      const int32_t shiftDefault      = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
      const int32_t offsetDefault     = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS;
      const int32_t shiftWeighted     = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
      const int32_t offsetWeighted    = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
    
    rlliao's avatar
    rlliao committed
      const int32_t ratioWH           = (width > height) ? (width / height) : 1;
      const int32_t ratioHW           = (width > height) ? 1 : (height / width);
    
    
      const bool    longWeight        = (compIdx == COMPONENT_Y);
    
      const int32_t weightedLength    = longWeight ? 7 : 3;
    
    rlliao's avatar
    rlliao committed
            int32_t weightedStartPos  = ( splitDir == 0 ) ? ( 0 - (weightedLength >> 1) * ratioWH ) : ( width - ((weightedLength + 1) >> 1) * ratioWH );
            int32_t weightedEndPos    = weightedStartPos + weightedLength * ratioWH - 1;
            int32_t weightedPosoffset =( splitDir == 0 ) ? ratioWH : -ratioWH;
    
            Pel     tmpPelWeighted;
            int32_t weightIdx;
    
    rlliao's avatar
    rlliao committed
            int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd;
    
    rlliao's avatar
    rlliao committed
      for( y = 0; y < height; y+= ratioHW )
    
    rlliao's avatar
    rlliao committed
        for( tmpY = ratioHW; tmpY > 0; tmpY-- )
    
    rlliao's avatar
    rlliao committed
          for( x = 0; x < weightedStartPos; x++ )
    
    rlliao's avatar
    rlliao committed
            *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng );
            src0++;
            src1++;
    
    rlliao's avatar
    rlliao committed
          tmpWeightedStart = std::max((int32_t)0, weightedStartPos);
          tmpWeightedEnd   = std::min(weightedEndPos, (int32_t)(width - 1));
    
    rlliao's avatar
    rlliao committed
          if( weightedStartPos < 0 )
    
            weightIdx     += abs(weightedStartPos) / ratioWH;
    
    rlliao's avatar
    rlliao committed
          for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH )
    
    rlliao's avatar
    rlliao committed
            for( tmpX = ratioWH; tmpX > 0; tmpX-- )
    
              tmpPelWeighted = Clip3( 1, 7, longWeight ? weightIdx : (weightIdx * 2));
    
              tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted;
              *dst++         = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
    
    rlliao's avatar
    rlliao committed
          for( x = weightedEndPos + 1; x < width; x++ )
    
    rlliao's avatar
    rlliao committed
            *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng );
            src0++;
            src1++;
    
    rlliao's avatar
    rlliao committed
    
          dst  += strideDst;
          src0 += strideSrc0;
          src1 += strideSrc1;
    
    rlliao's avatar
    rlliao committed
        weightedStartPos += weightedPosoffset;
        weightedEndPos   += weightedPosoffset;
    
    void InterPrediction::xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
    {
      int offset, width, height;
      int padsize;
      Mv cMv;
      const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
    
      int mvShift = (MV_FRACTIONAL_BITS_INTERNAL);
    
      for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
      {
        cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
    
        pcPad.bufs[compID].stride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA);
    
        int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
        width = pcPad.bufs[compID].width;
        height = pcPad.bufs[compID].height;
        offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
        padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
    
        int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
    
        width += (filtersize - 1);
        height += (filtersize - 1);
        cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
          -(((filtersize >> 1) - 1) << mvshiftTemp));
    
        bool wrapRef = false;
        if( pu.cs->sps->getWrapAroundEnabledFlag() ) 
        {
          wrapRef = wrapClipMv( cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps);   
        }
        else {
          clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps);
        }
    
        /* Pre-fetch similar to HEVC*/
        {
          CPelBuf refBuf;
          Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
    
          refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()), wrapRef);
    
          PelBuf &dstBuf = pcPad.bufs[compID];
          g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
    
    #if JVET_J0090_MEMORY_BANDWITH_MEASURE
          JVET_J0090_SET_REF_PICTURE( refPic, (ComponentID)compID );
          for ( int row = 0 ; row < height ; row++ )
          {
            for ( int col = 0 ; col < width ; col++ )
            {
              JVET_J0090_CACHE_ACCESS( ((Pel *)refBuf.buf) + row * refBuf.stride + col, __FILE__, __LINE__ );
            }
          }
    #endif
    
        }
        /*padding on all side of size DMVR_PAD_LENGTH*/
        {
          g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize);
        }
      }
    }
    inline int32_t div_for_maxq7(int64_t N, int64_t D)
    {
      int32_t sign, q;
      sign = 0;
      if (N < 0)
      {
        sign = 1;
        N = -N;
      }
    
      q = 0;
      D = (D << 3);
      if (N >= D)
      {
        N -= D;
        q++;
      }
      q = (q << 1);
    
      D = (D >> 1);
      if (N >= D)
      {
        N -= D;
        q++;
      }
      q = (q << 1);
    
      if (N >= (D >> 1))
        q++;
    
      if (sign)
        return (-q);
      return(q);
    }
    
    void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv)
    {
    
      int64_t numerator, denominator;
      int32_t mvDeltaSubPel;
      int32_t mvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/
    
        numerator = (int64_t)((sadBuffer[1] - sadBuffer[3]) << mvSubPelLvl);
        denominator = (int64_t)((sadBuffer[1] + sadBuffer[3] - (sadBuffer[0] << 1)));
    
        if (0 != denominator)
    
        {
          if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0]))
          {
    
            mvDeltaSubPel = div_for_maxq7(numerator, denominator);
            deltaMv[0] = (mvDeltaSubPel);
    
          }
          else
          {
            if (sadBuffer[1] == sadBuffer[0])
            {
              deltaMv[0] = -8;// half pel
            }
            else
            {
              deltaMv[0] = 8;// half pel
            }
          }
        }
    
        /*vertical*/
    
        numerator = (int64_t)((sadBuffer[2] - sadBuffer[4]) << mvSubPelLvl);
        denominator = (int64_t)((sadBuffer[2] + sadBuffer[4] - (sadBuffer[0] << 1)));
        if (0 != denominator)
    
        {
          if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0]))
          {
    
            mvDeltaSubPel = div_for_maxq7(numerator, denominator);
            deltaMv[1] = (mvDeltaSubPel);
    
          }
          else
          {
            if (sadBuffer[2] == sadBuffer[0])
            {
              deltaMv[1] = -8;// half pel
            }
            else
            {
              deltaMv[1] = 8;// half pel
            }
          }
        }
      return;
    }
    
    
    void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *deltaMV, uint64_t *pSADsArray, int width, int height)
    
    {
      const int32_t refStrideL0 = m_biLinearBufStride;
      const int32_t refStrideL1 = m_biLinearBufStride;
      Pel *pRefL0Orig = pRefL0;
      Pel *pRefL1Orig = pRefL1;
    
    Jeeva Raj A's avatar
    Jeeva Raj A committed
      for (int nIdx = 0; (nIdx < 25); ++nIdx)
    
        int32_t sadOffset = ((m_pSearchOffset[nIdx].getVer() * ((2 * DMVR_NUM_ITERATION) + 1)) + m_pSearchOffset[nIdx].getHor());
    
        pRefL0 = pRefL0Orig + m_pSearchOffset[nIdx].hor + (m_pSearchOffset[nIdx].ver * refStrideL0);
        pRefL1 = pRefL1Orig - m_pSearchOffset[nIdx].hor - (m_pSearchOffset[nIdx].ver * refStrideL1);
    
        if (*(pSADsArray + sadOffset) == MAX_UINT64)
    
        {
          const uint64_t cost = xDMVRCost(bd, pRefL0, refStrideL0, pRefL1, refStrideL1, width, height);
    
          *(pSADsArray + sadOffset) = cost;
    
        if (*(pSADsArray + sadOffset) < minCost)
    
          minCost = *(pSADsArray + sadOffset);
          deltaMV[0] = m_pSearchOffset[nIdx].getHor();
          deltaMV[1] = m_pSearchOffset[nIdx].getVer();
    
    void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied
    
      , const Mv mergeMV[NUM_REF_PIC_LIST_01]
    )
    {
      int offset, deltaIntMvX, deltaIntMvY;
    
      PelUnitBuf pcYUVTemp = pcYuvSrc0;
      PelUnitBuf pcPadTemp = pcPad0;
      /*always high precision MVs are used*/
    
      int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
    
    
      for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
      {
        RefPicList refId = (RefPicList)k;
        Mv cMv = pu.mv[refId];
        m_iRefListIdx = refId;
        const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
    
        Mv cMvClipped = cMv;
        clipMv(cMvClipped, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
    
        if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvForMCTSConstraint( pu, startMv, MV_PRECISION_INTERNAL ) )
        {
          const Area& tileArea = pu.cs->picture->mctsInfo.getTileArea();
          printf( "Attempt an access over tile boundary at block %d,%d %d,%d with MV %d,%d (in Tile TL: %d,%d BR: %d,%d)\n",
            pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), startMv.getHor(), startMv.getVer(), tileArea.topLeft().x, tileArea.topLeft().y, tileArea.bottomRight().x, tileArea.bottomRight().y );
          THROW( "MCTS constraint failed!" );
        }
    
        for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
        {
    
          int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
    
          int leftPixelExtra;
          if (compID == COMPONENT_Y)
          {
            leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
          }
          else
          {
            leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
          }
    
          deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
            (startMv.getHor() >> mvshiftTemp);
          deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
            (startMv.getVer() >> mvshiftTemp);
    
          CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
    
          offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
          offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
          offset += (deltaIntMvX);
          PelBuf &srcBuf = pcPadTemp.bufs[compID];
    
          xPredInterBlk((ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
    
            bioApplied, false, 0, 0, 0, (srcBuf.buf + offset), pcPadTemp.bufs[compID].stride);
    
    uint64_t InterPrediction::xDMVRCost(int bitDepth, Pel* pOrg, uint32_t refStride, const Pel* pRef, uint32_t orgStride, int width, int height)
    
    {
      DistParam cDistParam;
      cDistParam.applyWeight = false;
      cDistParam.useMR = false;
    
      m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, orgStride, refStride, bitDepth, COMPONENT_Y, width, height, 1);
    
      uint64_t uiCost = cDistParam.distFunc(cDistParam);
      return uiCost;
    }
    
    
    void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *totalDeltaMV, int16_t *deltaMV, uint64_t *pSADsArray)
    
      int sadStride = (((2 * DMVR_NUM_ITERATION) + 1));
    
    Jeeva Raj A's avatar
    Jeeva Raj A committed
      if (notZeroCost && (abs(totalDeltaMV[0]) != (2 << MV_FRACTIONAL_BITS_INTERNAL))
        && (abs(totalDeltaMV[1]) != (2 << MV_FRACTIONAL_BITS_INTERNAL)))
    
        int32_t tempDeltaMv[2] = { 0,0 };
    
        sadbuffer[0] = pSADsArray[0];
        sadbuffer[1] = pSADsArray[-1];
        sadbuffer[2] = pSADsArray[-sadStride];
        sadbuffer[3] = pSADsArray[1];
        sadbuffer[4] = pSADsArray[sadStride];
    
        xSubPelErrorSrfc(sadbuffer, tempDeltaMv);
        totalDeltaMV[0] += tempDeltaMv[0];
        totalDeltaMV[1] += tempDeltaMv[1];
    
      }
    }
    
    void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs)
    {
    
      const int refIdx0 = pu.refIdx[0];
      const int refIdx1 = pu.refIdx[1];
    
      /*use merge MV as starting MV*/
    
      Mv mergeMVL0(pu.mv[REF_PIC_LIST_0]);
      Mv mergeMVL1(pu.mv[REF_PIC_LIST_1]);
    
      clipMv(mergeMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
      clipMv(mergeMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
    
    
      /*L0 MC for refinement*/
      {
        int offset;
        int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
        offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride + 1);
        offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride;
        offset += (-(int)DMVR_NUM_ITERATION);
        PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y];
        PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0,
    
          (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)), pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
    
        xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, refIdx0), mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y],
          false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
    
        );
      }
    
      /*L1 MC for refinement*/
      {
        int offset;
        int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
        offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride + 1);
        offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride;
        offset += (-(int)DMVR_NUM_ITERATION);
        PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y];
        PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1,
    
          (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)), pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
    
        xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_1, refIdx1), mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y],
          false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
    
    void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bioApplied)
    
    Jeeva Raj A's avatar
    Jeeva Raj A committed
      int iterationCount = 1;
    
      /*Always High Precision*/
      int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
    
      /*use merge MV as starting MV*/
      Mv mergeMv[] = { pu.mv[REF_PIC_LIST_0] , pu.mv[REF_PIC_LIST_1] };
    
    
      m_biLinearBufStride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION));
    
    
      int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
      int dx = std::min<int>(pu.lumaSize().width,  DMVR_SUBCU_WIDTH);
      /*L0 Padding*/
      m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
          PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
    
      xPrefetchPad(pu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
    
      /*L1 Padding*/
      m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
        PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
          PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
    
      xPrefetchPad(pu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
    
    
      JVET_J0090_SET_CACHE_ENABLE( false );
    
      xinitMC(pu, clpRngs);
    
      // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
    
    Jeeva Raj A's avatar
    Jeeva Raj A committed
      Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
      Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
    
    
      Position puPos = pu.lumaPos();
    
      int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd;
    
    #if JVET_O0055_INT_DMVR_DIS_BDOF
      int            bioEnabledThres = 8 * (dy >> 1) * dx;
    
    Chen-Yen Lai's avatar
    Chen-Yen Lai committed
      bool           bioAppliedType[MAX_NUM_SUBCU_DMVR];
    
      {
        int num = 0;
    
        int yStart = 0;
        for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
        {
          for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
    
            uint64_t minCost = MAX_UINT64;
            bool notZeroCost = true;
            int16_t totalDeltaMV[2] = { 0,0 };
            int16_t deltaMV[2] = { 0, 0 };
            uint64_t  *pSADsArray;
    
            for (int i = 0; i < (((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)); i++)
    
            pSADsArray = &m_SADsArray[(((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)) >> 1];
    
    
            Pel *addrL0Centre = biLinearPredL0 + yStart * m_biLinearBufStride + xStart;
            Pel *addrL1Centre = biLinearPredL1 + yStart * m_biLinearBufStride + xStart;
            for (int i = 0; i < iterationCount; i++)
            {
              deltaMV[0] = 0;
              deltaMV[1] = 0;
              Pel *addrL0 = addrL0Centre + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride);
              Pel *addrL1 = addrL1Centre - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride);
              if (i == 0)
              {
                minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy);
    
    Yi-Wen Chen's avatar
    Yi-Wen Chen committed
    #if JVET_O0590_REDUCE_DMVR_ORIG_MV_COST
                minCost -= (minCost >>2);            
    #endif
    
                if (minCost < ((4 * dx * (dy >> 1/*for alternate line*/))))
                {
                  notZeroCost = false;
                  break;
                }
                pSADsArray[0] = minCost;
              }
              if (!minCost)