Skip to content
Snippets Groups Projects
InterSearch.cpp 223 KiB
Newer Older
  • Learn to ignore specific revisions
  •           xTZ8PointDiamondSearch( cStruct, 0, 0, iDist, false );
              if ( bTestZeroVectorStop && (cStruct.uiBestRound > 0) ) // stop criterion
              {
                break;
              }
            }
          }
        }
      }
      else
      {
        // Test also zero neighbourhood but with half the range
        // It was reported that the original (above) search scheme using bTestZeroVectorStart did not
        // make sense since one would have already checked the zero candidate earlier
        // and thus the conditions for that test would have not been satisfied
        if (bTestZeroVectorStart == true && bBestCandidateZero != true)
        {
          for ( iDist = 1; iDist <= (iSearchRange >> 1); iDist*=2 )
          {
            xTZ8PointDiamondSearch( cStruct, 0, 0, iDist, false );
            if ( bTestZeroVectorStop && (cStruct.uiBestRound > 2) ) // stop criterion
            {
              break;
            }
          }
        }
      }
    
      // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
      if ( cStruct.uiBestDistance == 1 )
      {
        cStruct.uiBestDistance = 0;
        xTZ2PointSearch( cStruct );
      }
    
      // raster search if distance is too big
      if (bUseAdaptiveRaster)
      {
        int iWindowSize     = iRaster;
        SearchRange localsr = sr;
    
        if (!(bEnableRasterSearch && ( ((int)(cStruct.uiBestDistance) >= iRaster))))
        {
          iWindowSize ++;
          localsr.left   /= 2;
          localsr.right  /= 2;
          localsr.top    /= 2;
          localsr.bottom /= 2;
        }
        cStruct.uiBestDistance = iWindowSize;
        for ( iStartY = localsr.top; iStartY <= localsr.bottom; iStartY += iWindowSize )
        {
          for ( iStartX = localsr.left; iStartX <= localsr.right; iStartX += iWindowSize )
          {
            xTZSearchHelp( cStruct, iStartX, iStartY, 0, iWindowSize );
          }
        }
      }
      else
      {
        if ( bEnableRasterSearch && ( ((int)(cStruct.uiBestDistance) >= iRaster) || bAlwaysRasterSearch ) )
        {
          cStruct.uiBestDistance = iRaster;
          for ( iStartY = sr.top; iStartY <= sr.bottom; iStartY += iRaster )
          {
            for ( iStartX = sr.left; iStartX <= sr.right; iStartX += iRaster )
            {
              xTZSearchHelp( cStruct, iStartX, iStartY, 0, iRaster );
            }
          }
        }
      }
    
      // raster refinement
    
      if ( bRasterRefinementEnable && cStruct.uiBestDistance > 0 )
      {
        while ( cStruct.uiBestDistance > 0 )
        {
          iStartX = cStruct.iBestX;
          iStartY = cStruct.iBestY;
          if ( cStruct.uiBestDistance > 1 )
          {
            iDist = cStruct.uiBestDistance >>= 1;
            if ( bRasterRefinementDiamond == 1 )
            {
              xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bRasterRefinementCornersForDiamondDist1 );
            }
            else
            {
              xTZ8PointSquareSearch  ( cStruct, iStartX, iStartY, iDist );
            }
          }
    
          // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
          if ( cStruct.uiBestDistance == 1 )
          {
            cStruct.uiBestDistance = 0;
            if ( cStruct.ucPointNr != 0 )
            {
              xTZ2PointSearch( cStruct );
            }
          }
        }
      }
    
      // star refinement
      if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 )
      {
        while ( cStruct.uiBestDistance > 0 )
        {
          iStartX = cStruct.iBestX;
          iStartY = cStruct.iBestY;
          cStruct.uiBestDistance = 0;
          cStruct.ucPointNr = 0;
          for ( iDist = 1; iDist < iSearchRange + 1; iDist*=2 )
          {
            if ( bStarRefinementDiamond == 1 )
            {
              xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bStarRefinementCornersForDiamondDist1 );
            }
            else
            {
              xTZ8PointSquareSearch  ( cStruct, iStartX, iStartY, iDist );
            }
            if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion
            {
              break;
            }
          }
    
          // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1
          if ( cStruct.uiBestDistance == 1 )
          {
            cStruct.uiBestDistance = 0;
            if ( cStruct.ucPointNr != 0 )
            {
              xTZ2PointSearch( cStruct );
            }
          }
        }
      }
    
      // write out best match
      rcMv.set( cStruct.iBestX, cStruct.iBestY );
      ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
    }
    
    
    void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
                                          IntTZSearchStruct&    cStruct,
                                          Mv                    &rcMv,
                                          Distortion            &ruiSAD,
                                          const Mv* const       pIntegerMv2Nx2NPred )
    {
      const bool bTestZeroVector          = true;
      const bool bEnableRasterSearch      = true;
      const bool bAlwaysRasterSearch      = false;  // 1: BETTER but factor 15x slower
      const bool bStarRefinementEnable    = true;   // enable either star refinement or raster refinement
      const bool bStarRefinementDiamond   = true;   // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
      const bool bStarRefinementStop      = false;
      const uint32_t uiStarRefinementRounds   = 2;  // star refinement stop X rounds after best match (must be >=1)
      const int  iSearchRange             = m_iSearchRange;
      const int  iSearchRangeInitial      = m_iSearchRange >> 2;
      const int  uiSearchStep             = 4;
      const int  iMVDistThresh            = 8;
    
      int   iStartX                 = 0;
      int   iStartY                 = 0;
      int   iDist                   = 0;
    
      rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
      clipMv( rcMv, pu.cu->lumaPos(),
              pu.cu->lumaSize(),
              *pu.cs->sps );
    
      rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
      rcMv.divideByPowerOf2(2);
    
      // init TZSearchStruct
      cStruct.uiBestSad = std::numeric_limits<Distortion>::max();
      cStruct.iBestX = 0;
      cStruct.iBestY = 0;
    
      m_cDistParam.maximumDistortionForEarlyExit = cStruct.uiBestSad;
      m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode );
    
    
      // set rcMv (Median predictor) as start point and as best point
      xTZSearchHelp( cStruct, rcMv.getHor(), rcMv.getVer(), 0, 0 );
    
      // test whether zero Mv is better start point than Median predictor
      if ( bTestZeroVector )
      {
        xTZSearchHelp( cStruct, 0, 0, 0, 0 );
      }
    
      SearchRange& sr = cStruct.searchRange;
    
      if ( pIntegerMv2Nx2NPred != 0 )
      {
        Mv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred;
    
        integerMv2Nx2NPred.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
        clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(),
                pu.cu->lumaSize(),
                *pu.cs->sps );
    
        integerMv2Nx2NPred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
        integerMv2Nx2NPred.divideByPowerOf2(2);
    
        xTZSearchHelp( cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
    
      }
      {
        // set search range
        Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
        currBestMv <<= 2;
    
        xSetSearchRange( pu, currBestMv, m_iSearchRange, sr
          , cStruct
        );
    
      }
    
      // Initial search
      int iBestX = cStruct.iBestX;
      int iBestY = cStruct.iBestY;
      int iFirstSrchRngHorLeft    = ((iBestX - iSearchRangeInitial) > sr.left)   ? (iBestX - iSearchRangeInitial) : sr.left;
      int iFirstSrchRngVerTop     = ((iBestY - iSearchRangeInitial) > sr.top)    ? (iBestY - iSearchRangeInitial) : sr.top;
      int iFirstSrchRngHorRight   = ((iBestX + iSearchRangeInitial) < sr.right)  ? (iBestX + iSearchRangeInitial) : sr.right;
      int iFirstSrchRngVerBottom  = ((iBestY + iSearchRangeInitial) < sr.bottom) ? (iBestY + iSearchRangeInitial) : sr.bottom;
    
      for ( iStartY = iFirstSrchRngVerTop; iStartY <= iFirstSrchRngVerBottom; iStartY += uiSearchStep )
      {
        for ( iStartX = iFirstSrchRngHorLeft; iStartX <= iFirstSrchRngHorRight; iStartX += uiSearchStep )
        {
          xTZSearchHelp( cStruct, iStartX, iStartY, 0, 0 );
          xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, 1, false );
          xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, 2, false );
        }
      }
    
      int iMaxMVDistToPred = (abs(cStruct.iBestX - iBestX) > iMVDistThresh || abs(cStruct.iBestY - iBestY) > iMVDistThresh);
    
      //full search with early exit if MV is distant from predictors
      if ( bEnableRasterSearch && (iMaxMVDistToPred || bAlwaysRasterSearch) )
      {
        for ( iStartY = sr.top; iStartY <= sr.bottom; iStartY += 1 )
        {
          for ( iStartX = sr.left; iStartX <= sr.right; iStartX += 1 )
          {
            xTZSearchHelp( cStruct, iStartX, iStartY, 0, 1 );
          }
        }
      }
      //Smaller MV, refine around predictor
      else if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 )
      {
        // start refinement
        while ( cStruct.uiBestDistance > 0 )
        {
          iStartX = cStruct.iBestX;
          iStartY = cStruct.iBestY;
          cStruct.uiBestDistance = 0;
          cStruct.ucPointNr = 0;
          for ( iDist = 1; iDist < iSearchRange + 1; iDist*=2 )
          {
            if ( bStarRefinementDiamond == 1 )
            {
              xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, false );
            }
            else
            {
              xTZ8PointSquareSearch  ( cStruct, iStartX, iStartY, iDist );
            }
            if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion
            {
              break;
            }
          }
    
          // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1
          if ( cStruct.uiBestDistance == 1 )
          {
            cStruct.uiBestDistance = 0;
            if ( cStruct.ucPointNr != 0 )
            {
              xTZ2PointSearch( cStruct );
            }
          }
        }
      }
    
      // write out best match
      rcMv.set( cStruct.iBestX, cStruct.iBestY );
      ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
    }
    
    void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&  cStruct, Mv& rcMv, Mv& rcMvPred, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, double fWeight)
    {
    
      CHECK( pu.cu->imv == 0,                       "xPatternSearchIntRefine(): IMV not used.");
      CHECK( amvpInfo.mvCand[riMVPIdx] != rcMvPred, "xPatternSearchIntRefine(): MvPred issue.");
    
      const SPS &sps = *pu.cs->sps;
      m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass );
    
      // input MV rcMV has integer resolution
      // -> shift it to QPEL
      rcMv <<= 2;
      // -> set MV scale for cost calculation to QPEL (0)
      m_pcRdCost->setCostScale ( 0 );
    
      Distortion  uiDist, uiSATD = 0;
      Distortion  uiBestDist  = std::numeric_limits<Distortion>::max();
      // subtract old MVP costs because costs for all newly tested MVPs are added in here
      ruiBits -= m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
    
      Mv cBestMv = rcMv;
      Mv cBaseMvd[2];
      int iBestBits = 0;
      int iBestMVPIdx = riMVPIdx;
      int testPos[9][2] = { { 0, 0}, { -1, -1},{ -1, 0},{ -1, 1},{ 0, -1},{ 0, 1},{ 1, -1},{ 1, 0},{ 1, 1} };
    
    
      cBaseMvd[0] = (rcMv - amvpInfo.mvCand[0]);
      cBaseMvd[1] = (rcMv - amvpInfo.mvCand[1]);
      CHECK( (cBaseMvd[0].getHor() & 0x03) != 0 || (cBaseMvd[0].getVer() & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 0 Mvd issue.");
      CHECK( (cBaseMvd[1].getHor() & 0x03) != 0 || (cBaseMvd[1].getVer() & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 1 Mvd issue.");
    
    
      cBaseMvd[0].roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, pu.cu->imv);
      cBaseMvd[1].roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, pu.cu->imv);
    
    
      int mvOffset = 1 << cStruct.imvShift;
    
      // test best integer position and all 8 neighboring positions
      for (int pos = 0; pos < 9; pos ++)
      {
        Mv cTestMv[2];
        // test both AMVP candidates for each position
        for (int iMVPIdx = 0; iMVPIdx < amvpInfo.numCand; iMVPIdx++)
        {
          cTestMv[iMVPIdx].set(testPos[pos][0]*mvOffset, testPos[pos][1]*mvOffset);
          cTestMv[iMVPIdx] += cBaseMvd[iMVPIdx];
          cTestMv[iMVPIdx] += amvpInfo.mvCand[iMVPIdx];
    
          if ( iMVPIdx == 0 || cTestMv[0] != cTestMv[1])
          {
            Mv cTempMV = cTestMv[iMVPIdx];
    
            cTempMV.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
            clipMv(cTempMV, pu.cu->lumaPos(),
                   pu.cu->lumaSize(),
                   sps);
    
            cTempMV.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
            m_cDistParam.cur.buf = cStruct.piRefY  + cStruct.iRefStride * (cTempMV.getVer() >>  2) + (cTempMV.getHor() >> 2);
            uiDist = uiSATD = (Distortion) (m_cDistParam.distFunc( m_cDistParam ) * fWeight);
          }
          else
          {
            uiDist = uiSATD;
          }
    
          int iMvBits = m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
          m_pcRdCost->setPredictor( amvpInfo.mvCand[iMVPIdx] );
          iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( cTestMv[iMVPIdx].getHor(), cTestMv[iMVPIdx].getVer(), cStruct.imvShift );
          uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cTestMv[iMVPIdx].getHor(), cTestMv[iMVPIdx].getVer(), cStruct.imvShift );
    
          if (uiDist < uiBestDist)
          {
            uiBestDist = uiDist;
            cBestMv = cTestMv[iMVPIdx];
            iBestMVPIdx = iMVPIdx;
            iBestBits = iMvBits;
          }
        }
      }
    
      rcMv = cBestMv;
      rcMvPred = amvpInfo.mvCand[iBestMVPIdx];
      riMVPIdx = iBestMVPIdx;
      m_pcRdCost->setPredictor( rcMvPred );
    
      ruiBits += iBestBits;
      // taken from JEM 5.0
      // verify since it makes no sence to subtract Lamda*(Rmvd+Rmvpidx) from D+Lamda(Rmvd)
      // this would take the rate for the MVP idx out of the cost calculation
      // however this rate is always 1 so impact is small
      ruiCost = uiBestDist - m_pcRdCost->getCost(iBestBits) + m_pcRdCost->getCost(ruiBits);
      // taken from JEM 5.0
      // verify since it makes no sense to add rate for MVDs twicce
      ruiBits += m_pcRdCost->getBitsOfVectorWithPredictor(rcMv.getHor(), rcMv.getVer(), cStruct.imvShift);
    
      return;
    }
    
    void InterSearch::xPatternSearchFracDIF(
      const PredictionUnit& pu,
      RefPicList            eRefPicList,
      int                   iRefIdx,
      IntTZSearchStruct&    cStruct,
      const Mv&             rcMvInt,
      Mv&                   rcMvHalf,
      Mv&                   rcMvQter,
      Distortion&           ruiCost
    )
    {
      const bool bIsLosslessCoded = pu.cu->transQuantBypass;
    
      //  Reference pattern initialization (integer scale)
      int         iOffset    = rcMvInt.getHor() + rcMvInt.getVer() * cStruct.iRefStride;
      CPelBuf cPatternRoi(cStruct.piRefY + iOffset, cStruct.iRefStride, *cStruct.pcPatternKey);
    
    
    
      if (cStruct.imvShift || (pu.cs->sps->getSpsNext().getUseCompositeRef() && cStruct.zeroMV))
    
      {
        m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !bIsLosslessCoded );
        ruiCost = m_cDistParam.distFunc( m_cDistParam );
        ruiCost += m_pcRdCost->getCostOfVectorWithPredictor( rcMvInt.getHor(), rcMvInt.getVer(), cStruct.imvShift );
        return;
      }
    
      //  Half-pel refinement
      m_pcRdCost->setCostScale(1);
      xExtDIFUpSamplingH ( &cPatternRoi );
    
      rcMvHalf = rcMvInt;   rcMvHalf <<= 1;    // for mv-cost
      Mv baseRefMv(0, 0);
      ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, !bIsLosslessCoded);
    
      //  quarter-pel refinement
      m_pcRdCost->setCostScale( 0 );
      xExtDIFUpSamplingQ ( &cPatternRoi, rcMvHalf );
      baseRefMv = rcMvHalf;
      baseRefMv <<= 1;
    
      rcMvQter = rcMvInt;    rcMvQter <<= 1;    // for mv-cost
      rcMvQter += rcMvHalf;  rcMvQter <<= 1;
      ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded );
    }
    
    
    #if JVET_M0444_SMVD
    Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int gbiIdx )
    {
      Distortion cost = std::numeric_limits<Distortion>::max();
      RefPicList eTarRefPicList = (RefPicList)(1 - (int)eCurRefPicList);
    
      // get prediction of eCurRefPicList
      PelUnitBuf predBufA = m_tmpPredStorage[eCurRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) );
      const Picture* picRefA = pu.cu->slice->getRefPic( eCurRefPicList, cCurMvField.refIdx );
      Mv mvA = cCurMvField.mv;
      mvA.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
      clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps );
      xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false );
    
      // get prediction of eTarRefPicList
      PelUnitBuf predBufB = m_tmpPredStorage[eTarRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) );
      const Picture* picRefB = pu.cu->slice->getRefPic( eTarRefPicList, cTarMvField.refIdx );
      Mv mvB = cTarMvField.mv;
      mvB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
      clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps );
      xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false );
    
      PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) );
      if (gbiIdx != GBI_DEFAULT)
        bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), gbiIdx);
      else
        bufTmp.Y().addAvg( predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng( COMPONENT_Y ) );
    
      // calc distortion
      cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD);
    
      return(cost);
    }
    
    Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred
      , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int gbiIdx )
    {
      const Mv mvSearchOffsetCross[4] = { Mv( 0 , 1 ) , Mv( 1 , 0 ) , Mv( 0 , -1 ) , Mv( -1 ,  0 ) };
      const Mv mvSearchOffsetSquare[8] = { Mv( -1 , 1 ) , Mv( 0 , 1 ) , Mv( 1 ,  1 ) , Mv( 1 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -1 ) , Mv( -1 , -1 ) , Mv( -1 , 0 ) };
      const Mv mvSearchOffsetDiamond[8] = { Mv( 0 , 2 ) , Mv( 1 , 1 ) , Mv( 2 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -2 ) , Mv( -1 , -1 ) , Mv( -2 ,  0 ) , Mv( -1 , 1 ) };
      const Mv mvSearchOffsetHexagon[6] = { Mv( 2 , 0 ) , Mv( 1 , 2 ) , Mv( -1 ,  2 ) , Mv( -2 ,  0 ) , Mv( -1 , -2 ) , Mv( 1 , -2 ) };
    
      int nDirectStart = 0, nDirectEnd = 0, nDirectRounding = 0, nDirectMask = 0;
      const Mv * pSearchOffset;
      if ( SearchPattern == 0 )
      {
        nDirectEnd = 3;
        nDirectRounding = 4;
        nDirectMask = 0x03;
        pSearchOffset = mvSearchOffsetCross;
      }
      else if ( SearchPattern == 1 )
      {
        nDirectEnd = 7;
        nDirectRounding = 8;
        nDirectMask = 0x07;
        pSearchOffset = mvSearchOffsetSquare;
      }
      else if ( SearchPattern == 2 )
      {
        nDirectEnd = 7;
        nDirectRounding = 8;
        nDirectMask = 0x07;
        pSearchOffset = mvSearchOffsetDiamond;
      }
      else if ( SearchPattern == 3 )
      {
        nDirectEnd = 5;
        pSearchOffset = mvSearchOffsetHexagon;
      }
      else
      {
        THROW( "Invalid search pattern" );
      }
    
      int nBestDirect;
      for ( uint32_t uiRound = 0; uiRound < uiMaxSearchRounds; uiRound++ )
      {
        nBestDirect = -1;
        MvField mvCurCenter = rCurMvField;
        for ( int nIdx = nDirectStart; nIdx <= nDirectEnd; nIdx++ )
        {
          int nDirect;
          if ( SearchPattern == 3 )
          {
            nDirect = nIdx < 0 ? nIdx + 6 : nIdx >= 6 ? nIdx - 6 : nIdx;
          }
          else
          {
            nDirect = (nIdx + nDirectRounding) & nDirectMask;
          }
    
          Mv mvOffset = pSearchOffset[nDirect];
          mvOffset <<= nSearchStepShift;
          MvField mvCand = mvCurCenter, mvPair;
          mvCand.mv += mvOffset;
    
          // get MVD cost
          m_pcRdCost->setPredictor( rcMvCurPred );
          m_pcRdCost->setCostScale( 0 );
          uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mvCand.mv.getHor(), mvCand.mv.getVer(), (pu.cu->imv << 1) );
          Distortion uiCost = m_pcRdCost->getCost( uiMvBits );
    
          // get MVD pair and set target MV
          mvPair.refIdx = rTarMvField.refIdx;
          mvPair.mv.set( rcMvTarPred.hor - (mvCand.mv.hor - rcMvCurPred.hor), rcMvTarPred.ver - (mvCand.mv.ver - rcMvCurPred.ver) );
          uiCost += xGetSymmetricCost( pu, origBuf, eRefPicList, mvCand, mvPair, gbiIdx );
          if ( uiCost < uiMinCost )
          {
            uiMinCost = uiCost;
            rCurMvField = mvCand;
            rTarMvField = mvPair;
            nBestDirect = nDirect;
          }
        }
    
        if ( nBestDirect == -1 )
        {
          break;
        }
        int nStep = 1;
        if ( SearchPattern == 1 || SearchPattern == 2 )
        {
          nStep = 2 - (nBestDirect & 0x01);
        }
        nDirectStart = nBestDirect - nStep;
        nDirectEnd = nBestDirect + nStep;
      }
    
      return(uiMinCost);
    }
    
    
    void InterSearch::xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int gbiIdx )
    {
      // Refine Search
      int nSearchStepShift = 0;
      int nDiamondRound = 8;
      int nCrossRound = 1;
    
      nSearchStepShift += (pu.cu->imv << 1);
      nDiamondRound >>= pu.cu->imv;
    
      ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, gbiIdx );
      ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, gbiIdx );
    }
    #endif // JVET_M0444_SMVD
    
    
    void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
                                              PelUnitBuf&           origBuf,
                                              int                   puIdx,
                                              uint32_t&                 lastMode,
                                              Distortion&           affineCost,
                                              Mv                    hevcMv[2][33]
                                            , Mv                    mvAffine4Para[2][33][3]
                                            , int                   refIdx4Para[2]
    
                                            , uint8_t               gbiIdx
                                            , bool                  enforceGBiPred
                                            , uint32_t              gbiIdxBits
    
                                             )
    {
      const Slice &slice = *pu.cu->slice;
    
      affineCost = std::numeric_limits<Distortion>::max();
    
      Mv        cMvZero;
      Mv        aacMv[2][3];
      Mv        cMvBi[2][3];
      Mv        cMvTemp[2][33][3];
    
      int       iNumPredDir = slice.isInterP() ? 1 : 2;
    
      int mvNum = 2;
      mvNum = pu.cu->affineType ? 3 : 2;
    
      // Mvp
      Mv        cMvPred[2][33][3];
      Mv        cMvPredBi[2][33][3];
      int       aaiMvpIdxBi[2][33];
      int       aaiMvpIdx[2][33];
      int       aaiMvpNum[2][33];
    
      AffineAMVPInfo aacAffineAMVPInfo[2][33];
      AffineAMVPInfo affiAMVPInfoTemp[2];
    
      int           iRefIdx[2]={0,0}; // If un-initialized, may cause SEGV in bi-directional prediction iterative stage.
      int           iRefIdxBi[2];
    
      uint32_t          uiMbBits[3] = {1, 1, 0};
    
      int           iRefStart, iRefEnd;
    
      int           bestBiPRefIdxL1 = 0;
      int           bestBiPMvpL1 = 0;
      Distortion biPDistTemp = std::numeric_limits<Distortion>::max();
    
      Distortion    uiCost[2] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max() };
      Distortion    uiCostBi  = std::numeric_limits<Distortion>::max();
      Distortion    uiCostTemp;
    
    
      uint32_t          uiBitsTemp;
      Distortion    bestBiPDist = std::numeric_limits<Distortion>::max();
    
      Distortion    uiCostTempL0[MAX_NUM_REF];
      for (int iNumRef=0; iNumRef < MAX_NUM_REF; iNumRef++)
      {
        uiCostTempL0[iNumRef] = std::numeric_limits<Distortion>::max();
      }
      uint32_t uiBitsTempL0[MAX_NUM_REF];
    
      Mv            mvValidList1[4];
      int           refIdxValidList1 = 0;
      uint32_t          bitsValidList1 = MAX_UINT;
      Distortion costValidList1 = std::numeric_limits<Distortion>::max();
      Mv            mvHevc[3];
    
    #if JVET_M0246_AFFINE_AMVR
      const bool changeToHighPrec  = pu.cu->imv != 1;
      const bool affineAmvrEnabled = pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag();
    #endif
    
    Karsten Suehring's avatar
    Karsten Suehring committed
      xGetBlkBits( slice.isInterP(), puIdx, lastMode, uiMbBits);
    
      if( gbiIdx != GBI_DEFAULT )
      {
        pu.cu->GBiIdx = gbiIdx;
      }
    
      // Uni-directional prediction
      for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ )
      {
        RefPicList  eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
        int refPicNumber = slice.getNumRefIdx(eRefPicList);
    
    Yu Han's avatar
    Yu Han committed
        if (slice.getSPS()->getSpsNext().getIBCMode() && eRefPicList == REF_PIC_LIST_0)
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
        {
          refPicNumber--;
        }
        for (int iRefIdxTemp = 0; iRefIdxTemp < refPicNumber; iRefIdxTemp++)
    
        {
          // Get RefIdx bits
          uiBitsTemp = uiMbBits[iRefList];
          if ( slice.getNumRefIdx(eRefPicList) > 1 )
          {
            uiBitsTemp += iRefIdxTemp+1;
            if ( iRefIdxTemp == slice.getNumRefIdx(eRefPicList)-1 )
            {
              uiBitsTemp--;
            }
          }
    
          // Do Affine AMVP
          xEstimateAffineAMVP( pu, affiAMVPInfoTemp[eRefPicList], origBuf, eRefPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], &biPDistTemp );
    
    #if JVET_M0246_AFFINE_AMVR
          if ( affineAmvrEnabled )
          {
            biPDistTemp += m_pcRdCost->getCost( xCalcAffineMVBits( pu, cMvPred[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp] ) );
          }
    #endif
    
          aaiMvpIdx[iRefList][iRefIdxTemp] = pu.mvpIdx[eRefPicList];
          aaiMvpNum[iRefList][iRefIdxTemp] = pu.mvpNum[eRefPicList];;
          if ( pu.cu->affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp )
          {
            xCopyAffineAMVPInfo( affiAMVPInfoTemp[eRefPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp] );
            continue;
          }
    
          // set hevc ME result as start search position when it is best than mvp
          for ( int i=0; i<3; i++ )
          {
            mvHevc[i] = hevcMv[iRefList][iRefIdxTemp];
    
    #if JVET_M0246_AFFINE_AMVR
            if ( pu.cu->imv == 1 )
            {
              mvHevc[i].changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL );
            }
            else if ( pu.cu->imv == 2 )
            {
              mvHevc[i].roundToPrecision( MV_PRECISION_QUARTER, MV_PRECISION_INT );
            }
    #endif
    
          }
          PelUnitBuf predBuf = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) );
    
          Distortion uiCandCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvHevc, aaiMvpIdx[iRefList][iRefIdxTemp],
                                                         AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp);
    
    
    #if JVET_M0246_AFFINE_AMVR
          if ( affineAmvrEnabled )
          {
            uiCandCost += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvHevc, cMvPred[iRefList][iRefIdxTemp] ) );
          }
    
          //check stored affine motion
          bool affine4Para    = pu.cu->affineType == AFFINEMODEL_4PARAM;
          bool savedParaAvail = pu.cu->imv && ( ( m_affineMotion.affine4ParaRefIdx[iRefList] == iRefIdxTemp && affine4Para && m_affineMotion.affine4ParaAvail ) || 
                                                ( m_affineMotion.affine6ParaRefIdx[iRefList] == iRefIdxTemp && !affine4Para && m_affineMotion.affine6ParaAvail ) );
    
          if ( savedParaAvail )
          {
            Mv mvFour[3];
            for ( int i = 0; i < mvNum; i++ )
            {
              mvFour[i] = affine4Para ? m_affineMotion.acMvAffine4Para[iRefList][i] : m_affineMotion.acMvAffine6Para[iRefList][i];
              if ( pu.cu->imv != 1 )
              {
                mvFour[i].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER );
                mvFour[i].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
              }
            }
    
            Distortion candCostInherit = xGetAffineTemplateCost( pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp );
            candCostInherit += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvFour, cMvPred[iRefList][iRefIdxTemp] ) );
    
            if ( candCostInherit < uiCandCost )
            {
              uiCandCost = candCostInherit;
              memcpy( mvHevc, mvFour, 3 * sizeof( Mv ) );
            }
          }
    #endif
    
    
          if (pu.cu->affineType == AFFINEMODEL_4PARAM && m_affMVListSize
            && (!pu.cu->cs->sps->getSpsNext().getUseGBi() || gbiIdx == GBI_DEFAULT)
            )
          {
            int shift = MAX_CU_DEPTH;
            for (int i = 0; i < m_affMVListSize; i++)
            {
              AffineMVInfo *mvInfo = m_affMVList + ((m_affMVListIdx - i - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
              //check;
              int j = 0;
              for (; j < i; j++)
              {
                AffineMVInfo *prevMvInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
                if ((mvInfo->affMVs[iRefList][iRefIdxTemp][0] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][0]) &&
                  (mvInfo->affMVs[iRefList][iRefIdxTemp][1] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][1])
                  && (mvInfo->x == prevMvInfo->x) && (mvInfo->y == prevMvInfo->y)
                  && (mvInfo->w == prevMvInfo->w)
                  )
                {
                  break;
                }
              }
              if (j < i)
                continue;
    
              Mv mvTmp[3], *nbMv = mvInfo->affMVs[iRefList][iRefIdxTemp];
              int vx, vy;
              int dMvHorX, dMvHorY, dMvVerX, dMvVerY;
              int mvScaleHor = nbMv[0].getHor() << shift;
              int mvScaleVer = nbMv[0].getVer() << shift;
              Mv dMv = nbMv[1] - nbMv[0];
    
              mvScaleHor <<= MV_FRACTIONAL_BITS_DIFF;
              mvScaleVer <<= MV_FRACTIONAL_BITS_DIFF;
              dMv <<= MV_FRACTIONAL_BITS_DIFF;
    
              dMvHorX = dMv.getHor() << (shift - g_aucLog2[mvInfo->w]);
              dMvHorY = dMv.getVer() << (shift - g_aucLog2[mvInfo->w]);
              dMvVerX = -dMvHorY;
              dMvVerY = dMvHorX;
              vx = mvScaleHor + dMvHorX * (pu.Y().x - mvInfo->x) + dMvVerX * (pu.Y().y - mvInfo->y);
              vy = mvScaleVer + dMvHorY * (pu.Y().x - mvInfo->x) + dMvVerY * (pu.Y().y - mvInfo->y);
              roundAffineMv(vx, vy, shift);
              mvTmp[0] = Mv(vx, vy);
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
    #if JVET_M0145_AFFINE_MV_CLIP
              mvTmp[0].clipToStorageBitDepth();
    #endif
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
              clipMv(mvTmp[0], pu.cu->lumaPos(),
                     pu.cu->lumaSize(),
                     *pu.cs->sps);
    
    #if JVET_M0246_AFFINE_AMVR
              if ( pu.cu->imv == 2 )
              {
                mvTmp[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
              }
              else if ( pu.cu->imv == 0 )
    #endif
    
              mvTmp[0].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
              vx = mvScaleHor + dMvHorX * (pu.Y().x + pu.Y().width - mvInfo->x) + dMvVerX * (pu.Y().y - mvInfo->y);
              vy = mvScaleVer + dMvHorY * (pu.Y().x + pu.Y().width - mvInfo->x) + dMvVerY * (pu.Y().y - mvInfo->y);
              roundAffineMv(vx, vy, shift);
              mvTmp[1] = Mv(vx, vy);
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
    #if JVET_M0145_AFFINE_MV_CLIP
              mvTmp[1].clipToStorageBitDepth();
    #endif
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
              clipMv(mvTmp[1], pu.cu->lumaPos(),
                     pu.cu->lumaSize(),
                     *pu.cs->sps);
    
    #if JVET_M0246_AFFINE_AMVR
              if ( pu.cu->imv != 1 )
              {
                mvTmp[1].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER );
                mvTmp[0].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
                mvTmp[1].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
              }
    #else
    
              mvTmp[1].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
              mvTmp[0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
              mvTmp[1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
              Distortion tmpCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvTmp, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp);
    
    #if JVET_M0246_AFFINE_AMVR
              if ( affineAmvrEnabled )
              {
                tmpCost += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvTmp, cMvPred[iRefList][iRefIdxTemp] ) );
              }
    #endif
    
              if (tmpCost < uiCandCost)
              {
                uiCandCost = tmpCost;
                std::memcpy(mvHevc, mvTmp, 3 * sizeof(Mv));
              }
            }
          }
    
          if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
          {
            Mv mvFour[3];
    
    #if JVET_M0246_AFFINE_AMVR
            if ( pu.cu->imv != 1 )
            {
    #endif
              mvAffine4Para[iRefList][iRefIdxTemp][0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
              mvAffine4Para[iRefList][iRefIdxTemp][1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
    #if JVET_M0246_AFFINE_AMVR
            }
    #endif
    
            mvFour[0] = mvAffine4Para[iRefList][iRefIdxTemp][0];
            mvFour[1] = mvAffine4Para[iRefList][iRefIdxTemp][1];
    
    #if JVET_M0246_AFFINE_AMVR
            if ( pu.cu->imv != 1 )
            {
    #endif
              mvAffine4Para[iRefList][iRefIdxTemp][0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
              mvAffine4Para[iRefList][iRefIdxTemp][1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    #if JVET_M0246_AFFINE_AMVR
            }
    #endif
    
            int shift = MAX_CU_DEPTH;
            int vx2 = (mvFour[0].getHor() << shift) - ((mvFour[1].getVer() - mvFour[0].getVer()) << (shift + g_aucLog2[pu.lheight()] - g_aucLog2[pu.lwidth()]));
            int vy2 = (mvFour[0].getVer() << shift) + ((mvFour[1].getHor() - mvFour[0].getHor()) << (shift + g_aucLog2[pu.lheight()] - g_aucLog2[pu.lwidth()]));
            vx2 >>= shift;
            vy2 >>= shift;
    
            mvFour[2].hor = vx2;
            mvFour[2].ver = vy2;
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
    #if JVET_M0145_AFFINE_MV_CLIP
            mvFour[2].clipToStorageBitDepth();
    #endif
    
    #if JVET_M0246_AFFINE_AMVR
            if ( pu.cu->imv != 1 )
            {
              mvFour[0].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER );
              mvFour[1].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER );
              mvFour[2].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER );
            }
    #else
    
            mvFour[2].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
            for (int i = 0; i < 3; i++)
            {
    
    #if JVET_M0246_AFFINE_AMVR
              if ( pu.cu->imv != 1 )
              {
                mvFour[i].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
              }
    #else
    
              mvFour[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
            Distortion uiCandCostInherit = xGetAffineTemplateCost( pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp );
    
    #if JVET_M0246_AFFINE_AMVR
            if ( affineAmvrEnabled )
            {
              uiCandCostInherit += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvFour, cMvPred[iRefList][iRefIdxTemp] ) );
            }
    #endif
    
            if ( uiCandCostInherit < uiCandCost )
            {
              uiCandCost = uiCandCostInherit;
              for ( int i = 0; i < 3; i++ )
              {
                mvHevc[i] = mvFour[i];
              }
            }
          }
    
          if ( uiCandCost < biPDistTemp )
          {
            ::memcpy( cMvTemp[iRefList][iRefIdxTemp], mvHevc, sizeof(Mv)*3 );
          }
          else
          {
            ::memcpy( cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], sizeof(Mv)*3 );
          }
    
          // GPB list 1, save the best MvpIdx, RefIdx and Cost
          if ( slice.getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist )
          {
            bestBiPDist = biPDistTemp;
            bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
            bestBiPRefIdxL1 = iRefIdxTemp;
          }
    
          // Update bits
          uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
    
          if ( m_pcEncCfg->getFastMEForGenBLowDelayEnabled() && iRefList == 1 )   // list 1
          {
    
            if ( slice.getList1IdxToList0Idx( iRefIdxTemp ) >= 0 && (pu.cu->affineType != AFFINEMODEL_6PARAM || slice.getList1IdxToList0Idx( iRefIdxTemp ) == refIdx4Para[0]) )
    
            {
              int iList1ToList0Idx = slice.getList1IdxToList0Idx( iRefIdxTemp );
              ::memcpy( cMvTemp[1][iRefIdxTemp], cMvTemp[0][iList1ToList0Idx], sizeof(Mv)*3 );
              uiCostTemp = uiCostTempL0[iList1ToList0Idx];
    
              uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[iList1ToList0Idx] );
    
    #if JVET_M0246_AFFINE_AMVR
              uiBitsTemp += xCalcAffineMVBits( pu, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp] );
    #else
    
              for (int iVerIdx = 0; iVerIdx < mvNum; iVerIdx++)
              {
                m_pcRdCost->setPredictor( cMvPred[iRefList][iRefIdxTemp][iVerIdx] );
    
                const int shift = 0;
    
                Mv secondPred;
                if ( iVerIdx != 0 )
                {
                  secondPred = cMvPred[iRefList][iRefIdxTemp][iVerIdx] + (cMvTemp[1][iRefIdxTemp][0] - cMvPred[1][iRefIdxTemp][0]);
                  m_pcRdCost->setPredictor( secondPred );
                }
                uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp][iVerIdx].getHor()>>shift, cMvTemp[1][iRefIdxTemp][iVerIdx].getVer()>>shift, 0 );
              }
    
              /*calculate the correct cost*/
              uiCostTemp += m_pcRdCost->getCost( uiBitsTemp );
              DTRACE( g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCostTemp );
            }
            else
            {
              xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
            }
          }
          else
          {
            xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
          }
    
          if(pu.cu->cs->sps->getSpsNext().getUseGBi() && pu.cu->GBiIdx == GBI_DEFAULT && pu.cu->slice->isInterB())
          {
    
            m_uniMotions.setReadModeAffine(true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType);
    
            m_uniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType);
    
          // Set best AMVP Index
          xCopyAffineAMVPInfo( affiAMVPInfoTemp[eRefPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp] );
          xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
    
          if ( iRefList == 0 )
          {
            uiCostTempL0[iRefIdxTemp] = uiCostTemp;
            uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
          }
          DTRACE( g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d, uiCost[iRefList]=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCostTemp, uiCost[iRefList] );
          if ( uiCostTemp < uiCost[iRefList] )
          {
            uiCost[iRefList] = uiCostTemp;
            uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction