Skip to content
Snippets Groups Projects
InterSearch.cpp 223 KiB
Newer Older
  • Learn to ignore specific revisions
  •                 costStart = cost;
                    cMvPredSym[curRefList] = aacAMVPInfo[curRefList][refIdxCur].mvCand[i];
                    cMvPredSym[tarRefList] = aacAMVPInfo[tarRefList][refIdxTar].mvCand[j];
                    mvpIdxSym[curRefList] = i;
                    mvpIdxSym[tarRefList] = j;
                  }
                }
              }
              cCurMvField.mv = cMvPredSym[curRefList];
              cTarMvField.mv = cMvPredSym[tarRefList];
    
              m_pcRdCost->setCostScale(0);
              m_pcRdCost->setPredictor(cMvPredSym[curRefList]);
              uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(cCurMvField.mv.hor, cCurMvField.mv.ver, (pu.cu->imv << 1));
              bits += m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS];
              bits += m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS];
              costStart += m_pcRdCost->getCost(bits);
    
              std::vector<Mv> symmvdCands;
              symmvdCands.push_back(cMvTemp[curRefList][refIdxCur]);
              if (iRefIdxBi[curRefList] == refIdxCur && cMvBi[curRefList] != cMvTemp[curRefList][refIdxCur])
              {
                symmvdCands.push_back(cMvBi[curRefList]);
              }
    
              for (auto mvStart : symmvdCands)
              {
                bool checked = false; //if it has been checkin in the mvPred.
                for (int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand && !checked; i++)
                {
                  checked |= (mvStart == aacAMVPInfo[curRefList][refIdxCur].mvCand[i]);
                }
                if (checked)
                  break;
    
                Distortion bestCost = costStart;
                symmvdCheckBestMvp(pu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, gbiIdx, cMvPredSym, mvpIdxSym, costStart);
                if (costStart < bestCost)
                {
                  cCurMvField.setMvField(mvStart, refIdxCur);
                  cTarMvField.setMvField(mvStart.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar);
                }
              }
              Mv startPtMv = cCurMvField.mv;
    
              Distortion mvpCost = m_pcRdCost->getCost(m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS] + m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS]);
              symCost = costStart - mvpCost;
              
              // ME
              xSymmetricMotionEstimation( pu, origBuf, cMvPredSym[curRefList], cMvPredSym[tarRefList], eCurRefList, cCurMvField, cTarMvField, symCost, gbiIdx );
    
              symCost += mvpCost;
    
              if (startPtMv != cCurMvField.mv)
              { // if ME change MV, run a final check for best MVP.
                symmvdCheckBestMvp(pu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, gbiIdx, cMvPredSym, mvpIdxSym, symCost, true);
              }
    
              bits = uiMbBits[2];
              bits += 1; // add one bit for symmetrical MVD mode
              bits += ((cs.slice->getSPS()->getSpsNext().getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0);
              symCost += m_pcRdCost->getCost(bits);
              cTarMvField.setMvField(cCurMvField.mv.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar);
    
              // save results
              if ( symCost < uiCostBi )
              {
                uiCostBi = symCost;
                symMode = 1 + curRefList;
    
                cMvBi[curRefList] = cCurMvField.mv;
                iRefIdxBi[curRefList] = cCurMvField.refIdx;
                aaiMvpIdxBi[curRefList][cCurMvField.refIdx] = mvpIdxSym[curRefList];
                cMvPredBi[curRefList][iRefIdxBi[curRefList]] = cMvPredSym[curRefList];
    
                cMvBi[tarRefList] = cTarMvField.mv;
                iRefIdxBi[tarRefList] = cTarMvField.refIdx;
                aaiMvpIdxBi[tarRefList][cTarMvField.refIdx] = mvpIdxSym[tarRefList];
                cMvPredBi[tarRefList][iRefIdxBi[tarRefList]] = cMvPredSym[tarRefList];
              }
            }
    #endif
    
          } // if (B_SLICE)
    
    
    
          //  Clear Motion Field
        pu.mv    [REF_PIC_LIST_0] = Mv();
        pu.mv    [REF_PIC_LIST_1] = Mv();
        pu.mvd   [REF_PIC_LIST_0] = cMvZero;
        pu.mvd   [REF_PIC_LIST_1] = cMvZero;
        pu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
        pu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
        pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
        pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
        pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
        pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
    
    
        // Set Motion Field
    
        cMv    [1] = mvValidList1;
        iRefIdx[1] = refIdxValidList1;
        uiBits [1] = bitsValidList1;
        uiCost [1] = costValidList1;
    
    
        if( enforceGBiPred )
        {
          uiCost[0] = uiCost[1] = MAX_UINT;
        }
    
          uiLastModeTemp = uiLastMode;
          if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1])
          {
            uiLastMode = 2;
            pu.mv    [REF_PIC_LIST_0] = cMvBi[0];
            pu.mv    [REF_PIC_LIST_1] = cMvBi[1];
    
            pu.mv[REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); 
            pu.mv[REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); 
    
            pu.mvd   [REF_PIC_LIST_0] = cMvBi[0] - cMvPredBi[0][iRefIdxBi[0]];
            pu.mvd   [REF_PIC_LIST_1] = cMvBi[1] - cMvPredBi[1][iRefIdxBi[1]];
            pu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
            pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
            pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]];
            pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]];
            pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]];
            pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]];
            pu.interDir = 3;
    
    
    #if JVET_M0444_SMVD
            pu.cu->smvdMode = symMode;
    #endif
    
          }
          else if ( uiCost[0] <= uiCost[1] )
          {
            uiLastMode = 0;
            pu.mv    [REF_PIC_LIST_0] = cMv[0];
    
            pu.mv    [REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); 
    
            pu.mvd   [REF_PIC_LIST_0] = cMv[0] - cMvPred[0][iRefIdx[0]];
            pu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
            pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
            pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
            pu.interDir = 1;
          }
          else
          {
            uiLastMode = 1;
            pu.mv    [REF_PIC_LIST_1] = cMv[1];
    
            pu.mv    [REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); 
    
            pu.mvd   [REF_PIC_LIST_1] = cMv[1] - cMvPred[1][iRefIdx[1]];
            pu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
            pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
            pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
            pu.interDir = 2;
          }
    
    
          if( gbiIdx != GBI_DEFAULT )
          {
            cu.GBiIdx = GBI_DEFAULT; // Reset to default for the Non-NormalMC modes.
          }
    
    Karsten Suehring's avatar
    Karsten Suehring committed
        uiHevcCost = ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) ? uiCostBi : ( ( uiCost[0] <= uiCost[1] ) ? uiCost[0] : uiCost[1] );
    
    #if JVET_M0246_AFFINE_AMVR
        }
    #endif
        if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->getSPS()->getSpsNext().getUseAffine() 
    #if JVET_M0246_AFFINE_AMVR
          && checkAffine
    #else
          && cu.imv == 0
    #endif
    
          && (gbiIdx == GBI_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseGBiFast())
          )
    
          m_hevcCost = uiHevcCost;
    
          // save normal hevc result
          uint32_t uiMRGIndex = pu.mergeIdx;
          bool bMergeFlag = pu.mergeFlag;
          uint32_t uiInterDir = pu.interDir;
    
    #if JVET_M0444_SMVD
          int  iSymMode = cu.smvdMode;
    #endif
    
    
          Mv cMvd[2];
          uint32_t uiMvpIdx[2], uiMvpNum[2];
          uiMvpIdx[0] = pu.mvpIdx[REF_PIC_LIST_0];
          uiMvpIdx[1] = pu.mvpIdx[REF_PIC_LIST_1];
          uiMvpNum[0] = pu.mvpNum[REF_PIC_LIST_0];
          uiMvpNum[1] = pu.mvpNum[REF_PIC_LIST_1];
          cMvd[0]     = pu.mvd[REF_PIC_LIST_0];
          cMvd[1]     = pu.mvd[REF_PIC_LIST_1];
    
          MvField cHevcMvField[2];
          cHevcMvField[0].setMvField( pu.mv[REF_PIC_LIST_0], pu.refIdx[REF_PIC_LIST_0] );
          cHevcMvField[1].setMvField( pu.mv[REF_PIC_LIST_1], pu.refIdx[REF_PIC_LIST_1] );
    
          // do affine ME & Merge
          cu.affineType = AFFINEMODEL_4PARAM;
          Mv acMvAffine4Para[2][33][3];
          int refIdx4Para[2] = { -1, -1 };
    
    
          xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, gbiIdx, enforceGBiPred,
            ((cu.slice->getSPS()->getSpsNext().getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0));
    
    
    #if JVET_M0246_AFFINE_AMVR
          if ( pu.cu->imv == 0 )
          {
            storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_4PARAM, gbiIdx );
          }
    #endif
    
    
          if ( cu.slice->getSPS()->getSpsNext().getUseAffineType() )
          {
            if ( uiAffineCost < uiHevcCost * 1.05 ) ///< condition for 6 parameter affine ME
            {
              // save 4 parameter results
              Mv bestMv[2][3], bestMvd[2][3];
              int bestMvpIdx[2], bestMvpNum[2], bestRefIdx[2];
              uint8_t bestInterDir;
    
              bestInterDir = pu.interDir;
              bestRefIdx[0] = pu.refIdx[0];
              bestRefIdx[1] = pu.refIdx[1];
              bestMvpIdx[0] = pu.mvpIdx[0];
              bestMvpIdx[1] = pu.mvpIdx[1];
              bestMvpNum[0] = pu.mvpNum[0];
              bestMvpNum[1] = pu.mvpNum[1];
    
              for ( int refList = 0; refList < 2; refList++ )
              {
    
                bestMv[refList][0] = pu.mvAffi[refList][0];
                bestMv[refList][1] = pu.mvAffi[refList][1];
                bestMv[refList][2] = pu.mvAffi[refList][2];
    
                bestMvd[refList][0] = pu.mvdAffi[refList][0];
                bestMvd[refList][1] = pu.mvdAffi[refList][1];
                bestMvd[refList][2] = pu.mvdAffi[refList][2];
              }
    
              refIdx4Para[0] = bestRefIdx[0];
              refIdx4Para[1] = bestRefIdx[1];
    
              Distortion uiAffine6Cost = std::numeric_limits<Distortion>::max();
              cu.affineType = AFFINEMODEL_6PARAM;
    
              xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, gbiIdx, enforceGBiPred,
                ((cu.slice->getSPS()->getSpsNext().getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0));
    
    #if JVET_M0246_AFFINE_AMVR
              if ( pu.cu->imv == 0 )
              {
                storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_6PARAM, gbiIdx );
              }
    #endif
    
    
              // reset to 4 parameter affine inter mode
              if ( uiAffineCost <= uiAffine6Cost )
              {
                cu.affineType = AFFINEMODEL_4PARAM;
                pu.interDir = bestInterDir;
                pu.refIdx[0] = bestRefIdx[0];
                pu.refIdx[1] = bestRefIdx[1];
                pu.mvpIdx[0] = bestMvpIdx[0];
                pu.mvpIdx[1] = bestMvpIdx[1];
                pu.mvpNum[0] = bestMvpNum[0];
                pu.mvpNum[1] = bestMvpNum[1];
    
                for ( int verIdx = 0; verIdx < 3; verIdx++ )
                {
                  pu.mvdAffi[REF_PIC_LIST_0][verIdx] = bestMvd[0][verIdx];
                  pu.mvdAffi[REF_PIC_LIST_1][verIdx] = bestMvd[1][verIdx];
                }
    
    
                PU::setAllAffineMv( pu, bestMv[0][0], bestMv[0][1], bestMv[0][2], REF_PIC_LIST_0 
                  , false
                );
                PU::setAllAffineMv( pu, bestMv[1][0], bestMv[1][1], bestMv[1][2], REF_PIC_LIST_1 
                  , false
                );
    
              }
              else
              {
                uiAffineCost = uiAffine6Cost;
              }
            }
    
            uiAffineCost += m_pcRdCost->getCost( 1 ); // add one bit for affine_type
          }
    
          if ( uiHevcCost <= uiAffineCost )
          {
            // set hevc me result
            cu.affine = false;
            pu.mergeFlag = bMergeFlag;
            pu.mergeIdx = uiMRGIndex;
            pu.interDir = uiInterDir;
    
    #if JVET_M0444_SMVD
            cu.smvdMode = iSymMode;
    #endif
    
            pu.mv    [REF_PIC_LIST_0] = cHevcMvField[0].mv;
            pu.refIdx[REF_PIC_LIST_0] = cHevcMvField[0].refIdx;
            pu.mv    [REF_PIC_LIST_1] = cHevcMvField[1].mv;
            pu.refIdx[REF_PIC_LIST_1] = cHevcMvField[1].refIdx;
            pu.mvpIdx[REF_PIC_LIST_0] = uiMvpIdx[0];
            pu.mvpIdx[REF_PIC_LIST_1] = uiMvpIdx[1];
            pu.mvpNum[REF_PIC_LIST_0] = uiMvpNum[0];
            pu.mvpNum[REF_PIC_LIST_1] = uiMvpNum[1];
            pu.mvd[REF_PIC_LIST_0] = cMvd[0];
            pu.mvd[REF_PIC_LIST_1] = cMvd[1];
          }
          else
          {
    
    #if JVET_M0444_SMVD
            cu.smvdMode = 0;
    #endif
    
            CHECK( !cu.affine, "Wrong." );
            uiLastMode = uiLastModeTemp;
          }
        }
    
        if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag )
        {
          if (gbiIdx != GBI_DEFAULT)
          {
            cu.GBiIdx = gbiIdx;
          }
        }
    
        m_maxCompIDToPred = MAX_NUM_COMPONENT;
    
        {
          PU::spanMotionInfo( pu, mergeCtx );
        }
    
        //  MC
        PelUnitBuf predBuf = pu.cs->getPredBuf(pu);
    
    #if JVET_M0246_AFFINE_AMVR
        if ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine4ParaAvail || !m_affineMotion.affine6ParaAvail )
        {
          m_affineMotion.hevcCost[pu.cu->imv] = uiHevcCost;
        }
    #endif
    
        motionCompensation( pu, predBuf, REF_PIC_LIST_X );
        puIdx++;
      }
    
      setWpScalingDistParam( -1, REF_PIC_LIST_X, cu.cs->slice );
    
      return;
    }
    
    
    #if JVET_M0246_AFFINE_AMVR
    uint32_t InterSearch::xCalcAffineMVBits( PredictionUnit& pu, Mv acMvTemp[3], Mv acMvPred[3], bool mvHighPrec )
    {
      int mvNum  = pu.cu->affineType ? 3 : 2;
      Mv tempMv0 = acMvTemp[0];
      const int shift = mvHighPrec ? MV_FRACTIONAL_BITS_DIFF : 0;
      const unsigned int mvdShift = pu.cu->imv == 2 ? MV_FRACTIONAL_BITS_DIFF : 0;
      Mv secondPred;
    
      if ( mvHighPrec )
      {
        tempMv0.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
      }
    
      m_pcRdCost->setCostScale( 0 );
      uint32_t bitsTemp = 0;
    
      for ( int verIdx = 0; verIdx < mvNum; verIdx++ )
      {
        m_pcRdCost->setPredictor( acMvPred[verIdx] );
    
        if ( verIdx != 0 )
        {
          secondPred = acMvPred[verIdx] + ( tempMv0 - acMvPred[0] );
          m_pcRdCost->setPredictor( secondPred );
        }
    
        bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( acMvTemp[verIdx].getHor() >> shift, acMvTemp[verIdx].getVer() >> shift, mvdShift );
      }
    
    
    // AMVP
    void InterSearch::xEstimateMvPredAMVP( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, int iRefIdx, Mv& rcMvPred, AMVPInfo& rAMVPInfo, bool bFilled, Distortion* puiDistBiP )
    {
      Mv         cBestMv;
      int        iBestIdx   = 0;
      Distortion uiBestCost = std::numeric_limits<Distortion>::max();
      int        i;
    
      AMVPInfo*  pcAMVPInfo = &rAMVPInfo;
    
      // Fill the MV Candidates
      if (!bFilled)
      {
        PU::fillMvpCand( pu, eRefPicList, iRefIdx, *pcAMVPInfo );
      }
    
      // initialize Mvp index & Mvp
      iBestIdx = 0;
      cBestMv  = pcAMVPInfo->mvCand[0];
    
      PelUnitBuf predBuf = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) );
    
      //-- Check Minimum Cost.
      for( i = 0 ; i < pcAMVPInfo->numCand; i++)
      {
        Distortion uiTmpCost = xGetTemplateCost( pu, origBuf, predBuf, pcAMVPInfo->mvCand[i], i, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx );
        if( uiBestCost > uiTmpCost )
        {
          uiBestCost     = uiTmpCost;
          cBestMv        = pcAMVPInfo->mvCand[i];
          iBestIdx       = i;
          (*puiDistBiP)  = uiTmpCost;
        }
      }
    
      // Setting Best MVP
      rcMvPred = cBestMv;
      pu.mvpIdx[eRefPicList] = iBestIdx;
      pu.mvpNum[eRefPicList] = pcAMVPInfo->numCand;
    
      return;
    }
    
    uint32_t InterSearch::xGetMvpIdxBits(int iIdx, int iNum)
    {
      CHECK(iIdx < 0 || iNum < 0 || iIdx >= iNum, "Invalid parameters");
    
      if (iNum == 1)
      {
        return 0;
      }
    
      uint32_t uiLength = 1;
      int iTemp = iIdx;
      if ( iTemp == 0 )
      {
        return uiLength;
      }
    
      bool bCodeLast = ( iNum-1 > iTemp );
    
      uiLength += (iTemp-1);
    
      if( bCodeLast )
      {
        uiLength++;
      }
    
      return uiLength;
    }
    
    
    Karsten Suehring's avatar
    Karsten Suehring committed
    void InterSearch::xGetBlkBits( bool bPSlice, int iPartIdx, uint32_t uiLastMode, uint32_t uiBlkBit[3])
    
    Karsten Suehring's avatar
    Karsten Suehring committed
      uiBlkBit[0] = (! bPSlice) ? 3 : 1;
      uiBlkBit[1] = 3;
      uiBlkBit[2] = 5;
    
    }
    
    void InterSearch::xCopyAMVPInfo (AMVPInfo* pSrc, AMVPInfo* pDst)
    {
      pDst->numCand = pSrc->numCand;
      for (int i = 0; i < pSrc->numCand; i++)
      {
        pDst->mvCand[i] = pSrc->mvCand[i];
      }
    }
    
    void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred, int& riMVPIdx, AMVPInfo& amvpInfo, uint32_t& ruiBits, Distortion& ruiCost, const uint8_t imv )
    {
      if( imv > 0 )
      {
        return;
      }
      unsigned imvshift = imv << 1;
    
      AMVPInfo* pcAMVPInfo = &amvpInfo;
    
      CHECK(pcAMVPInfo->mvCand[riMVPIdx] != rcMvPred, "Invalid MV prediction candidate");
    
      if (pcAMVPInfo->numCand < 2)
      {
        return;
      }
    
      m_pcRdCost->setCostScale ( 0    );
    
      int iBestMVPIdx = riMVPIdx;
    
      m_pcRdCost->setPredictor( rcMvPred );
      int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), imvshift);
      iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
      int iBestMvBits = iOrgMvBits;
    
      for (int iMVPIdx = 0; iMVPIdx < pcAMVPInfo->numCand; iMVPIdx++)
      {
        if (iMVPIdx == riMVPIdx)
        {
          continue;
        }
    
        m_pcRdCost->setPredictor( pcAMVPInfo->mvCand[iMVPIdx] );
        int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), imvshift);
        iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
    
        if (iMvBits < iBestMvBits)
        {
          iBestMvBits = iMvBits;
          iBestMVPIdx = iMVPIdx;
        }
      }
    
      if (iBestMVPIdx != riMVPIdx)  //if changed
      {
        rcMvPred = pcAMVPInfo->mvCand[iBestMVPIdx];
    
        riMVPIdx = iBestMVPIdx;
        uint32_t uiOrgBits = ruiBits;
        ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits;
        ruiCost = (ruiCost - m_pcRdCost->getCost( uiOrgBits ))  + m_pcRdCost->getCost( ruiBits );
      }
    }
    
    
    Distortion InterSearch::xGetTemplateCost( const PredictionUnit& pu,
                                              PelUnitBuf& origBuf,
                                              PelUnitBuf& predBuf,
                                              Mv          cMvCand,
                                              int         iMVPIdx,
                                              int         iMVPNum,
                                              RefPicList  eRefPicList,
                                              int         iRefIdx
    )
    {
      Distortion uiCost = std::numeric_limits<Distortion>::max();
    
      const Picture* picRef = pu.cu->slice->getRefPic( eRefPicList, iRefIdx );
    
      cMvCand.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); 
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
      clipMv( cMvCand, pu.cu->lumaPos(),
              pu.cu->lumaSize(),
              *pu.cs->sps );
    
    
    
      // prediction pattern
      const bool bi = pu.cu->slice->testWeightPred() && pu.cu->slice->getSliceType()==P_SLICE;
    
    
      xPredInterBlk( COMPONENT_Y, pu, picRef, cMvCand, predBuf, bi, pu.cu->slice->clpRng( COMPONENT_Y )
    
                    );
    
      if ( bi )
      {
        xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, iRefIdx, m_maxCompIDToPred );
      }
    
      // calc distortion
    
    
      uiCost = m_pcRdCost->getDistPart(origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_SAD);
    
      uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] );
    
      return uiCost;
    }
    
    Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf& origBuf, PelUnitBuf& predBuf, Mv acMvCand[3], int iMVPIdx, int iMVPNum, RefPicList eRefPicList, int iRefIdx )
    {
      Distortion uiCost = std::numeric_limits<Distortion>::max();
    
      const Picture* picRef = pu.cu->slice->getRefPic( eRefPicList, iRefIdx );
    
      // prediction pattern
      const bool bi = pu.cu->slice->testWeightPred() && pu.cu->slice->getSliceType()==P_SLICE;
    
      memcpy(mv, acMvCand, sizeof(mv));
    
    #if JVET_M0246_AFFINE_AMVR
      if ( pu.cu->imv != 1 )
      {
    #endif
        mv[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); 
        mv[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); 
        mv[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); 
    #if JVET_M0246_AFFINE_AMVR
      }
    #endif
    
      xPredAffineBlk(COMPONENT_Y, pu, picRef, mv, predBuf, bi, pu.cu->slice->clpRng(COMPONENT_Y));
    
      if( bi )
      {
        xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, iRefIdx, m_maxCompIDToPred );
      }
    
      // calc distortion
    
    
      uiCost  = m_pcRdCost->getDistPart( origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y
        , DF_HAD 
      );
    
      uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] );
      DTRACE( g_trace_ctx, D_COMMON, " (%d) affineTemplateCost=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCost );
      return uiCost;
    }
    
    void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi)
    {
    
      if( pu.cu->cs->sps->getSpsNext().getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedUniMv(pu, eRefPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost) )
      {
        return;
      }
    
      Mv cMvHalf, cMvQter;
    
      CHECK(eRefPicList >= MAX_NUM_REF_LIST_ADAPT_SR || iRefIdxPred>=int(MAX_IDX_ADAPT_SR), "Invalid reference picture list");
      m_iSearchRange = m_aaiAdaptSR[eRefPicList][iRefIdxPred];
    
      int    iSrchRng   = (bBi ? m_bipredSearchRange : m_iSearchRange);
      double fWeight    = 1.0;
    
      PelUnitBuf  origBufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) );
      PelUnitBuf* pBuf       = &origBuf;
    
      if(bBi) // Bi-predictive ME
      {
        // NOTE: Other buf contains predicted signal from another direction
        PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative(*pu.cu, pu ));
        origBufTmp.copyFrom(origBuf);
    
        origBufTmp.removeHighFreq( otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs()
    
                                  ,getGbiWeight( pu.cu->GBiIdx, eRefPicList )
    
        fWeight = xGetMEDistortionWeight( pu.cu->GBiIdx, eRefPicList );
    
      }
      m_cDistParam.isBiPred = bBi;
    
      //  Search key pattern initialization
      CPelBuf  tmpPattern   = pBuf->Y();
      CPelBuf* pcPatternKey = &tmpPattern;
    
      m_lumaClpRng = pu.cs->slice->clpRng( COMPONENT_Y );
    
      CPelBuf buf = pu.cu->slice->getRefPic(eRefPicList, iRefIdxPred)->getRecoBuf(pu.blocks[COMPONENT_Y]);
    
      IntTZSearchStruct cStruct;
      cStruct.pcPatternKey  = pcPatternKey;
      cStruct.iRefStride    = buf.stride;
      cStruct.piRefY        = buf.buf;
      cStruct.imvShift      = pu.cu->imv << 1;
    
      cStruct.inCtuSearch = false;
      cStruct.zeroMV = false;
      {
        if (pu.cs->sps->getSpsNext().getUseCompositeRef() && pu.cs->slice->getRefPic(eRefPicList, iRefIdxPred)->longTerm)
        {
          cStruct.inCtuSearch = true;
        }
      }
    
    
      auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl );
    
      bool bQTBTMV  = false;
      bool bQTBTMV2 = false;
      Mv cIntMv;
      if( !bBi )
      {
        bool bValid = blkCache && blkCache->getMv( pu, eRefPicList, iRefIdxPred, cIntMv );
        if( bValid )
        {
          bQTBTMV2 = true;
          cIntMv <<= 2;
        }
      }
    
    
      m_pcRdCost->setPredictor( rcMvPred );
    
      m_pcRdCost->setCostScale(2);
    
      {
        setWpScalingDistParam(iRefIdxPred, eRefPicList, pu.cu->slice);
      }
    
      //  Do integer search
      if( ( m_motionEstimationSearchMethod == MESEARCH_FULL ) || bBi || bQTBTMV )
      {
        if( !bQTBTMV )
        {
    
          xSetSearchRange(pu, (bBi ? rcMv : rcMvPred), iSrchRng, cStruct.searchRange
            , cStruct
          );
    
        }
        cStruct.subShiftMode = m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ? 2 : 0;
        xPatternSearch( cStruct, rcMv, ruiCost);
      }
      else if( bQTBTMV2 )
      {
        rcMv = cIntMv;
    
        cStruct.subShiftMode = ( !m_pcEncCfg->getRestrictMESampling() && m_pcEncCfg->getMotionEstimationSearchMethod() == MESEARCH_SELECTIVE ) ? 1 :
                                ( m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ) ? 2 : 0;
        xTZSearch( pu, cStruct, rcMv, ruiCost, NULL, false, true );
      }
      else
      {
        cStruct.subShiftMode = ( !m_pcEncCfg->getRestrictMESampling() && m_pcEncCfg->getMotionEstimationSearchMethod() == MESEARCH_SELECTIVE ) ? 1 :
                                ( m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ) ? 2 : 0;
        rcMv = rcMvPred;
        const Mv *pIntegerMv2Nx2NPred = 0;
        xPatternSearchFast( pu, cStruct, rcMv, ruiCost, pIntegerMv2Nx2NPred );
        if( blkCache )
        {
          blkCache->setMv( pu.cs->area, eRefPicList, iRefIdxPred, rcMv );
        }
    
    Karsten Suehring's avatar
    Karsten Suehring committed
        else
    
      DTRACE( g_trace_ctx, D_ME, "%d %d %d :MECostFPel<L%d,%d>: %d,%d,%dx%d, %d", DTRACE_GET_COUNTER( g_trace_ctx, D_ME ), pu.cu->slice->getPOC(), 0, ( int ) eRefPicList, ( int ) bBi, pu.Y().x, pu.Y().y, pu.Y().width, pu.Y().height, ruiCost );
    
      // sub-pel refinement for sub-pel resolution
      if( pu.cu->imv == 0 )
      {
        xPatternSearchFracDIF( pu, eRefPicList, iRefIdxPred, cStruct, rcMv, cMvHalf, cMvQter, ruiCost );
        m_pcRdCost->setCostScale( 0 );
        rcMv <<= 2;
        rcMv  += ( cMvHalf <<= 1 );
        rcMv  += cMvQter;
        uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( rcMv.getHor(), rcMv.getVer(), cStruct.imvShift );
        ruiBits += uiMvBits;
        ruiCost = ( Distortion ) ( floor( fWeight * ( ( double ) ruiCost - ( double ) m_pcRdCost->getCost( uiMvBits ) ) ) + ( double ) m_pcRdCost->getCost( ruiBits ) );
      }
      else // integer refinement for integer-pel and 4-pel resolution
      {
        xPatternSearchIntRefine( pu, cStruct, rcMv, rcMvPred, riMVPIdx, ruiBits, ruiCost, amvpInfo, fWeight);
      }
    
      DTRACE(g_trace_ctx, D_ME, "   MECost<L%d,%d>: %6d (%d)  MV:%d,%d\n", (int)eRefPicList, (int)bBi, ruiCost, ruiBits, rcMv.getHor() << 2, rcMv.getVer() << 2);
    
    }
    
    
    
    void InterSearch::xSetSearchRange ( const PredictionUnit& pu,
                                        const Mv& cMvPred,
                                        const int iSrchRng,
    
                                        SearchRange& sr
                                      , IntTZSearchStruct& cStruct
    )
    
      const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
    
      cFPMvPred.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); 
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
      clipMv( cFPMvPred, pu.cu->lumaPos(),
              pu.cu->lumaSize(),
              *pu.cs->sps );
    
      Mv mvTL(cFPMvPred.getHor() - (iSrchRng << iMvShift), cFPMvPred.getVer() - (iSrchRng << iMvShift));
      Mv mvBR(cFPMvPred.getHor() + (iSrchRng << iMvShift), cFPMvPred.getVer() + (iSrchRng << iMvShift));
    
      xClipMv( mvTL, pu.cu->lumaPos(),
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
              pu.cu->lumaSize(),
              *pu.cs->sps );
    
      xClipMv( mvBR, pu.cu->lumaPos(),
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
              pu.cu->lumaSize(),
              *pu.cs->sps );
    
    
      mvTL.divideByPowerOf2( iMvShift );
      mvBR.divideByPowerOf2( iMvShift );
    
      sr.left   = mvTL.hor;
      sr.top    = mvTL.ver;
      sr.right  = mvBR.hor;
      sr.bottom = mvBR.ver;
    
    
      if (pu.cs->sps->getSpsNext().getUseCompositeRef() && cStruct.inCtuSearch)
      {
        Position posRB = pu.Y().bottomRight();
        Position posTL = pu.Y().topLeft();
        const PreCalcValues *pcv = pu.cs->pcv;
        Position posRBinCTU(posRB.x & pcv->maxCUWidthMask, posRB.y & pcv->maxCUHeightMask);
        Position posLTinCTU = Position(posTL.x & pcv->maxCUWidthMask, posTL.y & pcv->maxCUHeightMask).offset(-4, -4);
        if (sr.left < -posLTinCTU.x)
          sr.left = -posLTinCTU.x;
        if (sr.top < -posLTinCTU.y)
          sr.top = -posLTinCTU.y;
        if (sr.right >((int)pcv->maxCUWidth - 4 - posRBinCTU.x))
          sr.right = (int)pcv->maxCUWidth - 4 - posRBinCTU.x;
        if (sr.bottom >((int)pcv->maxCUHeight - 4 - posRBinCTU.y))
          sr.bottom = (int)pcv->maxCUHeight - 4 - posRBinCTU.y;
        if (posLTinCTU.x == -4 || posLTinCTU.y == -4)
        {
          sr.left = sr.right = sr.bottom = sr.top = 0;
          cStruct.zeroMV = 1;
        }
        if (posRBinCTU.x == pcv->maxCUWidthMask || posRBinCTU.y == pcv->maxCUHeightMask)
        {
          sr.left = sr.right = sr.bottom = sr.top = 0;
          cStruct.zeroMV = 1;
        }
      }
    
    }
    
    
    void InterSearch::xPatternSearch( IntTZSearchStruct&    cStruct,
                                      Mv&            rcMv,
                                      Distortion&    ruiSAD )
    {
      Distortion  uiSad;
      Distortion  uiSadBest = std::numeric_limits<Distortion>::max();
      int         iBestX = 0;
      int         iBestY = 0;
    
      //-- jclee for using the SAD function pointer
      m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode );
    
      const SearchRange& sr = cStruct.searchRange;
    
      const Pel* piRef = cStruct.piRefY + (sr.top * cStruct.iRefStride);
      for ( int y = sr.top; y <= sr.bottom; y++ )
      {
        for ( int x = sr.left; x <= sr.right; x++ )
        {
          //  find min. distortion position
          m_cDistParam.cur.buf = piRef + x;
    
          uiSad = m_cDistParam.distFunc( m_cDistParam );
    
          // motion cost
          uiSad += m_pcRdCost->getCostOfVectorWithPredictor( x, y, cStruct.imvShift );
    
          if ( uiSad < uiSadBest )
          {
            uiSadBest = uiSad;
            iBestX    = x;
            iBestY    = y;
            m_cDistParam.maximumDistortionForEarlyExit = uiSad;
          }
        }
        piRef += cStruct.iRefStride;
      }
      rcMv.set( iBestX, iBestY );
    
      cStruct.uiBestSad = uiSadBest; // th for testing
      ruiSAD = uiSadBest - m_pcRdCost->getCostOfVectorWithPredictor( iBestX, iBestY, cStruct.imvShift );
      return;
    }
    
    
    void InterSearch::xPatternSearchFast( const PredictionUnit& pu,
                                          IntTZSearchStruct&    cStruct,
                                          Mv&                   rcMv,
                                          Distortion&           ruiSAD,
                                          const Mv* const       pIntegerMv2Nx2NPred )
    {
      switch ( m_motionEstimationSearchMethod )
      {
      case MESEARCH_DIAMOND:
        xTZSearch         ( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, false );
        break;
    
      case MESEARCH_SELECTIVE:
        xTZSearchSelective( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred );
        break;
    
      case MESEARCH_DIAMOND_ENHANCED:
        xTZSearch         ( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, true );
        break;
    
      case MESEARCH_FULL: // shouldn't get here.
      default:
        break;
      }
    }
    
    
    void InterSearch::xTZSearch( const PredictionUnit& pu,
                                 IntTZSearchStruct&    cStruct,
                                 Mv&                   rcMv,
                                 Distortion&           ruiSAD,
                                 const Mv* const       pIntegerMv2Nx2NPred,
                                 const bool            bExtendedSettings,
                                 const bool            bFastSettings)
    {
      const bool bUseRasterInFastMode                    = true; //toggle this to further reduce runtime
    
      const bool bUseAdaptiveRaster                      = bExtendedSettings;
      const int  iRaster                                 = (bFastSettings && bUseRasterInFastMode) ? 8 : 5;
      const bool bTestZeroVector                         = true && !bFastSettings;
      const bool bTestZeroVectorStart                    = bExtendedSettings;
      const bool bTestZeroVectorStop                     = false;
      const bool bFirstSearchDiamond                     = true;  // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
      const bool bFirstCornersForDiamondDist1            = bExtendedSettings;
      const bool bFirstSearchStop                        = m_pcEncCfg->getFastMEAssumingSmootherMVEnabled();
      const uint32_t uiFirstSearchRounds                     = bFastSettings ? (bUseRasterInFastMode?3:2) : 3;     // first search stop X rounds after best match (must be >=1)
      const bool bEnableRasterSearch                     = bFastSettings ? bUseRasterInFastMode : true;
      const bool bAlwaysRasterSearch                     = bExtendedSettings;  // true: BETTER but factor 2 slower
      const bool bRasterRefinementEnable                 = false; // enable either raster refinement or star refinement
      const bool bRasterRefinementDiamond                = false; // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
      const bool bRasterRefinementCornersForDiamondDist1 = bExtendedSettings;
      const bool bStarRefinementEnable                   = true;  // enable either star refinement or raster refinement
      const bool bStarRefinementDiamond                  = true;  // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
      const bool bStarRefinementCornersForDiamondDist1   = bExtendedSettings;
      const bool bStarRefinementStop                     = false || bFastSettings;
      const uint32_t uiStarRefinementRounds                  = 2;  // star refinement stop X rounds after best match (must be >=1)
      const bool bNewZeroNeighbourhoodTest               = bExtendedSettings;
    
      int iSearchRange = m_iSearchRange;
    
      rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); 
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
      clipMv( rcMv, pu.cu->lumaPos(),
              pu.cu->lumaSize(),
              *pu.cs->sps );
    
      rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); 
    
      rcMv.divideByPowerOf2(2);
    
      // init TZSearchStruct
      cStruct.uiBestSad = std::numeric_limits<Distortion>::max();
    
      //
      m_cDistParam.maximumDistortionForEarlyExit = cStruct.uiBestSad;
      m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode );
    
      // distortion
    
    
      // set rcMv (Median predictor) as start point and as best point
      xTZSearchHelp( cStruct, rcMv.getHor(), rcMv.getVer(), 0, 0 );
    
      // test whether zero Mv is better start point than Median predictor
      if ( bTestZeroVector )
      {
        if ((rcMv.getHor() != 0 || rcMv.getVer() != 0) &&
          (0 != cStruct.iBestX || 0 != cStruct.iBestY))
        {
          // only test 0-vector if not obviously previously tested.
          xTZSearchHelp( cStruct, 0, 0, 0, 0 );
        }
      }
    
      SearchRange& sr = cStruct.searchRange;
    
      if (pIntegerMv2Nx2NPred != 0)
      {
        Mv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred;
    
        integerMv2Nx2NPred.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
        clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(),
                pu.cu->lumaSize(),
                *pu.cs->sps );
    
        integerMv2Nx2NPred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
        integerMv2Nx2NPred.divideByPowerOf2(2);
    
        if ((rcMv != integerMv2Nx2NPred) &&
          (integerMv2Nx2NPred.getHor() != cStruct.iBestX || integerMv2Nx2NPred.getVer() != cStruct.iBestY))
        {
          // only test integerMv2Nx2NPred if not obviously previously tested.
          xTZSearchHelp( cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
        }
      }
      {
        // set search range
        Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
        currBestMv <<= 2;
    
        xSetSearchRange(pu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr
          , cStruct
        );
    
      }
    
      // start search
      int  iDist = 0;
      int  iStartX = cStruct.iBestX;
      int  iStartY = cStruct.iBestY;
    
      const bool bBestCandidateZero = (cStruct.iBestX == 0) && (cStruct.iBestY == 0);
    
      // first search around best position up to now.
      // The following works as a "subsampled/log" window search around the best candidate
      for ( iDist = 1; iDist <= iSearchRange; iDist*=2 )
      {
        if ( bFirstSearchDiamond == 1 )
        {
          xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bFirstCornersForDiamondDist1 );
        }
        else
        {
          xTZ8PointSquareSearch  ( cStruct, iStartX, iStartY, iDist );
        }
    
        if ( bFirstSearchStop && ( cStruct.uiBestRound >= uiFirstSearchRounds ) ) // stop criterion
        {
          break;
        }
      }
    
      if (!bNewZeroNeighbourhoodTest)
      {
        // test whether zero Mv is a better start point than Median predictor
        if ( bTestZeroVectorStart && ((cStruct.iBestX != 0) || (cStruct.iBestY != 0)) )
        {
          xTZSearchHelp( cStruct, 0, 0, 0, 0 );
          if ( (cStruct.iBestX == 0) && (cStruct.iBestY == 0) )
          {
            // test its neighborhood
            for ( iDist = 1; iDist <= iSearchRange; iDist*=2 )
            {