Skip to content
Snippets Groups Projects
InterSearch.cpp 223 KiB
Newer Older
  • Learn to ignore specific revisions
  •         // set best motion
            ::memcpy( aacMv[iRefList], cMvTemp[iRefList][iRefIdxTemp], sizeof(Mv) * 3 );
            iRefIdx[iRefList] = iRefIdxTemp;
          }
    
          if ( iRefList == 1 && uiCostTemp < costValidList1 && slice.getList1IdxToList0Idx( iRefIdxTemp ) < 0 )
          {
            costValidList1 = uiCostTemp;
            bitsValidList1 = uiBitsTemp;
    
            // set motion
            memcpy( mvValidList1, cMvTemp[iRefList][iRefIdxTemp], sizeof(Mv)*3 );
            refIdxValidList1 = iRefIdxTemp;
          }
        } // End refIdx loop
      } // end Uni-prediction
    
      if ( pu.cu->affineType == AFFINEMODEL_4PARAM )
      {
        ::memcpy( mvAffine4Para, cMvTemp, sizeof( cMvTemp ) );
    
    #if JVET_M0246_AFFINE_AMVR
        if ( pu.cu->imv == 0 && ( !pu.cu->cs->sps->getSpsNext().getUseGBi() || gbiIdx == GBI_DEFAULT ) )
    #else
    
        if (!pu.cu->cs->sps->getSpsNext().getUseGBi() || gbiIdx == GBI_DEFAULT)
    
        {
          AffineMVInfo *affMVInfo = m_affMVList + m_affMVListIdx;
    
          //check;
          int j = 0;
          for (; j < m_affMVListSize; j++)
          {
            AffineMVInfo *prevMvInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
            if ((pu.Y().x == prevMvInfo->x) && (pu.Y().y == prevMvInfo->y) && (pu.Y().width == prevMvInfo->w) && (pu.Y().height == prevMvInfo->h))
            {
              break;
            }
          }
          if (j < m_affMVListSize)
            affMVInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
    
          ::memcpy(affMVInfo->affMVs, cMvTemp, sizeof(cMvTemp));
    
          if (j == m_affMVListSize)
          {
            affMVInfo->x = pu.Y().x;
            affMVInfo->y = pu.Y().y;
            affMVInfo->w = pu.Y().width;
            affMVInfo->h = pu.Y().height;
            m_affMVListSize = std::min(m_affMVListSize + 1, m_affMVListMaxSize);
            m_affMVListIdx = (m_affMVListIdx + 1) % (m_affMVListMaxSize);
          }
        }
    
      }
    
      // Bi-directional prediction
      if ( slice.isInterB() && !PU::isBipredRestriction(pu) )
      {
        // Set as best list0 and list1
        iRefIdxBi[0] = iRefIdx[0];
        iRefIdxBi[1] = iRefIdx[1];
    
        ::memcpy( cMvBi,       aacMv,     sizeof(aacMv)     );
        ::memcpy( cMvPredBi,   cMvPred,   sizeof(cMvPred)   );
        ::memcpy( aaiMvpIdxBi, aaiMvpIdx, sizeof(aaiMvpIdx) );
    
        uint32_t uiMotBits[2];
    
        if ( slice.getMvdL1ZeroFlag() ) // GPB, list 1 only use Mvp
        {
          xCopyAffineAMVPInfo( aacAffineAMVPInfo[1][bestBiPRefIdxL1], affiAMVPInfoTemp[REF_PIC_LIST_1] );
          pu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1;
          aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
    
          // Set Mv for list1
          Mv pcMvTemp[3] = { affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandLT[bestBiPMvpL1],
                             affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandRT[bestBiPMvpL1],
                             affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandLB[bestBiPMvpL1] };
          ::memcpy( cMvPredBi[1][bestBiPRefIdxL1], pcMvTemp, sizeof(Mv)*3 );
          ::memcpy( cMvBi[1],                      pcMvTemp, sizeof(Mv)*3 );
          ::memcpy( cMvTemp[1][bestBiPRefIdxL1],   pcMvTemp, sizeof(Mv)*3 );
          iRefIdxBi[1] = bestBiPRefIdxL1;
    
          // Get list1 prediction block
    
          PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1 
    
    #if JVET_M0246_AFFINE_AMVR
            , changeToHighPrec
    #else
    
          pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
    
          PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getBuf( UnitAreaRelative(*pu.cu, pu) );
          motionCompensation( pu, predBufTmp, REF_PIC_LIST_1 );
    
          // Update bits
          uiMotBits[0] = uiBits[0] - uiMbBits[0];
          uiMotBits[1] = uiMbBits[1];
    
          if( slice.getNumRefIdx(REF_PIC_LIST_1) > 1 )
          {
            uiMotBits[1] += bestBiPRefIdxL1+1;
            if( bestBiPRefIdxL1 == slice.getNumRefIdx(REF_PIC_LIST_1)-1 )
            {
              uiMotBits[1]--;
            }
          }
          uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];
          uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
        }
        else
        {
          uiMotBits[0] = uiBits[0] - uiMbBits[0];
          uiMotBits[1] = uiBits[1] - uiMbBits[1];
          uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
        }
    
        // 4-times iteration (default)
        int iNumIter = 4;
        // fast encoder setting or GPB: only one iteration
        if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || slice.getMvdL1ZeroFlag() )
        {
          iNumIter = 1;
        }
    
        for ( int iIter = 0; iIter < iNumIter; iIter++ )
        {
          // Set RefList
          int iRefList = iIter % 2;
          if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 )
          {
            if( uiCost[0] <= uiCost[1] )
            {
              iRefList = 1;
            }
            else
            {
              iRefList = 0;
            }
    
            if( gbiIdx != GBI_DEFAULT )
            {
              iRefList = ( abs( getGbiWeight( gbiIdx, REF_PIC_LIST_0 ) ) > abs( getGbiWeight( gbiIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 );
            }
    
          }
          else if ( iIter == 0 )
          {
            iRefList = 0;
          }
    
          // First iterate, get prediction block of opposite direction
          if( iIter == 0 && !slice.getMvdL1ZeroFlag() )
          {
    
            PU::setAllAffineMv( pu, aacMv[1-iRefList][0], aacMv[1-iRefList][1], aacMv[1-iRefList][2], RefPicList(1-iRefList) 
    
    #if JVET_M0246_AFFINE_AMVR
              , changeToHighPrec
    #else
    
            pu.refIdx[1-iRefList] = iRefIdx[1-iRefList];
    
            PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getBuf( UnitAreaRelative(*pu.cu, pu) );
            motionCompensation( pu, predBufTmp, RefPicList(1 - iRefList) );
          }
    
          RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
    
          if ( slice.getMvdL1ZeroFlag() ) // GPB, fix List 1, search List 0
          {
            iRefList = 0;
            eRefPicList = REF_PIC_LIST_0;
          }
    
          bool bChanged = false;
    
          iRefStart = 0;
          iRefEnd   = slice.getNumRefIdx(eRefPicList) - 1;
    
    Yu Han's avatar
    Yu Han committed
          if (slice.getSPS()->getSpsNext().getIBCMode() && eRefPicList == REF_PIC_LIST_0)
    
    Xiaozhong Xu's avatar
    Xiaozhong Xu committed
          {
            iRefEnd--;
          }
    
          for ( int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ )
          {
            if ( pu.cu->affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp )
            {
              continue;
            }
    
            if(m_pcEncCfg->getUseGBiFast() && (gbiIdx != GBI_DEFAULT)
              && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC() == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC())
              && (pu.cu->affineType == AFFINEMODEL_4PARAM && pu.cu->slice->getTLayer()>1))
            {
              continue;
            }
    
            // update bits
            uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList];
    
            uiBitsTemp += ((pu.cu->slice->getSPS()->getSpsNext().getUseGBi() == true) ? gbiIdxBits : 0);
    
            if( slice.getNumRefIdx(eRefPicList) > 1 )
            {
              uiBitsTemp += iRefIdxTemp+1;
              if ( iRefIdxTemp == slice.getNumRefIdx(eRefPicList)-1 )
              {
                uiBitsTemp--;
              }
            }
            uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
    
            // call Affine ME
            xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, true );
            xCopyAffineAMVPInfo( aacAffineAMVPInfo[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList] );
            xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
    
            if ( uiCostTemp < uiCostBi )
            {
              bChanged = true;
              ::memcpy( cMvBi[iRefList], cMvTemp[iRefList][iRefIdxTemp], sizeof(Mv)*3 );
              iRefIdxBi[iRefList] = iRefIdxTemp;
    
              uiCostBi            = uiCostTemp;
              uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList];
    
              uiMotBits[iRefList] -= ((pu.cu->slice->getSPS()->getSpsNext().getUseGBi() == true) ? gbiIdxBits : 0);
    
              uiBits[2]           = uiBitsTemp;
    
              if ( iNumIter != 1 ) // MC for next iter
              {
                //  Set motion
    
                PU::setAllAffineMv( pu, cMvBi[iRefList][0], cMvBi[iRefList][1], cMvBi[iRefList][2], eRefPicList 
    
    #if JVET_M0246_AFFINE_AMVR
                  , changeToHighPrec
    #else
    
                pu.refIdx[eRefPicList] = iRefIdxBi[eRefPicList];
                PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getBuf( UnitAreaRelative(*pu.cu, pu) );
                motionCompensation( pu, predBufTmp, eRefPicList );
              }
            }
          } // for loop-iRefIdxTemp
    
          if ( !bChanged )
          {
    
            if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceGBiPred)
    
            {
              xCopyAffineAMVPInfo( aacAffineAMVPInfo[0][iRefIdxBi[0]], affiAMVPInfoTemp[REF_PIC_LIST_0] );
              xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_0], REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi );
    
              if ( !slice.getMvdL1ZeroFlag() )
              {
                xCopyAffineAMVPInfo( aacAffineAMVPInfo[1][iRefIdxBi[1]], affiAMVPInfoTemp[REF_PIC_LIST_1] );
                xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_1], REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi );
              }
            }
            break;
          }
        } // for loop-iter
      } // if (B_SLICE)
    
      pu.mv    [REF_PIC_LIST_0] = Mv();
      pu.mv    [REF_PIC_LIST_1] = Mv();
      pu.mvd   [REF_PIC_LIST_0] = cMvZero;
      pu.mvd   [REF_PIC_LIST_1] = cMvZero;
      pu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
      pu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
      pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
      pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
      pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
      pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
    
      for ( int verIdx = 0; verIdx < 3; verIdx++ )
      {
        pu.mvdAffi[REF_PIC_LIST_0][verIdx] = cMvZero;
        pu.mvdAffi[REF_PIC_LIST_1][verIdx] = cMvZero;
      }
    
      // Set Motion Field
      memcpy( aacMv[1], mvValidList1, sizeof(Mv)*3 );
      iRefIdx[1] = refIdxValidList1;
      uiBits[1]  = bitsValidList1;
      uiCost[1]  = costValidList1;
    
    
      if( enforceGBiPred )
      {
        uiCost[0] = uiCost[1] = MAX_UINT;
      }
    
      // Affine ME result set
      if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) // Bi
      {
        lastMode = 2;
        affineCost = uiCostBi;
    
    
        PU::setAllAffineMv( pu, cMvBi[0][0], cMvBi[0][1], cMvBi[0][2], REF_PIC_LIST_0 
    
    #if JVET_M0246_AFFINE_AMVR
          , changeToHighPrec
    #else
    
        );
        PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1 
    
    #if JVET_M0246_AFFINE_AMVR
          , changeToHighPrec
    #else
    
        pu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
        pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
    
        for ( int verIdx = 0; verIdx < mvNum; verIdx++ )
        {
          pu.mvdAffi[REF_PIC_LIST_0][verIdx] = cMvBi[0][verIdx] - cMvPredBi[0][iRefIdxBi[0]][verIdx];
          pu.mvdAffi[REF_PIC_LIST_1][verIdx] = cMvBi[1][verIdx] - cMvPredBi[1][iRefIdxBi[1]][verIdx];
          if ( verIdx != 0 )
          {
            pu.mvdAffi[0][verIdx] = pu.mvdAffi[0][verIdx] - pu.mvdAffi[0][0];
            pu.mvdAffi[1][verIdx] = pu.mvdAffi[1][verIdx] - pu.mvdAffi[1][0];
          }
        }
    
        pu.interDir = 3;
    
        pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]];
        pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]];
        pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]];
        pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]];
      }
      else if ( uiCost[0] <= uiCost[1] ) // List 0
      {
        lastMode = 0;
        affineCost = uiCost[0];
    
    
        PU::setAllAffineMv( pu, aacMv[0][0], aacMv[0][1], aacMv[0][2], REF_PIC_LIST_0 
    
    #if JVET_M0246_AFFINE_AMVR
          , changeToHighPrec
    #else
    
        pu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
    
        for ( int verIdx = 0; verIdx < mvNum; verIdx++ )
        {
          pu.mvdAffi[REF_PIC_LIST_0][verIdx] = aacMv[0][verIdx] - cMvPred[0][iRefIdx[0]][verIdx];
          if ( verIdx != 0 )
          {
            pu.mvdAffi[0][verIdx] = pu.mvdAffi[0][verIdx] - pu.mvdAffi[0][0];
          }
        }
        pu.interDir = 1;
    
        pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
        pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
      }
      else
      {
        lastMode = 1;
        affineCost = uiCost[1];
    
    
        PU::setAllAffineMv( pu, aacMv[1][0], aacMv[1][1], aacMv[1][2], REF_PIC_LIST_1 
    
    #if JVET_M0246_AFFINE_AMVR
          , changeToHighPrec
    #else
    
        pu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
    
        for ( int verIdx = 0; verIdx < mvNum; verIdx++ )
        {
          pu.mvdAffi[REF_PIC_LIST_1][verIdx] = aacMv[1][verIdx] - cMvPred[1][iRefIdx[1]][verIdx];
          if ( verIdx != 0 )
          {
            pu.mvdAffi[1][verIdx] = pu.mvdAffi[1][verIdx] - pu.mvdAffi[1][0];
          }
        }
        pu.interDir = 2;
    
        pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
        pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
      }
    
      if( gbiIdx != GBI_DEFAULT )
      {
        pu.cu->GBiIdx = GBI_DEFAULT;
      }
    
    }
    
    void solveEqual( double** dEqualCoeff, int iOrder, double* dAffinePara )
    {
      for ( int k = 0; k < iOrder; k++ )
      {
        dAffinePara[k] = 0.;
      }
    
      // row echelon
      for ( int i = 1; i < iOrder; i++ )
      {
        // find column max
        double temp = fabs(dEqualCoeff[i][i-1]);
        int tempIdx = i;
        for ( int j = i+1; j < iOrder+1; j++ )
        {
          if ( fabs(dEqualCoeff[j][i-1]) > temp )
          {
            temp = fabs(dEqualCoeff[j][i-1]);
            tempIdx = j;
          }
        }
    
        // swap line
        if ( tempIdx != i )
        {
          for ( int j = 0; j < iOrder+1; j++ )
          {
            dEqualCoeff[0][j] = dEqualCoeff[i][j];
            dEqualCoeff[i][j] = dEqualCoeff[tempIdx][j];
            dEqualCoeff[tempIdx][j] = dEqualCoeff[0][j];
          }
        }
    
        // elimination first column
        if ( dEqualCoeff[i][i - 1] == 0. )
        {
          return;
        }
        for ( int j = i+1; j < iOrder+1; j++ )
        {
          for ( int k = i; k < iOrder+1; k++ )
          {
            dEqualCoeff[j][k] = dEqualCoeff[j][k] - dEqualCoeff[i][k] * dEqualCoeff[j][i-1] / dEqualCoeff[i][i-1];
          }
        }
      }
    
      if ( dEqualCoeff[iOrder][iOrder - 1] == 0. )
      {
        return;
      }
      dAffinePara[iOrder-1] = dEqualCoeff[iOrder][iOrder] / dEqualCoeff[iOrder][iOrder-1];
      for ( int i = iOrder-2; i >= 0; i-- )
      {
        if ( dEqualCoeff[i + 1][i] == 0. )
        {
          for ( int k = 0; k < iOrder; k++ )
          {
            dAffinePara[k] = 0.;
          }
          return;
        }
        double temp = 0;
        for ( int j = i+1; j < iOrder; j++ )
        {
          temp += dEqualCoeff[i+1][j] * dAffinePara[j];
        }
        dAffinePara[i] = ( dEqualCoeff[i+1][iOrder] - temp ) / dEqualCoeff[i+1][i];
      }
    }
    
    void InterSearch::xCheckBestAffineMVP( PredictionUnit &pu, AffineAMVPInfo &affineAMVPInfo, RefPicList eRefPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost )
    {
      if ( affineAMVPInfo.numCand < 2 )
      {
        return;
      }
    
      int mvNum = pu.cu->affineType ? 3 : 2;
    
      m_pcRdCost->selectMotionLambda( pu.cu->transQuantBypass );
      m_pcRdCost->setCostScale ( 0 );
    
      int iBestMVPIdx = riMVPIdx;
    
      // Get origin MV bits
    
    #if JVET_M0246_AFFINE_AMVR
      Mv tmpPredMv[3];
      int iOrgMvBits = xCalcAffineMVBits( pu, acMv, acMvPred );
    #else
    
      int iOrgMvBits = 0;
      for ( int iVerIdx = 0; iVerIdx < mvNum; iVerIdx++ )
      {
        m_pcRdCost->setPredictor ( acMvPred[iVerIdx] );
    
        const int shift = 0;
    
    
        Mv secondPred;
        if ( iVerIdx != 0 )
        {
          secondPred = acMvPred[iVerIdx] + (acMv[0] - acMvPred[0]);
          m_pcRdCost->setPredictor( secondPred );
        }
        iOrgMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( acMv[iVerIdx].getHor()>>shift, acMv[iVerIdx].getVer()>>shift, 0 );
      }
    
      iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
    
      int iBestMvBits = iOrgMvBits;
      for (int iMVPIdx = 0; iMVPIdx < affineAMVPInfo.numCand; iMVPIdx++)
      {
        if (iMVPIdx == riMVPIdx)
        {
          continue;
        }
    
    #if JVET_M0246_AFFINE_AMVR
        tmpPredMv[0] = affineAMVPInfo.mvCandLT[iMVPIdx];
        tmpPredMv[1] = affineAMVPInfo.mvCandRT[iMVPIdx];
        if ( mvNum == 3 )
        {
          tmpPredMv[2] = affineAMVPInfo.mvCandLB[iMVPIdx];
        }
        int iMvBits = xCalcAffineMVBits( pu, acMv, tmpPredMv );
    #else
    
        int iMvBits = 0;
        for ( int iVerIdx = 0; iVerIdx < mvNum; iVerIdx++ )
        {
          m_pcRdCost->setPredictor( iVerIdx == 2 ? affineAMVPInfo.mvCandLB[iMVPIdx] :
            (iVerIdx == 1 ? affineAMVPInfo.mvCandRT[iMVPIdx] : affineAMVPInfo.mvCandLT[iMVPIdx]) );
    
          const int shift = 0;
    
            secondPred = (iVerIdx == 1 ? affineAMVPInfo.mvCandRT[iMVPIdx] : affineAMVPInfo.mvCandLB[iMVPIdx]) + (acMv[0] - affineAMVPInfo.mvCandLT[iMVPIdx]);
    
            m_pcRdCost->setPredictor( secondPred );
          }
          iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( acMv[iVerIdx].getHor()>>shift, acMv[iVerIdx].getVer()>>shift, 0 );
        }
    
        iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
    
        if (iMvBits < iBestMvBits)
        {
          iBestMvBits = iMvBits;
          iBestMVPIdx = iMVPIdx;
        }
      }
    
      if (iBestMVPIdx != riMVPIdx)  // if changed
      {
        acMvPred[0] = affineAMVPInfo.mvCandLT[iBestMVPIdx];
        acMvPred[1] = affineAMVPInfo.mvCandRT[iBestMVPIdx];
        acMvPred[2] = affineAMVPInfo.mvCandLB[iBestMVPIdx];
        riMVPIdx = iBestMVPIdx;
        uint32_t uiOrgBits = ruiBits;
        ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits;
        ruiCost = (ruiCost - m_pcRdCost->getCost( uiOrgBits )) + m_pcRdCost->getCost( ruiBits );
      }
    }
    
    void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
                                               PelUnitBuf&     origBuf,
                                               RefPicList      eRefPicList,
                                               Mv              acMvPred[3],
                                               int             iRefIdxPred,
                                               Mv              acMv[3],
                                               uint32_t&           ruiBits,
                                               Distortion&     ruiCost,
    
      if( pu.cu->cs->sps->getSpsNext().getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedAffineUniMv(pu, eRefPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost) )
      {
        return;
      }
    
      const int width  = pu.Y().width;
      const int height = pu.Y().height;
    
      const Picture* refPic = pu.cu->slice->getRefPic(eRefPicList, iRefIdxPred);
    
      // Set Origin YUV: pcYuv
      PelUnitBuf*   pBuf = &origBuf;
      double        fWeight       = 1.0;
    
      PelUnitBuf  origBufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) );
    
      // if Bi, set to ( 2 * Org - ListX )
      if ( bBi )
      {
        // NOTE: Other buf contains predicted signal from another direction
        PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) );
        origBufTmp.copyFrom(origBuf);
    
        origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs()
    
                                 ,getGbiWeight(pu.cu->GBiIdx, eRefPicList)
    
        fWeight = xGetMEDistortionWeight( pu.cu->GBiIdx, eRefPicList );
    
      }
    
      // pred YUV
      PelUnitBuf  predBuf = m_tmpAffiStorage.getBuf( UnitAreaRelative(*pu.cu, pu) );
    
      // Set start Mv position, use input mv as started search mv
      Mv acMvTemp[3];
      ::memcpy( acMvTemp, acMv, sizeof(Mv)*3 );
    
    #if JVET_M0246_AFFINE_AMVR
      if ( pu.cu->imv != 1 )
      {
    #endif
        acMvTemp[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
        acMvTemp[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
        acMvTemp[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
    #if JVET_M0246_AFFINE_AMVR
      }
    #endif
    
      // Set delta mv
      // malloc buffer
      int iParaNum = pu.cu->affineType ? 7 : 5;
      int affineParaNum = iParaNum - 1;
      int mvNum = pu.cu->affineType ? 3 : 2;
      double **pdEqualCoeff;
      pdEqualCoeff = new double *[iParaNum];
      for ( int i = 0; i < iParaNum; i++ )
      {
        pdEqualCoeff[i] = new double[iParaNum];
      }
    
      int64_t  i64EqualCoeff[7][7];
      Pel    *piError = m_tmpAffiError;
      int    *pdDerivate[2];
      pdDerivate[0] = m_tmpAffiDeri[0];
      pdDerivate[1] = m_tmpAffiDeri[1];
    
      Distortion uiCostBest = std::numeric_limits<Distortion>::max();
      uint32_t uiBitsBest = 0;
    
      // do motion compensation with origin mv
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
      clipMv( acMvTemp[0], pu.cu->lumaPos(),
              pu.cu->lumaSize(),
              *pu.cs->sps );
      clipMv( acMvTemp[1], pu.cu->lumaPos(),
              pu.cu->lumaSize(),
              *pu.cs->sps );
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
        clipMv( acMvTemp[2], pu.cu->lumaPos(),
                pu.cu->lumaSize(),
                *pu.cs->sps );
    
    #if JVET_M0246_AFFINE_AMVR
      int mvdPrecision = ( pu.cu->imv == 1 ) ? 2 : 0;
      if ( pu.cu->imv == 2 )
      {
        acMvTemp[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
        acMvTemp[1].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
        if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
        {
          acMvTemp[2].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
        }
      }
    #endif
    
      xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cs->slice->clpRng( COMPONENT_Y ) );
    
      // get error
      uiCostBest = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD );
    
      // get cost with mv
      m_pcRdCost->setCostScale(0);
      uiBitsBest = ruiBits;
      DTRACE( g_trace_ctx, D_COMMON, " (%d) xx uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest );
    
    #if JVET_M0246_AFFINE_AMVR
      uiBitsBest += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 );
      DTRACE( g_trace_ctx, D_COMMON, " (%d) yy uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest );
    #else
    
      for ( int i = 0; i < mvNum; i++ )
      {
        DTRACE( g_trace_ctx, D_COMMON, "#mvPredForBits=(%d,%d) \n", acMvPred[i].getHor(), acMvPred[i].getVer() );
        m_pcRdCost->setPredictor( acMvPred[i] );
        DTRACE( g_trace_ctx, D_COMMON, "#mvForBits=(%d,%d) \n", acMvTemp[i].getHor(), acMvTemp[i].getVer() );
    
        Mv mv0 = acMvTemp[0];
        mv0.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
        const int shift = MV_FRACTIONAL_BITS_DIFF;
    
          secondPred.hor = acMvPred[i].hor + mv0.hor - acMvPred[0].hor;
          secondPred.ver = acMvPred[i].ver + mv0.ver - acMvPred[0].ver;
    
          m_pcRdCost->setPredictor( secondPred );
        }
        uiBitsBest += m_pcRdCost->getBitsOfVectorWithPredictor( acMvTemp[i].getHor()>>shift, acMvTemp[i].getVer()>>shift, 0 );
        DTRACE( g_trace_ctx, D_COMMON, " (%d) yy uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest );
      }
    
      uiCostBest = (Distortion)( floor( fWeight * (double)uiCostBest ) + (double)m_pcRdCost->getCost( uiBitsBest ) );
    
    
      DTRACE( g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest, uiCostBest );
    
      ::memcpy( acMv, acMvTemp, sizeof(Mv) * 3 );
    
      const int bufStride = pBuf->Y().stride;
      const int predBufStride = predBuf.Y().stride;
    
      int iIterTime;
      if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
      {
        iIterTime = bBi ? 3 : 4;
      }
      else
      {
        iIterTime = bBi ? 3 : 5;
      }
    
      if ( !pu.cu->cs->sps->getSpsNext().getUseAffineType() )
      {
        iIterTime = bBi ? 5 : 7;
      }
      for ( int iter=0; iter<iIterTime; iter++ )    // iterate loop
      {
        /*********************************************************************************
         *                         use gradient to update mv
         *********************************************************************************/
        // get Error Matrix
        Pel* pOrg  = pBuf->Y().buf;
        Pel* pPred = predBuf.Y().buf;
        for ( int j=0; j< height; j++ )
        {
          for ( int i=0; i< width; i++ )
          {
            piError[i + j * width] = pOrg[i] - pPred[i];
          }
          pOrg  += bufStride;
          pPred += predBufStride;
        }
    
        // sobel x direction
        // -1 0 1
        // -2 0 2
        // -1 0 1
        pPred = predBuf.Y().buf;
        m_HorizontalSobelFilter( pPred, predBufStride, pdDerivate[0], width, width, height );
    
        // sobel y direction
        // -1 -2 -1
        //  0  0  0
        //  1  2  1
        m_VerticalSobelFilter( pPred, predBufStride, pdDerivate[1], width, width, height );
    
        // solve delta x and y
        for ( int row = 0; row < iParaNum; row++ )
        {
          memset( &i64EqualCoeff[row][0], 0, iParaNum * sizeof( int64_t ) );
        }
    
        m_EqualCoeffComputer( piError, width, pdDerivate, width, i64EqualCoeff, width, height
          , (pu.cu->affineType == AFFINEMODEL_6PARAM)
        );
    
        for ( int row = 0; row < iParaNum; row++ )
        {
          for ( int i = 0; i < iParaNum; i++ )
          {
            pdEqualCoeff[row][i] = (double)i64EqualCoeff[row][i];
          }
        }
    
        double dAffinePara[6];
        double dDeltaMv[6];
        Mv acDeltaMv[3];
    
        solveEqual( pdEqualCoeff, affineParaNum, dAffinePara );
    
        // convert to delta mv
        dDeltaMv[0] = dAffinePara[0];
        dDeltaMv[2] = dAffinePara[2];
        if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
        {
          dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0];
          dDeltaMv[3] = dAffinePara[3] * width + dAffinePara[2];
          dDeltaMv[4] = dAffinePara[4] * height + dAffinePara[0];
          dDeltaMv[5] = dAffinePara[5] * height + dAffinePara[2];
        }
        else
        {
          dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0];
          dDeltaMv[3] = -dAffinePara[3] * width + dAffinePara[2];
        }
    
    #if JVET_M0246_AFFINE_AMVR
        int mvShift = MV_FRACTIONAL_BITS_DIFF - mvdPrecision;
        int multiShift = 1 << ( MV_FRACTIONAL_BITS_DIFF + mvdPrecision );
    
        acDeltaMv[0] = Mv( ( int ) ( dDeltaMv[0] * multiShift + SIGN( dDeltaMv[0] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[2] * multiShift + SIGN( dDeltaMv[2] ) * 0.5 ) << mvShift );
        acDeltaMv[1] = Mv( ( int ) ( dDeltaMv[1] * multiShift + SIGN( dDeltaMv[1] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[3] * multiShift + SIGN( dDeltaMv[3] ) * 0.5 ) << mvShift );
    
        if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
        {
          acDeltaMv[2] = Mv( ( int ) ( dDeltaMv[4] * multiShift + SIGN( dDeltaMv[4] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[5] * multiShift + SIGN( dDeltaMv[5] ) * 0.5 ) << mvShift );
        }
    #else
    
        acDeltaMv[0] = Mv( (int)(dDeltaMv[0] * 4 + SIGN( dDeltaMv[0] ) * 0.5) << MV_FRACTIONAL_BITS_DIFF, (int)(dDeltaMv[2] * 4 + SIGN( dDeltaMv[2] ) * 0.5) << MV_FRACTIONAL_BITS_DIFF
    
        acDeltaMv[1] = Mv( (int)(dDeltaMv[1] * 4 + SIGN( dDeltaMv[1] ) * 0.5) << MV_FRACTIONAL_BITS_DIFF, (int)(dDeltaMv[3] * 4 + SIGN( dDeltaMv[3] ) * 0.5) << MV_FRACTIONAL_BITS_DIFF
    
          acDeltaMv[2] = Mv( (int)(dDeltaMv[4] * 4 + SIGN( dDeltaMv[4] ) * 0.5) << MV_FRACTIONAL_BITS_DIFF, (int)(dDeltaMv[5] * 4 + SIGN( dDeltaMv[5] ) * 0.5) << MV_FRACTIONAL_BITS_DIFF
    
        bool bAllZero = false;
        for ( int i = 0; i < mvNum; i++ )
        {
    
    #if JVET_M0246_AFFINE_AMVR
          Mv deltaMv = acDeltaMv[i];
          if ( pu.cu->imv == 2 )
          {
            deltaMv.roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_HALF );
          }
          if ( deltaMv.getHor() != 0 || deltaMv.getVer() != 0 )
    #else
    
          if ( acDeltaMv[i].getHor() != 0 || acDeltaMv[i].getVer() != 0 )
    
          {
            bAllZero = false;
            break;
          }
          bAllZero = true;
        }
    
        if ( bAllZero )
          break;
    
        // do motion compensation with updated mv
        for ( int i = 0; i < mvNum; i++ )
        {
          acMvTemp[i] += acDeltaMv[i];
    
    Kiran Misra's avatar
    Kiran Misra committed
    #if JVET_M0479_18BITS_MV_CLIP
          acMvTemp[i].hor = Clip3( -131072, 131071, acMvTemp[i].hor );
          acMvTemp[i].ver = Clip3( -131072, 131071, acMvTemp[i].ver );
    #else
    
          acMvTemp[i].hor = Clip3( -32768, 32767, acMvTemp[i].hor );
          acMvTemp[i].ver = Clip3( -32768, 32767, acMvTemp[i].ver );
    
    Kiran Misra's avatar
    Kiran Misra committed
    #endif
    
    #if JVET_M0246_AFFINE_AMVR
          if ( pu.cu->imv == 0 )
          {
    #endif
            acMvTemp[i].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    #if JVET_M0246_AFFINE_AMVR
          }
          else if ( pu.cu->imv == 2 )
          {
            acMvTemp[i].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
          }
    #endif
    
    Philippe Hanhart's avatar
    Philippe Hanhart committed
          clipMv(acMvTemp[i], pu.cu->lumaPos(),
                 pu.cu->lumaSize(),
                 *pu.cs->sps);
    
        }
        xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ) );
    
        // get error
        Distortion uiCostTemp = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD );
        DTRACE( g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCostTemp );
    
        // get cost with mv
        m_pcRdCost->setCostScale(0);
        uint32_t uiBitsTemp = ruiBits;
    
    #if JVET_M0246_AFFINE_AMVR
        uiBitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 );
    #else
    
        for ( int i = 0; i < mvNum; i++ )
        {
          m_pcRdCost->setPredictor( acMvPred[i] );
    
          Mv mv0 = acMvTemp[0];
          mv0.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
          const int shift = MV_FRACTIONAL_BITS_DIFF;
    
            secondPred.hor = acMvPred[i].hor + mv0.hor - acMvPred[0].hor;
            secondPred.ver = acMvPred[i].ver + mv0.ver - acMvPred[0].ver;
    
            m_pcRdCost->setPredictor( secondPred );
          }
          uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( acMvTemp[i].getHor()>>shift, acMvTemp[i].getVer()>>shift, 0 );
        }
    
        uiCostTemp = (Distortion)( floor( fWeight * (double)uiCostTemp ) + (double)m_pcRdCost->getCost( uiBitsTemp ) );
    
    
        // store best cost and mv
        if ( uiCostTemp < uiCostBest )
        {
          uiCostBest = uiCostTemp;
          uiBitsBest = uiBitsTemp;
          memcpy( acMv, acMvTemp, sizeof(Mv) * 3 );
        }
      }
    
    
      auto checkCPMVRdCost = [&](Mv ctrlPtMv[3]) 
      {
        xPredAffineBlk(COMPONENT_Y, pu, refPic, ctrlPtMv, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
        // get error
        Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD);
        // get cost with mv
        m_pcRdCost->setCostScale(0);
        uint32_t bitsTemp = ruiBits;
    
    #if JVET_M0246_AFFINE_AMVR
        bitsTemp += xCalcAffineMVBits( pu, ctrlPtMv, acMvPred, pu.cu->imv != 1 );
    #else
    
        for (int i = 0; i < mvNum; i++)
        {
          m_pcRdCost->setPredictor(acMvPred[i]);
    
          Mv mv0 = ctrlPtMv[0];
          mv0.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
    
          const int shift = MV_FRACTIONAL_BITS_DIFF;
    
          Mv secondPred;
          if (i != 0)
          {
            secondPred.hor = acMvPred[i].hor + mv0.hor - acMvPred[0].hor;
            secondPred.ver = acMvPred[i].ver + mv0.ver - acMvPred[0].ver;
            m_pcRdCost->setPredictor(secondPred);
          }
          bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor(ctrlPtMv[i].getHor() >> shift, ctrlPtMv[i].getVer() >> shift, 0);
        }
    
        costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
        // store best cost and mv
        if (costTemp < uiCostBest)
        {
          uiCostBest = costTemp;
          uiBitsBest = bitsTemp;
          ::memcpy(acMv, ctrlPtMv, sizeof(Mv) * 3);
        }
      };
    
      if (uiCostBest <= AFFINE_ME_LIST_MVP_TH*m_hevcCost)
      {
        Mv mvPredTmp[3] = { acMvPred[0], acMvPred[1], acMvPred[2] };
    
    #if JVET_M0246_AFFINE_AMVR
        if ( pu.cu->imv != 1 )
        {
    #endif
          mvPredTmp[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
          mvPredTmp[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
          mvPredTmp[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
    #if JVET_M0246_AFFINE_AMVR
        }
    #endif
    
        Mv mvME[3];
        ::memcpy(mvME, acMv, sizeof(Mv) * 3);
        Mv dMv = mvME[0] - mvPredTmp[0];
    
        for (int j = 0; j < mvNum; j++)
        {
          if ((!j && mvME[j] != mvPredTmp[j]) || (j && mvME[j] != (mvPredTmp[j] + dMv)))
          {
            ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
            acMvTemp[j] = mvPredTmp[j];
    
            if (j)
              acMvTemp[j] += dMv;
    
            checkCPMVRdCost(acMvTemp);
          }
        }
    
        //keep the rotation/zoom;
        if (mvME[0] != mvPredTmp[0])
        {
          ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
          for (int i = 1; i < mvNum; i++)
          {
            acMvTemp[i] -= dMv;
          }
          acMvTemp[0] = mvPredTmp[0];
    
          checkCPMVRdCost(acMvTemp);
        }
    
        //keep the translation; 
        if (pu.cu->affineType == AFFINEMODEL_6PARAM && mvME[1] != (mvPredTmp[1] + dMv) && mvME[2] != (mvPredTmp[2] + dMv))
        {
          ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
    
          acMvTemp[1] = mvPredTmp[1] + dMv;
          acMvTemp[2] = mvPredTmp[2] + dMv;
    
          checkCPMVRdCost(acMvTemp);
        }
    
        {
          dMv = acMv[1] - acMv[0];
          if (pu.cu->affineType == AFFINEMODEL_4PARAM && (dMv.getAbsHor() > 4 || dMv.getAbsVer() > 4))
          {
            int testPos[4][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 } };
            Mv centerMv[3];
    
    #if JVET_M0246_AFFINE_AMVR
            const uint32_t mvShift = pu.cu->imv == 1 ? 0 : ( pu.cu->imv == 2 ? ( MV_FRACTIONAL_BITS_DIFF << 1 ) : MV_FRACTIONAL_BITS_DIFF );
    #endif
    
            ::memcpy(centerMv, acMv, sizeof(Mv) * 3);
            acMvTemp[0] = centerMv[0];
            for (int i = 0; i < 4; i++)
            {
    
    #if JVET_M0246_AFFINE_AMVR
              acMvTemp[1].set( centerMv[1].getHor() + ( testPos[i][0] << mvShift ), centerMv[1].getVer() + ( testPos[i][1] << mvShift ) );
    #else
    
              acMvTemp[1].set(centerMv[1].getHor() + (testPos[i][0] << MV_FRACTIONAL_BITS_DIFF), centerMv[1].getVer() + (testPos[i][1] << MV_FRACTIONAL_BITS_DIFF));