/* The copyright in this software is being made available under the BSD * License, included below. This software may be subject to other third party * and contributor rights, including patent rights, and no such rights are * granted under this license. * * Copyright (c) 2010-2019, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** \file EncSearch.cpp * \brief encoder inter search class */ #include "InterSearch.h" #include "CommonLib/CommonDef.h" #include "CommonLib/Rom.h" #include "CommonLib/MotionInfo.h" #include "CommonLib/Picture.h" #include "CommonLib/UnitTools.h" #include "CommonLib/dtrace_next.h" #include "CommonLib/dtrace_buffer.h" #include "CommonLib/MCTS.h" #include "EncModeCtrl.h" #include "EncLib.h" #include <math.h> #include <limits> //! \ingroup EncoderLib //! \{ static const Mv s_acMvRefineH[9] = { Mv( 0, 0 ), // 0 Mv( 0, -1 ), // 1 Mv( 0, 1 ), // 2 Mv( -1, 0 ), // 3 Mv( 1, 0 ), // 4 Mv( -1, -1 ), // 5 Mv( 1, -1 ), // 6 Mv( -1, 1 ), // 7 Mv( 1, 1 ) // 8 }; static const Mv s_acMvRefineQ[9] = { Mv( 0, 0 ), // 0 Mv( 0, -1 ), // 1 Mv( 0, 1 ), // 2 Mv( -1, -1 ), // 5 Mv( 1, -1 ), // 6 Mv( -1, 0 ), // 3 Mv( 1, 0 ), // 4 Mv( -1, 1 ), // 7 Mv( 1, 1 ) // 8 }; InterSearch::InterSearch() : m_modeCtrl (nullptr) , m_pSplitCS (nullptr) , m_pFullCS (nullptr) , m_pcEncCfg (nullptr) , m_pcTrQuant (nullptr) , m_pcReshape (nullptr) , m_iSearchRange (0) , m_bipredSearchRange (0) , m_motionEstimationSearchMethod(MESEARCH_FULL) , m_CABACEstimator (nullptr) , m_CtxCache (nullptr) , m_pTempPel (nullptr) , m_isInitialized (false) { for (int i=0; i<MAX_NUM_REF_LIST_ADAPT_SR; i++) { memset (m_aaiAdaptSR[i], 0, MAX_IDX_ADAPT_SR * sizeof (int)); } for (int i=0; i<AMVP_MAX_NUM_CANDS+1; i++) { memset (m_auiMVPIdxCost[i], 0, (AMVP_MAX_NUM_CANDS+1) * sizeof (uint32_t) ); } setWpScalingDistParam( -1, REF_PIC_LIST_X, nullptr ); m_affMVList = nullptr; m_affMVListSize = 0; m_affMVListIdx = 0; m_histBestSbt = MAX_UCHAR; m_histBestMtsIdx = MAX_UCHAR; } void InterSearch::destroy() { CHECK(!m_isInitialized, "Not initialized"); if ( m_pTempPel ) { delete [] m_pTempPel; m_pTempPel = NULL; } m_pSplitCS = m_pFullCS = nullptr; m_pSaveCS = nullptr; for(uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++) { m_tmpPredStorage[i].destroy(); } m_tmpStorageLCU.destroy(); m_tmpAffiStorage.destroy(); if ( m_tmpAffiError != NULL ) { delete[] m_tmpAffiError; } if ( m_tmpAffiDeri[0] != NULL ) { delete[] m_tmpAffiDeri[0]; } if ( m_tmpAffiDeri[1] != NULL ) { delete[] m_tmpAffiDeri[1]; } if (m_affMVList) { delete[] m_affMVList; m_affMVList = nullptr; } m_affMVListIdx = 0; m_affMVListSize = 0; m_isInitialized = false; } void InterSearch::setTempBuffers( CodingStructure ****pSplitCS, CodingStructure ****pFullCS, CodingStructure **pSaveCS ) { m_pSplitCS = pSplitCS; m_pFullCS = pFullCS; m_pSaveCS = pSaveCS; } #if ENABLE_SPLIT_PARALLELISM void InterSearch::copyState( const InterSearch& other ) { memcpy( m_aaiAdaptSR, other.m_aaiAdaptSR, sizeof( m_aaiAdaptSR ) ); } #endif InterSearch::~InterSearch() { if (m_isInitialized) { destroy(); } } void InterSearch::init( EncCfg* pcEncCfg, TrQuant* pcTrQuant, int iSearchRange, int bipredSearchRange, MESearchMethod motionEstimationSearchMethod, bool useCompositeRef, const uint32_t maxCUWidth, const uint32_t maxCUHeight, const uint32_t maxTotalCUDepth, RdCost* pcRdCost, CABACWriter* CABACEstimator, CtxCache* ctxCache , EncReshape* pcReshape ) { CHECK(m_isInitialized, "Already initialized"); m_numBVs = 0; #if JVET_N0329_IBC_SEARCH_IMP for (int i = 0; i < IBC_NUM_CANDIDATES; i++) { m_defaultCachedBvs.m_bvCands[i].setZero(); } m_defaultCachedBvs.currCnt = 0; #else m_numBV16s = 0; #endif m_pcEncCfg = pcEncCfg; m_pcTrQuant = pcTrQuant; m_iSearchRange = iSearchRange; m_bipredSearchRange = bipredSearchRange; m_motionEstimationSearchMethod = motionEstimationSearchMethod; m_CABACEstimator = CABACEstimator; m_CtxCache = ctxCache; m_useCompositeRef = useCompositeRef; m_pcReshape = pcReshape; for( uint32_t iDir = 0; iDir < MAX_NUM_REF_LIST_ADAPT_SR; iDir++ ) { for( uint32_t iRefIdx = 0; iRefIdx < MAX_IDX_ADAPT_SR; iRefIdx++ ) { m_aaiAdaptSR[iDir][iRefIdx] = iSearchRange; } } // initialize motion cost for( int iNum = 0; iNum < AMVP_MAX_NUM_CANDS + 1; iNum++ ) { for( int iIdx = 0; iIdx < AMVP_MAX_NUM_CANDS; iIdx++ ) { if( iIdx < iNum ) { m_auiMVPIdxCost[iIdx][iNum] = xGetMvpIdxBits( iIdx, iNum ); } else { m_auiMVPIdxCost[iIdx][iNum] = MAX_UINT; } } } const ChromaFormat cform = pcEncCfg->getChromaFormatIdc(); InterPrediction::init( pcRdCost, cform ); for( uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++ ) { m_tmpPredStorage[i].create( UnitArea( cform, Area( 0, 0, MAX_CU_SIZE, MAX_CU_SIZE ) ) ); } m_tmpStorageLCU.create( UnitArea( cform, Area( 0, 0, MAX_CU_SIZE, MAX_CU_SIZE ) ) ); m_tmpAffiStorage.create( UnitArea( cform, Area( 0, 0, MAX_CU_SIZE, MAX_CU_SIZE ) ) ); m_tmpAffiError = new Pel[MAX_CU_SIZE * MAX_CU_SIZE]; m_tmpAffiDeri[0] = new int[MAX_CU_SIZE * MAX_CU_SIZE]; m_tmpAffiDeri[1] = new int[MAX_CU_SIZE * MAX_CU_SIZE]; m_pTempPel = new Pel[maxCUWidth*maxCUHeight]; m_affMVListMaxSize = (pcEncCfg->getIntraPeriod() == (uint32_t)-1) ? AFFINE_ME_LIST_SIZE_LD : AFFINE_ME_LIST_SIZE; if (!m_affMVList) m_affMVList = new AffineMVInfo[m_affMVListMaxSize]; m_affMVListIdx = 0; m_affMVListSize = 0; m_isInitialized = true; } void InterSearch::resetSavedAffineMotion() { for ( int i = 0; i < 2; i++ ) { for ( int j = 0; j < 2; j++ ) { m_affineMotion.acMvAffine4Para[i][j] = Mv( 0, 0 ); m_affineMotion.acMvAffine6Para[i][j] = Mv( 0, 0 ); } m_affineMotion.acMvAffine6Para[i][2] = Mv( 0, 0 ); m_affineMotion.affine4ParaRefIdx[i] = -1; m_affineMotion.affine6ParaRefIdx[i] = -1; } for ( int i = 0; i < 3; i++ ) { m_affineMotion.hevcCost[i] = std::numeric_limits<Distortion>::max(); } m_affineMotion.affine4ParaAvail = false; m_affineMotion.affine6ParaAvail = false; } void InterSearch::storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int gbiIdx ) { if ( ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine6ParaAvail ) && affineType == AFFINEMODEL_6PARAM ) { for ( int i = 0; i < 2; i++ ) { for ( int j = 0; j < 3; j++ ) { m_affineMotion.acMvAffine6Para[i][j] = acAffineMv[i][j]; } m_affineMotion.affine6ParaRefIdx[i] = affineRefIdx[i]; } m_affineMotion.affine6ParaAvail = true; } if ( ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine4ParaAvail ) && affineType == AFFINEMODEL_4PARAM ) { for ( int i = 0; i < 2; i++ ) { for ( int j = 0; j < 2; j++ ) { m_affineMotion.acMvAffine4Para[i][j] = acAffineMv[i][j]; } m_affineMotion.affine4ParaRefIdx[i] = affineRefIdx[i]; } m_affineMotion.affine4ParaAvail = true; } } inline void InterSearch::xTZSearchHelp( IntTZSearchStruct& rcStruct, const int iSearchX, const int iSearchY, const uint8_t ucPointNr, const uint32_t uiDistance ) { Distortion uiSad = 0; // CHECK(!( !( rcStruct.searchRange.left > iSearchX || rcStruct.searchRange.right < iSearchX || rcStruct.searchRange.top > iSearchY || rcStruct.searchRange.bottom < iSearchY )), "Unspecified error"); const Pel* const piRefSrch = rcStruct.piRefY + iSearchY * rcStruct.iRefStride + iSearchX; m_cDistParam.cur.buf = piRefSrch; if( 1 == rcStruct.subShiftMode ) { // motion cost Distortion uiBitCost = m_pcRdCost->getCostOfVectorWithPredictor( iSearchX, iSearchY, rcStruct.imvShift ); // Skip search if bit cost is already larger than best SAD if (uiBitCost < rcStruct.uiBestSad) { Distortion uiTempSad = m_cDistParam.distFunc( m_cDistParam ); if((uiTempSad + uiBitCost) < rcStruct.uiBestSad) { // it's not supposed that any member of DistParams is manipulated beside cur.buf int subShift = m_cDistParam.subShift; const Pel* pOrgCpy = m_cDistParam.org.buf; uiSad += uiTempSad >> m_cDistParam.subShift; while( m_cDistParam.subShift > 0 ) { int isubShift = m_cDistParam.subShift -1; m_cDistParam.org.buf = rcStruct.pcPatternKey->buf + (rcStruct.pcPatternKey->stride << isubShift); m_cDistParam.cur.buf = piRefSrch + (rcStruct.iRefStride << isubShift); uiTempSad = m_cDistParam.distFunc( m_cDistParam ); uiSad += uiTempSad >> m_cDistParam.subShift; if(((uiSad << isubShift) + uiBitCost) > rcStruct.uiBestSad) { break; } m_cDistParam.subShift--; } if(m_cDistParam.subShift == 0) { uiSad += uiBitCost; if( uiSad < rcStruct.uiBestSad ) { rcStruct.uiBestSad = uiSad; rcStruct.iBestX = iSearchX; rcStruct.iBestY = iSearchY; rcStruct.uiBestDistance = uiDistance; rcStruct.uiBestRound = 0; rcStruct.ucPointNr = ucPointNr; m_cDistParam.maximumDistortionForEarlyExit = uiSad; } } // restore org ptr m_cDistParam.org.buf = pOrgCpy; m_cDistParam.subShift = subShift; } } } else { uiSad = m_cDistParam.distFunc( m_cDistParam ); // only add motion cost if uiSad is smaller than best. Otherwise pointless // to add motion cost. if( uiSad < rcStruct.uiBestSad ) { // motion cost uiSad += m_pcRdCost->getCostOfVectorWithPredictor( iSearchX, iSearchY, rcStruct.imvShift ); if( uiSad < rcStruct.uiBestSad ) { rcStruct.uiBestSad = uiSad; rcStruct.iBestX = iSearchX; rcStruct.iBestY = iSearchY; rcStruct.uiBestDistance = uiDistance; rcStruct.uiBestRound = 0; rcStruct.ucPointNr = ucPointNr; m_cDistParam.maximumDistortionForEarlyExit = uiSad; } } } } inline void InterSearch::xTZ2PointSearch( IntTZSearchStruct& rcStruct ) { const SearchRange& sr = rcStruct.searchRange; static const int xOffset[2][9] = { { 0, -1, -1, 0, -1, +1, -1, -1, +1 }, { 0, 0, +1, +1, -1, +1, 0, +1, 0 } }; static const int yOffset[2][9] = { { 0, 0, -1, -1, +1, -1, 0, +1, 0 }, { 0, -1, -1, 0, -1, +1, +1, +1, +1 } }; // 2 point search, // 1 2 3 // check only the 2 untested points // 4 0 5 // around the start point // 6 7 8 const int iX1 = rcStruct.iBestX + xOffset[0][rcStruct.ucPointNr]; const int iX2 = rcStruct.iBestX + xOffset[1][rcStruct.ucPointNr]; const int iY1 = rcStruct.iBestY + yOffset[0][rcStruct.ucPointNr]; const int iY2 = rcStruct.iBestY + yOffset[1][rcStruct.ucPointNr]; if( iX1 >= sr.left && iX1 <= sr.right && iY1 >= sr.top && iY1 <= sr.bottom ) { xTZSearchHelp( rcStruct, iX1, iY1, 0, 2 ); } if( iX2 >= sr.left && iX2 <= sr.right && iY2 >= sr.top && iY2 <= sr.bottom ) { xTZSearchHelp( rcStruct, iX2, iY2, 0, 2 ); } } inline void InterSearch::xTZ8PointSquareSearch( IntTZSearchStruct& rcStruct, const int iStartX, const int iStartY, const int iDist ) { const SearchRange& sr = rcStruct.searchRange; // 8 point search, // 1 2 3 // search around the start point // 4 0 5 // with the required distance // 6 7 8 CHECK( iDist == 0 , "Invalid distance"); const int iTop = iStartY - iDist; const int iBottom = iStartY + iDist; const int iLeft = iStartX - iDist; const int iRight = iStartX + iDist; rcStruct.uiBestRound += 1; if ( iTop >= sr.top ) // check top { if ( iLeft >= sr.left ) // check top left { xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist ); } // top middle xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist ); if ( iRight <= sr.right ) // check top right { xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist ); } } // check top if ( iLeft >= sr.left ) // check middle left { xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist ); } if ( iRight <= sr.right ) // check middle right { xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist ); } if ( iBottom <= sr.bottom ) // check bottom { if ( iLeft >= sr.left ) // check bottom left { xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist ); } // check bottom middle xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist ); if ( iRight <= sr.right ) // check bottom right { xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist ); } } // check bottom } inline void InterSearch::xTZ8PointDiamondSearch( IntTZSearchStruct& rcStruct, const int iStartX, const int iStartY, const int iDist, const bool bCheckCornersAtDist1 ) { const SearchRange& sr = rcStruct.searchRange; // 8 point search, // 1 2 3 // search around the start point // 4 0 5 // with the required distance // 6 7 8 CHECK( iDist == 0, "Invalid distance" ); const int iTop = iStartY - iDist; const int iBottom = iStartY + iDist; const int iLeft = iStartX - iDist; const int iRight = iStartX + iDist; rcStruct.uiBestRound += 1; if ( iDist == 1 ) { if ( iTop >= sr.top ) // check top { if (bCheckCornersAtDist1) { if ( iLeft >= sr.left) // check top-left { xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist ); } xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist ); if ( iRight <= sr.right ) // check middle right { xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist ); } } else { xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist ); } } if ( iLeft >= sr.left ) // check middle left { xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist ); } if ( iRight <= sr.right ) // check middle right { xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist ); } if ( iBottom <= sr.bottom ) // check bottom { if (bCheckCornersAtDist1) { if ( iLeft >= sr.left) // check top-left { xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist ); } xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist ); if ( iRight <= sr.right ) // check middle right { xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist ); } } else { xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist ); } } } else { if ( iDist <= 8 ) { const int iTop_2 = iStartY - (iDist>>1); const int iBottom_2 = iStartY + (iDist>>1); const int iLeft_2 = iStartX - (iDist>>1); const int iRight_2 = iStartX + (iDist>>1); if ( iTop >= sr.top && iLeft >= sr.left && iRight <= sr.right && iBottom <= sr.bottom ) // check border { xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist ); xTZSearchHelp( rcStruct, iLeft_2, iTop_2, 1, iDist>>1 ); xTZSearchHelp( rcStruct, iRight_2, iTop_2, 3, iDist>>1 ); xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist ); xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist ); xTZSearchHelp( rcStruct, iLeft_2, iBottom_2, 6, iDist>>1 ); xTZSearchHelp( rcStruct, iRight_2, iBottom_2, 8, iDist>>1 ); xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist ); } else // check border { if ( iTop >= sr.top ) // check top { xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist ); } if ( iTop_2 >= sr.top ) // check half top { if ( iLeft_2 >= sr.left ) // check half left { xTZSearchHelp( rcStruct, iLeft_2, iTop_2, 1, (iDist>>1) ); } if ( iRight_2 <= sr.right ) // check half right { xTZSearchHelp( rcStruct, iRight_2, iTop_2, 3, (iDist>>1) ); } } // check half top if ( iLeft >= sr.left ) // check left { xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist ); } if ( iRight <= sr.right ) // check right { xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist ); } if ( iBottom_2 <= sr.bottom ) // check half bottom { if ( iLeft_2 >= sr.left ) // check half left { xTZSearchHelp( rcStruct, iLeft_2, iBottom_2, 6, (iDist>>1) ); } if ( iRight_2 <= sr.right ) // check half right { xTZSearchHelp( rcStruct, iRight_2, iBottom_2, 8, (iDist>>1) ); } } // check half bottom if ( iBottom <= sr.bottom ) // check bottom { xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist ); } } // check border } else // iDist > 8 { if ( iTop >= sr.top && iLeft >= sr.left && iRight <= sr.right && iBottom <= sr.bottom ) // check border { xTZSearchHelp( rcStruct, iStartX, iTop, 0, iDist ); xTZSearchHelp( rcStruct, iLeft, iStartY, 0, iDist ); xTZSearchHelp( rcStruct, iRight, iStartY, 0, iDist ); xTZSearchHelp( rcStruct, iStartX, iBottom, 0, iDist ); for ( int index = 1; index < 4; index++ ) { const int iPosYT = iTop + ((iDist>>2) * index); const int iPosYB = iBottom - ((iDist>>2) * index); const int iPosXL = iStartX - ((iDist>>2) * index); const int iPosXR = iStartX + ((iDist>>2) * index); xTZSearchHelp( rcStruct, iPosXL, iPosYT, 0, iDist ); xTZSearchHelp( rcStruct, iPosXR, iPosYT, 0, iDist ); xTZSearchHelp( rcStruct, iPosXL, iPosYB, 0, iDist ); xTZSearchHelp( rcStruct, iPosXR, iPosYB, 0, iDist ); } } else // check border { if ( iTop >= sr.top ) // check top { xTZSearchHelp( rcStruct, iStartX, iTop, 0, iDist ); } if ( iLeft >= sr.left ) // check left { xTZSearchHelp( rcStruct, iLeft, iStartY, 0, iDist ); } if ( iRight <= sr.right ) // check right { xTZSearchHelp( rcStruct, iRight, iStartY, 0, iDist ); } if ( iBottom <= sr.bottom ) // check bottom { xTZSearchHelp( rcStruct, iStartX, iBottom, 0, iDist ); } for ( int index = 1; index < 4; index++ ) { const int iPosYT = iTop + ((iDist>>2) * index); const int iPosYB = iBottom - ((iDist>>2) * index); const int iPosXL = iStartX - ((iDist>>2) * index); const int iPosXR = iStartX + ((iDist>>2) * index); if ( iPosYT >= sr.top ) // check top { if ( iPosXL >= sr.left ) // check left { xTZSearchHelp( rcStruct, iPosXL, iPosYT, 0, iDist ); } if ( iPosXR <= sr.right ) // check right { xTZSearchHelp( rcStruct, iPosXR, iPosYT, 0, iDist ); } } // check top if ( iPosYB <= sr.bottom ) // check bottom { if ( iPosXL >= sr.left ) // check left { xTZSearchHelp( rcStruct, iPosXL, iPosYB, 0, iDist ); } if ( iPosXR <= sr.right ) // check right { xTZSearchHelp( rcStruct, iPosXR, iPosYB, 0, iDist ); } } // check bottom } // for ... } // check border } // iDist <= 8 } // iDist == 1 } Distortion InterSearch::xPatternRefinement( const CPelBuf* pcPatternKey, Mv baseRefMv, int iFrac, Mv& rcMvFrac, bool bAllowUseOfHadamard ) { Distortion uiDist; Distortion uiDistBest = std::numeric_limits<Distortion>::max(); uint32_t uiDirecBest = 0; Pel* piRefPos; int iRefStride = pcPatternKey->width + 1; m_pcRdCost->setDistParam( m_cDistParam, *pcPatternKey, m_filteredBlock[0][0][0], iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && bAllowUseOfHadamard ); const Mv* pcMvRefine = (iFrac == 2 ? s_acMvRefineH : s_acMvRefineQ); for (uint32_t i = 0; i < 9; i++) { if (m_skipFracME && i > 0) { break; } Mv cMvTest = pcMvRefine[i]; cMvTest += baseRefMv; int horVal = cMvTest.getHor() * iFrac; int verVal = cMvTest.getVer() * iFrac; piRefPos = m_filteredBlock[verVal & 3][horVal & 3][0]; if (horVal == 2 && (verVal & 1) == 0) { piRefPos += 1; } if ((horVal & 1) == 0 && verVal == 2) { piRefPos += iRefStride; } cMvTest = pcMvRefine[i]; cMvTest += rcMvFrac; m_cDistParam.cur.buf = piRefPos; uiDist = m_cDistParam.distFunc( m_cDistParam ); uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cMvTest.getHor(), cMvTest.getVer(), 0 ); if ( uiDist < uiDistBest ) { uiDistBest = uiDist; uiDirecBest = i; m_cDistParam.maximumDistortionForEarlyExit = uiDist; } } rcMvFrac = pcMvRefine[uiDirecBest]; return uiDistBest; } Distortion InterSearch::xGetInterPredictionError( PredictionUnit& pu, PelUnitBuf& origBuf, const RefPicList &eRefPicList ) { PelUnitBuf predBuf = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) ); motionCompensation( pu, predBuf, eRefPicList ); DistParam cDistParam; cDistParam.applyWeight = false; #if JVET_N0329_IBC_SEARCH_IMP m_pcRdCost->setDistParam(cDistParam, origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass && !pu.cu->slice->getDisableSATDForRD()); #else m_pcRdCost->setDistParam( cDistParam, origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass ); #endif return (Distortion)cDistParam.distFunc( cDistParam ); } /// add ibc search functions here void InterSearch::xIBCSearchMVCandUpdate(Distortion sad, int x, int y, Distortion* sadBestCand, Mv* cMVCand) { int j = CHROMA_REFINEMENT_CANDIDATES - 1; if (sad < sadBestCand[CHROMA_REFINEMENT_CANDIDATES - 1]) { for (int t = CHROMA_REFINEMENT_CANDIDATES - 1; t >= 0; t--) { if (sad < sadBestCand[t]) j = t; } for (int k = CHROMA_REFINEMENT_CANDIDATES - 1; k > j; k--) { sadBestCand[k] = sadBestCand[k - 1]; cMVCand[k].set(cMVCand[k - 1].getHor(), cMVCand[k - 1].getVer()); } sadBestCand[j] = sad; cMVCand[j].set(x, y); } } int InterSearch::xIBCSearchMVChromaRefine(PredictionUnit& pu, int roiWidth, int roiHeight, int cuPelX, int cuPelY, Distortion* sadBestCand, Mv* cMVCand ) { if (!pu.Cb().valid()) return 0; int bestCandIdx = 0; Distortion sadBest = std::numeric_limits<Distortion>::max(); Distortion tempSad; Pel* pRef; Pel* pOrg; int refStride, orgStride; int width, height; int picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); int picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); UnitArea allCompBlocks(pu.chromaFormat, (Area)pu.block(COMPONENT_Y)); for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++) { #if JVET_N0329_IBC_SEARCH_IMP if (sadBestCand[cand] == std::numeric_limits<Distortion>::max()) { continue; } #endif if ((!cMVCand[cand].getHor()) && (!cMVCand[cand].getVer())) continue; if (((int)(cuPelY + cMVCand[cand].getVer() + roiHeight) >= picHeight) || ((cuPelY + cMVCand[cand].getVer()) < 0)) continue; if (((int)(cuPelX + cMVCand[cand].getHor() + roiWidth) >= picWidth) || ((cuPelX + cMVCand[cand].getHor()) < 0)) continue; tempSad = sadBestCand[cand]; pu.mv[0] = cMVCand[cand]; pu.mv[0].changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL); pu.interDir = 1; pu.refIdx[0] = pu.cs->slice->getNumRefIdx(REF_PIC_LIST_0); // last idx in the list PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_0].getBuf(UnitAreaRelative(*pu.cu, pu)); motionCompensation(pu, predBufTmp, REF_PIC_LIST_0); for (unsigned int ch = COMPONENT_Cb; ch < ::getNumberValidComponents(pu.chromaFormat); ch++) { width = roiWidth >> ::getComponentScaleX(ComponentID(ch), pu.chromaFormat); height = roiHeight >> ::getComponentScaleY(ComponentID(ch), pu.chromaFormat); PelUnitBuf origBuf = pu.cs->getOrgBuf(allCompBlocks); PelUnitBuf* pBuf = &origBuf; CPelBuf tmpPattern = pBuf->get(ComponentID(ch)); pOrg = (Pel*)tmpPattern.buf; Picture* refPic = pu.cu->slice->getPic(); const CPelBuf refBuf = refPic->getRecoBuf(allCompBlocks.blocks[ComponentID(ch)]); pRef = (Pel*)refBuf.buf; refStride = refBuf.stride; orgStride = tmpPattern.stride; //ComponentID compID = (ComponentID)ch; PelUnitBuf* pBufRef = &predBufTmp; CPelBuf tmpPatternRef = pBufRef->get(ComponentID(ch)); pRef = (Pel*)tmpPatternRef.buf; refStride = tmpPatternRef.stride; for (int row = 0; row < height; row++) { for (int col = 0; col < width; col++) { tempSad += ((abs(pRef[col] - pOrg[col])) >> (pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA) - 8)); } pRef += refStride; pOrg += orgStride; } } if (tempSad < sadBest) { sadBest = tempSad; bestCandIdx = cand; } } return bestCandIdx; } #if JVET_N0329_IBC_SEARCH_IMP static unsigned int xMergeCandLists(Mv *dst, unsigned int dn, unsigned int dstTotalLength, Mv *src, unsigned int sn) { for (unsigned int cand = 0; cand < sn && dn < dstTotalLength; cand++) #else static unsigned int xMergeCandLists(Mv *dst, unsigned int dn, Mv *src, unsigned int sn) { for (unsigned int cand = 0; cand < sn && dn<IBC_NUM_CANDIDATES; cand++) #endif { #if JVET_N0329_IBC_SEARCH_IMP if (src[cand] == Mv()) { continue; } #endif bool found = false; for (int j = 0; j<dn; j++) { if (src[cand] == dst[j]) { found = true; break; } } if (!found) { dst[dn] = src[cand]; dn++; } } return dn; } void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiCost, Mv* pcMvSrchRngLT, Mv* pcMvSrchRngRB, Mv* pcMvPred) { const int srchRngHorLeft = pcMvSrchRngLT->getHor(); const int srchRngHorRight = pcMvSrchRngRB->getHor(); const int srchRngVerTop = pcMvSrchRngLT->getVer(); const int srchRngVerBottom = pcMvSrchRngRB->getVer(); const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); const int puPelOffsetX = 0; const int puPelOffsetY = 0; const int cuPelX = pu.Y().x; const int cuPelY = pu.Y().y; int roiWidth = pu.lwidth(); int roiHeight = pu.lheight(); Distortion sad; Distortion sadBest = std::numeric_limits<Distortion>::max(); int bestX = 0; int bestY = 0; const Pel* piRefSrch = cStruct.piRefY; int bestCandIdx = 0; Distortion sadBestCand[CHROMA_REFINEMENT_CANDIDATES]; Mv cMVCand[CHROMA_REFINEMENT_CANDIDATES]; for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++) { sadBestCand[cand] = std::numeric_limits<Distortion>::max(); cMVCand[cand].set(0, 0); } m_cDistParam.useMR = false; m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode); const int picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); const int picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); { m_cDistParam.subShift = 0; Distortion tempSadBest = 0; int srLeft = srchRngHorLeft, srRight = srchRngHorRight, srTop = srchRngVerTop, srBottom = srchRngVerBottom; #if JVET_N0329_IBC_SEARCH_IMP m_numBVs = 0; m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt); Mv cMvPredEncOnly[IBC_NUM_CANDIDATES]; #else if (roiWidth>8 || roiHeight>8) { m_numBVs = 0; } else if (roiWidth + roiHeight == 16) { m_numBVs = m_numBV16s; } Mv cMvPredEncOnly[16]; #endif int nbPreds = 0; PU::getIbcMVPsEncOnly(pu, cMvPredEncOnly, nbPreds); #if JVET_N0329_IBC_SEARCH_IMP m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), cMvPredEncOnly, nbPreds); #else m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, cMvPredEncOnly, nbPreds); #endif for (unsigned int cand = 0; cand < m_numBVs; cand++) { int xPred = m_acBVs[cand].getHor(); int yPred = m_acBVs[cand].getVer(); if (!(xPred == 0 && yPred == 0) && !((yPred < srTop) || (yPred > srBottom)) && !((xPred < srLeft) || (xPred > srRight))) { bool validCand = PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, xPred, yPred, lcuWidth); if (validCand) { sad = m_pcRdCost->getBvCostMultiplePreds(xPred, yPred, pu.cs->sps->getAMVREnabledFlag()); m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * yPred + xPred; sad += m_cDistParam.distFunc(m_cDistParam); xIBCSearchMVCandUpdate(sad, xPred, yPred, sadBestCand, cMVCand); } } } bestX = cMVCand[0].getHor(); bestY = cMVCand[0].getVer(); rcMv.set(bestX, bestY); sadBest = sadBestCand[0]; const int boundY = (0 - roiHeight - puPelOffsetY); for (int y = std::max(srchRngVerTop, 0 - cuPelY); y <= boundY; ++y) { if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, 0, y, lcuWidth)) { continue; } sad = m_pcRdCost->getBvCostMultiplePreds(0, y, pu.cs->sps->getAMVREnabledFlag()); m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y; sad += m_cDistParam.distFunc(m_cDistParam); xIBCSearchMVCandUpdate(sad, 0, y, sadBestCand, cMVCand); tempSadBest = sadBestCand[0]; if (sadBestCand[0] <= 3) { bestX = cMVCand[0].getHor(); bestY = cMVCand[0].getVer(); sadBest = sadBestCand[0]; rcMv.set(bestX, bestY); ruiCost = sadBest; goto end; } } const int boundX = std::max(srchRngHorLeft, -cuPelX); for (int x = 0 - roiWidth - puPelOffsetX; x >= boundX; --x) { if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, 0, lcuWidth)) { continue; } sad = m_pcRdCost->getBvCostMultiplePreds(x, 0, pu.cs->sps->getAMVREnabledFlag()); m_cDistParam.cur.buf = piRefSrch + x; sad += m_cDistParam.distFunc(m_cDistParam); xIBCSearchMVCandUpdate(sad, x, 0, sadBestCand, cMVCand); tempSadBest = sadBestCand[0]; if (sadBestCand[0] <= 3) { bestX = cMVCand[0].getHor(); bestY = cMVCand[0].getVer(); sadBest = sadBestCand[0]; rcMv.set(bestX, bestY); ruiCost = sadBest; goto end; } } bestX = cMVCand[0].getHor(); bestY = cMVCand[0].getVer(); sadBest = sadBestCand[0]; if ((!bestX && !bestY) || (sadBest - m_pcRdCost->getBvCostMultiplePreds(bestX, bestY, pu.cs->sps->getAMVREnabledFlag()) <= 32)) { //chroma refine bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); sadBest = sadBestCand[bestCandIdx]; rcMv.set(bestX, bestY); ruiCost = sadBest; goto end; } if (pu.lwidth() < 16 && pu.lheight() < 16) { for (int y = std::max(srchRngVerTop, -cuPelY); y <= srchRngVerBottom; y += 2) { if ((y == 0) || ((int)(cuPelY + y + roiHeight) >= picHeight)) continue; for (int x = std::max(srchRngHorLeft, -cuPelX); x <= srchRngHorRight; x++) { if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth)) continue; if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, y, lcuWidth)) { continue; } sad = m_pcRdCost->getBvCostMultiplePreds(x, y, pu.cs->sps->getAMVREnabledFlag()); m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y + x; sad += m_cDistParam.distFunc(m_cDistParam); xIBCSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand); } } bestX = cMVCand[0].getHor(); bestY = cMVCand[0].getVer(); sadBest = sadBestCand[0]; if (sadBest - m_pcRdCost->getBvCostMultiplePreds(bestX, bestY, pu.cs->sps->getAMVREnabledFlag()) <= 16) { //chroma refine bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); sadBest = sadBestCand[bestCandIdx]; rcMv.set(bestX, bestY); ruiCost = sadBest; goto end; } for (int y = (std::max(srchRngVerTop, -cuPelY) + 1); y <= srchRngVerBottom; y += 2) { if ((y == 0) || ((int)(cuPelY + y + roiHeight) >= picHeight)) continue; for (int x = std::max(srchRngHorLeft, -cuPelX); x <= srchRngHorRight; x += 2) { if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth)) continue; if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, y, lcuWidth)) { continue; } sad = m_pcRdCost->getBvCostMultiplePreds(x, y, pu.cs->sps->getAMVREnabledFlag()); m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y + x; sad += m_cDistParam.distFunc(m_cDistParam); xIBCSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand); if (sadBestCand[0] <= 5) { //chroma refine & return bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); sadBest = sadBestCand[bestCandIdx]; rcMv.set(bestX, bestY); ruiCost = sadBest; goto end; } } } bestX = cMVCand[0].getHor(); bestY = cMVCand[0].getVer(); sadBest = sadBestCand[0]; if ((sadBest >= tempSadBest) || ((sadBest - m_pcRdCost->getBvCostMultiplePreds(bestX, bestY, pu.cs->sps->getAMVREnabledFlag())) <= 32)) { //chroma refine bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); sadBest = sadBestCand[bestCandIdx]; rcMv.set(bestX, bestY); ruiCost = sadBest; goto end; } tempSadBest = sadBestCand[0]; for (int y = (std::max(srchRngVerTop, -cuPelY) + 1); y <= srchRngVerBottom; y += 2) { if ((y == 0) || ((int)(cuPelY + y + roiHeight) >= picHeight)) continue; for (int x = (std::max(srchRngHorLeft, -cuPelX) + 1); x <= srchRngHorRight; x += 2) { if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth)) continue; if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, y, lcuWidth)) { continue; } sad = m_pcRdCost->getBvCostMultiplePreds(x, y, pu.cs->sps->getAMVREnabledFlag()); m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y + x; sad += m_cDistParam.distFunc(m_cDistParam); xIBCSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand); if (sadBestCand[0] <= 5) { //chroma refine & return bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); sadBest = sadBestCand[bestCandIdx]; rcMv.set(bestX, bestY); ruiCost = sadBest; goto end; } } } } } bestCandIdx = xIBCSearchMVChromaRefine(pu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand); bestX = cMVCand[bestCandIdx].getHor(); bestY = cMVCand[bestCandIdx].getVer(); sadBest = sadBestCand[bestCandIdx]; rcMv.set(bestX, bestY); ruiCost = sadBest; end: #if JVET_N0329_IBC_SEARCH_IMP m_numBVs = 0; m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt); m_defaultCachedBvs.currCnt = 0; m_defaultCachedBvs.currCnt = xMergeCandLists(m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt, IBC_NUM_CANDIDATES, cMVCand, CHROMA_REFINEMENT_CANDIDATES); m_defaultCachedBvs.currCnt = xMergeCandLists(m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt, IBC_NUM_CANDIDATES, m_acBVs, m_numBVs); for (unsigned int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++) { if (cMVCand[cand].getHor() == 0 && cMVCand[cand].getVer() == 0) { continue; } m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord[cMVCand[cand]] = sadBestCand[cand]; } #else if (roiWidth + roiHeight > 8) { m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, cMVCand, CHROMA_REFINEMENT_CANDIDATES); if (roiWidth + roiHeight == 32) { m_numBV16s = m_numBVs; } } #endif return; } // based on xMotionEstimation void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Mv *pcMvPred, Mv &rcMv, Distortion &ruiCost, const int localSearchRangeX, const int localSearchRangeY ) { #if JVET_N0329_IBC_SEARCH_IMP const int iPicWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); const int iPicHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); const int cuPelX = pu.Y().x; const int cuPelY = pu.Y().y; int iRoiWidth = pu.lwidth(); int iRoiHeight = pu.lheight(); PelUnitBuf* pBuf = &origBuf; // Search key pattern initialization CPelBuf tmpPattern = pBuf->Y(); CPelBuf* pcPatternKey = &tmpPattern; if ((pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())) { const CompArea &area = pu.blocks[COMPONENT_Y]; CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); PelBuf tmpOrgLuma = m_tmpStorageLCU.getBuf(tmpArea); tmpOrgLuma.copyFrom(tmpPattern); tmpOrgLuma.rspSignal(m_pcReshape->getFwdLUT()); pcPatternKey = (CPelBuf*)&tmpOrgLuma; } m_lumaClpRng = pu.cs->slice->clpRng(COMPONENT_Y); Picture* refPic = pu.cu->slice->getPic(); const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]); IntTZSearchStruct cStruct; cStruct.pcPatternKey = pcPatternKey; cStruct.iRefStride = refBuf.stride; cStruct.piRefY = refBuf.buf; cStruct.imvShift = pu.cu->imv << 1; cStruct.subShiftMode = 0; // used by intra pattern search function // disable weighted prediction setWpScalingDistParam(-1, REF_PIC_LIST_X, pu.cs->slice); m_pcRdCost->getMotionCost(0, pu.cu->transQuantBypass); m_pcRdCost->setPredictors(pcMvPred); m_pcRdCost->setCostScale(0); m_cDistParam.useMR = false; m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode); #endif bool buffered = false; if (m_pcEncCfg->getIBCFastMethod() & IBC_FAST_METHOD_BUFFERBV) { ruiCost = MAX_UINT; #if !JVET_N0329_IBC_SEARCH_IMP const int iPicWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); const int iPicHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); const int cuPelX = pu.Y().x; const int cuPelY = pu.Y().y; int iRoiWidth = pu.lwidth(); int iRoiHeight = pu.lheight(); #endif std::unordered_map<Mv, Distortion>& history = m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord; #if !JVET_N0329_IBC_SEARCH_IMP const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); #endif for (std::unordered_map<Mv, Distortion>::iterator p = history.begin(); p != history.end(); p++) { const Mv& bv = p->first; int xBv = bv.hor; int yBv = bv.ver; if (PU::isBlockVectorValid(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, 0, 0, xBv, yBv, lcuWidth)) { #if JVET_N0329_IBC_SEARCH_IMP buffered = true; Distortion sad = m_pcRdCost->getBvCostMultiplePreds(xBv, yBv, pu.cs->sps->getAMVREnabledFlag()); m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yBv + xBv; sad += m_cDistParam.distFunc(m_cDistParam); if (sad < ruiCost) { rcMv = bv; ruiCost = sad; } else if (sad == ruiCost) { // stabilise the search through the unordered list if (bv.hor < rcMv.getHor() || (bv.hor == rcMv.getHor() && bv.ver < rcMv.getVer())) { // update the vector. rcMv = bv; } } #else if (p->second < ruiCost) { rcMv = bv; ruiCost = p->second; buffered = true; } else if (p->second == ruiCost) { // stabilise the search through the unordered list if (bv.hor < rcMv.getHor() || (bv.hor == rcMv.getHor() && bv.ver < rcMv.getVer())) { // update the vector. rcMv = bv; } } #endif } } #if JVET_N0329_IBC_SEARCH_IMP if (buffered) { Mv cMvPredEncOnly[IBC_NUM_CANDIDATES]; int nbPreds = 0; PU::getIbcMVPsEncOnly(pu, cMvPredEncOnly, nbPreds); for (unsigned int cand = 0; cand < nbPreds; cand++) { int xPred = cMvPredEncOnly[cand].getHor(); int yPred = cMvPredEncOnly[cand].getVer(); if (PU::isBlockVectorValid(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, 0, 0, xPred, yPred, lcuWidth)) { Distortion sad = m_pcRdCost->getBvCostMultiplePreds(xPred, yPred, pu.cs->sps->getAMVREnabledFlag()); m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yPred + xPred; sad += m_cDistParam.distFunc(m_cDistParam); if (sad < ruiCost) { rcMv.set(xPred, yPred); ruiCost = sad; } else if (sad == ruiCost) { // stabilise the search through the unordered list if (xPred < rcMv.getHor() || (xPred == rcMv.getHor() && yPred < rcMv.getVer())) { // update the vector. rcMv.set(xPred, yPred); } } m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord[Mv(xPred, yPred)] = sad; } } } #endif } if (!buffered) { #if JVET_N0329_IBC_SEARCH_IMP Mv cMvSrchRngLT; Mv cMvSrchRngRB; // assume that intra BV is integer-pel precision xSetIntraSearchRange(pu, pu.lwidth(), pu.lheight(), localSearchRangeX, localSearchRangeY, cMvSrchRngLT, cMvSrchRngRB); #else Mv cMvSrchRngLT; Mv cMvSrchRngRB; //cMvSrchRngLT.highPrec = false; //cMvSrchRngRB.highPrec = false; PelUnitBuf* pBuf = &origBuf; // Search key pattern initialization CPelBuf tmpPattern = pBuf->Y(); CPelBuf* pcPatternKey = &tmpPattern; if ((pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())) { const CompArea &area = pu.blocks[COMPONENT_Y]; CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); PelBuf tmpOrgLuma = m_tmpStorageLCU.getBuf(tmpArea); tmpOrgLuma.copyFrom(tmpPattern); tmpOrgLuma.rspSignal(m_pcReshape->getFwdLUT()); pcPatternKey = (CPelBuf*)&tmpOrgLuma; } m_lumaClpRng = pu.cs->slice->clpRng(COMPONENT_Y); Picture* refPic = pu.cu->slice->getPic(); const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]); IntTZSearchStruct cStruct; cStruct.pcPatternKey = pcPatternKey; cStruct.iRefStride = refBuf.stride; cStruct.piRefY = refBuf.buf; cStruct.imvShift = pu.cu->imv << 1; cStruct.subShiftMode = 0; // used by intra pattern search function // assume that intra BV is integer-pel precision xSetIntraSearchRange(pu, pu.lwidth(), pu.lheight(), localSearchRangeX, localSearchRangeY, cMvSrchRngLT, cMvSrchRngRB); // disable weighted prediction setWpScalingDistParam(-1, REF_PIC_LIST_X, pu.cs->slice); m_pcRdCost->getMotionCost(0, pu.cu->transQuantBypass); m_pcRdCost->setPredictors(pcMvPred); m_pcRdCost->setCostScale(0); #endif // Do integer search xIntraPatternSearch(pu, cStruct, rcMv, ruiCost, &cMvSrchRngLT, &cMvSrchRngRB, pcMvPred); } } // based on xSetSearchRange void InterSearch::xSetIntraSearchRange(PredictionUnit& pu, int iRoiWidth, int iRoiHeight, const int localSearchRangeX, const int localSearchRangeY, Mv& rcMvSrchRngLT, Mv& rcMvSrchRngRB) { const SPS &sps = *pu.cs->sps; int srLeft, srRight, srTop, srBottom; const int cuPelX = pu.Y().x; const int cuPelY = pu.Y().y; const int iPicWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); const int iPicHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); srLeft = -std::min(cuPelX, localSearchRangeX); srTop = -std::min(cuPelY, localSearchRangeY); srRight = std::min(iPicWidth - cuPelX - iRoiWidth, localSearchRangeX); srBottom = std::min(iPicHeight - cuPelY - iRoiHeight, localSearchRangeY); rcMvSrchRngLT.setHor(srLeft); rcMvSrchRngLT.setVer(srTop); rcMvSrchRngRB.setHor(srRight); rcMvSrchRngRB.setVer(srBottom); rcMvSrchRngLT <<= 2; rcMvSrchRngRB <<= 2; xClipMv(rcMvSrchRngLT, pu.cu->lumaPos(), pu.cu->lumaSize(), sps); xClipMv(rcMvSrchRngRB, pu.cu->lumaPos(), pu.cu->lumaSize(), sps); rcMvSrchRngLT >>= 2; rcMvSrchRngRB >>= 2; } bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const int localSearchRangeX, const int localSearchRangeY, IbcHashMap& ibcHashMap) { Mv cMvSrchRngLT; Mv cMvSrchRngRB; Mv cMv; Mv cMvPred; for (auto &pu : CU::traversePUs(cu)) { m_maxCompIDToPred = MAX_NUM_COMPONENT; CHECK(pu.cu != &cu, "PU is contained in another CU"); ////////////////////////////////////////////////////////// /// ibc search pu.cu->imv = 2; AMVPInfo amvpInfo4Pel; PU::fillIBCMvpCand(pu, amvpInfo4Pel); pu.cu->imv = 0;// (Int)cu.cs->sps->getUseIMV(); // set as IMV=0 initially Mv cMv, cMvPred[2]; AMVPInfo amvpInfo; PU::fillIBCMvpCand(pu, amvpInfo); cMvPred[0].set(amvpInfo.mvCand[0].getHor() >> (2), amvpInfo.mvCand[0].getVer() >> (2)); // store in full pel accuracy, shift before use in search cMvPred[1].set(amvpInfo.mvCand[1].getHor() >> (2), amvpInfo.mvCand[1].getVer() >> (2)); int iBvpNum = 2; int bvpIdxBest = 0; cMv.setZero(); Distortion cost = 0; if (m_pcEncCfg->getIBCHashSearch()) { xxIBCHashSearch(pu, cMvPred, iBvpNum, cMv, bvpIdxBest, ibcHashMap); } if (cMv.getHor() == 0 && cMv.getVer() == 0) { // if hash search does not work or is not enabled PelUnitBuf origBuf = pu.cs->getOrgBuf(pu); xIBCEstimation(pu, origBuf, cMvPred, cMv, cost, localSearchRangeX, localSearchRangeY); } if (cMv.getHor() == 0 && cMv.getVer() == 0) { return false; } /// ibc search ///////////////////////////////////////////////////////// unsigned int bitsBVPBest, bitsBVPTemp; bitsBVPBest = MAX_INT; m_pcRdCost->setCostScale(0); for (int bvpIdxTemp = 0; bvpIdxTemp<iBvpNum; bvpIdxTemp++) { m_pcRdCost->setPredictor(cMvPred[bvpIdxTemp]); bitsBVPTemp = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 0); if (bitsBVPTemp < bitsBVPBest) { bitsBVPBest = bitsBVPTemp; bvpIdxBest = bvpIdxTemp; if (cu.cs->sps->getAMVREnabledFlag() && cMv != cMvPred[bvpIdxTemp]) pu.cu->imv = 1; // set as full-pel else pu.cu->imv = 0; // set as fractional-pel } unsigned int bitsBVPQP = MAX_UINT; Mv mvPredQuadPel; if ((cMv.getHor() % 4 == 0) && (cMv.getVer() % 4 == 0) && (pu.cs->sps->getAMVREnabledFlag())) { mvPredQuadPel = amvpInfo4Pel.mvCand[bvpIdxTemp];// cMvPred[bvpIdxTemp]; mvPredQuadPel >>= (4); m_pcRdCost->setPredictor(mvPredQuadPel); bitsBVPQP = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor() >> 2, cMv.getVer() >> 2, 0); } mvPredQuadPel <<= (2); if (bitsBVPQP < bitsBVPBest && cMv != mvPredQuadPel) { bitsBVPBest = bitsBVPQP; bvpIdxBest = bvpIdxTemp; if (cu.cs->sps->getAMVREnabledFlag()) pu.cu->imv = 2; // set as quad-pel } } pu.bv = cMv; cMv <<= (2); pu.mv[REF_PIC_LIST_0] = cMv; // store in fractional pel accuracy pu.mvpIdx[REF_PIC_LIST_0] = bvpIdxBest; if(pu.cu->imv == 2 && cMv != amvpInfo4Pel.mvCand[bvpIdxBest]) pu.mvd[REF_PIC_LIST_0] = cMv - amvpInfo4Pel.mvCand[bvpIdxBest]; else pu.mvd[REF_PIC_LIST_0] = cMv - amvpInfo.mvCand[bvpIdxBest]; if (pu.mvd[REF_PIC_LIST_0] == Mv(0, 0)) pu.cu->imv = 0; if (pu.cu->imv == 2) assert((cMv.getHor() % 16 == 0) && (cMv.getVer() % 16 == 0)); if (cu.cs->sps->getAMVREnabledFlag()) assert(pu.cu->imv>0 || pu.mvd[REF_PIC_LIST_0] == Mv()); if (!cu.cs->sps->getAMVREnabledFlag()) pu.mvd[REF_PIC_LIST_0] >>= (2); pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF; pu.mv[REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); #if !JVET_N0329_IBC_SEARCH_IMP m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord[pu.bv] = cost; #endif } return true; } void InterSearch::xxIBCHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred, Mv &mv, int& idxMvPred, IbcHashMap& ibcHashMap) { mv.setZero(); m_pcRdCost->setCostScale(0); std::vector<Position> candPos; if (ibcHashMap.ibcHashMatch(pu.Y(), candPos, *pu.cs, m_pcEncCfg->getIBCHashSearchMaxCand(), m_pcEncCfg->getIBCHashSearchRange4SmallBlk())) { unsigned int minCost = MAX_UINT; const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); const int cuPelX = pu.Y().x; const int cuPelY = pu.Y().y; const int picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); const int picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); int roiWidth = pu.lwidth(); int roiHeight = pu.lheight(); for (std::vector<Position>::iterator pos = candPos.begin(); pos != candPos.end(); pos++) { Position bottomRight = pos->offset(pu.Y().width - 1, pu.Y().height - 1); if (pu.cs->isDecomp(*pos, pu.cs->chType) && pu.cs->isDecomp(bottomRight, pu.cs->chType)) { Position tmp = *pos - pu.Y().pos(); Mv candMv; candMv.set(tmp.x, tmp.y); if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, candMv.getHor(), candMv.getVer(), lcuWidth)) { continue; } for (int n = 0; n < numMvPred; n++) { m_pcRdCost->setPredictor(mvPred[n]); unsigned int cost = m_pcRdCost->getBitsOfVectorWithPredictor(candMv.getHor(), candMv.getVer(), 0); if (cost < minCost) { mv = candMv; idxMvPred = n; minCost = cost; } int costQuadPel = MAX_UINT; if ((candMv.getHor() % 4 == 0) && (candMv.getVer() % 4 == 0) && (pu.cs->sps->getAMVREnabledFlag())) { Mv mvPredQuadPel; int imvShift = 2; int offset = 1 << (imvShift - 1); #if JVET_N0335_N0085_MV_ROUNDING int x = (mvPred[n].hor + offset - (mvPred[n].hor >= 0)) >> 2; int y = (mvPred[n].ver + offset - (mvPred[n].ver >= 0)) >> 2; mvPredQuadPel.set(x, y); #else mvPredQuadPel.set(((mvPred[n].hor + offset) >> 2), ((mvPred[n].ver + offset) >> 2)); #endif m_pcRdCost->setPredictor(mvPredQuadPel); costQuadPel = m_pcRdCost->getBitsOfVectorWithPredictor(candMv.getHor() >> 2, candMv.getVer() >> 2, 0); } if (costQuadPel < minCost) { mv = candMv; idxMvPred = n; minCost = costQuadPel; } } } } } } void InterSearch::addToSortList(std::list<BlockHash>& listBlockHash, std::list<int>& listCost, int cost, const BlockHash& blockHash) { std::list<BlockHash>::iterator itBlockHash = listBlockHash.begin(); std::list<int>::iterator itCost = listCost.begin(); while (itCost != listCost.end()) { if (cost < (*itCost)) { listCost.insert(itCost, cost); listBlockHash.insert(itBlockHash, blockHash); return; } ++itCost; ++itBlockHash; } listCost.push_back(cost); listBlockHash.push_back(blockHash); } void InterSearch::selectMatchesInter(const MapIterator& itBegin, int count, std::list<BlockHash>& listBlockHash, const BlockHash& currBlockHash) { const int maxReturnNumber = 5; listBlockHash.clear(); std::list<int> listCost; listCost.clear(); MapIterator it = itBegin; for (int i = 0; i < count; i++, it++) { if ((*it).hashValue2 != currBlockHash.hashValue2) { continue; } int currCost = RdCost::xGetExpGolombNumberOfBits((*it).x - currBlockHash.x) + RdCost::xGetExpGolombNumberOfBits((*it).y - currBlockHash.y); if (listBlockHash.size() < maxReturnNumber) { addToSortList(listBlockHash, listCost, currCost, (*it)); } else if (!listCost.empty() && currCost < listCost.back()) { listCost.pop_back(); listBlockHash.pop_back(); addToSortList(listBlockHash, listCost, currCost, (*it)); } } } int InterSearch::xHashInterPredME(const PredictionUnit& pu, RefPicList currRefPicList, int currRefPicIndex, Mv bestMv[5]) { int width = pu.cu->lumaSize().width; int height = pu.cu->lumaSize().height; int xPos = pu.cu->lumaPos().x; int yPos = pu.cu->lumaPos().y; uint32_t hashValue1; uint32_t hashValue2; if (!TComHash::getBlockHashValue((pu.cs->picture->getOrigBuf()), width, height, xPos, yPos, pu.cu->slice->getSPS()->getBitDepths(), hashValue1, hashValue2)) { return 0; } BlockHash currBlockHash; currBlockHash.x = xPos; currBlockHash.y = yPos; currBlockHash.hashValue2 = hashValue2; int count = static_cast<int>(pu.cu->slice->getRefPic(currRefPicList, currRefPicIndex)->getHashMap()->count(hashValue1)); if (count == 0) { return 0; } list<BlockHash> listBlockHash; selectMatchesInter(pu.cu->slice->getRefPic(currRefPicList, currRefPicIndex)->getHashMap()->getFirstIterator(hashValue1), count, listBlockHash, currBlockHash); if (listBlockHash.empty()) { return 0; } int totalSize = 0; list<BlockHash>::iterator it = listBlockHash.begin(); for (int i = 0; i < 5 && i < listBlockHash.size(); i++, it++) { bestMv[i].set((*it).x - currBlockHash.x, (*it).y - currBlockHash.y); totalSize++; } return totalSize; } bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch) { int width = pu.cu->lumaSize().width; int height = pu.cu->lumaSize().height; int xPos = pu.cu->lumaPos().x; int yPos = pu.cu->lumaPos().y; uint32_t hashValue1; uint32_t hashValue2; Distortion bestCost = UINT64_MAX; if (!TComHash::getBlockHashValue((pu.cs->picture->getOrigBuf()), width, height, xPos, yPos, pu.cu->slice->getSPS()->getBitDepths(), hashValue1, hashValue2)) { return false; } BlockHash currBlockHash; currBlockHash.x = xPos; currBlockHash.y = yPos; currBlockHash.hashValue2 = hashValue2; m_pcRdCost->setDistParam(m_cDistParam, pu.cs->getOrgBuf(pu).Y(), 0, 0, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, false); int imvBest = 0; int numPredDir = pu.cu->slice->isInterP() ? 1 : 2; for (int refList = 0; refList < numPredDir; refList++) { RefPicList eRefPicList = (refList == 0) ? REF_PIC_LIST_0 : REF_PIC_LIST_1; int refPicNumber = pu.cu->slice->getNumRefIdx(eRefPicList); for (int refIdx = 0; refIdx < refPicNumber; refIdx++) { int bitsOnRefIdx = 1; if (refPicNumber > 1) { bitsOnRefIdx += refIdx + 1; if (refIdx == refPicNumber - 1) { bitsOnRefIdx--; } } if (refList == 0 || pu.cu->slice->getList1IdxToList0Idx(refIdx) < 0) { int count = static_cast<int>(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->count(hashValue1)); if (count == 0) { continue; } list<BlockHash> listBlockHash; selectMatchesInter(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->getFirstIterator(hashValue1), count, listBlockHash, currBlockHash); if (listBlockHash.empty()) { continue; } AMVPInfo currAMVPInfoPel; AMVPInfo currAMVPInfo4Pel; pu.cu->imv = 2; PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfo4Pel); pu.cu->imv = 1; PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoPel); AMVPInfo currAMVPInfoQPel; pu.cu->imv = 0; PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoQPel); CHECK(currAMVPInfoPel.numCand <= 1, "Wrong") const Pel* refBufStart = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf().get(COMPONENT_Y).buf; const int refStride = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf().get(COMPONENT_Y).stride; m_cDistParam.cur.stride = refStride; m_pcRdCost->selectMotionLambda(pu.cu->transQuantBypass); m_pcRdCost->setCostScale(0); list<BlockHash>::iterator it; for (it = listBlockHash.begin(); it != listBlockHash.end(); ++it) { int curMVPIdx = 0; unsigned int curMVPbits = MAX_UINT; Mv cMv((*it).x - currBlockHash.x, (*it).y - currBlockHash.y); cMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_QUARTER); for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++) { Mv cMvPredPel = currAMVPInfoQPel.mvCand[mvpIdxTemp]; m_pcRdCost->setPredictor(cMvPredPel); unsigned int tempMVPbits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 0); if (tempMVPbits < curMVPbits) { curMVPbits = tempMVPbits; curMVPIdx = mvpIdxTemp; pu.cu->imv = 0; } if (pu.cu->slice->getSPS()->getAMVREnabledFlag()) { unsigned int bitsMVP1Pel = MAX_UINT; Mv mvPred1Pel = currAMVPInfoPel.mvCand[mvpIdxTemp]; m_pcRdCost->setPredictor(mvPred1Pel); bitsMVP1Pel = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 2); if (bitsMVP1Pel < curMVPbits) { curMVPbits = bitsMVP1Pel; curMVPIdx = mvpIdxTemp; pu.cu->imv = 1; } if ((cMv.getHor() % 16 == 0) && (cMv.getVer() % 16 == 0)) { unsigned int bitsMVP4Pel = MAX_UINT; Mv mvPred4Pel = currAMVPInfo4Pel.mvCand[mvpIdxTemp]; m_pcRdCost->setPredictor(mvPred4Pel); bitsMVP4Pel = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 4); if (bitsMVP4Pel < curMVPbits) { curMVPbits = bitsMVP4Pel; curMVPIdx = mvpIdxTemp; pu.cu->imv = 2; } } } } curMVPbits += bitsOnRefIdx; m_cDistParam.cur.buf = refBufStart + (*it).y*refStride + (*it).x; Distortion currSad = m_cDistParam.distFunc(m_cDistParam); Distortion currCost = currSad + m_pcRdCost->getCost(curMVPbits); if (!isPerfectMatch) { if (pu.cu->slice->getRefPic(eRefPicList, refIdx)->slices[0]->getSliceQp() <= pu.cu->slice->getSliceQp()) { isPerfectMatch = true; } } if (currCost < bestCost) { bestCost = currCost; bestRefPicList = eRefPicList; bestRefIndex = refIdx; bestMv = cMv; bestMVPIndex = curMVPIdx; imvBest = pu.cu->imv; if (pu.cu->imv == 2) { bestMvd = cMv - currAMVPInfo4Pel.mvCand[curMVPIdx]; } else if (pu.cu->imv == 1) { bestMvd = cMv - currAMVPInfoPel.mvCand[curMVPIdx]; } else { bestMvd = cMv - currAMVPInfoQPel.mvCand[curMVPIdx]; } } } } } } pu.cu->imv = imvBest; if (bestMvd == Mv(0, 0)) { pu.cu->imv = 0; return false; } return (bestCost < MAX_INT); } bool InterSearch::predInterHashSearch(CodingUnit& cu, Partitioner& partitioner, bool& isPerfectMatch) { Mv bestMv, bestMvd; RefPicList bestRefPicList; int bestRefIndex; int bestMVPIndex; auto &pu = *cu.firstPU; Mv cMvZero; pu.mv[REF_PIC_LIST_0] = Mv(); pu.mv[REF_PIC_LIST_1] = Mv(); pu.mvd[REF_PIC_LIST_0] = cMvZero; pu.mvd[REF_PIC_LIST_1] = cMvZero; pu.refIdx[REF_PIC_LIST_0] = NOT_VALID; pu.refIdx[REF_PIC_LIST_1] = NOT_VALID; pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID; pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID; pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID; pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID; if (xHashInterEstimation(pu, bestRefPicList, bestRefIndex, bestMv, bestMvd, bestMVPIndex, isPerfectMatch)) { pu.interDir = static_cast<int>(bestRefPicList) + 1; pu.mv[bestRefPicList] = bestMv; pu.mv[bestRefPicList].hor <<= MV_FRACTIONAL_BITS_DIFF; pu.mv[bestRefPicList].ver <<= MV_FRACTIONAL_BITS_DIFF; pu.mvd[bestRefPicList] = bestMvd; pu.refIdx[bestRefPicList] = bestRefIndex; pu.mvpIdx[bestRefPicList] = bestMVPIndex; pu.mvpNum[bestRefPicList] = 2; PU::spanMotionInfo(pu); PelUnitBuf predBuf = pu.cs->getPredBuf(pu); motionCompensation(pu, predBuf, REF_PIC_LIST_X); return true; } else { return false; } return true; } //! search of the best candidate for inter prediction void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) { CodingStructure& cs = *cu.cs; AMVPInfo amvp[2]; Mv cMvSrchRngLT; Mv cMvSrchRngRB; Mv cMvZero; Mv cMv[2]; Mv cMvBi[2]; Mv cMvTemp[2][33]; Mv cMvHevcTemp[2][33]; int iNumPredDir = cs.slice->isInterP() ? 1 : 2; Mv cMvPred[2][33]; Mv cMvPredBi[2][33]; int aaiMvpIdxBi[2][33]; int aaiMvpIdx[2][33]; int aaiMvpNum[2][33]; AMVPInfo aacAMVPInfo[2][33]; int iRefIdx[2]={0,0}; //If un-initialized, may cause SEGV in bi-directional prediction iterative stage. int iRefIdxBi[2] = { -1, -1 }; uint32_t uiMbBits[3] = {1, 1, 0}; uint32_t uiLastMode = 0; uint32_t uiLastModeTemp = 0; int iRefStart, iRefEnd; int symMode = 0; int bestBiPRefIdxL1 = 0; int bestBiPMvpL1 = 0; Distortion biPDistTemp = std::numeric_limits<Distortion>::max(); uint8_t gbiIdx = (cu.cs->slice->isInterB() ? cu.GBiIdx : GBI_DEFAULT); bool enforceGBiPred = false; MergeCtx mergeCtx; // Loop over Prediction Units CHECK(!cu.firstPU, "CU does not contain any PUs"); uint32_t puIdx = 0; auto &pu = *cu.firstPU; WPScalingParam *wp0; WPScalingParam *wp1; int tryBipred = 0; bool checkAffine = pu.cu->imv == 0 || pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag(); bool checkNonAffine = pu.cu->imv == 0 || ( pu.cu->slice->getSPS()->getAMVREnabledFlag() && pu.cu->imv <= (pu.cu->slice->getSPS()->getAMVREnabledFlag() ? IMV_4PEL : 0)); CodingUnit *bestCU = pu.cu->cs->bestCS != nullptr ? pu.cu->cs->bestCS->getCU( CHANNEL_TYPE_LUMA ) : nullptr; bool trySmvd = ( bestCU != nullptr && pu.cu->imv == 2 && checkAffine ) ? ( !bestCU->firstPU->mergeFlag && !bestCU->affine ) : true; if ( pu.cu->imv && bestCU != nullptr && checkAffine ) { checkAffine = !( bestCU->firstPU->mergeFlag || !bestCU->affine ); } if ( pu.cu->imv == 2 && checkNonAffine && pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag() ) { checkNonAffine = m_affineMotion.hevcCost[1] < m_affineMotion.hevcCost[0] * 1.06f; } { // motion estimation only evaluates luma component m_maxCompIDToPred = MAX_NUM_COMPONENT; // m_maxCompIDToPred = COMPONENT_Y; CHECK(pu.cu != &cu, "PU is contained in another CU"); if (cu.cs->sps->getSBTMVPEnabledFlag()) { Size bufSize = g_miScaling.scale(pu.lumaSize()); mergeCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize); } PU::spanMotionInfo( pu ); Distortion uiHevcCost = std::numeric_limits<Distortion>::max(); Distortion uiAffineCost = std::numeric_limits<Distortion>::max(); Distortion uiCost[2] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max() }; Distortion uiCostBi = std::numeric_limits<Distortion>::max(); Distortion uiCostTemp; uint32_t uiBits[3]; uint32_t uiBitsTemp; Distortion bestBiPDist = std::numeric_limits<Distortion>::max(); Distortion uiCostTempL0[MAX_NUM_REF]; for (int iNumRef=0; iNumRef < MAX_NUM_REF; iNumRef++) { uiCostTempL0[iNumRef] = std::numeric_limits<Distortion>::max(); } uint32_t uiBitsTempL0[MAX_NUM_REF]; Mv mvValidList1; int refIdxValidList1 = 0; uint32_t bitsValidList1 = MAX_UINT; Distortion costValidList1 = std::numeric_limits<Distortion>::max(); PelUnitBuf origBuf = pu.cs->getOrgBuf( pu ); xGetBlkBits( cs.slice->isInterP(), puIdx, uiLastMode, uiMbBits ); m_pcRdCost->selectMotionLambda( cu.transQuantBypass ); unsigned imvShift = pu.cu->imv << 1; if ( checkNonAffine ) { // Uni-directional prediction for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ ) { RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); for (int iRefIdxTemp = 0; iRefIdxTemp < cs.slice->getNumRefIdx(eRefPicList); iRefIdxTemp++) { uiBitsTemp = uiMbBits[iRefList]; if ( cs.slice->getNumRefIdx(eRefPicList) > 1 ) { uiBitsTemp += iRefIdxTemp+1; if ( iRefIdxTemp == cs.slice->getNumRefIdx(eRefPicList)-1 ) { uiBitsTemp--; } } xEstimateMvPredAMVP( pu, origBuf, eRefPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], amvp[eRefPicList], false, &biPDistTemp); aaiMvpIdx[iRefList][iRefIdxTemp] = pu.mvpIdx[eRefPicList]; aaiMvpNum[iRefList][iRefIdxTemp] = pu.mvpNum[eRefPicList]; if(cs.slice->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist) { bestBiPDist = biPDistTemp; bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp]; bestBiPRefIdxL1 = iRefIdxTemp; } uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS]; if ( m_pcEncCfg->getFastMEForGenBLowDelayEnabled() && iRefList == 1 ) // list 1 { if ( cs.slice->getList1IdxToList0Idx( iRefIdxTemp ) >= 0 ) { cMvTemp[1][iRefIdxTemp] = cMvTemp[0][cs.slice->getList1IdxToList0Idx( iRefIdxTemp )]; uiCostTemp = uiCostTempL0[cs.slice->getList1IdxToList0Idx( iRefIdxTemp )]; /*first subtract the bit-rate part of the cost of the other list*/ uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[cs.slice->getList1IdxToList0Idx( iRefIdxTemp )] ); /*correct the bit-rate part of the current ref*/ m_pcRdCost->setPredictor ( cMvPred[iRefList][iRefIdxTemp] ); uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp].getHor(), cMvTemp[1][iRefIdxTemp].getVer(), imvShift ); /*calculate the correct cost*/ uiCostTemp += m_pcRdCost->getCost( uiBitsTemp ); } else { xMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList] ); } } else { xMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList] ); } if( cu.cs->sps->getUseGBi() && cu.GBiIdx == GBI_DEFAULT && cu.cs->slice->isInterB() ) { const bool checkIdentical = true; m_uniMotions.setReadMode(checkIdentical, (uint32_t)iRefList, (uint32_t)iRefIdxTemp); m_uniMotions.copyFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint32_t)iRefList, (uint32_t)iRefIdxTemp); } xCopyAMVPInfo( &amvp[eRefPicList], &aacAMVPInfo[iRefList][iRefIdxTemp]); // must always be done ( also when AMVP_MODE = AM_NONE ) xCheckBestMVP( eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], amvp[eRefPicList], uiBitsTemp, uiCostTemp, pu.cu->imv ); if ( iRefList == 0 ) { uiCostTempL0[iRefIdxTemp] = uiCostTemp; uiBitsTempL0[iRefIdxTemp] = uiBitsTemp; } if ( uiCostTemp < uiCost[iRefList] ) { uiCost[iRefList] = uiCostTemp; uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction // set motion cMv [iRefList] = cMvTemp[iRefList][iRefIdxTemp]; iRefIdx[iRefList] = iRefIdxTemp; } if ( iRefList == 1 && uiCostTemp < costValidList1 && cs.slice->getList1IdxToList0Idx( iRefIdxTemp ) < 0 ) { costValidList1 = uiCostTemp; bitsValidList1 = uiBitsTemp; // set motion mvValidList1 = cMvTemp[iRefList][iRefIdxTemp]; refIdxValidList1 = iRefIdxTemp; } } } if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->getSPS()->getUseAffine() && checkAffine && (gbiIdx == GBI_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseGBiFast()) ) { ::memcpy( cMvHevcTemp, cMvTemp, sizeof( cMvTemp ) ); } // Bi-predictive Motion estimation if( ( cs.slice->isInterB() ) && ( PU::isBipredRestriction( pu ) == false ) && (cu.slice->getCheckLDC() || gbiIdx == GBI_DEFAULT || !m_affineModeSelected || !m_pcEncCfg->getUseGBiFast()) ) { bool doBiPred = true; tryBipred = 1; cMvBi[0] = cMv[0]; cMvBi[1] = cMv[1]; iRefIdxBi[0] = iRefIdx[0]; iRefIdxBi[1] = iRefIdx[1]; ::memcpy( cMvPredBi, cMvPred, sizeof( cMvPred ) ); ::memcpy( aaiMvpIdxBi, aaiMvpIdx, sizeof( aaiMvpIdx ) ); uint32_t uiMotBits[2]; if(cs.slice->getMvdL1ZeroFlag()) { xCopyAMVPInfo(&aacAMVPInfo[1][bestBiPRefIdxL1], &amvp[REF_PIC_LIST_1]); aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1; cMvPredBi [1][bestBiPRefIdxL1] = amvp[REF_PIC_LIST_1].mvCand[bestBiPMvpL1]; cMvBi [1] = cMvPredBi[1][bestBiPRefIdxL1]; iRefIdxBi[1] = bestBiPRefIdxL1; pu.mv [REF_PIC_LIST_1] = cMvBi[1]; pu.mv[REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1]; pu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1; if( m_pcEncCfg->getMCTSEncConstraint() ) { Mv restrictedMv = pu.mv[REF_PIC_LIST_1]; Area curTileAreaRestricted; curTileAreaRestricted = pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu ); MCTSHelper::clipMvToArea( restrictedMv, pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps ); // If sub-pel filter samples are not inside of allowed area if( restrictedMv != pu.mv[REF_PIC_LIST_1] ) { uiCostBi = std::numeric_limits<Distortion>::max(); doBiPred = false; } } PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getBuf( UnitAreaRelative(cu, pu) ); motionCompensation( pu, predBufTmp, REF_PIC_LIST_1 ); uiMotBits[0] = uiBits[0] - uiMbBits[0]; uiMotBits[1] = uiMbBits[1]; if ( cs.slice->getNumRefIdx(REF_PIC_LIST_1) > 1 ) { uiMotBits[1] += bestBiPRefIdxL1 + 1; if ( bestBiPRefIdxL1 == cs.slice->getNumRefIdx(REF_PIC_LIST_1)-1 ) { uiMotBits[1]--; } } uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS]; uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1]; cMvTemp[1][bestBiPRefIdxL1] = cMvBi[1]; } else { uiMotBits[0] = uiBits[0] - uiMbBits[0]; uiMotBits[1] = uiBits[1] - uiMbBits[1]; uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1]; } if( doBiPred ) { // 4-times iteration (default) int iNumIter = 4; // fast encoder setting: only one iteration if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || cs.slice->getMvdL1ZeroFlag() ) { iNumIter = 1; } enforceGBiPred = (gbiIdx != GBI_DEFAULT); for ( int iIter = 0; iIter < iNumIter; iIter++ ) { int iRefList = iIter % 2; if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 ) { if( uiCost[0] <= uiCost[1] ) { iRefList = 1; } else { iRefList = 0; } if( gbiIdx != GBI_DEFAULT ) { iRefList = ( abs( getGbiWeight(gbiIdx, REF_PIC_LIST_0 ) ) > abs( getGbiWeight(gbiIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 ); } } else if ( iIter == 0 ) { iRefList = 0; } if ( iIter == 0 && !cs.slice->getMvdL1ZeroFlag()) { pu.mv [1 - iRefList] = cMv [1 - iRefList]; pu.mv[1 - iRefList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.refIdx[1 - iRefList] = iRefIdx[1 - iRefList]; PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getBuf( UnitAreaRelative(cu, pu) ); motionCompensation( pu, predBufTmp, RefPicList(1 - iRefList) ); } RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); if(cs.slice->getMvdL1ZeroFlag()) { iRefList = 0; eRefPicList = REF_PIC_LIST_0; } bool bChanged = false; iRefStart = 0; iRefEnd = cs.slice->getNumRefIdx(eRefPicList)-1; for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++) { if( m_pcEncCfg->getUseGBiFast() && (gbiIdx != GBI_DEFAULT) && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC() == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC()) && (!pu.cu->imv && pu.cu->slice->getTLayer()>1)) { continue; } uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList]; uiBitsTemp += ((cs.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0); if ( cs.slice->getNumRefIdx(eRefPicList) > 1 ) { uiBitsTemp += iRefIdxTemp+1; if ( iRefIdxTemp == cs.slice->getNumRefIdx(eRefPicList)-1 ) { uiBitsTemp--; } } uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS]; if ( cs.slice->getBiDirPred() ) { uiBitsTemp += 1; // add one bit for symmetrical MVD mode } // call ME xCopyAMVPInfo(&aacAMVPInfo[iRefList][iRefIdxTemp], &amvp[eRefPicList] ); xMotionEstimation ( pu, origBuf, eRefPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList], true ); xCheckBestMVP( eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], amvp[eRefPicList], uiBitsTemp, uiCostTemp, pu.cu->imv); if ( uiCostTemp < uiCostBi ) { bChanged = true; cMvBi[iRefList] = cMvTemp[iRefList][iRefIdxTemp]; iRefIdxBi[iRefList] = iRefIdxTemp; uiCostBi = uiCostTemp; uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList]; uiMotBits[iRefList] -= ((cs.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0); uiBits[2] = uiBitsTemp; if(iNumIter!=1) { // Set motion pu.mv [eRefPicList] = cMvBi [iRefList]; pu.mv[eRefPicList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.refIdx[eRefPicList] = iRefIdxBi[iRefList]; PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getBuf( UnitAreaRelative(cu, pu) ); motionCompensation( pu, predBufTmp, eRefPicList ); } } } // for loop-iRefIdxTemp if ( !bChanged ) { if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceGBiPred) { xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], &amvp[REF_PIC_LIST_0]); xCheckBestMVP( REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[eRefPicList], uiBits[2], uiCostBi, pu.cu->imv); if(!cs.slice->getMvdL1ZeroFlag()) { xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], &amvp[REF_PIC_LIST_1]); xCheckBestMVP( REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], amvp[eRefPicList], uiBits[2], uiCostBi, pu.cu->imv); } } break; } } // for loop-iter } cu.refIdxBi[0] = iRefIdxBi[0]; cu.refIdxBi[1] = iRefIdxBi[1]; if ( cs.slice->getBiDirPred() && trySmvd ) { Distortion symCost; Mv cMvPredSym[2]; int mvpIdxSym[2]; int curRefList = REF_PIC_LIST_0; int tarRefList = 1 - curRefList; RefPicList eCurRefList = (curRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); int refIdxCur = cs.slice->getSymRefIdx( curRefList ); int refIdxTar = cs.slice->getSymRefIdx( tarRefList ); MvField cCurMvField, cTarMvField; Distortion costStart = std::numeric_limits<Distortion>::max(); for ( int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand; i++ ) { for ( int j = 0; j < aacAMVPInfo[tarRefList][refIdxTar].numCand; j++ ) { cCurMvField.setMvField( aacAMVPInfo[curRefList][refIdxCur].mvCand[i], refIdxCur ); cTarMvField.setMvField( aacAMVPInfo[tarRefList][refIdxTar].mvCand[j], refIdxTar ); Distortion cost = xGetSymmetricCost( pu, origBuf, eCurRefList, cCurMvField, cTarMvField, gbiIdx ); if ( cost < costStart ) { costStart = cost; cMvPredSym[curRefList] = aacAMVPInfo[curRefList][refIdxCur].mvCand[i]; cMvPredSym[tarRefList] = aacAMVPInfo[tarRefList][refIdxTar].mvCand[j]; mvpIdxSym[curRefList] = i; mvpIdxSym[tarRefList] = j; } } } cCurMvField.mv = cMvPredSym[curRefList]; cTarMvField.mv = cMvPredSym[tarRefList]; m_pcRdCost->setCostScale(0); m_pcRdCost->setPredictor(cMvPredSym[curRefList]); uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(cCurMvField.mv.hor, cCurMvField.mv.ver, (pu.cu->imv << 1)); bits += m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS]; bits += m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS]; costStart += m_pcRdCost->getCost(bits); std::vector<Mv> symmvdCands; symmvdCands.push_back(cMvTemp[curRefList][refIdxCur]); if (iRefIdxBi[curRefList] == refIdxCur && cMvBi[curRefList] != cMvTemp[curRefList][refIdxCur]) { symmvdCands.push_back(cMvBi[curRefList]); } for (auto mvStart : symmvdCands) { bool checked = false; //if it has been checkin in the mvPred. for (int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand && !checked; i++) { checked |= (mvStart == aacAMVPInfo[curRefList][refIdxCur].mvCand[i]); } if (checked) break; Distortion bestCost = costStart; symmvdCheckBestMvp(pu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, gbiIdx, cMvPredSym, mvpIdxSym, costStart); if (costStart < bestCost) { cCurMvField.setMvField(mvStart, refIdxCur); cTarMvField.setMvField(mvStart.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar); } } Mv startPtMv = cCurMvField.mv; Distortion mvpCost = m_pcRdCost->getCost(m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS] + m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS]); symCost = costStart - mvpCost; // ME xSymmetricMotionEstimation( pu, origBuf, cMvPredSym[curRefList], cMvPredSym[tarRefList], eCurRefList, cCurMvField, cTarMvField, symCost, gbiIdx ); symCost += mvpCost; if (startPtMv != cCurMvField.mv) { // if ME change MV, run a final check for best MVP. symmvdCheckBestMvp(pu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, gbiIdx, cMvPredSym, mvpIdxSym, symCost, true); } bits = uiMbBits[2]; bits += 1; // add one bit for #symmetrical MVD mode bits += ((cs.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0); symCost += m_pcRdCost->getCost(bits); cTarMvField.setMvField(cCurMvField.mv.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar); if( m_pcEncCfg->getMCTSEncConstraint() ) { if( !( MCTSHelper::checkMvForMCTSConstraint( pu, cCurMvField.mv ) && MCTSHelper::checkMvForMCTSConstraint( pu, cTarMvField.mv ) ) ) symCost = std::numeric_limits<Distortion>::max(); } // save results if ( symCost < uiCostBi ) { uiCostBi = symCost; symMode = 1 + curRefList; cMvBi[curRefList] = cCurMvField.mv; iRefIdxBi[curRefList] = cCurMvField.refIdx; aaiMvpIdxBi[curRefList][cCurMvField.refIdx] = mvpIdxSym[curRefList]; cMvPredBi[curRefList][iRefIdxBi[curRefList]] = cMvPredSym[curRefList]; cMvBi[tarRefList] = cTarMvField.mv; iRefIdxBi[tarRefList] = cTarMvField.refIdx; aaiMvpIdxBi[tarRefList][cTarMvField.refIdx] = mvpIdxSym[tarRefList]; cMvPredBi[tarRefList][iRefIdxBi[tarRefList]] = cMvPredSym[tarRefList]; } } } // if (B_SLICE) // Clear Motion Field pu.mv [REF_PIC_LIST_0] = Mv(); pu.mv [REF_PIC_LIST_1] = Mv(); pu.mvd [REF_PIC_LIST_0] = cMvZero; pu.mvd [REF_PIC_LIST_1] = cMvZero; pu.refIdx[REF_PIC_LIST_0] = NOT_VALID; pu.refIdx[REF_PIC_LIST_1] = NOT_VALID; pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID; pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID; pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID; pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID; // Set Motion Field cMv [1] = mvValidList1; iRefIdx[1] = refIdxValidList1; uiBits [1] = bitsValidList1; uiCost [1] = costValidList1; if (cu.cs->pps->getWPBiPred() == true && tryBipred && (gbiIdx != GBI_DEFAULT)) { CHECK(iRefIdxBi[0]<0, "Invalid picture reference index"); CHECK(iRefIdxBi[1]<0, "Invalid picture reference index"); cu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0], wp0); cu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1], wp1); if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag)) { uiCostBi = MAX_UINT; enforceGBiPred = false; } } if( enforceGBiPred ) { uiCost[0] = uiCost[1] = MAX_UINT; } uiLastModeTemp = uiLastMode; if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) { uiLastMode = 2; pu.mv [REF_PIC_LIST_0] = cMvBi[0]; pu.mv [REF_PIC_LIST_1] = cMvBi[1]; pu.mv[REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.mv[REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.mvd [REF_PIC_LIST_0] = cMvBi[0] - cMvPredBi[0][iRefIdxBi[0]]; pu.mvd [REF_PIC_LIST_1] = cMvBi[1] - cMvPredBi[1][iRefIdxBi[1]]; pu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0]; pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1]; pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]]; pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]]; pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]]; pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]]; pu.interDir = 3; pu.cu->smvdMode = symMode; } else if ( uiCost[0] <= uiCost[1] ) { uiLastMode = 0; pu.mv [REF_PIC_LIST_0] = cMv[0]; pu.mv [REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.mvd [REF_PIC_LIST_0] = cMv[0] - cMvPred[0][iRefIdx[0]]; pu.refIdx[REF_PIC_LIST_0] = iRefIdx[0]; pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]]; pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]]; pu.interDir = 1; } else { uiLastMode = 1; pu.mv [REF_PIC_LIST_1] = cMv[1]; pu.mv [REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.mvd [REF_PIC_LIST_1] = cMv[1] - cMvPred[1][iRefIdx[1]]; pu.refIdx[REF_PIC_LIST_1] = iRefIdx[1]; pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]]; pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]]; pu.interDir = 2; } if( gbiIdx != GBI_DEFAULT ) { cu.GBiIdx = GBI_DEFAULT; // Reset to default for the Non-NormalMC modes. } uiHevcCost = ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) ? uiCostBi : ( ( uiCost[0] <= uiCost[1] ) ? uiCost[0] : uiCost[1] ); } if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->getSPS()->getUseAffine() && checkAffine && (gbiIdx == GBI_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseGBiFast()) ) { m_hevcCost = uiHevcCost; // save normal hevc result uint32_t uiMRGIndex = pu.mergeIdx; bool bMergeFlag = pu.mergeFlag; uint32_t uiInterDir = pu.interDir; int iSymMode = cu.smvdMode; Mv cMvd[2]; uint32_t uiMvpIdx[2], uiMvpNum[2]; uiMvpIdx[0] = pu.mvpIdx[REF_PIC_LIST_0]; uiMvpIdx[1] = pu.mvpIdx[REF_PIC_LIST_1]; uiMvpNum[0] = pu.mvpNum[REF_PIC_LIST_0]; uiMvpNum[1] = pu.mvpNum[REF_PIC_LIST_1]; cMvd[0] = pu.mvd[REF_PIC_LIST_0]; cMvd[1] = pu.mvd[REF_PIC_LIST_1]; MvField cHevcMvField[2]; cHevcMvField[0].setMvField( pu.mv[REF_PIC_LIST_0], pu.refIdx[REF_PIC_LIST_0] ); cHevcMvField[1].setMvField( pu.mv[REF_PIC_LIST_1], pu.refIdx[REF_PIC_LIST_1] ); // do affine ME & Merge cu.affineType = AFFINEMODEL_4PARAM; Mv acMvAffine4Para[2][33][3]; int refIdx4Para[2] = { -1, -1 }; xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, gbiIdx, enforceGBiPred, ((cu.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0)); if ( pu.cu->imv == 0 ) { storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_4PARAM, gbiIdx ); } if ( cu.slice->getSPS()->getUseAffineType() ) { if ( uiAffineCost < uiHevcCost * 1.05 ) ///< condition for 6 parameter affine ME { // save 4 parameter results Mv bestMv[2][3], bestMvd[2][3]; int bestMvpIdx[2], bestMvpNum[2], bestRefIdx[2]; uint8_t bestInterDir; bestInterDir = pu.interDir; bestRefIdx[0] = pu.refIdx[0]; bestRefIdx[1] = pu.refIdx[1]; bestMvpIdx[0] = pu.mvpIdx[0]; bestMvpIdx[1] = pu.mvpIdx[1]; bestMvpNum[0] = pu.mvpNum[0]; bestMvpNum[1] = pu.mvpNum[1]; for ( int refList = 0; refList < 2; refList++ ) { bestMv[refList][0] = pu.mvAffi[refList][0]; bestMv[refList][1] = pu.mvAffi[refList][1]; bestMv[refList][2] = pu.mvAffi[refList][2]; bestMvd[refList][0] = pu.mvdAffi[refList][0]; bestMvd[refList][1] = pu.mvdAffi[refList][1]; bestMvd[refList][2] = pu.mvdAffi[refList][2]; } refIdx4Para[0] = bestRefIdx[0]; refIdx4Para[1] = bestRefIdx[1]; Distortion uiAffine6Cost = std::numeric_limits<Distortion>::max(); cu.affineType = AFFINEMODEL_6PARAM; xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, gbiIdx, enforceGBiPred, ((cu.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0)); if ( pu.cu->imv == 0 ) { storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_6PARAM, gbiIdx ); } // reset to 4 parameter affine inter mode if ( uiAffineCost <= uiAffine6Cost ) { cu.affineType = AFFINEMODEL_4PARAM; pu.interDir = bestInterDir; pu.refIdx[0] = bestRefIdx[0]; pu.refIdx[1] = bestRefIdx[1]; pu.mvpIdx[0] = bestMvpIdx[0]; pu.mvpIdx[1] = bestMvpIdx[1]; pu.mvpNum[0] = bestMvpNum[0]; pu.mvpNum[1] = bestMvpNum[1]; for ( int verIdx = 0; verIdx < 3; verIdx++ ) { pu.mvdAffi[REF_PIC_LIST_0][verIdx] = bestMvd[0][verIdx]; pu.mvdAffi[REF_PIC_LIST_1][verIdx] = bestMvd[1][verIdx]; } PU::setAllAffineMv( pu, bestMv[0][0], bestMv[0][1], bestMv[0][2], REF_PIC_LIST_0 , false ); PU::setAllAffineMv( pu, bestMv[1][0], bestMv[1][1], bestMv[1][2], REF_PIC_LIST_1 , false ); } else { uiAffineCost = uiAffine6Cost; } } uiAffineCost += m_pcRdCost->getCost( 1 ); // add one bit for affine_type } if( uiAffineCost < uiHevcCost ) { if( m_pcEncCfg->getMCTSEncConstraint() && !MCTSHelper::checkMvBufferForMCTSConstraint( pu ) ) { uiAffineCost = std::numeric_limits<Distortion>::max(); } } if ( uiHevcCost <= uiAffineCost ) { // set hevc me result cu.affine = false; pu.mergeFlag = bMergeFlag; pu.mergeIdx = uiMRGIndex; pu.interDir = uiInterDir; cu.smvdMode = iSymMode; pu.mv [REF_PIC_LIST_0] = cHevcMvField[0].mv; pu.refIdx[REF_PIC_LIST_0] = cHevcMvField[0].refIdx; pu.mv [REF_PIC_LIST_1] = cHevcMvField[1].mv; pu.refIdx[REF_PIC_LIST_1] = cHevcMvField[1].refIdx; pu.mvpIdx[REF_PIC_LIST_0] = uiMvpIdx[0]; pu.mvpIdx[REF_PIC_LIST_1] = uiMvpIdx[1]; pu.mvpNum[REF_PIC_LIST_0] = uiMvpNum[0]; pu.mvpNum[REF_PIC_LIST_1] = uiMvpNum[1]; pu.mvd[REF_PIC_LIST_0] = cMvd[0]; pu.mvd[REF_PIC_LIST_1] = cMvd[1]; } else { cu.smvdMode = 0; CHECK( !cu.affine, "Wrong." ); uiLastMode = uiLastModeTemp; } } if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag ) { if (gbiIdx != GBI_DEFAULT) { cu.GBiIdx = gbiIdx; } } m_maxCompIDToPred = MAX_NUM_COMPONENT; { PU::spanMotionInfo( pu, mergeCtx ); } // MC PelUnitBuf predBuf = pu.cs->getPredBuf(pu); if ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine4ParaAvail || !m_affineMotion.affine6ParaAvail ) { m_affineMotion.hevcCost[pu.cu->imv] = uiHevcCost; } motionCompensation( pu, predBuf, REF_PIC_LIST_X ); puIdx++; } setWpScalingDistParam( -1, REF_PIC_LIST_X, cu.cs->slice ); return; } uint32_t InterSearch::xCalcAffineMVBits( PredictionUnit& pu, Mv acMvTemp[3], Mv acMvPred[3], bool mvHighPrec ) { int mvNum = pu.cu->affineType ? 3 : 2; Mv tempMv0 = acMvTemp[0]; const int shift = mvHighPrec ? MV_FRACTIONAL_BITS_DIFF : 0; const unsigned int mvdShift = pu.cu->imv == 2 ? MV_FRACTIONAL_BITS_DIFF : 0; Mv secondPred; if ( mvHighPrec ) { tempMv0.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); } m_pcRdCost->setCostScale( 0 ); uint32_t bitsTemp = 0; for ( int verIdx = 0; verIdx < mvNum; verIdx++ ) { m_pcRdCost->setPredictor( acMvPred[verIdx] ); if ( verIdx != 0 ) { secondPred = acMvPred[verIdx] + ( tempMv0 - acMvPred[0] ); m_pcRdCost->setPredictor( secondPred ); } bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( acMvTemp[verIdx].getHor() >> shift, acMvTemp[verIdx].getVer() >> shift, mvdShift ); } return bitsTemp; } // AMVP void InterSearch::xEstimateMvPredAMVP( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, int iRefIdx, Mv& rcMvPred, AMVPInfo& rAMVPInfo, bool bFilled, Distortion* puiDistBiP ) { Mv cBestMv; int iBestIdx = 0; Distortion uiBestCost = std::numeric_limits<Distortion>::max(); int i; AMVPInfo* pcAMVPInfo = &rAMVPInfo; // Fill the MV Candidates if (!bFilled) { PU::fillMvpCand( pu, eRefPicList, iRefIdx, *pcAMVPInfo ); } // initialize Mvp index & Mvp iBestIdx = 0; cBestMv = pcAMVPInfo->mvCand[0]; PelUnitBuf predBuf = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) ); //-- Check Minimum Cost. for( i = 0 ; i < pcAMVPInfo->numCand; i++) { Distortion uiTmpCost = xGetTemplateCost( pu, origBuf, predBuf, pcAMVPInfo->mvCand[i], i, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx ); if( uiBestCost > uiTmpCost ) { uiBestCost = uiTmpCost; cBestMv = pcAMVPInfo->mvCand[i]; iBestIdx = i; (*puiDistBiP) = uiTmpCost; } } // Setting Best MVP rcMvPred = cBestMv; pu.mvpIdx[eRefPicList] = iBestIdx; pu.mvpNum[eRefPicList] = pcAMVPInfo->numCand; return; } uint32_t InterSearch::xGetMvpIdxBits(int iIdx, int iNum) { CHECK(iIdx < 0 || iNum < 0 || iIdx >= iNum, "Invalid parameters"); if (iNum == 1) { return 0; } uint32_t uiLength = 1; int iTemp = iIdx; if ( iTemp == 0 ) { return uiLength; } bool bCodeLast = ( iNum-1 > iTemp ); uiLength += (iTemp-1); if( bCodeLast ) { uiLength++; } return uiLength; } void InterSearch::xGetBlkBits( bool bPSlice, int iPartIdx, uint32_t uiLastMode, uint32_t uiBlkBit[3]) { uiBlkBit[0] = (! bPSlice) ? 3 : 1; uiBlkBit[1] = 3; uiBlkBit[2] = 5; } void InterSearch::xCopyAMVPInfo (AMVPInfo* pSrc, AMVPInfo* pDst) { pDst->numCand = pSrc->numCand; for (int i = 0; i < pSrc->numCand; i++) { pDst->mvCand[i] = pSrc->mvCand[i]; } } void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred, int& riMVPIdx, AMVPInfo& amvpInfo, uint32_t& ruiBits, Distortion& ruiCost, const uint8_t imv ) { if( imv > 0 ) { return; } unsigned imvshift = imv << 1; AMVPInfo* pcAMVPInfo = &amvpInfo; CHECK(pcAMVPInfo->mvCand[riMVPIdx] != rcMvPred, "Invalid MV prediction candidate"); if (pcAMVPInfo->numCand < 2) { return; } m_pcRdCost->setCostScale ( 0 ); int iBestMVPIdx = riMVPIdx; m_pcRdCost->setPredictor( rcMvPred ); int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), imvshift); iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS]; int iBestMvBits = iOrgMvBits; for (int iMVPIdx = 0; iMVPIdx < pcAMVPInfo->numCand; iMVPIdx++) { if (iMVPIdx == riMVPIdx) { continue; } m_pcRdCost->setPredictor( pcAMVPInfo->mvCand[iMVPIdx] ); int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), imvshift); iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS]; if (iMvBits < iBestMvBits) { iBestMvBits = iMvBits; iBestMVPIdx = iMVPIdx; } } if (iBestMVPIdx != riMVPIdx) //if changed { rcMvPred = pcAMVPInfo->mvCand[iBestMVPIdx]; riMVPIdx = iBestMVPIdx; uint32_t uiOrgBits = ruiBits; ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits; ruiCost = (ruiCost - m_pcRdCost->getCost( uiOrgBits )) + m_pcRdCost->getCost( ruiBits ); } } Distortion InterSearch::xGetTemplateCost( const PredictionUnit& pu, PelUnitBuf& origBuf, PelUnitBuf& predBuf, Mv cMvCand, int iMVPIdx, int iMVPNum, RefPicList eRefPicList, int iRefIdx ) { Distortion uiCost = std::numeric_limits<Distortion>::max(); const Picture* picRef = pu.cu->slice->getRefPic( eRefPicList, iRefIdx ); cMvCand.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); clipMv( cMvCand, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); // prediction pattern const bool bi = pu.cu->slice->testWeightPred() && pu.cu->slice->getSliceType()==P_SLICE; xPredInterBlk( COMPONENT_Y, pu, picRef, cMvCand, predBuf, bi, pu.cu->slice->clpRng( COMPONENT_Y ) , false , false ); if ( bi ) { xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, iRefIdx, m_maxCompIDToPred ); } // calc distortion uiCost = m_pcRdCost->getDistPart(origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_SAD); uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] ); return uiCost; } Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf& origBuf, PelUnitBuf& predBuf, Mv acMvCand[3], int iMVPIdx, int iMVPNum, RefPicList eRefPicList, int iRefIdx ) { Distortion uiCost = std::numeric_limits<Distortion>::max(); const Picture* picRef = pu.cu->slice->getRefPic( eRefPicList, iRefIdx ); // prediction pattern const bool bi = pu.cu->slice->testWeightPred() && pu.cu->slice->getSliceType()==P_SLICE; Mv mv[3]; memcpy(mv, acMvCand, sizeof(mv)); if ( pu.cu->imv != 1 ) { mv[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); mv[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); mv[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); } xPredAffineBlk(COMPONENT_Y, pu, picRef, mv, predBuf, bi, pu.cu->slice->clpRng(COMPONENT_Y)); if( bi ) { xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, iRefIdx, m_maxCompIDToPred ); } // calc distortion #if JVET_N0329_IBC_SEARCH_IMP enum DFunc distFunc = (pu.cu->transQuantBypass || pu.cs->slice->getDisableSATDForRD()) ? DF_SAD : DF_HAD; #endif uiCost = m_pcRdCost->getDistPart( origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y #if JVET_N0329_IBC_SEARCH_IMP , distFunc #else , DF_HAD #endif ); uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] ); DTRACE( g_trace_ctx, D_COMMON, " (%d) affineTemplateCost=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCost ); return uiCost; } void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi) { if( pu.cu->cs->sps->getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedUniMv(pu, eRefPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost) ) { return; } Mv cMvHalf, cMvQter; CHECK(eRefPicList >= MAX_NUM_REF_LIST_ADAPT_SR || iRefIdxPred>=int(MAX_IDX_ADAPT_SR), "Invalid reference picture list"); m_iSearchRange = m_aaiAdaptSR[eRefPicList][iRefIdxPred]; int iSrchRng = (bBi ? m_bipredSearchRange : m_iSearchRange); double fWeight = 1.0; PelUnitBuf origBufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) ); PelUnitBuf* pBuf = &origBuf; if(bBi) // Bi-predictive ME { // NOTE: Other buf contains predicted signal from another direction PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative(*pu.cu, pu )); origBufTmp.copyFrom(origBuf); origBufTmp.removeHighFreq( otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs() ,getGbiWeight( pu.cu->GBiIdx, eRefPicList ) ); pBuf = &origBufTmp; fWeight = xGetMEDistortionWeight( pu.cu->GBiIdx, eRefPicList ); } m_cDistParam.isBiPred = bBi; // Search key pattern initialization CPelBuf tmpPattern = pBuf->Y(); CPelBuf* pcPatternKey = &tmpPattern; m_lumaClpRng = pu.cs->slice->clpRng( COMPONENT_Y ); CPelBuf buf = pu.cu->slice->getRefPic(eRefPicList, iRefIdxPred)->getRecoBuf(pu.blocks[COMPONENT_Y]); IntTZSearchStruct cStruct; cStruct.pcPatternKey = pcPatternKey; cStruct.iRefStride = buf.stride; cStruct.piRefY = buf.buf; cStruct.imvShift = pu.cu->imv << 1; cStruct.inCtuSearch = false; cStruct.zeroMV = false; { if (m_useCompositeRef && pu.cs->slice->getRefPic(eRefPicList, iRefIdxPred)->longTerm) { cStruct.inCtuSearch = true; } } auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ); bool bQTBTMV = false; bool bQTBTMV2 = false; Mv cIntMv; if( !bBi ) { bool bValid = blkCache && blkCache->getMv( pu, eRefPicList, iRefIdxPred, cIntMv ); if( bValid ) { bQTBTMV2 = true; cIntMv <<= 2; } } m_pcRdCost->setPredictor( rcMvPred ); m_pcRdCost->setCostScale(2); { setWpScalingDistParam(iRefIdxPred, eRefPicList, pu.cu->slice); } m_currRefPicList = eRefPicList; m_currRefPicIndex = iRefIdxPred; m_skipFracME = false; // Do integer search if( ( m_motionEstimationSearchMethod == MESEARCH_FULL ) || bBi || bQTBTMV ) { if( !bQTBTMV ) { xSetSearchRange(pu, (bBi ? rcMv : rcMvPred), iSrchRng, cStruct.searchRange , cStruct ); } cStruct.subShiftMode = m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ? 2 : 0; xPatternSearch( cStruct, rcMv, ruiCost); } else if( bQTBTMV2 ) { rcMv = cIntMv; cStruct.subShiftMode = ( !m_pcEncCfg->getRestrictMESampling() && m_pcEncCfg->getMotionEstimationSearchMethod() == MESEARCH_SELECTIVE ) ? 1 : ( m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ) ? 2 : 0; xTZSearch( pu, cStruct, rcMv, ruiCost, NULL, false, true ); } else { cStruct.subShiftMode = ( !m_pcEncCfg->getRestrictMESampling() && m_pcEncCfg->getMotionEstimationSearchMethod() == MESEARCH_SELECTIVE ) ? 1 : ( m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ) ? 2 : 0; rcMv = rcMvPred; const Mv *pIntegerMv2Nx2NPred = 0; xPatternSearchFast( pu, cStruct, rcMv, ruiCost, pIntegerMv2Nx2NPred ); if( blkCache ) { blkCache->setMv( pu.cs->area, eRefPicList, iRefIdxPred, rcMv ); } else { m_integerMv2Nx2N[eRefPicList][iRefIdxPred] = rcMv; } } DTRACE( g_trace_ctx, D_ME, "%d %d %d :MECostFPel<L%d,%d>: %d,%d,%dx%d, %d", DTRACE_GET_COUNTER( g_trace_ctx, D_ME ), pu.cu->slice->getPOC(), 0, ( int ) eRefPicList, ( int ) bBi, pu.Y().x, pu.Y().y, pu.Y().width, pu.Y().height, ruiCost ); // sub-pel refinement for sub-pel resolution if( pu.cu->imv == 0 ) { if( m_pcEncCfg->getMCTSEncConstraint() ) { Area curTileAreaSubPelRestricted = pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu ); // Area adjustment, because subpel refinement is going to (x-1;y-1) direction curTileAreaSubPelRestricted.x += 1; curTileAreaSubPelRestricted.y += 1; curTileAreaSubPelRestricted.width -= 1; curTileAreaSubPelRestricted.height -= 1; if( ! MCTSHelper::checkMvIsNotInRestrictedArea( pu, rcMv, curTileAreaSubPelRestricted, MV_PRECISION_INT ) ) { MCTSHelper::clipMvToArea( rcMv, pu.Y(), curTileAreaSubPelRestricted, *pu.cs->sps, 0 ); } } xPatternSearchFracDIF( pu, eRefPicList, iRefIdxPred, cStruct, rcMv, cMvHalf, cMvQter, ruiCost ); m_pcRdCost->setCostScale( 0 ); rcMv <<= 2; rcMv += ( cMvHalf <<= 1 ); rcMv += cMvQter; uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( rcMv.getHor(), rcMv.getVer(), cStruct.imvShift ); ruiBits += uiMvBits; ruiCost = ( Distortion ) ( floor( fWeight * ( ( double ) ruiCost - ( double ) m_pcRdCost->getCost( uiMvBits ) ) ) + ( double ) m_pcRdCost->getCost( ruiBits ) ); } else // integer refinement for integer-pel and 4-pel resolution { xPatternSearchIntRefine( pu, cStruct, rcMv, rcMvPred, riMVPIdx, ruiBits, ruiCost, amvpInfo, fWeight); } DTRACE(g_trace_ctx, D_ME, " MECost<L%d,%d>: %6d (%d) MV:%d,%d\n", (int)eRefPicList, (int)bBi, ruiCost, ruiBits, rcMv.getHor() << 2, rcMv.getVer() << 2); } void InterSearch::xSetSearchRange ( const PredictionUnit& pu, const Mv& cMvPred, const int iSrchRng, SearchRange& sr , IntTZSearchStruct& cStruct ) { const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL; Mv cFPMvPred = cMvPred; cFPMvPred.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); clipMv( cFPMvPred, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); Mv mvTL(cFPMvPred.getHor() - (iSrchRng << iMvShift), cFPMvPred.getVer() - (iSrchRng << iMvShift)); Mv mvBR(cFPMvPred.getHor() + (iSrchRng << iMvShift), cFPMvPred.getVer() + (iSrchRng << iMvShift)); if (m_pcEncCfg->getMCTSEncConstraint()) { MCTSHelper::clipMvToArea( mvTL, pu.Y(), pu.cs->picture->mctsInfo.getTileArea(), *pu.cs->sps ); MCTSHelper::clipMvToArea( mvBR, pu.Y(), pu.cs->picture->mctsInfo.getTileArea(), *pu.cs->sps ); } else { xClipMv( mvTL, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); xClipMv( mvBR, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); } mvTL.divideByPowerOf2( iMvShift ); mvBR.divideByPowerOf2( iMvShift ); sr.left = mvTL.hor; sr.top = mvTL.ver; sr.right = mvBR.hor; sr.bottom = mvBR.ver; if (m_useCompositeRef && cStruct.inCtuSearch) { Position posRB = pu.Y().bottomRight(); Position posTL = pu.Y().topLeft(); const PreCalcValues *pcv = pu.cs->pcv; Position posRBinCTU(posRB.x & pcv->maxCUWidthMask, posRB.y & pcv->maxCUHeightMask); Position posLTinCTU = Position(posTL.x & pcv->maxCUWidthMask, posTL.y & pcv->maxCUHeightMask).offset(-4, -4); if (sr.left < -posLTinCTU.x) sr.left = -posLTinCTU.x; if (sr.top < -posLTinCTU.y) sr.top = -posLTinCTU.y; if (sr.right >((int)pcv->maxCUWidth - 4 - posRBinCTU.x)) sr.right = (int)pcv->maxCUWidth - 4 - posRBinCTU.x; if (sr.bottom >((int)pcv->maxCUHeight - 4 - posRBinCTU.y)) sr.bottom = (int)pcv->maxCUHeight - 4 - posRBinCTU.y; if (posLTinCTU.x == -4 || posLTinCTU.y == -4) { sr.left = sr.right = sr.bottom = sr.top = 0; cStruct.zeroMV = 1; } if (posRBinCTU.x == pcv->maxCUWidthMask || posRBinCTU.y == pcv->maxCUHeightMask) { sr.left = sr.right = sr.bottom = sr.top = 0; cStruct.zeroMV = 1; } } } void InterSearch::xPatternSearch( IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiSAD ) { Distortion uiSad; Distortion uiSadBest = std::numeric_limits<Distortion>::max(); int iBestX = 0; int iBestY = 0; //-- jclee for using the SAD function pointer m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode ); const SearchRange& sr = cStruct.searchRange; const Pel* piRef = cStruct.piRefY + (sr.top * cStruct.iRefStride); for ( int y = sr.top; y <= sr.bottom; y++ ) { for ( int x = sr.left; x <= sr.right; x++ ) { // find min. distortion position m_cDistParam.cur.buf = piRef + x; uiSad = m_cDistParam.distFunc( m_cDistParam ); // motion cost uiSad += m_pcRdCost->getCostOfVectorWithPredictor( x, y, cStruct.imvShift ); if ( uiSad < uiSadBest ) { uiSadBest = uiSad; iBestX = x; iBestY = y; m_cDistParam.maximumDistortionForEarlyExit = uiSad; } } piRef += cStruct.iRefStride; } rcMv.set( iBestX, iBestY ); cStruct.uiBestSad = uiSadBest; // th for testing ruiSAD = uiSadBest - m_pcRdCost->getCostOfVectorWithPredictor( iBestX, iBestY, cStruct.imvShift ); return; } void InterSearch::xPatternSearchFast( const PredictionUnit& pu, IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiSAD, const Mv* const pIntegerMv2Nx2NPred ) { switch ( m_motionEstimationSearchMethod ) { case MESEARCH_DIAMOND: xTZSearch ( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, false ); break; case MESEARCH_SELECTIVE: xTZSearchSelective( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred ); break; case MESEARCH_DIAMOND_ENHANCED: xTZSearch ( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, true ); break; case MESEARCH_FULL: // shouldn't get here. default: break; } } void InterSearch::xTZSearch( const PredictionUnit& pu, IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiSAD, const Mv* const pIntegerMv2Nx2NPred, const bool bExtendedSettings, const bool bFastSettings) { const bool bUseRasterInFastMode = true; //toggle this to further reduce runtime const bool bUseAdaptiveRaster = bExtendedSettings; const int iRaster = (bFastSettings && bUseRasterInFastMode) ? 8 : 5; const bool bTestZeroVector = true && !bFastSettings; const bool bTestZeroVectorStart = bExtendedSettings; const bool bTestZeroVectorStop = false; const bool bFirstSearchDiamond = true; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch const bool bFirstCornersForDiamondDist1 = bExtendedSettings; const bool bFirstSearchStop = m_pcEncCfg->getFastMEAssumingSmootherMVEnabled(); const uint32_t uiFirstSearchRounds = bFastSettings ? (bUseRasterInFastMode?3:2) : 3; // first search stop X rounds after best match (must be >=1) const bool bEnableRasterSearch = bFastSettings ? bUseRasterInFastMode : true; const bool bAlwaysRasterSearch = bExtendedSettings; // true: BETTER but factor 2 slower const bool bRasterRefinementEnable = false; // enable either raster refinement or star refinement const bool bRasterRefinementDiamond = false; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch const bool bRasterRefinementCornersForDiamondDist1 = bExtendedSettings; const bool bStarRefinementEnable = true; // enable either star refinement or raster refinement const bool bStarRefinementDiamond = true; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch const bool bStarRefinementCornersForDiamondDist1 = bExtendedSettings; const bool bStarRefinementStop = false || bFastSettings; const uint32_t uiStarRefinementRounds = 2; // star refinement stop X rounds after best match (must be >=1) const bool bNewZeroNeighbourhoodTest = bExtendedSettings; int iSearchRange = m_iSearchRange; rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); if( m_pcEncCfg->getMCTSEncConstraint() ) { MCTSHelper::clipMvToArea( rcMv, pu.Y(), pu.cs->picture->mctsInfo.getTileArea(), *pu.cs->sps ); } else clipMv( rcMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); rcMv.divideByPowerOf2(2); // init TZSearchStruct cStruct.uiBestSad = std::numeric_limits<Distortion>::max(); // m_cDistParam.maximumDistortionForEarlyExit = cStruct.uiBestSad; m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode ); // distortion // set rcMv (Median predictor) as start point and as best point xTZSearchHelp( cStruct, rcMv.getHor(), rcMv.getVer(), 0, 0 ); // test whether zero Mv is better start point than Median predictor if ( bTestZeroVector ) { if ((rcMv.getHor() != 0 || rcMv.getVer() != 0) && (0 != cStruct.iBestX || 0 != cStruct.iBestY)) { // only test 0-vector if not obviously previously tested. xTZSearchHelp( cStruct, 0, 0, 0, 0 ); } } SearchRange& sr = cStruct.searchRange; if (pIntegerMv2Nx2NPred != 0) { Mv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred; integerMv2Nx2NPred.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL); if( m_pcEncCfg->getMCTSEncConstraint() ) { MCTSHelper::clipMvToArea( integerMv2Nx2NPred, pu.Y(), pu.cs->picture->mctsInfo.getTileArea(), *pu.cs->sps ); } else clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); integerMv2Nx2NPred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); integerMv2Nx2NPred.divideByPowerOf2(2); if ((rcMv != integerMv2Nx2NPred) && (integerMv2Nx2NPred.getHor() != cStruct.iBestX || integerMv2Nx2NPred.getVer() != cStruct.iBestY)) { // only test integerMv2Nx2NPred if not obviously previously tested. xTZSearchHelp( cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0); } } { // set search range Mv currBestMv(cStruct.iBestX, cStruct.iBestY ); currBestMv <<= 2; xSetSearchRange(pu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr , cStruct ); } if (m_pcEncCfg->getUseHashME()) { int width = pu.cu->lumaSize().width; int height = pu.cu->lumaSize().height; if ((width == height && width <= 64 && width >= 4) || (width == 8 && height == 4) || (width == 4 && height == 8)) { Mv otherMvps[5]; int numberOfOtherMvps; numberOfOtherMvps = xHashInterPredME(pu, m_currRefPicList, m_currRefPicIndex, otherMvps); for (int i = 0; i < numberOfOtherMvps; i++) { xTZSearchHelp(cStruct, otherMvps[i].getHor(), otherMvps[i].getVer(), 0, 0); } if (numberOfOtherMvps > 0) { // write out best match rcMv.set(cStruct.iBestX, cStruct.iBestY); ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor(cStruct.iBestX, cStruct.iBestY, cStruct.imvShift); m_skipFracME = true; return; } } } // start search int iDist = 0; int iStartX = cStruct.iBestX; int iStartY = cStruct.iBestY; const bool bBestCandidateZero = (cStruct.iBestX == 0) && (cStruct.iBestY == 0); // first search around best position up to now. // The following works as a "subsampled/log" window search around the best candidate for ( iDist = 1; iDist <= iSearchRange; iDist*=2 ) { if ( bFirstSearchDiamond == 1 ) { xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bFirstCornersForDiamondDist1 ); } else { xTZ8PointSquareSearch ( cStruct, iStartX, iStartY, iDist ); } if ( bFirstSearchStop && ( cStruct.uiBestRound >= uiFirstSearchRounds ) ) // stop criterion { break; } } if (!bNewZeroNeighbourhoodTest) { // test whether zero Mv is a better start point than Median predictor if ( bTestZeroVectorStart && ((cStruct.iBestX != 0) || (cStruct.iBestY != 0)) ) { xTZSearchHelp( cStruct, 0, 0, 0, 0 ); if ( (cStruct.iBestX == 0) && (cStruct.iBestY == 0) ) { // test its neighborhood for ( iDist = 1; iDist <= iSearchRange; iDist*=2 ) { xTZ8PointDiamondSearch( cStruct, 0, 0, iDist, false ); if ( bTestZeroVectorStop && (cStruct.uiBestRound > 0) ) // stop criterion { break; } } } } } else { // Test also zero neighbourhood but with half the range // It was reported that the original (above) search scheme using bTestZeroVectorStart did not // make sense since one would have already checked the zero candidate earlier // and thus the conditions for that test would have not been satisfied if (bTestZeroVectorStart == true && bBestCandidateZero != true) { for ( iDist = 1; iDist <= (iSearchRange >> 1); iDist*=2 ) { xTZ8PointDiamondSearch( cStruct, 0, 0, iDist, false ); if ( bTestZeroVectorStop && (cStruct.uiBestRound > 2) ) // stop criterion { break; } } } } // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1 if ( cStruct.uiBestDistance == 1 ) { cStruct.uiBestDistance = 0; xTZ2PointSearch( cStruct ); } // raster search if distance is too big if (bUseAdaptiveRaster) { int iWindowSize = iRaster; SearchRange localsr = sr; if (!(bEnableRasterSearch && ( ((int)(cStruct.uiBestDistance) >= iRaster)))) { iWindowSize ++; localsr.left /= 2; localsr.right /= 2; localsr.top /= 2; localsr.bottom /= 2; } cStruct.uiBestDistance = iWindowSize; for ( iStartY = localsr.top; iStartY <= localsr.bottom; iStartY += iWindowSize ) { for ( iStartX = localsr.left; iStartX <= localsr.right; iStartX += iWindowSize ) { xTZSearchHelp( cStruct, iStartX, iStartY, 0, iWindowSize ); } } } else { if ( bEnableRasterSearch && ( ((int)(cStruct.uiBestDistance) >= iRaster) || bAlwaysRasterSearch ) ) { cStruct.uiBestDistance = iRaster; for ( iStartY = sr.top; iStartY <= sr.bottom; iStartY += iRaster ) { for ( iStartX = sr.left; iStartX <= sr.right; iStartX += iRaster ) { xTZSearchHelp( cStruct, iStartX, iStartY, 0, iRaster ); } } } } // raster refinement if ( bRasterRefinementEnable && cStruct.uiBestDistance > 0 ) { while ( cStruct.uiBestDistance > 0 ) { iStartX = cStruct.iBestX; iStartY = cStruct.iBestY; if ( cStruct.uiBestDistance > 1 ) { iDist = cStruct.uiBestDistance >>= 1; if ( bRasterRefinementDiamond == 1 ) { xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bRasterRefinementCornersForDiamondDist1 ); } else { xTZ8PointSquareSearch ( cStruct, iStartX, iStartY, iDist ); } } // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1 if ( cStruct.uiBestDistance == 1 ) { cStruct.uiBestDistance = 0; if ( cStruct.ucPointNr != 0 ) { xTZ2PointSearch( cStruct ); } } } } // star refinement if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 ) { while ( cStruct.uiBestDistance > 0 ) { iStartX = cStruct.iBestX; iStartY = cStruct.iBestY; cStruct.uiBestDistance = 0; cStruct.ucPointNr = 0; for ( iDist = 1; iDist < iSearchRange + 1; iDist*=2 ) { if ( bStarRefinementDiamond == 1 ) { xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bStarRefinementCornersForDiamondDist1 ); } else { xTZ8PointSquareSearch ( cStruct, iStartX, iStartY, iDist ); } if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion { break; } } // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1 if ( cStruct.uiBestDistance == 1 ) { cStruct.uiBestDistance = 0; if ( cStruct.ucPointNr != 0 ) { xTZ2PointSearch( cStruct ); } } } } // write out best match rcMv.set( cStruct.iBestX, cStruct.iBestY ); ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift ); } void InterSearch::xTZSearchSelective( const PredictionUnit& pu, IntTZSearchStruct& cStruct, Mv &rcMv, Distortion &ruiSAD, const Mv* const pIntegerMv2Nx2NPred ) { const bool bTestZeroVector = true; const bool bEnableRasterSearch = true; const bool bAlwaysRasterSearch = false; // 1: BETTER but factor 15x slower const bool bStarRefinementEnable = true; // enable either star refinement or raster refinement const bool bStarRefinementDiamond = true; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch const bool bStarRefinementStop = false; const uint32_t uiStarRefinementRounds = 2; // star refinement stop X rounds after best match (must be >=1) const int iSearchRange = m_iSearchRange; const int iSearchRangeInitial = m_iSearchRange >> 2; const int uiSearchStep = 4; const int iMVDistThresh = 8; int iStartX = 0; int iStartY = 0; int iDist = 0; rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); clipMv( rcMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); rcMv.divideByPowerOf2(2); // init TZSearchStruct cStruct.uiBestSad = std::numeric_limits<Distortion>::max(); cStruct.iBestX = 0; cStruct.iBestY = 0; m_cDistParam.maximumDistortionForEarlyExit = cStruct.uiBestSad; m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode ); // set rcMv (Median predictor) as start point and as best point xTZSearchHelp( cStruct, rcMv.getHor(), rcMv.getVer(), 0, 0 ); // test whether zero Mv is better start point than Median predictor if ( bTestZeroVector ) { xTZSearchHelp( cStruct, 0, 0, 0, 0 ); } SearchRange& sr = cStruct.searchRange; if ( pIntegerMv2Nx2NPred != 0 ) { Mv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred; integerMv2Nx2NPred.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL); clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); integerMv2Nx2NPred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); integerMv2Nx2NPred.divideByPowerOf2(2); xTZSearchHelp( cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0); } { // set search range Mv currBestMv(cStruct.iBestX, cStruct.iBestY ); currBestMv <<= 2; xSetSearchRange( pu, currBestMv, m_iSearchRange, sr , cStruct ); } if (m_pcEncCfg->getUseHashME()) { int width = pu.cu->lumaSize().width; int height = pu.cu->lumaSize().height; if ((width == height && width <= 64 && width >= 4) || (width == 8 && height == 4) || (width == 4 && height == 8)) { Mv otherMvps[5]; int numberOfOtherMvps; numberOfOtherMvps = xHashInterPredME(pu, m_currRefPicList, m_currRefPicIndex, otherMvps); for (int i = 0; i < numberOfOtherMvps; i++) { xTZSearchHelp(cStruct, otherMvps[i].getHor(), otherMvps[i].getVer(), 0, 0); } if (numberOfOtherMvps > 0) { // write out best match rcMv.set(cStruct.iBestX, cStruct.iBestY); ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor(cStruct.iBestX, cStruct.iBestY, cStruct.imvShift); m_skipFracME = true; return; } } } // Initial search int iBestX = cStruct.iBestX; int iBestY = cStruct.iBestY; int iFirstSrchRngHorLeft = ((iBestX - iSearchRangeInitial) > sr.left) ? (iBestX - iSearchRangeInitial) : sr.left; int iFirstSrchRngVerTop = ((iBestY - iSearchRangeInitial) > sr.top) ? (iBestY - iSearchRangeInitial) : sr.top; int iFirstSrchRngHorRight = ((iBestX + iSearchRangeInitial) < sr.right) ? (iBestX + iSearchRangeInitial) : sr.right; int iFirstSrchRngVerBottom = ((iBestY + iSearchRangeInitial) < sr.bottom) ? (iBestY + iSearchRangeInitial) : sr.bottom; for ( iStartY = iFirstSrchRngVerTop; iStartY <= iFirstSrchRngVerBottom; iStartY += uiSearchStep ) { for ( iStartX = iFirstSrchRngHorLeft; iStartX <= iFirstSrchRngHorRight; iStartX += uiSearchStep ) { xTZSearchHelp( cStruct, iStartX, iStartY, 0, 0 ); xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, 1, false ); xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, 2, false ); } } int iMaxMVDistToPred = (abs(cStruct.iBestX - iBestX) > iMVDistThresh || abs(cStruct.iBestY - iBestY) > iMVDistThresh); //full search with early exit if MV is distant from predictors if ( bEnableRasterSearch && (iMaxMVDistToPred || bAlwaysRasterSearch) ) { for ( iStartY = sr.top; iStartY <= sr.bottom; iStartY += 1 ) { for ( iStartX = sr.left; iStartX <= sr.right; iStartX += 1 ) { xTZSearchHelp( cStruct, iStartX, iStartY, 0, 1 ); } } } //Smaller MV, refine around predictor else if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 ) { // start refinement while ( cStruct.uiBestDistance > 0 ) { iStartX = cStruct.iBestX; iStartY = cStruct.iBestY; cStruct.uiBestDistance = 0; cStruct.ucPointNr = 0; for ( iDist = 1; iDist < iSearchRange + 1; iDist*=2 ) { if ( bStarRefinementDiamond == 1 ) { xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, false ); } else { xTZ8PointSquareSearch ( cStruct, iStartX, iStartY, iDist ); } if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion { break; } } // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1 if ( cStruct.uiBestDistance == 1 ) { cStruct.uiBestDistance = 0; if ( cStruct.ucPointNr != 0 ) { xTZ2PointSearch( cStruct ); } } } } // write out best match rcMv.set( cStruct.iBestX, cStruct.iBestY ); ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift ); } void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct& cStruct, Mv& rcMv, Mv& rcMvPred, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, double fWeight) { CHECK( pu.cu->imv == 0, "xPatternSearchIntRefine(): IMV not used."); CHECK( amvpInfo.mvCand[riMVPIdx] != rcMvPred, "xPatternSearchIntRefine(): MvPred issue."); const SPS &sps = *pu.cs->sps; #if JVET_N0329_IBC_SEARCH_IMP m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass && !pu.cs->slice->getDisableSATDForRD()); #else m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass ); #endif // input MV rcMV has integer resolution // -> shift it to QPEL rcMv <<= 2; // -> set MV scale for cost calculation to QPEL (0) m_pcRdCost->setCostScale ( 0 ); Distortion uiDist, uiSATD = 0; Distortion uiBestDist = std::numeric_limits<Distortion>::max(); // subtract old MVP costs because costs for all newly tested MVPs are added in here ruiBits -= m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS]; Mv cBestMv = rcMv; Mv cBaseMvd[2]; int iBestBits = 0; int iBestMVPIdx = riMVPIdx; int testPos[9][2] = { { 0, 0}, { -1, -1},{ -1, 0},{ -1, 1},{ 0, -1},{ 0, 1},{ 1, -1},{ 1, 0},{ 1, 1} }; cBaseMvd[0] = (rcMv - amvpInfo.mvCand[0]); cBaseMvd[1] = (rcMv - amvpInfo.mvCand[1]); CHECK( (cBaseMvd[0].getHor() & 0x03) != 0 || (cBaseMvd[0].getVer() & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 0 Mvd issue."); CHECK( (cBaseMvd[1].getHor() & 0x03) != 0 || (cBaseMvd[1].getVer() & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 1 Mvd issue."); cBaseMvd[0].roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, pu.cu->imv); cBaseMvd[1].roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, pu.cu->imv); int mvOffset = 1 << cStruct.imvShift; // test best integer position and all 8 neighboring positions for (int pos = 0; pos < 9; pos ++) { Mv cTestMv[2]; // test both AMVP candidates for each position for (int iMVPIdx = 0; iMVPIdx < amvpInfo.numCand; iMVPIdx++) { cTestMv[iMVPIdx].set(testPos[pos][0]*mvOffset, testPos[pos][1]*mvOffset); cTestMv[iMVPIdx] += cBaseMvd[iMVPIdx]; cTestMv[iMVPIdx] += amvpInfo.mvCand[iMVPIdx]; // MCTS and IMV if( m_pcEncCfg->getMCTSEncConstraint() ) { Mv cTestMVRestr = cTestMv[iMVPIdx]; cTestMVRestr.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL ); MCTSHelper::clipMvToArea( cTestMVRestr, pu.cu->Y(), pu.cs->picture->mctsInfo.getTileAreaIntPelRestricted( pu ), *pu.cs->sps ); cTestMVRestr.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); if( cTestMVRestr != cTestMv[iMVPIdx] ) { // Skip this IMV pos, cause clipping affects IMV scaling continue; } } if ( iMVPIdx == 0 || cTestMv[0] != cTestMv[1]) { Mv cTempMV = cTestMv[iMVPIdx]; if( !m_pcEncCfg->getMCTSEncConstraint() ) { cTempMV.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); clipMv(cTempMV, pu.cu->lumaPos(), pu.cu->lumaSize(), sps); cTempMV.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); } m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * (cTempMV.getVer() >> 2) + (cTempMV.getHor() >> 2); uiDist = uiSATD = (Distortion) (m_cDistParam.distFunc( m_cDistParam ) * fWeight); } else { uiDist = uiSATD; } int iMvBits = m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS]; m_pcRdCost->setPredictor( amvpInfo.mvCand[iMVPIdx] ); iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( cTestMv[iMVPIdx].getHor(), cTestMv[iMVPIdx].getVer(), cStruct.imvShift ); #if JVET_N0168_AMVR_ME_MODIFICATION uiDist += m_pcRdCost->getCost(iMvBits); #else uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cTestMv[iMVPIdx].getHor(), cTestMv[iMVPIdx].getVer(), cStruct.imvShift ); #endif if (uiDist < uiBestDist) { uiBestDist = uiDist; cBestMv = cTestMv[iMVPIdx]; iBestMVPIdx = iMVPIdx; iBestBits = iMvBits; } } } if( uiBestDist == std::numeric_limits<Distortion>::max() ) { ruiCost = std::numeric_limits<Distortion>::max(); return; } rcMv = cBestMv; rcMvPred = amvpInfo.mvCand[iBestMVPIdx]; riMVPIdx = iBestMVPIdx; m_pcRdCost->setPredictor( rcMvPred ); ruiBits += iBestBits; // taken from JEM 5.0 // verify since it makes no sence to subtract Lamda*(Rmvd+Rmvpidx) from D+Lamda(Rmvd) // this would take the rate for the MVP idx out of the cost calculation // however this rate is always 1 so impact is small ruiCost = uiBestDist - m_pcRdCost->getCost(iBestBits) + m_pcRdCost->getCost(ruiBits); // taken from JEM 5.0 // verify since it makes no sense to add rate for MVDs twicce #if JVET_N0168_AMVR_ME_MODIFICATION == 0 ruiBits += m_pcRdCost->getBitsOfVectorWithPredictor(rcMv.getHor(), rcMv.getVer(), cStruct.imvShift); #endif return; } void InterSearch::xPatternSearchFracDIF( const PredictionUnit& pu, RefPicList eRefPicList, int iRefIdx, IntTZSearchStruct& cStruct, const Mv& rcMvInt, Mv& rcMvHalf, Mv& rcMvQter, Distortion& ruiCost ) { const bool bIsLosslessCoded = pu.cu->transQuantBypass; // Reference pattern initialization (integer scale) int iOffset = rcMvInt.getHor() + rcMvInt.getVer() * cStruct.iRefStride; CPelBuf cPatternRoi(cStruct.piRefY + iOffset, cStruct.iRefStride, *cStruct.pcPatternKey); if (m_skipFracME) { Mv baseRefMv(0, 0); rcMvHalf.setZero(); m_pcRdCost->setCostScale(0); xExtDIFUpSamplingH(&cPatternRoi); rcMvQter = rcMvInt; rcMvQter <<= 2; // for mv-cost #if JVET_N0329_IBC_SEARCH_IMP ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded && !pu.cs->slice->getDisableSATDForRD()); #else ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded); #endif return; } if (cStruct.imvShift || (m_useCompositeRef && cStruct.zeroMV)) { #if JVET_N0329_IBC_SEARCH_IMP m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !bIsLosslessCoded && !pu.cs->slice->getDisableSATDForRD()); #else m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !bIsLosslessCoded ); #endif ruiCost = m_cDistParam.distFunc( m_cDistParam ); ruiCost += m_pcRdCost->getCostOfVectorWithPredictor( rcMvInt.getHor(), rcMvInt.getVer(), cStruct.imvShift ); return; } // Half-pel refinement m_pcRdCost->setCostScale(1); xExtDIFUpSamplingH ( &cPatternRoi ); rcMvHalf = rcMvInt; rcMvHalf <<= 1; // for mv-cost Mv baseRefMv(0, 0); #if JVET_N0329_IBC_SEARCH_IMP ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, (!bIsLosslessCoded && !pu.cs->slice->getDisableSATDForRD())); #else ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, !bIsLosslessCoded); #endif // quarter-pel refinement m_pcRdCost->setCostScale( 0 ); xExtDIFUpSamplingQ ( &cPatternRoi, rcMvHalf ); baseRefMv = rcMvHalf; baseRefMv <<= 1; rcMvQter = rcMvInt; rcMvQter <<= 1; // for mv-cost rcMvQter += rcMvHalf; rcMvQter <<= 1; #if JVET_N0329_IBC_SEARCH_IMP ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, (!bIsLosslessCoded && !pu.cs->slice->getDisableSATDForRD())); #else ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded ); #endif } Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int gbiIdx ) { Distortion cost = std::numeric_limits<Distortion>::max(); RefPicList eTarRefPicList = (RefPicList)(1 - (int)eCurRefPicList); // get prediction of eCurRefPicList PelUnitBuf predBufA = m_tmpPredStorage[eCurRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); const Picture* picRefA = pu.cu->slice->getRefPic( eCurRefPicList, cCurMvField.refIdx ); Mv mvA = cCurMvField.mv; mvA.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); // get prediction of eTarRefPicList PelUnitBuf predBufB = m_tmpPredStorage[eTarRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); const Picture* picRefB = pu.cu->slice->getRefPic( eTarRefPicList, cTarMvField.refIdx ); Mv mvB = cTarMvField.mv; mvB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) ); if (gbiIdx != GBI_DEFAULT) bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), gbiIdx); else bufTmp.Y().addAvg( predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng( COMPONENT_Y ) ); // calc distortion #if JVET_N0329_IBC_SEARCH_IMP DFunc distFunc = (!pu.cu->transQuantBypass && !pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD; cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); #else cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD); #endif return(cost); } Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int gbiIdx ) { const Mv mvSearchOffsetCross[4] = { Mv( 0 , 1 ) , Mv( 1 , 0 ) , Mv( 0 , -1 ) , Mv( -1 , 0 ) }; const Mv mvSearchOffsetSquare[8] = { Mv( -1 , 1 ) , Mv( 0 , 1 ) , Mv( 1 , 1 ) , Mv( 1 , 0 ) , Mv( 1 , -1 ) , Mv( 0 , -1 ) , Mv( -1 , -1 ) , Mv( -1 , 0 ) }; const Mv mvSearchOffsetDiamond[8] = { Mv( 0 , 2 ) , Mv( 1 , 1 ) , Mv( 2 , 0 ) , Mv( 1 , -1 ) , Mv( 0 , -2 ) , Mv( -1 , -1 ) , Mv( -2 , 0 ) , Mv( -1 , 1 ) }; const Mv mvSearchOffsetHexagon[6] = { Mv( 2 , 0 ) , Mv( 1 , 2 ) , Mv( -1 , 2 ) , Mv( -2 , 0 ) , Mv( -1 , -2 ) , Mv( 1 , -2 ) }; int nDirectStart = 0, nDirectEnd = 0, nDirectRounding = 0, nDirectMask = 0; const Mv * pSearchOffset; if ( SearchPattern == 0 ) { nDirectEnd = 3; nDirectRounding = 4; nDirectMask = 0x03; pSearchOffset = mvSearchOffsetCross; } else if ( SearchPattern == 1 ) { nDirectEnd = 7; nDirectRounding = 8; nDirectMask = 0x07; pSearchOffset = mvSearchOffsetSquare; } else if ( SearchPattern == 2 ) { nDirectEnd = 7; nDirectRounding = 8; nDirectMask = 0x07; pSearchOffset = mvSearchOffsetDiamond; } else if ( SearchPattern == 3 ) { nDirectEnd = 5; pSearchOffset = mvSearchOffsetHexagon; } else { THROW( "Invalid search pattern" ); } int nBestDirect; for ( uint32_t uiRound = 0; uiRound < uiMaxSearchRounds; uiRound++ ) { nBestDirect = -1; MvField mvCurCenter = rCurMvField; for ( int nIdx = nDirectStart; nIdx <= nDirectEnd; nIdx++ ) { int nDirect; if ( SearchPattern == 3 ) { nDirect = nIdx < 0 ? nIdx + 6 : nIdx >= 6 ? nIdx - 6 : nIdx; } else { nDirect = (nIdx + nDirectRounding) & nDirectMask; } Mv mvOffset = pSearchOffset[nDirect]; mvOffset <<= nSearchStepShift; MvField mvCand = mvCurCenter, mvPair; mvCand.mv += mvOffset; if( m_pcEncCfg->getMCTSEncConstraint() ) { if( !( MCTSHelper::checkMvForMCTSConstraint( pu, mvCand.mv ) ) ) continue; // Skip this this pos } // get MVD cost m_pcRdCost->setPredictor( rcMvCurPred ); m_pcRdCost->setCostScale( 0 ); uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mvCand.mv.getHor(), mvCand.mv.getVer(), (pu.cu->imv << 1) ); Distortion uiCost = m_pcRdCost->getCost( uiMvBits ); // get MVD pair and set target MV mvPair.refIdx = rTarMvField.refIdx; mvPair.mv.set( rcMvTarPred.hor - (mvCand.mv.hor - rcMvCurPred.hor), rcMvTarPred.ver - (mvCand.mv.ver - rcMvCurPred.ver) ); if( m_pcEncCfg->getMCTSEncConstraint() ) { if( !( MCTSHelper::checkMvForMCTSConstraint( pu, mvPair.mv ) ) ) continue; // Skip this this pos } uiCost += xGetSymmetricCost( pu, origBuf, eRefPicList, mvCand, mvPair, gbiIdx ); if ( uiCost < uiMinCost ) { uiMinCost = uiCost; rCurMvField = mvCand; rTarMvField = mvPair; nBestDirect = nDirect; } } if ( nBestDirect == -1 ) { break; } int nStep = 1; if ( SearchPattern == 1 || SearchPattern == 2 ) { nStep = 2 - (nBestDirect & 0x01); } nDirectStart = nBestDirect - nStep; nDirectEnd = nBestDirect + nStep; } return(uiMinCost); } void InterSearch::xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int gbiIdx ) { // Refine Search int nSearchStepShift = 0; int nDiamondRound = 8; int nCrossRound = 1; nSearchStepShift += (pu.cu->imv << 1); nDiamondRound >>= pu.cu->imv; ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, gbiIdx ); ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, gbiIdx ); } void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, PelUnitBuf& origBuf, int puIdx, uint32_t& lastMode, Distortion& affineCost, Mv hevcMv[2][33] , Mv mvAffine4Para[2][33][3] , int refIdx4Para[2] , uint8_t gbiIdx , bool enforceGBiPred , uint32_t gbiIdxBits ) { const Slice &slice = *pu.cu->slice; affineCost = std::numeric_limits<Distortion>::max(); Mv cMvZero; Mv aacMv[2][3]; Mv cMvBi[2][3]; Mv cMvTemp[2][33][3]; int iNumPredDir = slice.isInterP() ? 1 : 2; int mvNum = 2; mvNum = pu.cu->affineType ? 3 : 2; // Mvp Mv cMvPred[2][33][3]; Mv cMvPredBi[2][33][3]; int aaiMvpIdxBi[2][33]; int aaiMvpIdx[2][33]; int aaiMvpNum[2][33]; AffineAMVPInfo aacAffineAMVPInfo[2][33]; AffineAMVPInfo affiAMVPInfoTemp[2]; int iRefIdx[2]={0,0}; // If un-initialized, may cause SEGV in bi-directional prediction iterative stage. int iRefIdxBi[2]; uint32_t uiMbBits[3] = {1, 1, 0}; int iRefStart, iRefEnd; int bestBiPRefIdxL1 = 0; int bestBiPMvpL1 = 0; Distortion biPDistTemp = std::numeric_limits<Distortion>::max(); Distortion uiCost[2] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max() }; Distortion uiCostBi = std::numeric_limits<Distortion>::max(); Distortion uiCostTemp; uint32_t uiBits[3] = { 0 }; uint32_t uiBitsTemp; Distortion bestBiPDist = std::numeric_limits<Distortion>::max(); Distortion uiCostTempL0[MAX_NUM_REF]; for (int iNumRef=0; iNumRef < MAX_NUM_REF; iNumRef++) { uiCostTempL0[iNumRef] = std::numeric_limits<Distortion>::max(); } uint32_t uiBitsTempL0[MAX_NUM_REF]; Mv mvValidList1[4]; int refIdxValidList1 = 0; uint32_t bitsValidList1 = MAX_UINT; Distortion costValidList1 = std::numeric_limits<Distortion>::max(); Mv mvHevc[3]; const bool changeToHighPrec = pu.cu->imv != 1; const bool affineAmvrEnabled = pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag(); int tryBipred = 0; WPScalingParam *wp0; WPScalingParam *wp1; xGetBlkBits( slice.isInterP(), puIdx, lastMode, uiMbBits); pu.cu->affine = true; pu.mergeFlag = false; if( gbiIdx != GBI_DEFAULT ) { pu.cu->GBiIdx = gbiIdx; } // Uni-directional prediction for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ ) { RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); #if JVET_N0068_AFFINE_MEM_BW pu.interDir = ( iRefList ? 2 : 1 ); #endif for (int iRefIdxTemp = 0; iRefIdxTemp < slice.getNumRefIdx(eRefPicList); iRefIdxTemp++) { // Get RefIdx bits uiBitsTemp = uiMbBits[iRefList]; if ( slice.getNumRefIdx(eRefPicList) > 1 ) { uiBitsTemp += iRefIdxTemp+1; if ( iRefIdxTemp == slice.getNumRefIdx(eRefPicList)-1 ) { uiBitsTemp--; } } // Do Affine AMVP xEstimateAffineAMVP( pu, affiAMVPInfoTemp[eRefPicList], origBuf, eRefPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], &biPDistTemp ); if ( affineAmvrEnabled ) { biPDistTemp += m_pcRdCost->getCost( xCalcAffineMVBits( pu, cMvPred[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp] ) ); } aaiMvpIdx[iRefList][iRefIdxTemp] = pu.mvpIdx[eRefPicList]; aaiMvpNum[iRefList][iRefIdxTemp] = pu.mvpNum[eRefPicList];; if ( pu.cu->affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp ) { xCopyAffineAMVPInfo( affiAMVPInfoTemp[eRefPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp] ); continue; } // set hevc ME result as start search position when it is best than mvp for ( int i=0; i<3; i++ ) { mvHevc[i] = hevcMv[iRefList][iRefIdxTemp]; if ( pu.cu->imv == 1 ) { mvHevc[i].changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL ); } else if ( pu.cu->imv == 2 ) { mvHevc[i].roundToPrecision( MV_PRECISION_QUARTER, MV_PRECISION_INT ); } } PelUnitBuf predBuf = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) ); Distortion uiCandCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvHevc, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp); if ( affineAmvrEnabled ) { uiCandCost += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvHevc, cMvPred[iRefList][iRefIdxTemp] ) ); } //check stored affine motion bool affine4Para = pu.cu->affineType == AFFINEMODEL_4PARAM; bool savedParaAvail = pu.cu->imv && ( ( m_affineMotion.affine4ParaRefIdx[iRefList] == iRefIdxTemp && affine4Para && m_affineMotion.affine4ParaAvail ) || ( m_affineMotion.affine6ParaRefIdx[iRefList] == iRefIdxTemp && !affine4Para && m_affineMotion.affine6ParaAvail ) ); if ( savedParaAvail ) { Mv mvFour[3]; for ( int i = 0; i < mvNum; i++ ) { mvFour[i] = affine4Para ? m_affineMotion.acMvAffine4Para[iRefList][i] : m_affineMotion.acMvAffine6Para[iRefList][i]; if ( pu.cu->imv != 1 ) { mvFour[i].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER ); mvFour[i].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); } } Distortion candCostInherit = xGetAffineTemplateCost( pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp ); candCostInherit += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvFour, cMvPred[iRefList][iRefIdxTemp] ) ); if ( candCostInherit < uiCandCost ) { uiCandCost = candCostInherit; memcpy( mvHevc, mvFour, 3 * sizeof( Mv ) ); } } if (pu.cu->affineType == AFFINEMODEL_4PARAM && m_affMVListSize && (!pu.cu->cs->sps->getUseGBi() || gbiIdx == GBI_DEFAULT) ) { int shift = MAX_CU_DEPTH; for (int i = 0; i < m_affMVListSize; i++) { AffineMVInfo *mvInfo = m_affMVList + ((m_affMVListIdx - i - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize)); //check; int j = 0; for (; j < i; j++) { AffineMVInfo *prevMvInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize)); if ((mvInfo->affMVs[iRefList][iRefIdxTemp][0] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][0]) && (mvInfo->affMVs[iRefList][iRefIdxTemp][1] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][1]) && (mvInfo->x == prevMvInfo->x) && (mvInfo->y == prevMvInfo->y) && (mvInfo->w == prevMvInfo->w) ) { break; } } if (j < i) continue; Mv mvTmp[3], *nbMv = mvInfo->affMVs[iRefList][iRefIdxTemp]; int vx, vy; int dMvHorX, dMvHorY, dMvVerX, dMvVerY; int mvScaleHor = nbMv[0].getHor() << shift; int mvScaleVer = nbMv[0].getVer() << shift; Mv dMv = nbMv[1] - nbMv[0]; mvScaleHor <<= MV_FRACTIONAL_BITS_DIFF; mvScaleVer <<= MV_FRACTIONAL_BITS_DIFF; dMv <<= MV_FRACTIONAL_BITS_DIFF; dMvHorX = dMv.getHor() << (shift - g_aucLog2[mvInfo->w]); dMvHorY = dMv.getVer() << (shift - g_aucLog2[mvInfo->w]); dMvVerX = -dMvHorY; dMvVerY = dMvHorX; vx = mvScaleHor + dMvHorX * (pu.Y().x - mvInfo->x) + dMvVerX * (pu.Y().y - mvInfo->y); vy = mvScaleVer + dMvHorY * (pu.Y().x - mvInfo->x) + dMvVerY * (pu.Y().y - mvInfo->y); roundAffineMv(vx, vy, shift); mvTmp[0] = Mv(vx, vy); mvTmp[0].clipToStorageBitDepth(); clipMv(mvTmp[0], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps); if ( pu.cu->imv == 2 ) { mvTmp[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); } else if ( pu.cu->imv == 0 ) mvTmp[0].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); vx = mvScaleHor + dMvHorX * (pu.Y().x + pu.Y().width - mvInfo->x) + dMvVerX * (pu.Y().y - mvInfo->y); vy = mvScaleVer + dMvHorY * (pu.Y().x + pu.Y().width - mvInfo->x) + dMvVerY * (pu.Y().y - mvInfo->y); roundAffineMv(vx, vy, shift); mvTmp[1] = Mv(vx, vy); mvTmp[1].clipToStorageBitDepth(); clipMv(mvTmp[1], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps); if ( pu.cu->imv != 1 ) { mvTmp[1].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER ); mvTmp[0].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); mvTmp[1].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); } Distortion tmpCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvTmp, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp); if ( affineAmvrEnabled ) { tmpCost += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvTmp, cMvPred[iRefList][iRefIdxTemp] ) ); } if (tmpCost < uiCandCost) { uiCandCost = tmpCost; std::memcpy(mvHevc, mvTmp, 3 * sizeof(Mv)); } } } if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { Mv mvFour[3]; if ( pu.cu->imv != 1 ) { mvAffine4Para[iRefList][iRefIdxTemp][0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); mvAffine4Para[iRefList][iRefIdxTemp][1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); } mvFour[0] = mvAffine4Para[iRefList][iRefIdxTemp][0]; mvFour[1] = mvAffine4Para[iRefList][iRefIdxTemp][1]; if ( pu.cu->imv != 1 ) { mvAffine4Para[iRefList][iRefIdxTemp][0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); mvAffine4Para[iRefList][iRefIdxTemp][1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); } int shift = MAX_CU_DEPTH; int vx2 = (mvFour[0].getHor() << shift) - ((mvFour[1].getVer() - mvFour[0].getVer()) << (shift + g_aucLog2[pu.lheight()] - g_aucLog2[pu.lwidth()])); int vy2 = (mvFour[0].getVer() << shift) + ((mvFour[1].getHor() - mvFour[0].getHor()) << (shift + g_aucLog2[pu.lheight()] - g_aucLog2[pu.lwidth()])); #if JVET_N0335_N0085_MV_ROUNDING int offset = (1 << (shift - 1)); vx2 = (vx2 + offset - (vx2 >= 0)) >> shift; vy2 = (vy2 + offset - (vy2 >= 0)) >> shift; #else vx2 >>= shift; vy2 >>= shift; #endif mvFour[2].hor = vx2; mvFour[2].ver = vy2; mvFour[2].clipToStorageBitDepth(); if ( pu.cu->imv != 1 ) { mvFour[0].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER ); mvFour[1].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER ); mvFour[2].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER ); } for (int i = 0; i < 3; i++) { if ( pu.cu->imv != 1 ) { mvFour[i].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); } } Distortion uiCandCostInherit = xGetAffineTemplateCost( pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp ); if ( affineAmvrEnabled ) { uiCandCostInherit += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvFour, cMvPred[iRefList][iRefIdxTemp] ) ); } if ( uiCandCostInherit < uiCandCost ) { uiCandCost = uiCandCostInherit; for ( int i = 0; i < 3; i++ ) { mvHevc[i] = mvFour[i]; } } } if ( uiCandCost < biPDistTemp ) { ::memcpy( cMvTemp[iRefList][iRefIdxTemp], mvHevc, sizeof(Mv)*3 ); } else { ::memcpy( cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], sizeof(Mv)*3 ); } // GPB list 1, save the best MvpIdx, RefIdx and Cost if ( slice.getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist ) { bestBiPDist = biPDistTemp; bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp]; bestBiPRefIdxL1 = iRefIdxTemp; } // Update bits uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS]; if ( m_pcEncCfg->getFastMEForGenBLowDelayEnabled() && iRefList == 1 ) // list 1 { if ( slice.getList1IdxToList0Idx( iRefIdxTemp ) >= 0 && (pu.cu->affineType != AFFINEMODEL_6PARAM || slice.getList1IdxToList0Idx( iRefIdxTemp ) == refIdx4Para[0]) ) { int iList1ToList0Idx = slice.getList1IdxToList0Idx( iRefIdxTemp ); ::memcpy( cMvTemp[1][iRefIdxTemp], cMvTemp[0][iList1ToList0Idx], sizeof(Mv)*3 ); uiCostTemp = uiCostTempL0[iList1ToList0Idx]; uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[iList1ToList0Idx] ); uiBitsTemp += xCalcAffineMVBits( pu, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp] ); /*calculate the correct cost*/ uiCostTemp += m_pcRdCost->getCost( uiBitsTemp ); DTRACE( g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCostTemp ); } else { xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp , aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList] ); } } else { xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp , aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList] ); } if(pu.cu->cs->sps->getUseGBi() && pu.cu->GBiIdx == GBI_DEFAULT && pu.cu->slice->isInterB()) { m_uniMotions.setReadModeAffine(true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType); m_uniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType , aaiMvpIdx[iRefList][iRefIdxTemp] ); } // Set best AMVP Index xCopyAffineAMVPInfo( affiAMVPInfoTemp[eRefPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp] ); if ( pu.cu->imv != 2 || !m_pcEncCfg->getUseAffineAmvrEncOpt() ) xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp ); if ( iRefList == 0 ) { uiCostTempL0[iRefIdxTemp] = uiCostTemp; uiBitsTempL0[iRefIdxTemp] = uiBitsTemp; } DTRACE( g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d, uiCost[iRefList]=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCostTemp, uiCost[iRefList] ); if ( uiCostTemp < uiCost[iRefList] ) { uiCost[iRefList] = uiCostTemp; uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction // set best motion ::memcpy( aacMv[iRefList], cMvTemp[iRefList][iRefIdxTemp], sizeof(Mv) * 3 ); iRefIdx[iRefList] = iRefIdxTemp; } if ( iRefList == 1 && uiCostTemp < costValidList1 && slice.getList1IdxToList0Idx( iRefIdxTemp ) < 0 ) { costValidList1 = uiCostTemp; bitsValidList1 = uiBitsTemp; // set motion memcpy( mvValidList1, cMvTemp[iRefList][iRefIdxTemp], sizeof(Mv)*3 ); refIdxValidList1 = iRefIdxTemp; } } // End refIdx loop } // end Uni-prediction if ( pu.cu->affineType == AFFINEMODEL_4PARAM ) { ::memcpy( mvAffine4Para, cMvTemp, sizeof( cMvTemp ) ); if ( pu.cu->imv == 0 && ( !pu.cu->cs->sps->getUseGBi() || gbiIdx == GBI_DEFAULT ) ) { AffineMVInfo *affMVInfo = m_affMVList + m_affMVListIdx; //check; int j = 0; for (; j < m_affMVListSize; j++) { AffineMVInfo *prevMvInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize)); if ((pu.Y().x == prevMvInfo->x) && (pu.Y().y == prevMvInfo->y) && (pu.Y().width == prevMvInfo->w) && (pu.Y().height == prevMvInfo->h)) { break; } } if (j < m_affMVListSize) affMVInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize)); ::memcpy(affMVInfo->affMVs, cMvTemp, sizeof(cMvTemp)); if (j == m_affMVListSize) { affMVInfo->x = pu.Y().x; affMVInfo->y = pu.Y().y; affMVInfo->w = pu.Y().width; affMVInfo->h = pu.Y().height; m_affMVListSize = std::min(m_affMVListSize + 1, m_affMVListMaxSize); m_affMVListIdx = (m_affMVListIdx + 1) % (m_affMVListMaxSize); } } } // Bi-directional prediction if ( slice.isInterB() && !PU::isBipredRestriction(pu) ) { tryBipred = 1; #if JVET_N0068_AFFINE_MEM_BW pu.interDir = 3; #endif // Set as best list0 and list1 iRefIdxBi[0] = iRefIdx[0]; iRefIdxBi[1] = iRefIdx[1]; ::memcpy( cMvBi, aacMv, sizeof(aacMv) ); ::memcpy( cMvPredBi, cMvPred, sizeof(cMvPred) ); ::memcpy( aaiMvpIdxBi, aaiMvpIdx, sizeof(aaiMvpIdx) ); uint32_t uiMotBits[2]; bool doBiPred = true; if ( slice.getMvdL1ZeroFlag() ) // GPB, list 1 only use Mvp { xCopyAffineAMVPInfo( aacAffineAMVPInfo[1][bestBiPRefIdxL1], affiAMVPInfoTemp[REF_PIC_LIST_1] ); pu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1; aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1; // Set Mv for list1 Mv pcMvTemp[3] = { affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandLT[bestBiPMvpL1], affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandRT[bestBiPMvpL1], affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandLB[bestBiPMvpL1] }; ::memcpy( cMvPredBi[1][bestBiPRefIdxL1], pcMvTemp, sizeof(Mv)*3 ); ::memcpy( cMvBi[1], pcMvTemp, sizeof(Mv)*3 ); ::memcpy( cMvTemp[1][bestBiPRefIdxL1], pcMvTemp, sizeof(Mv)*3 ); iRefIdxBi[1] = bestBiPRefIdxL1; if( m_pcEncCfg->getMCTSEncConstraint() ) { Area curTileAreaRestricted; curTileAreaRestricted = pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu ); for( int i = 0; i < mvNum; i++ ) { Mv restrictedMv = pcMvTemp[i]; restrictedMv.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL ); MCTSHelper::clipMvToArea( restrictedMv, pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps ); restrictedMv.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); // If sub-pel filter samples are not inside of allowed area if( restrictedMv != pcMvTemp[i] ) { uiCostBi = std::numeric_limits<Distortion>::max(); doBiPred = false; } } } // Get list1 prediction block PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1 , changeToHighPrec ); pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1]; PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getBuf( UnitAreaRelative(*pu.cu, pu) ); motionCompensation( pu, predBufTmp, REF_PIC_LIST_1 ); // Update bits uiMotBits[0] = uiBits[0] - uiMbBits[0]; uiMotBits[1] = uiMbBits[1]; if( slice.getNumRefIdx(REF_PIC_LIST_1) > 1 ) { uiMotBits[1] += bestBiPRefIdxL1+1; if( bestBiPRefIdxL1 == slice.getNumRefIdx(REF_PIC_LIST_1)-1 ) { uiMotBits[1]--; } } uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS]; uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1]; } else { uiMotBits[0] = uiBits[0] - uiMbBits[0]; uiMotBits[1] = uiBits[1] - uiMbBits[1]; uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1]; } if( doBiPred ) { // 4-times iteration (default) int iNumIter = 4; // fast encoder setting or GPB: only one iteration if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || slice.getMvdL1ZeroFlag() ) { iNumIter = 1; } for ( int iIter = 0; iIter < iNumIter; iIter++ ) { // Set RefList int iRefList = iIter % 2; if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 ) { if( uiCost[0] <= uiCost[1] ) { iRefList = 1; } else { iRefList = 0; } if( gbiIdx != GBI_DEFAULT ) { iRefList = ( abs( getGbiWeight( gbiIdx, REF_PIC_LIST_0 ) ) > abs( getGbiWeight( gbiIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 ); } } else if ( iIter == 0 ) { iRefList = 0; } // First iterate, get prediction block of opposite direction if( iIter == 0 && !slice.getMvdL1ZeroFlag() ) { PU::setAllAffineMv( pu, aacMv[1-iRefList][0], aacMv[1-iRefList][1], aacMv[1-iRefList][2], RefPicList(1-iRefList) , changeToHighPrec ); pu.refIdx[1-iRefList] = iRefIdx[1-iRefList]; PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getBuf( UnitAreaRelative(*pu.cu, pu) ); motionCompensation( pu, predBufTmp, RefPicList(1 - iRefList) ); } RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); if ( slice.getMvdL1ZeroFlag() ) // GPB, fix List 1, search List 0 { iRefList = 0; eRefPicList = REF_PIC_LIST_0; } bool bChanged = false; iRefStart = 0; iRefEnd = slice.getNumRefIdx(eRefPicList) - 1; for ( int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ ) { if ( pu.cu->affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp ) { continue; } if(m_pcEncCfg->getUseGBiFast() && (gbiIdx != GBI_DEFAULT) && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC() == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC()) && (pu.cu->affineType == AFFINEMODEL_4PARAM && pu.cu->slice->getTLayer()>1)) { continue; } // update bits uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList]; uiBitsTemp += ((pu.cu->slice->getSPS()->getUseGBi() == true) ? gbiIdxBits : 0); if( slice.getNumRefIdx(eRefPicList) > 1 ) { uiBitsTemp += iRefIdxTemp+1; if ( iRefIdxTemp == slice.getNumRefIdx(eRefPicList)-1 ) { uiBitsTemp--; } } uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS]; // call Affine ME xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, aaiMvpIdxBi[iRefList][iRefIdxTemp], aacAffineAMVPInfo[iRefList][iRefIdxTemp], true ); xCopyAffineAMVPInfo( aacAffineAMVPInfo[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList] ); if ( pu.cu->imv != 2 || !m_pcEncCfg->getUseAffineAmvrEncOpt() ) xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp ); if ( uiCostTemp < uiCostBi ) { bChanged = true; ::memcpy( cMvBi[iRefList], cMvTemp[iRefList][iRefIdxTemp], sizeof(Mv)*3 ); iRefIdxBi[iRefList] = iRefIdxTemp; uiCostBi = uiCostTemp; uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList]; uiMotBits[iRefList] -= ((pu.cu->slice->getSPS()->getUseGBi() == true) ? gbiIdxBits : 0); uiBits[2] = uiBitsTemp; if ( iNumIter != 1 ) // MC for next iter { // Set motion PU::setAllAffineMv( pu, cMvBi[iRefList][0], cMvBi[iRefList][1], cMvBi[iRefList][2], eRefPicList , changeToHighPrec ); pu.refIdx[eRefPicList] = iRefIdxBi[eRefPicList]; PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getBuf( UnitAreaRelative(*pu.cu, pu) ); motionCompensation( pu, predBufTmp, eRefPicList ); } } } // for loop-iRefIdxTemp if ( !bChanged ) { if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceGBiPred) { xCopyAffineAMVPInfo( aacAffineAMVPInfo[0][iRefIdxBi[0]], affiAMVPInfoTemp[REF_PIC_LIST_0] ); xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_0], REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi ); if ( !slice.getMvdL1ZeroFlag() ) { xCopyAffineAMVPInfo( aacAffineAMVPInfo[1][iRefIdxBi[1]], affiAMVPInfoTemp[REF_PIC_LIST_1] ); xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_1], REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi ); } } break; } } // for loop-iter } } // if (B_SLICE) pu.mv [REF_PIC_LIST_0] = Mv(); pu.mv [REF_PIC_LIST_1] = Mv(); pu.mvd [REF_PIC_LIST_0] = cMvZero; pu.mvd [REF_PIC_LIST_1] = cMvZero; pu.refIdx[REF_PIC_LIST_0] = NOT_VALID; pu.refIdx[REF_PIC_LIST_1] = NOT_VALID; pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID; pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID; pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID; pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID; for ( int verIdx = 0; verIdx < 3; verIdx++ ) { pu.mvdAffi[REF_PIC_LIST_0][verIdx] = cMvZero; pu.mvdAffi[REF_PIC_LIST_1][verIdx] = cMvZero; } // Set Motion Field memcpy( aacMv[1], mvValidList1, sizeof(Mv)*3 ); iRefIdx[1] = refIdxValidList1; uiBits[1] = bitsValidList1; uiCost[1] = costValidList1; if (pu.cs->pps->getWPBiPred() == true && tryBipred && (gbiIdx != GBI_DEFAULT)) { CHECK(iRefIdxBi[0]<0, "Invalid picture reference index"); CHECK(iRefIdxBi[1]<0, "Invalid picture reference index"); pu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0], wp0); pu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1], wp1); if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag)) { uiCostBi = MAX_UINT; enforceGBiPred = false; } } if( enforceGBiPred ) { uiCost[0] = uiCost[1] = MAX_UINT; } // Affine ME result set if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) // Bi { lastMode = 2; affineCost = uiCostBi; #if JVET_N0068_AFFINE_MEM_BW pu.interDir = 3; #endif PU::setAllAffineMv( pu, cMvBi[0][0], cMvBi[0][1], cMvBi[0][2], REF_PIC_LIST_0 , changeToHighPrec ); PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1 , changeToHighPrec ); pu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0]; pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1]; for ( int verIdx = 0; verIdx < mvNum; verIdx++ ) { pu.mvdAffi[REF_PIC_LIST_0][verIdx] = cMvBi[0][verIdx] - cMvPredBi[0][iRefIdxBi[0]][verIdx]; pu.mvdAffi[REF_PIC_LIST_1][verIdx] = cMvBi[1][verIdx] - cMvPredBi[1][iRefIdxBi[1]][verIdx]; if ( verIdx != 0 ) { pu.mvdAffi[0][verIdx] = pu.mvdAffi[0][verIdx] - pu.mvdAffi[0][0]; pu.mvdAffi[1][verIdx] = pu.mvdAffi[1][verIdx] - pu.mvdAffi[1][0]; } } #if !JVET_N0068_AFFINE_MEM_BW pu.interDir = 3; #endif pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]]; pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]]; pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]]; pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]]; } else if ( uiCost[0] <= uiCost[1] ) // List 0 { lastMode = 0; affineCost = uiCost[0]; #if JVET_N0068_AFFINE_MEM_BW pu.interDir = 1; #endif PU::setAllAffineMv( pu, aacMv[0][0], aacMv[0][1], aacMv[0][2], REF_PIC_LIST_0 , changeToHighPrec ); pu.refIdx[REF_PIC_LIST_0] = iRefIdx[0]; for ( int verIdx = 0; verIdx < mvNum; verIdx++ ) { pu.mvdAffi[REF_PIC_LIST_0][verIdx] = aacMv[0][verIdx] - cMvPred[0][iRefIdx[0]][verIdx]; if ( verIdx != 0 ) { pu.mvdAffi[0][verIdx] = pu.mvdAffi[0][verIdx] - pu.mvdAffi[0][0]; } } #if !JVET_N0068_AFFINE_MEM_BW pu.interDir = 1; #endif pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]]; pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]]; } else { lastMode = 1; affineCost = uiCost[1]; #if JVET_N0068_AFFINE_MEM_BW pu.interDir = 2; #endif PU::setAllAffineMv( pu, aacMv[1][0], aacMv[1][1], aacMv[1][2], REF_PIC_LIST_1 , changeToHighPrec ); pu.refIdx[REF_PIC_LIST_1] = iRefIdx[1]; for ( int verIdx = 0; verIdx < mvNum; verIdx++ ) { pu.mvdAffi[REF_PIC_LIST_1][verIdx] = aacMv[1][verIdx] - cMvPred[1][iRefIdx[1]][verIdx]; if ( verIdx != 0 ) { pu.mvdAffi[1][verIdx] = pu.mvdAffi[1][verIdx] - pu.mvdAffi[1][0]; } } #if !JVET_N0068_AFFINE_MEM_BW pu.interDir = 2; #endif pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]]; pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]]; } if( gbiIdx != GBI_DEFAULT ) { pu.cu->GBiIdx = GBI_DEFAULT; } } void solveEqual( double** dEqualCoeff, int iOrder, double* dAffinePara ) { for ( int k = 0; k < iOrder; k++ ) { dAffinePara[k] = 0.; } // row echelon for ( int i = 1; i < iOrder; i++ ) { // find column max double temp = fabs(dEqualCoeff[i][i-1]); int tempIdx = i; for ( int j = i+1; j < iOrder+1; j++ ) { if ( fabs(dEqualCoeff[j][i-1]) > temp ) { temp = fabs(dEqualCoeff[j][i-1]); tempIdx = j; } } // swap line if ( tempIdx != i ) { for ( int j = 0; j < iOrder+1; j++ ) { dEqualCoeff[0][j] = dEqualCoeff[i][j]; dEqualCoeff[i][j] = dEqualCoeff[tempIdx][j]; dEqualCoeff[tempIdx][j] = dEqualCoeff[0][j]; } } // elimination first column if ( dEqualCoeff[i][i - 1] == 0. ) { return; } for ( int j = i+1; j < iOrder+1; j++ ) { for ( int k = i; k < iOrder+1; k++ ) { dEqualCoeff[j][k] = dEqualCoeff[j][k] - dEqualCoeff[i][k] * dEqualCoeff[j][i-1] / dEqualCoeff[i][i-1]; } } } if ( dEqualCoeff[iOrder][iOrder - 1] == 0. ) { return; } dAffinePara[iOrder-1] = dEqualCoeff[iOrder][iOrder] / dEqualCoeff[iOrder][iOrder-1]; for ( int i = iOrder-2; i >= 0; i-- ) { if ( dEqualCoeff[i + 1][i] == 0. ) { for ( int k = 0; k < iOrder; k++ ) { dAffinePara[k] = 0.; } return; } double temp = 0; for ( int j = i+1; j < iOrder; j++ ) { temp += dEqualCoeff[i+1][j] * dAffinePara[j]; } dAffinePara[i] = ( dEqualCoeff[i+1][iOrder] - temp ) / dEqualCoeff[i+1][i]; } } void InterSearch::xCheckBestAffineMVP( PredictionUnit &pu, AffineAMVPInfo &affineAMVPInfo, RefPicList eRefPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost ) { if ( affineAMVPInfo.numCand < 2 ) { return; } int mvNum = pu.cu->affineType ? 3 : 2; m_pcRdCost->selectMotionLambda( pu.cu->transQuantBypass ); m_pcRdCost->setCostScale ( 0 ); int iBestMVPIdx = riMVPIdx; // Get origin MV bits Mv tmpPredMv[3]; int iOrgMvBits = xCalcAffineMVBits( pu, acMv, acMvPred ); iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS]; int iBestMvBits = iOrgMvBits; for (int iMVPIdx = 0; iMVPIdx < affineAMVPInfo.numCand; iMVPIdx++) { if (iMVPIdx == riMVPIdx) { continue; } tmpPredMv[0] = affineAMVPInfo.mvCandLT[iMVPIdx]; tmpPredMv[1] = affineAMVPInfo.mvCandRT[iMVPIdx]; if ( mvNum == 3 ) { tmpPredMv[2] = affineAMVPInfo.mvCandLB[iMVPIdx]; } int iMvBits = xCalcAffineMVBits( pu, acMv, tmpPredMv ); iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS]; if (iMvBits < iBestMvBits) { iBestMvBits = iMvBits; iBestMVPIdx = iMVPIdx; } } if (iBestMVPIdx != riMVPIdx) // if changed { acMvPred[0] = affineAMVPInfo.mvCandLT[iBestMVPIdx]; acMvPred[1] = affineAMVPInfo.mvCandRT[iBestMVPIdx]; acMvPred[2] = affineAMVPInfo.mvCandLB[iBestMVPIdx]; riMVPIdx = iBestMVPIdx; uint32_t uiOrgBits = ruiBits; ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits; ruiCost = (ruiCost - m_pcRdCost->getCost( uiOrgBits )) + m_pcRdCost->getCost( ruiBits ); } } void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, Mv acMvPred[3], int iRefIdxPred, Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost, int& mvpIdx, const AffineAMVPInfo& aamvpi, bool bBi) { if( pu.cu->cs->sps->getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedAffineUniMv(pu, eRefPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost , mvpIdx, aamvpi ) ) { return; } uint32_t dirBits = ruiBits - m_auiMVPIdxCost[mvpIdx][aamvpi.numCand]; int bestMvpIdx = mvpIdx; const int width = pu.Y().width; const int height = pu.Y().height; const Picture* refPic = pu.cu->slice->getRefPic(eRefPicList, iRefIdxPred); // Set Origin YUV: pcYuv PelUnitBuf* pBuf = &origBuf; double fWeight = 1.0; PelUnitBuf origBufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) ); #if JVET_N0329_IBC_SEARCH_IMP enum DFunc distFunc = (pu.cu->transQuantBypass || pu.cs->slice->getDisableSATDForRD()) ? DF_SAD : DF_HAD; #endif // if Bi, set to ( 2 * Org - ListX ) if ( bBi ) { // NOTE: Other buf contains predicted signal from another direction PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); origBufTmp.copyFrom(origBuf); origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs() ,getGbiWeight(pu.cu->GBiIdx, eRefPicList) ); pBuf = &origBufTmp; fWeight = xGetMEDistortionWeight( pu.cu->GBiIdx, eRefPicList ); } // pred YUV PelUnitBuf predBuf = m_tmpAffiStorage.getBuf( UnitAreaRelative(*pu.cu, pu) ); // Set start Mv position, use input mv as started search mv Mv acMvTemp[3]; ::memcpy( acMvTemp, acMv, sizeof(Mv)*3 ); if ( pu.cu->imv != 1 ) { acMvTemp[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); acMvTemp[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); acMvTemp[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); } // Set delta mv // malloc buffer int iParaNum = pu.cu->affineType ? 7 : 5; int affineParaNum = iParaNum - 1; int mvNum = pu.cu->affineType ? 3 : 2; double **pdEqualCoeff; pdEqualCoeff = new double *[iParaNum]; for ( int i = 0; i < iParaNum; i++ ) { pdEqualCoeff[i] = new double[iParaNum]; } int64_t i64EqualCoeff[7][7]; Pel *piError = m_tmpAffiError; int *pdDerivate[2]; pdDerivate[0] = m_tmpAffiDeri[0]; pdDerivate[1] = m_tmpAffiDeri[1]; Distortion uiCostBest = std::numeric_limits<Distortion>::max(); uint32_t uiBitsBest = 0; // do motion compensation with origin mv if( m_pcEncCfg->getMCTSEncConstraint() ) { Area curTileAreaRestricted = pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu ); MCTSHelper::clipMvToArea( acMvTemp[0], pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps ); MCTSHelper::clipMvToArea( acMvTemp[1], pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps ); if( pu.cu->affineType == AFFINEMODEL_6PARAM ) { MCTSHelper::clipMvToArea( acMvTemp[2], pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps ); } } else clipMv( acMvTemp[0], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); clipMv( acMvTemp[1], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { clipMv( acMvTemp[2], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); } int mvdPrecision = ( pu.cu->imv == 1 ) ? 2 : 0; if ( pu.cu->imv == 2 ) { acMvTemp[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); acMvTemp[1].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { acMvTemp[2].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); } } xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cs->slice->clpRng( COMPONENT_Y ) ); // get error #if JVET_N0329_IBC_SEARCH_IMP uiCostBest = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); #else uiCostBest = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD ); #endif // get cost with mv m_pcRdCost->setCostScale(0); uiBitsBest = ruiBits; if ( pu.cu->imv == 2 && m_pcEncCfg->getUseAffineAmvrEncOpt() ) { uiBitsBest = dirBits + xDetermineBestMvp( pu, acMvTemp, mvpIdx, aamvpi ); acMvPred[0] = aamvpi.mvCandLT[mvpIdx]; acMvPred[1] = aamvpi.mvCandRT[mvpIdx]; acMvPred[2] = aamvpi.mvCandLB[mvpIdx]; } else { DTRACE( g_trace_ctx, D_COMMON, " (%d) xx uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest ); uiBitsBest += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 ); DTRACE( g_trace_ctx, D_COMMON, " (%d) yy uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest ); } uiCostBest = (Distortion)( floor( fWeight * (double)uiCostBest ) + (double)m_pcRdCost->getCost( uiBitsBest ) ); DTRACE( g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest, uiCostBest ); ::memcpy( acMv, acMvTemp, sizeof(Mv) * 3 ); const int bufStride = pBuf->Y().stride; const int predBufStride = predBuf.Y().stride; Mv prevIterMv[7][3]; int iIterTime; if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { iIterTime = bBi ? 3 : 4; } else { iIterTime = bBi ? 3 : 5; } if ( !pu.cu->cs->sps->getUseAffineType() ) { iIterTime = bBi ? 5 : 7; } for ( int iter=0; iter<iIterTime; iter++ ) // iterate loop { memcpy( prevIterMv[iter], acMvTemp, sizeof( Mv ) * 3 ); /********************************************************************************* * use gradient to update mv *********************************************************************************/ // get Error Matrix Pel* pOrg = pBuf->Y().buf; Pel* pPred = predBuf.Y().buf; for ( int j=0; j< height; j++ ) { for ( int i=0; i< width; i++ ) { piError[i + j * width] = pOrg[i] - pPred[i]; } pOrg += bufStride; pPred += predBufStride; } // sobel x direction // -1 0 1 // -2 0 2 // -1 0 1 pPred = predBuf.Y().buf; m_HorizontalSobelFilter( pPred, predBufStride, pdDerivate[0], width, width, height ); // sobel y direction // -1 -2 -1 // 0 0 0 // 1 2 1 m_VerticalSobelFilter( pPred, predBufStride, pdDerivate[1], width, width, height ); // solve delta x and y for ( int row = 0; row < iParaNum; row++ ) { memset( &i64EqualCoeff[row][0], 0, iParaNum * sizeof( int64_t ) ); } m_EqualCoeffComputer( piError, width, pdDerivate, width, i64EqualCoeff, width, height , (pu.cu->affineType == AFFINEMODEL_6PARAM) ); for ( int row = 0; row < iParaNum; row++ ) { for ( int i = 0; i < iParaNum; i++ ) { pdEqualCoeff[row][i] = (double)i64EqualCoeff[row][i]; } } double dAffinePara[6]; double dDeltaMv[6]; Mv acDeltaMv[3]; solveEqual( pdEqualCoeff, affineParaNum, dAffinePara ); // convert to delta mv dDeltaMv[0] = dAffinePara[0]; dDeltaMv[2] = dAffinePara[2]; if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0]; dDeltaMv[3] = dAffinePara[3] * width + dAffinePara[2]; dDeltaMv[4] = dAffinePara[4] * height + dAffinePara[0]; dDeltaMv[5] = dAffinePara[5] * height + dAffinePara[2]; } else { dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0]; dDeltaMv[3] = -dAffinePara[3] * width + dAffinePara[2]; } int mvShift = MV_FRACTIONAL_BITS_DIFF - mvdPrecision; int multiShift = 1 << ( MV_FRACTIONAL_BITS_DIFF + mvdPrecision ); acDeltaMv[0] = Mv( ( int ) ( dDeltaMv[0] * multiShift + SIGN( dDeltaMv[0] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[2] * multiShift + SIGN( dDeltaMv[2] ) * 0.5 ) << mvShift ); acDeltaMv[1] = Mv( ( int ) ( dDeltaMv[1] * multiShift + SIGN( dDeltaMv[1] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[3] * multiShift + SIGN( dDeltaMv[3] ) * 0.5 ) << mvShift ); if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { acDeltaMv[2] = Mv( ( int ) ( dDeltaMv[4] * multiShift + SIGN( dDeltaMv[4] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[5] * multiShift + SIGN( dDeltaMv[5] ) * 0.5 ) << mvShift ); } if ( !m_pcEncCfg->getUseAffineAmvrEncOpt() ) { bool bAllZero = false; for ( int i = 0; i < mvNum; i++ ) { Mv deltaMv = acDeltaMv[i]; if ( pu.cu->imv == 2 ) { deltaMv.roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_HALF ); } if ( deltaMv.getHor() != 0 || deltaMv.getVer() != 0 ) { bAllZero = false; break; } bAllZero = true; } if ( bAllZero ) break; } // do motion compensation with updated mv for ( int i = 0; i < mvNum; i++ ) { acMvTemp[i] += acDeltaMv[i]; acMvTemp[i].hor = Clip3( -131072, 131071, acMvTemp[i].hor ); acMvTemp[i].ver = Clip3( -131072, 131071, acMvTemp[i].ver ); if ( pu.cu->imv == 0 ) { acMvTemp[i].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); } else if ( pu.cu->imv == 2 ) { acMvTemp[i].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); } if( m_pcEncCfg->getMCTSEncConstraint() ) { MCTSHelper::clipMvToArea( acMvTemp[i], pu.cu->Y(), pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu ), *pu.cs->sps ); } else { clipMv(acMvTemp[i], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps); } } if ( m_pcEncCfg->getUseAffineAmvrEncOpt() ) { bool identical = false; for ( int k = iter; k >= 0; k-- ) { if ( acMvTemp[0] == prevIterMv[k][0] && acMvTemp[1] == prevIterMv[k][1] ) { identical = pu.cu->affineType ? acMvTemp[2] == prevIterMv[k][2] : true; if ( identical ) { break; } } } if ( identical ) { break; } } xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ) ); // get error #if JVET_N0329_IBC_SEARCH_IMP Distortion uiCostTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); #else Distortion uiCostTemp = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD ); #endif DTRACE( g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCostTemp ); // get cost with mv m_pcRdCost->setCostScale(0); uint32_t uiBitsTemp = ruiBits; if ( pu.cu->imv == 2 && m_pcEncCfg->getUseAffineAmvrEncOpt() ) { uiBitsTemp = dirBits + xDetermineBestMvp( pu, acMvTemp, bestMvpIdx, aamvpi ); acMvPred[0] = aamvpi.mvCandLT[bestMvpIdx]; acMvPred[1] = aamvpi.mvCandRT[bestMvpIdx]; acMvPred[2] = aamvpi.mvCandLB[bestMvpIdx]; } else { uiBitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 ); } uiCostTemp = (Distortion)( floor( fWeight * (double)uiCostTemp ) + (double)m_pcRdCost->getCost( uiBitsTemp ) ); // store best cost and mv if ( uiCostTemp < uiCostBest ) { uiCostBest = uiCostTemp; uiBitsBest = uiBitsTemp; memcpy( acMv, acMvTemp, sizeof(Mv) * 3 ); mvpIdx = bestMvpIdx; } } auto checkCPMVRdCost = [&](Mv ctrlPtMv[3]) { xPredAffineBlk(COMPONENT_Y, pu, refPic, ctrlPtMv, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y)); // get error #if JVET_N0329_IBC_SEARCH_IMP Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); #else Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD); #endif // get cost with mv m_pcRdCost->setCostScale(0); uint32_t bitsTemp = ruiBits; bitsTemp += xCalcAffineMVBits( pu, ctrlPtMv, acMvPred, pu.cu->imv != 1 ); costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp)); // store best cost and mv if (costTemp < uiCostBest) { uiCostBest = costTemp; uiBitsBest = bitsTemp; ::memcpy(acMv, ctrlPtMv, sizeof(Mv) * 3); } }; if (uiCostBest <= AFFINE_ME_LIST_MVP_TH*m_hevcCost) { //search 8 nearest neighbors; integer distance int testPos[8][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 },{ -1, -1 },{ -1, 1 },{ 1, 1 },{ 1, -1 } }; const uint32_t mvShift = pu.cu->imv == 1 ? 0 : ( pu.cu->imv == 2 ? ( MV_FRACTIONAL_BITS_DIFF << 1 ) : MV_FRACTIONAL_BITS_DIFF ); const int maxSearchRound = 3; if ( m_pcEncCfg->getUseAffineAmvrEncOpt() && m_pcEncCfg->getIntraPeriod() != ( uint32_t ) -1 && pu.cu->imv ) { for ( int rnd = 0; rnd < ( pu.cu->slice->getTLayer() <= 2 ? maxSearchRound : maxSearchRound - 1 ); rnd++ ) { bool modelChange = false; //search the model parameters with finear granularity; for ( int j = 0; j < mvNum; j++ ) { for ( int iter = 0; iter < 2; iter++ ) { Mv centerMv[3]; memcpy( centerMv, acMv, sizeof( Mv ) * 3 ); memcpy( acMvTemp, acMv, sizeof( Mv ) * 3 ); for ( int i = ( iter ? 0: 4 ); i < ( iter ? 4 : 8 ); i++ ) { acMvTemp[j].set( centerMv[j].getHor() + ( testPos[i][0] << mvShift ), centerMv[j].getVer() + ( testPos[i][1] << mvShift ) ); clipMv( acMvTemp[j], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ) ); #if JVET_N0329_IBC_SEARCH_IMP Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); #else Distortion costTemp = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, DF_HAD ); #endif uint32_t bitsTemp = ruiBits; bitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 ); costTemp = ( Distortion ) ( floor( fWeight * ( double ) costTemp ) + ( double ) m_pcRdCost->getCost( bitsTemp ) ); if ( costTemp < uiCostBest ) { uiCostBest = costTemp; uiBitsBest = bitsTemp; ::memcpy( acMv, acMvTemp, sizeof( Mv ) * 3 ); modelChange = true; } } } } if ( !modelChange ) { break; } } } Mv mvPredTmp[3] = { acMvPred[0], acMvPred[1], acMvPred[2] }; if ( pu.cu->imv != 1 ) { mvPredTmp[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); mvPredTmp[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); mvPredTmp[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); } Mv mvME[3]; ::memcpy(mvME, acMv, sizeof(Mv) * 3); Mv dMv = mvME[0] - mvPredTmp[0]; for (int j = 0; j < mvNum; j++) { if ((!j && mvME[j] != mvPredTmp[j]) || (j && mvME[j] != (mvPredTmp[j] + dMv))) { ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3); acMvTemp[j] = mvPredTmp[j]; if (j) acMvTemp[j] += dMv; checkCPMVRdCost(acMvTemp); } } //keep the rotation/zoom; if (mvME[0] != mvPredTmp[0]) { ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3); for (int i = 1; i < mvNum; i++) { acMvTemp[i] -= dMv; } acMvTemp[0] = mvPredTmp[0]; checkCPMVRdCost(acMvTemp); } //keep the translation; if (pu.cu->affineType == AFFINEMODEL_6PARAM && mvME[1] != (mvPredTmp[1] + dMv) && mvME[2] != (mvPredTmp[2] + dMv)) { ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3); acMvTemp[1] = mvPredTmp[1] + dMv; acMvTemp[2] = mvPredTmp[2] + dMv; checkCPMVRdCost(acMvTemp); } { dMv = acMv[1] - acMv[0]; if (pu.cu->affineType == AFFINEMODEL_4PARAM && (dMv.getAbsHor() > 4 || dMv.getAbsVer() > 4)) { int testPos[4][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 } }; Mv centerMv[3]; const uint32_t mvShift = pu.cu->imv == 1 ? 0 : ( pu.cu->imv == 2 ? ( MV_FRACTIONAL_BITS_DIFF << 1 ) : MV_FRACTIONAL_BITS_DIFF ); ::memcpy(centerMv, acMv, sizeof(Mv) * 3); acMvTemp[0] = centerMv[0]; for (int i = 0; i < 4; i++) { acMvTemp[1].set( centerMv[1].getHor() + ( testPos[i][0] << mvShift ), centerMv[1].getVer() + ( testPos[i][1] << mvShift ) ); checkCPMVRdCost(acMvTemp); } } } } if ( pu.cu->imv != 1 ) { acMv[0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); acMv[1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); acMv[2].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); } acMvPred[0] = aamvpi.mvCandLT[mvpIdx]; acMvPred[1] = aamvpi.mvCandRT[mvpIdx]; acMvPred[2] = aamvpi.mvCandLB[mvpIdx]; // free buffer for (int i = 0; i<iParaNum; i++) delete[]pdEqualCoeff[i]; delete[]pdEqualCoeff; ruiBits = uiBitsBest; ruiCost = uiCostBest; DTRACE( g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest, uiCostBest ); } void InterSearch::xEstimateAffineAMVP( PredictionUnit& pu, AffineAMVPInfo& affineAMVPInfo, PelUnitBuf& origBuf, RefPicList eRefPicList, int iRefIdx, Mv acMvPred[3], Distortion* puiDistBiP ) { Mv bestMvLT, bestMvRT, bestMvLB; int iBestIdx = 0; Distortion uiBestCost = std::numeric_limits<Distortion>::max(); // Fill the MV Candidates PU::fillAffineMvpCand( pu, eRefPicList, iRefIdx, affineAMVPInfo ); CHECK( affineAMVPInfo.numCand == 0, "Assertion failed." ); PelUnitBuf predBuf = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) ); // initialize Mvp index & Mvp iBestIdx = 0; for( int i = 0 ; i < affineAMVPInfo.numCand; i++ ) { Mv mv[3] = { affineAMVPInfo.mvCandLT[i], affineAMVPInfo.mvCandRT[i], affineAMVPInfo.mvCandLB[i] }; Distortion uiTmpCost = xGetAffineTemplateCost( pu, origBuf, predBuf, mv, i, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx ); if ( uiBestCost > uiTmpCost ) { uiBestCost = uiTmpCost; bestMvLT = affineAMVPInfo.mvCandLT[i]; bestMvRT = affineAMVPInfo.mvCandRT[i]; bestMvLB = affineAMVPInfo.mvCandLB[i]; iBestIdx = i; *puiDistBiP = uiTmpCost; } } // Setting Best MVP acMvPred[0] = bestMvLT; acMvPred[1] = bestMvRT; acMvPred[2] = bestMvLB; pu.mvpIdx[eRefPicList] = iBestIdx; pu.mvpNum[eRefPicList] = affineAMVPInfo.numCand; DTRACE( g_trace_ctx, D_COMMON, "#estAffi=%d \n", affineAMVPInfo.numCand ); } void InterSearch::xCopyAffineAMVPInfo (AffineAMVPInfo& src, AffineAMVPInfo& dst) { dst.numCand = src.numCand; DTRACE( g_trace_ctx, D_COMMON, " (%d) #copyAffi=%d \n", DTRACE_GET_COUNTER( g_trace_ctx, D_COMMON ), src.numCand ); ::memcpy( dst.mvCandLT, src.mvCandLT, sizeof(Mv)*src.numCand ); ::memcpy( dst.mvCandRT, src.mvCandRT, sizeof(Mv)*src.numCand ); ::memcpy( dst.mvCandLB, src.mvCandLB, sizeof(Mv)*src.numCand ); } /** * \brief Generate half-sample interpolated block * * \param pattern Reference picture ROI * \param biPred Flag indicating whether block is for biprediction */ void InterSearch::xExtDIFUpSamplingH( CPelBuf* pattern ) { const ClpRng& clpRng = m_lumaClpRng; int width = pattern->width; int height = pattern->height; int srcStride = pattern->stride; int intStride = width + 1; int dstStride = width + 1; Pel *intPtr; Pel *dstPtr; int filterSize = NTAPS_LUMA; int halfFilterSize = (filterSize>>1); const Pel *srcPtr = pattern->buf - halfFilterSize*srcStride - 1; const ChromaFormat chFmt = m_currChromaFormat; m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0][0], intStride, width + 1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng); if (!m_skipFracME) { m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2][0], intStride, width + 1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng); } intPtr = m_filteredBlockTmp[0][0] + halfFilterSize * intStride + 1; dstPtr = m_filteredBlock[0][0][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); if (m_skipFracME) { return; } intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1; dstPtr = m_filteredBlock[2][0][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); intPtr = m_filteredBlockTmp[2][0] + halfFilterSize * intStride; dstPtr = m_filteredBlock[0][2][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride; dstPtr = m_filteredBlock[2][2][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); } /** * \brief Generate quarter-sample interpolated blocks * * \param pattern Reference picture ROI * \param halfPelRef Half-pel mv * \param biPred Flag indicating whether block is for biprediction */ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef ) { const ClpRng& clpRng = m_lumaClpRng; int width = pattern->width; int height = pattern->height; int srcStride = pattern->stride; Pel const* srcPtr; int intStride = width + 1; int dstStride = width + 1; Pel *intPtr; Pel *dstPtr; int filterSize = NTAPS_LUMA; int halfFilterSize = (filterSize>>1); int extHeight = (halfPelRef.getVer() == 0) ? height + filterSize : height + filterSize-1; const ChromaFormat chFmt = m_currChromaFormat; // Horizontal filter 1/4 srcPtr = pattern->buf - halfFilterSize * srcStride - 1; intPtr = m_filteredBlockTmp[1][0]; if (halfPelRef.getVer() > 0) { srcPtr += srcStride; } if (halfPelRef.getHor() >= 0) { srcPtr += 1; } m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 1 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng); // Horizontal filter 3/4 srcPtr = pattern->buf - halfFilterSize*srcStride - 1; intPtr = m_filteredBlockTmp[3][0]; if (halfPelRef.getVer() > 0) { srcPtr += srcStride; } if (halfPelRef.getHor() > 0) { srcPtr += 1; } m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 3 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng); // Generate @ 1,1 intPtr = m_filteredBlockTmp[1][0] + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[1][1][0]; if (halfPelRef.getVer() == 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); // Generate @ 3,1 intPtr = m_filteredBlockTmp[1][0] + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[3][1][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); if (halfPelRef.getVer() != 0) { // Generate @ 2,1 intPtr = m_filteredBlockTmp[1][0] + (halfFilterSize - 1) * intStride; dstPtr = m_filteredBlock[2][1][0]; if (halfPelRef.getVer() == 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); // Generate @ 2,3 intPtr = m_filteredBlockTmp[3][0] + (halfFilterSize - 1) * intStride; dstPtr = m_filteredBlock[2][3][0]; if (halfPelRef.getVer() == 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); } else { // Generate @ 0,1 intPtr = m_filteredBlockTmp[1][0] + halfFilterSize * intStride; dstPtr = m_filteredBlock[0][1][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); // Generate @ 0,3 intPtr = m_filteredBlockTmp[3][0] + halfFilterSize * intStride; dstPtr = m_filteredBlock[0][3][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); } if (halfPelRef.getHor() != 0) { // Generate @ 1,2 intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride; dstPtr = m_filteredBlock[1][2][0]; if (halfPelRef.getHor() > 0) { intPtr += 1; } if (halfPelRef.getVer() >= 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); // Generate @ 3,2 intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride; dstPtr = m_filteredBlock[3][2][0]; if (halfPelRef.getHor() > 0) { intPtr += 1; } if (halfPelRef.getVer() > 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); } else { // Generate @ 1,0 intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1; dstPtr = m_filteredBlock[1][0][0]; if (halfPelRef.getVer() >= 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); // Generate @ 3,0 intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1; dstPtr = m_filteredBlock[3][0][0]; if (halfPelRef.getVer() > 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); } // Generate @ 1,3 intPtr = m_filteredBlockTmp[3][0] + (halfFilterSize - 1) * intStride; dstPtr = m_filteredBlock[1][3][0]; if (halfPelRef.getVer() == 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); // Generate @ 3,3 intPtr = m_filteredBlockTmp[3][0] + (halfFilterSize - 1) * intStride; dstPtr = m_filteredBlock[3][3][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); } //! set wp tables void InterSearch::setWpScalingDistParam( int iRefIdx, RefPicList eRefPicListCur, Slice *pcSlice ) { if ( iRefIdx<0 ) { m_cDistParam.applyWeight = false; return; } WPScalingParam *wp0 , *wp1; m_cDistParam.applyWeight = ( pcSlice->getSliceType()==P_SLICE && pcSlice->testWeightPred() ) || ( pcSlice->getSliceType()==B_SLICE && pcSlice->testWeightBiPred() ) ; if ( !m_cDistParam.applyWeight ) { return; } int iRefIdx0 = ( eRefPicListCur == REF_PIC_LIST_0 ) ? iRefIdx : (-1); int iRefIdx1 = ( eRefPicListCur == REF_PIC_LIST_1 ) ? iRefIdx : (-1); getWpScaling( pcSlice, iRefIdx0, iRefIdx1, wp0 , wp1 ); if ( iRefIdx0 < 0 ) { wp0 = NULL; } if ( iRefIdx1 < 0 ) { wp1 = NULL; } m_cDistParam.wpCur = NULL; if ( eRefPicListCur == REF_PIC_LIST_0 ) { m_cDistParam.wpCur = wp0; } else { m_cDistParam.wpCur = wp1; } } void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &partitioner, const ComponentID &compID) { const UnitArea& currArea = partitioner.currArea(); const TransformUnit &currTU = *cs.getTU(currArea.lumaPos(), partitioner.chType); const CodingUnit &cu = *currTU.cu; const unsigned currDepth = partitioner.currTrDepth; const bool bSubdiv = currDepth != currTU.depth; if (compID == MAX_NUM_TBLOCKS) // we are not processing a channel, instead we always recurse and code the CBFs { if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) { CHECK( !bSubdiv, "Not performing the implicit TU split" ); } else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) { CHECK( !bSubdiv, "Not performing the implicit TU split - sbt" ); } else { CHECK( bSubdiv, "transformsplit not supported" ); } CHECK(CU::isIntra(cu), "Inter search provided with intra CU"); if( cu.chromaFormat != CHROMA_400 ) { const bool firstCbfOfCU = ( currDepth == 0 ); { if( firstCbfOfCU || TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth - 1 ) ) { const bool chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ); if( !( cu.sbtInfo && currDepth == 1 ) ) m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cb], currDepth ); } if( firstCbfOfCU || TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth - 1 ) ) { const bool chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth ); if( !( cu.sbtInfo && currDepth == 1 ) ) m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cr], currDepth, TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ) ); } } } if( !bSubdiv && !( cu.sbtInfo && currTU.noResidual ) ) { m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currArea.Y(), currDepth ); } } if (!bSubdiv) { if (compID != MAX_NUM_TBLOCKS) // we have already coded the CBFs, so now we code coefficients { if( currArea.blocks[compID].valid() ) { if( TU::hasCrossCompPredInfo( currTU, compID ) ) { m_CABACEstimator->cross_comp_pred( currTU, compID ); } if( TU::getCbf( currTU, compID ) ) { m_CABACEstimator->residual_coding( currTU, compID ); } } } } else { if( compID == MAX_NUM_TBLOCKS || TU::getCbfAtDepth( currTU, compID, currDepth ) ) { if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) { partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); } else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) { partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs ); } else THROW( "Implicit TU split not available!" ); do { xEncodeInterResidualQT( cs, partitioner, compID ); } while( partitioner.nextPart( cs ) ); partitioner.exitCurrSplit(); } } } void InterSearch::calcMinDistSbt( CodingStructure &cs, const CodingUnit& cu, const uint8_t sbtAllowed ) { if( !sbtAllowed ) { m_estMinDistSbt[NUMBER_SBT_MODE] = 0; for( int comp = 0; comp < getNumberValidTBlocks( *cs.pcv ); comp++ ) { const ComponentID compID = ComponentID( comp ); CPelBuf pred = cs.getPredBuf( compID ); CPelBuf org = cs.getOrgBuf( compID ); m_estMinDistSbt[NUMBER_SBT_MODE] += m_pcRdCost->getDistPart( org, pred, cs.sps->getBitDepth( toChannelType( compID ) ), compID, DF_SSE ); } return; } //SBT fast algorithm 2.1 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16); // if this cost is larger than the best cost, no need to try a specific SBT mode int cuWidth = cu.lwidth(); int cuHeight = cu.lheight(); int numPartX = cuWidth >= 16 ? 4 : ( cuWidth == 4 ? 1 : 2 ); int numPartY = cuHeight >= 16 ? 4 : ( cuHeight == 4 ? 1 : 2 ); Distortion dist[4][4]; memset( dist, 0, sizeof( Distortion ) * 16 ); for( uint32_t c = 0; c < getNumberValidTBlocks( *cs.pcv ); c++ ) { const ComponentID compID = ComponentID( c ); const CompArea& compArea = cu.blocks[compID]; const CPelBuf orgPel = cs.getOrgBuf( compArea ); const CPelBuf predPel = cs.getPredBuf( compArea ); int lengthX = compArea.width / numPartX; int lengthY = compArea.height / numPartY; int strideOrg = orgPel.stride; int stridePred = predPel.stride; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT( ( *cs.sps.getBitDepth( toChannelType( compID ) ) - 8 ) << 1 ); Intermediate_Int iTemp; //calc distY of 16 sub parts for( int j = 0; j < numPartY; j++ ) { for( int i = 0; i < numPartX; i++ ) { int posX = i * lengthX; int posY = j * lengthY; const Pel* ptrOrg = orgPel.bufAt( posX, posY ); const Pel* ptrPred = predPel.bufAt( posX, posY ); Distortion uiSum = 0; for( int n = 0; n < lengthY; n++ ) { for( int m = 0; m < lengthX; m++ ) { iTemp = ptrOrg[m] - ptrPred[m]; uiSum += Distortion( ( iTemp * iTemp ) >> uiShift ); } ptrOrg += strideOrg; ptrPred += stridePred; } if( isChroma( compID ) ) { uiSum = (Distortion)( uiSum * m_pcRdCost->getChromaWeight() ); } dist[j][i] += uiSum; } } } //SSE of a CU m_estMinDistSbt[NUMBER_SBT_MODE] = 0; for( int j = 0; j < numPartY; j++ ) { for( int i = 0; i < numPartX; i++ ) { m_estMinDistSbt[NUMBER_SBT_MODE] += dist[j][i]; } } //init per-mode dist for( int i = SBT_VER_H0; i < NUMBER_SBT_MODE; i++ ) { m_estMinDistSbt[i] = std::numeric_limits<uint64_t>::max(); } //SBT fast algorithm 1: not try SBT if the residual is too small to compensate bits for encoding residual info uint64_t minNonZeroResiFracBits = 12 << SCALE_BITS; if( m_pcRdCost->calcRdCost( 0, m_estMinDistSbt[NUMBER_SBT_MODE] ) < m_pcRdCost->calcRdCost( minNonZeroResiFracBits, 0 ) ) { m_skipSbtAll = true; return; } //derive estimated minDist of SBT = zero-residual part distortion + non-zero residual part distortion / 16 int shift = 5; Distortion distResiPart = 0, distNoResiPart = 0; if( CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) ) { int offsetResiPart = 0; int offsetNoResiPart = numPartX / 2; distResiPart = distNoResiPart = 0; assert( numPartX >= 2 ); for( int j = 0; j < numPartY; j++ ) { for( int i = 0; i < numPartX / 2; i++ ) { distResiPart += dist[j][i + offsetResiPart]; distNoResiPart += dist[j][i + offsetNoResiPart]; } } m_estMinDistSbt[SBT_VER_H0] = ( distResiPart >> shift ) + distNoResiPart; m_estMinDistSbt[SBT_VER_H1] = ( distNoResiPart >> shift ) + distResiPart; } if( CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ) ) { int offsetResiPart = 0; int offsetNoResiPart = numPartY / 2; assert( numPartY >= 2 ); distResiPart = distNoResiPart = 0; for( int j = 0; j < numPartY / 2; j++ ) { for( int i = 0; i < numPartX; i++ ) { distResiPart += dist[j + offsetResiPart][i]; distNoResiPart += dist[j + offsetNoResiPart][i]; } } m_estMinDistSbt[SBT_HOR_H0] = ( distResiPart >> shift ) + distNoResiPart; m_estMinDistSbt[SBT_HOR_H1] = ( distNoResiPart >> shift ) + distResiPart; } if( CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) ) { assert( numPartX == 4 ); m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q1] = 0; for( int j = 0; j < numPartY; j++ ) { m_estMinDistSbt[SBT_VER_Q0] += dist[j][0] + ( ( dist[j][1] + dist[j][2] + dist[j][3] ) << shift ); m_estMinDistSbt[SBT_VER_Q1] += dist[j][3] + ( ( dist[j][0] + dist[j][1] + dist[j][2] ) << shift ); } m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q0] >> shift; m_estMinDistSbt[SBT_VER_Q1] = m_estMinDistSbt[SBT_VER_Q1] >> shift; } if( CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ) ) { assert( numPartY == 4 ); m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q1] = 0; for( int i = 0; i < numPartX; i++ ) { m_estMinDistSbt[SBT_HOR_Q0] += dist[0][i] + ( ( dist[1][i] + dist[2][i] + dist[3][i] ) << shift ); m_estMinDistSbt[SBT_HOR_Q1] += dist[3][i] + ( ( dist[0][i] + dist[1][i] + dist[2][i] ) << shift ); } m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q0] >> shift; m_estMinDistSbt[SBT_HOR_Q1] = m_estMinDistSbt[SBT_HOR_Q1] >> shift; } //SBT fast algorithm 5: try N SBT modes with the lowest distortion Distortion temp[NUMBER_SBT_MODE]; memcpy( temp, m_estMinDistSbt, sizeof( Distortion ) * NUMBER_SBT_MODE ); memset( m_sbtRdoOrder, 255, NUMBER_SBT_MODE ); int startIdx = 0, numRDO; numRDO = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ); numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO ); for( int i = startIdx; i < startIdx + numRDO; i++ ) { Distortion minDist = std::numeric_limits<uint64_t>::max(); for( int n = SBT_VER_H0; n <= SBT_HOR_H1; n++ ) { if( temp[n] < minDist ) { minDist = temp[n]; m_sbtRdoOrder[i] = n; } } temp[m_sbtRdoOrder[i]] = std::numeric_limits<uint64_t>::max(); } startIdx += numRDO; numRDO = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ); numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO ); for( int i = startIdx; i < startIdx + numRDO; i++ ) { Distortion minDist = std::numeric_limits<uint64_t>::max(); for( int n = SBT_VER_Q0; n <= SBT_HOR_Q1; n++ ) { if( temp[n] < minDist ) { minDist = temp[n]; m_sbtRdoOrder[i] = n; } } temp[m_sbtRdoOrder[i]] = std::numeric_limits<uint64_t>::max(); } } uint8_t InterSearch::skipSbtByRDCost( int width, int height, int mtDepth, uint8_t sbtIdx, uint8_t sbtPos, double bestCost, Distortion distSbtOff, double costSbtOff, bool rootCbfSbtOff ) { int sbtMode = CU::getSbtMode( sbtIdx, sbtPos ); //SBT fast algorithm 2.2 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16); // if this cost is larger than the best cost, no need to try a specific SBT mode if( m_pcRdCost->calcRdCost( 11 << SCALE_BITS, m_estMinDistSbt[sbtMode] ) > bestCost ) { return 0; //early skip type 0 } if( costSbtOff != MAX_DOUBLE ) { if( !rootCbfSbtOff ) { //SBT fast algorithm 3: skip SBT when the residual is too small (estCost is more accurate than fast algorithm 1, counting PU mode bits) uint64_t minNonZeroResiFracBits = 10 << SCALE_BITS; Distortion distResiPart; if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_HOR_HALF ) { distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 9 ) >> 4 ); } else { distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 3 ) >> 3 ); } double estCost = ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) + m_pcRdCost->calcRdCost( minNonZeroResiFracBits, m_estMinDistSbt[sbtMode] + distResiPart ); if( estCost > costSbtOff ) { return 1; } if( estCost > bestCost ) { return 2; } } else { //SBT fast algorithm 4: skip SBT when an estimated RD cost is larger than the bestCost double weight = sbtMode > SBT_HOR_H1 ? 0.4 : 0.6; double estCost = ( ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) * weight ) + m_pcRdCost->calcRdCost( 0 << SCALE_BITS, m_estMinDistSbt[sbtMode] ); if( estCost > bestCost ) { return 3; } } } return MAX_UCHAR; } void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/ , const bool luma, const bool chroma ) { const UnitArea& currArea = partitioner.currArea(); const SPS &sps = *cs.sps; #if JVET_N0671_RDCOST_FIX m_pcRdCost->setChromaFormat(sps.getChromaFormatIdc()); #endif const uint32_t numValidComp = getNumberValidComponents( sps.getChromaFormatIdc() ); const uint32_t numTBlocks = getNumberValidTBlocks ( *cs.pcv ); const CodingUnit &cu = *cs.getCU(partitioner.chType); const unsigned currDepth = partitioner.currTrDepth; bool bCheckFull = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs ); if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) { bCheckFull = false; } bool bCheckSplit = !bCheckFull; // get temporary data CodingStructure *csSplit = nullptr; CodingStructure *csFull = nullptr; if (bCheckSplit) { csSplit = &cs; } else if (bCheckFull) { csFull = &cs; } Distortion uiSingleDist = 0; Distortion uiSingleDistComp [3] = { 0, 0, 0 }; TCoeff uiAbsSum [3] = { 0, 0, 0 }; const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() ); TempCtx ctxBest ( m_CtxCache ); if (bCheckFull) { TransformUnit &tu = csFull->addTU(CS::isDualITree(cs) ? cu : currArea, partitioner.chType); tu.depth = currDepth; tu.mtsIdx = 0; tu.checkTuNoResidual( partitioner.currPartIdx() ); const Slice &slice = *cs.slice; if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && slice.getReshapeInfo().getSliceReshapeChromaAdj()) { const CompArea &areaY = tu.blocks[COMPONENT_Y]; PelBuf piPredY = cs.getPredBuf(areaY); CompArea tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size()); PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); tmpPred.copyFrom(piPredY); if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) tmpPred.rspSignal(m_pcReshape->getFwdLUT()); const Pel avgLuma = tmpPred.computeAvg(); int adj = m_pcReshape->calculateChromaAdj(avgLuma); tu.setChromaAdj(adj); } double minCost [MAX_NUM_TBLOCKS]; m_CABACEstimator->resetBits(); memset(m_pTempPel, 0, sizeof(Pel) * tu.Y().area()); // not necessary needed for inside of recursion (only at the beginning) for (uint32_t i = 0; i < numTBlocks; i++) { minCost[i] = MAX_DOUBLE; } CodingStructure &saveCS = *m_pSaveCS[0]; saveCS.pcv = cs.pcv; saveCS.picture = cs.picture; saveCS.area.repositionTo(currArea); saveCS.clearTUs(); TransformUnit & bestTU = saveCS.addTU(CS::isDualITree(cs) ? cu : currArea, partitioner.chType); for( uint32_t c = 0; c < numTBlocks; c++ ) { const ComponentID compID = ComponentID(c); if (compID == COMPONENT_Y && !luma) continue; if (compID != COMPONENT_Y && !chroma) continue; const CompArea& compArea = tu.blocks[compID]; const int channelBitDepth = sps.getBitDepth(toChannelType(compID)); if( !tu.blocks[compID].valid() ) { continue; } const bool isCrossCPredictionAvailable = TU::hasCrossCompPredInfo( tu, compID ); int8_t preCalcAlpha = 0; const CPelBuf lumaResi = csFull->getResiBuf(tu.Y()); if (isCrossCPredictionAvailable) { csFull->getResiBuf( compArea ).copyFrom( cs.getOrgResiBuf( compArea ) ); preCalcAlpha = xCalcCrossComponentPredictionAlpha( tu, compID, m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate() ); } const bool tsAllowed = TU::isTSAllowed ( tu, compID ); const bool mtsAllowed = TU::isMTSAllowed( tu, compID ); uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests std::vector<TrMode> trModes; trModes.push_back( TrMode( 0, true ) ); //DCT2 nNumTransformCands = 1; //for a SBT-no-residual TU, the RDO process should be called once, in order to get the RD cost if( tsAllowed && !tu.noResidual ) { trModes.push_back( TrMode( 1, true ) ); nNumTransformCands++; } #if APPLY_SBT_SL_ON_MTS //skip MTS if DCT2 is the best if( mtsAllowed && ( !tu.cu->slice->getSPS()->getUseSBT() || CU::getSbtIdx( m_histBestSbt ) != SBT_OFF_DCT ) ) #else if( mtsAllowed ) #endif { for( int i = 2; i < 6; i++ ) { #if APPLY_SBT_SL_ON_MTS //skip the non-best Mts mode if( !tu.cu->slice->getSPS()->getUseSBT() || ( m_histBestMtsIdx == MAX_UCHAR || m_histBestMtsIdx == i ) ) { #endif trModes.push_back( TrMode( i, true ) ); nNumTransformCands++; #if APPLY_SBT_SL_ON_MTS } #endif } } const int crossCPredictionModesToTest = preCalcAlpha != 0 ? 2 : 1; const int numTransformCandidates = nNumTransformCands; const bool isOneMode = crossCPredictionModesToTest == 1 && numTransformCandidates == 1; bool isLastBest = isOneMode; for( int transformMode = 0; transformMode < numTransformCandidates; transformMode++ ) { for( int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++ ) { const bool isFirstMode = transformMode == 0 && crossCPredictionModeId == 0; const bool isLastMode = ( transformMode + 1 ) == numTransformCandidates && ( crossCPredictionModeId + 1 ) == crossCPredictionModesToTest; const bool bUseCrossCPrediction = crossCPredictionModeId != 0; // copy the original residual into the residual buffer csFull->getResiBuf(compArea).copyFrom(cs.getOrgResiBuf(compArea)); m_CABACEstimator->getCtx() = ctxStart; m_CABACEstimator->resetBits(); if( isLuma( compID ) ) { if( bestTU.mtsIdx == 1 && m_pcEncCfg->getUseTransformSkipFast() ) { continue; } if( !trModes[transformMode].second ) { continue; } tu.mtsIdx = trModes[transformMode].first; } tu.compAlpha[compID] = bUseCrossCPrediction ? preCalcAlpha : 0; const QpParam cQP(tu, compID); // note: uses tu.transformSkip[compID] #if RDOQ_CHROMA_LAMBDA m_pcTrQuant->selectLambda(compID); #endif if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj()) { double cRescale = round((double)(1 << CSCALE_FP_PREC) / (double)(tu.getChromaAdj())); m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cRescale*cRescale)); } TCoeff currAbsSum = 0; uint64_t currCompFracBits = 0; Distortion currCompDist = 0; double currCompCost = 0; uint64_t nonCoeffFracBits = 0; Distortion nonCoeffDist = 0; double nonCoeffCost = 0; if (bUseCrossCPrediction) { PelBuf resiBuf = csFull->getResiBuf( compArea ); crossComponentPrediction( tu, compID, lumaResi, resiBuf, resiBuf, false ); } if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() && tu.blocks[compID].width*tu.blocks[compID].height > 4 ) { PelBuf resiBuf = csFull->getResiBuf(compArea); resiBuf.scaleSignal(tu.getChromaAdj(), 1, tu.cu->cs->slice->clpRng(compID)); } if( nNumTransformCands > 1 ) { if( transformMode == 0 ) { m_pcTrQuant->transformNxN( tu, compID, cQP, &trModes, CU::isIntra( *tu.cu ) ? m_pcEncCfg->getIntraMTSMaxCand() : m_pcEncCfg->getInterMTSMaxCand() ); tu.mtsIdx = trModes[0].first; } m_pcTrQuant->transformNxN( tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true ); } else { m_pcTrQuant->transformNxN( tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx() ); } if (isFirstMode || (currAbsSum == 0)) { const CPelBuf zeroBuf(m_pTempPel, compArea); const CPelBuf orgResi = csFull->getOrgResiBuf( compArea ); if (bUseCrossCPrediction) { PelBuf resi = csFull->getResiBuf( compArea ); crossComponentPrediction( tu, compID, lumaResi, zeroBuf, resi, true ); nonCoeffDist = m_pcRdCost->getDistPart( orgResi, resi, channelBitDepth, compID, DF_SSE ); } else { nonCoeffDist = m_pcRdCost->getDistPart( zeroBuf, orgResi, channelBitDepth, compID, DF_SSE ); // initialized with zero residual distortion } if( !tu.noResidual ) { const bool prevCbf = ( compID == COMPONENT_Cr ? tu.cbf[COMPONENT_Cb] : false ); m_CABACEstimator->cbf_comp( *csFull, false, compArea, currDepth, prevCbf ); if( isCrossCPredictionAvailable ) { m_CABACEstimator->cross_comp_pred( tu, compID ); } } nonCoeffFracBits = m_CABACEstimator->getEstFracBits(); #if WCG_EXT if( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() ) { nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist, false); } else #endif nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist); } if ((puiZeroDist != NULL) && isFirstMode) { *puiZeroDist += nonCoeffDist; // initialized with zero residual distortion } if (currAbsSum > 0) //if non-zero coefficients are present, a residual needs to be derived for further prediction { if (isFirstMode) { m_CABACEstimator->getCtx() = ctxStart; m_CABACEstimator->resetBits(); } const bool prevCbf = ( compID == COMPONENT_Cr ? tu.cbf[COMPONENT_Cb] : false ); m_CABACEstimator->cbf_comp( *csFull, true, compArea, currDepth, prevCbf ); if( isCrossCPredictionAvailable ) { m_CABACEstimator->cross_comp_pred( tu, compID ); } m_CABACEstimator->residual_coding( tu, compID ); currCompFracBits = m_CABACEstimator->getEstFracBits(); PelBuf resiBuf = csFull->getResiBuf(compArea); CPelBuf orgResiBuf = csFull->getOrgResiBuf(compArea); m_pcTrQuant->invTransformNxN(tu, compID, resiBuf, cQP); if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() && tu.blocks[compID].width*tu.blocks[compID].height > 4 ) { resiBuf.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID)); } if (bUseCrossCPrediction) { crossComponentPrediction( tu, compID, lumaResi, resiBuf, resiBuf, true ); } currCompDist = m_pcRdCost->getDistPart(orgResiBuf, resiBuf, channelBitDepth, compID, DF_SSE); #if WCG_EXT currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDist, false); #else currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDist); #endif if (csFull->isLossless) { nonCoeffCost = MAX_DOUBLE; } } else if( transformMode > 0 && !bUseCrossCPrediction ) { currCompCost = MAX_DOUBLE; } else { currCompFracBits = nonCoeffFracBits; currCompDist = nonCoeffDist; currCompCost = nonCoeffCost; tu.cbf[compID] = 0; } // evaluate if( ( currCompCost < minCost[compID] ) || ( transformMode == 1 && currCompCost == minCost[compID] ) ) { // copy component if (isFirstMode && ((nonCoeffCost < currCompCost) || (currAbsSum == 0))) // check for forced null { tu.getCoeffs( compID ).fill( 0 ); csFull->getResiBuf( compArea ).fill( 0 ); tu.cbf[compID] = 0; currAbsSum = 0; currCompFracBits = nonCoeffFracBits; currCompDist = nonCoeffDist; currCompCost = nonCoeffCost; } uiAbsSum[compID] = currAbsSum; uiSingleDistComp[compID] = currCompDist; minCost[compID] = currCompCost; if (uiAbsSum[compID] == 0) { if (bUseCrossCPrediction) { const CPelBuf zeroBuf( m_pTempPel, compArea ); PelBuf resiBuf = csFull->getResiBuf( compArea ); crossComponentPrediction( tu, compID, lumaResi, zeroBuf, resiBuf, true ); } } #if JVET_N0054_JOINT_CHROMA if( !isLastMode || (compID != COMPONENT_Y && !tu.noResidual) ) #else if( !isLastMode ) #endif { bestTU.copyComponentFrom( tu, compID ); saveCS.getResiBuf( compArea ).copyFrom( csFull->getResiBuf( compArea ) ); } isLastBest = isLastMode; } if( tu.noResidual ) { CHECK( currCompFracBits > 0 || currAbsSum, "currCompFracBits > 0 when tu noResidual" ); } } } if( !isLastBest ) { // copy component tu.copyComponentFrom( bestTU, compID ); csFull->getResiBuf( compArea ).copyFrom( saveCS.getResiBuf( compArea ) ); } } // component loop #if JVET_N0054_JOINT_CHROMA if ( chroma && tu.blocks[COMPONENT_Cb].valid() ) { const CompArea& cbArea = tu.blocks[COMPONENT_Cb]; const CompArea& crArea = tu.blocks[COMPONENT_Cr]; bool checkJointCbCr = !tu.noResidual && (TU::getCbf(tu, COMPONENT_Cb) || TU::getCbf(tu, COMPONENT_Cr)); if ( checkJointCbCr ) { const int channelBitDepth = sps.getBitDepth(toChannelType(COMPONENT_Cb)); double minCostCbCr = minCost[COMPONENT_Cb] + minCost[COMPONENT_Cr]; bool isLastBest = false; TCoeff currAbsSum = 0; uint64_t currCompFracBits = 0; Distortion currCompDistCb = 0; Distortion currCompDistCr = 0; double currCompCost = 0; tu.jointCbCr = 1; tu.getCoeffs(COMPONENT_Cr).fill(0); const QpParam cQP(tu, COMPONENT_Cb); // note: uses tu.transformSkip[compID] #if RDOQ_CHROMA_LAMBDA m_pcTrQuant->selectLambda(COMPONENT_Cb); #endif // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks m_pcTrQuant->setLambda( 0.60 * m_pcTrQuant->getLambda() ); m_CABACEstimator->getCtx() = ctxStart; m_CABACEstimator->resetBits(); // Copy the original residual into the residual buffer csFull->getResiBuf(cbArea).copyFrom(cs.getOrgResiBuf(cbArea)); csFull->getResiBuf(crArea).copyFrom(cs.getOrgResiBuf(crArea)); // Create joint residual and store it for Cb component: jointResi = (cbResi - crResi)/2 PelBuf cbResi = csFull->getResiBuf( cbArea ); PelBuf crResi = csFull->getResiBuf( crArea ); cbResi.subtractAndHalve( crResi ); bool reshape = slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && slice.getReshapeInfo().getSliceReshapeChromaAdj() && tu.blocks[COMPONENT_Cb].width*tu.blocks[COMPONENT_Cb].height > 4 ; if ( reshape ) { double cRescale = round((double)(1 << CSCALE_FP_PREC) / (double)(tu.getChromaAdj())); m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cRescale*cRescale)); cbResi.scaleSignal(tu.getChromaAdj(), 1, tu.cu->cs->slice->clpRng(COMPONENT_Cb)); } m_pcTrQuant->transformNxN(tu, COMPONENT_Cb, cQP, currAbsSum, m_CABACEstimator->getCtx()); if (currAbsSum > 0) { // Set cfb also for Cr TU::setCbfAtDepth (tu, COMPONENT_Cr, tu.depth, true); m_CABACEstimator->cbf_comp( *csFull, true, cbArea, currDepth, false ); m_CABACEstimator->cbf_comp( *csFull, true, crArea, currDepth, true ); m_CABACEstimator->residual_coding( tu, COMPONENT_Cb ); m_CABACEstimator->joint_cb_cr ( tu ); // Could also call residual coding for Cr where this flag is sent currCompFracBits = m_CABACEstimator->getEstFracBits(); m_pcTrQuant->invTransformNxN(tu, COMPONENT_Cb, cbResi, cQP); if ( reshape ) cbResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cb));; crResi.copyAndNegate( cbResi ); currCompDistCb = m_pcRdCost->getDistPart(csFull->getOrgResiBuf(cbArea), cbResi, channelBitDepth, COMPONENT_Cb, DF_SSE); currCompDistCr = m_pcRdCost->getDistPart(csFull->getOrgResiBuf(crArea), crResi, channelBitDepth, COMPONENT_Cr, DF_SSE); #if WCG_EXT currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb, false); #else currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb); #endif } else currCompCost = MAX_DOUBLE; // evaluate if( currCompCost < minCostCbCr ) { uiAbsSum[COMPONENT_Cb] = currAbsSum; uiAbsSum[COMPONENT_Cr] = currAbsSum; uiSingleDistComp[COMPONENT_Cb] = currCompDistCb; uiSingleDistComp[COMPONENT_Cr] = currCompDistCr; minCostCbCr = currCompCost; isLastBest = true; } if( !isLastBest ) { // copy component tu.copyComponentFrom( bestTU, COMPONENT_Cb ); tu.copyComponentFrom( bestTU, COMPONENT_Cr ); csFull->getResiBuf( cbArea ).copyFrom( saveCS.getResiBuf( cbArea ) ); csFull->getResiBuf( crArea ).copyFrom( saveCS.getResiBuf( crArea ) ); } } } #endif // JVET_N0054_JOINT_CHROMA m_CABACEstimator->getCtx() = ctxStart; m_CABACEstimator->resetBits(); if( !tu.noResidual ) { static const ComponentID cbf_getComp[3] = { COMPONENT_Cb, COMPONENT_Cr, COMPONENT_Y }; for( unsigned c = 0; c < numTBlocks; c++) { const ComponentID compID = cbf_getComp[c]; if (compID == COMPONENT_Y && !luma) continue; if (compID != COMPONENT_Y && !chroma) continue; if( tu.blocks[compID].valid() ) { const bool prevCbf = ( compID == COMPONENT_Cr ? TU::getCbfAtDepth( tu, COMPONENT_Cb, currDepth ) : false ); m_CABACEstimator->cbf_comp( *csFull, TU::getCbfAtDepth( tu, compID, currDepth ), tu.blocks[compID], currDepth, prevCbf ); } } } for (uint32_t ch = 0; ch < numValidComp; ch++) { const ComponentID compID = ComponentID(ch); if (compID == COMPONENT_Y && !luma) continue; if (compID != COMPONENT_Y && !chroma) continue; if (tu.blocks[compID].valid()) { if( cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isChroma(compID) && uiAbsSum[COMPONENT_Y] ) { m_CABACEstimator->cross_comp_pred( tu, compID ); } if( TU::getCbf( tu, compID ) ) { m_CABACEstimator->residual_coding( tu, compID ); } uiSingleDist += uiSingleDistComp[compID]; } } if( tu.noResidual ) { CHECK( m_CABACEstimator->getEstFracBits() > 0, "no residual TU's bits shall be 0" ); } csFull->fracBits += m_CABACEstimator->getEstFracBits(); csFull->dist += uiSingleDist; #if WCG_EXT if( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() ) { csFull->cost = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist, false); } else #endif csFull->cost = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist); } // check full // code sub-blocks if( bCheckSplit ) { if( bCheckFull ) { m_CABACEstimator->getCtx() = ctxStart; } if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) { partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); } else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) { partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs ); } else THROW( "Implicit TU split not available!" ); do { xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist , luma, chroma ); csSplit->cost = m_pcRdCost->calcRdCost( csSplit->fracBits, csSplit->dist ); } while( partitioner.nextPart( *csSplit ) ); partitioner.exitCurrSplit(); unsigned anyCbfSet = 0; unsigned compCbf[3] = { 0, 0, 0 }; if( !bCheckFull ) { for( auto &currTU : csSplit->traverseTUs( currArea, partitioner.chType ) ) { for( unsigned ch = 0; ch < numTBlocks; ch++ ) { compCbf[ ch ] |= ( TU::getCbfAtDepth( currTU, ComponentID(ch), currDepth + 1 ) ? 1 : 0 ); } } { for( auto &currTU : csSplit->traverseTUs( currArea, partitioner.chType ) ) { TU::setCbfAtDepth ( currTU, COMPONENT_Y, currDepth, compCbf[ COMPONENT_Y ] ); if( currArea.chromaFormat != CHROMA_400 ) { TU::setCbfAtDepth ( currTU, COMPONENT_Cb, currDepth, compCbf[ COMPONENT_Cb ] ); TU::setCbfAtDepth ( currTU, COMPONENT_Cr, currDepth, compCbf[ COMPONENT_Cr ] ); } } anyCbfSet = compCbf[ COMPONENT_Y ]; if( currArea.chromaFormat != CHROMA_400 ) { anyCbfSet |= compCbf[ COMPONENT_Cb ]; anyCbfSet |= compCbf[ COMPONENT_Cr ]; } } m_CABACEstimator->getCtx() = ctxStart; m_CABACEstimator->resetBits(); // when compID isn't a channel, code Cbfs: xEncodeInterResidualQT( *csSplit, partitioner, MAX_NUM_TBLOCKS ); for (uint32_t ch = 0; ch < numValidComp; ch++) { const ComponentID compID = ComponentID(ch); if (compID == COMPONENT_Y && !luma) continue; if (compID != COMPONENT_Y && !chroma) continue; xEncodeInterResidualQT( *csSplit, partitioner, ComponentID( ch ) ); } csSplit->fracBits = m_CABACEstimator->getEstFracBits(); csSplit->cost = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist); if( bCheckFull && anyCbfSet && csSplit->cost < csFull->cost ) { cs.useSubStructure( *csSplit, partitioner.chType, currArea, false, false, false, true ); cs.cost = csSplit->cost; } } if( csSplit && csFull ) { csSplit->releaseIntermediateData(); csFull ->releaseIntermediateData(); } } } void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &partitioner, const bool &skipResidual , const bool luma, const bool chroma ) { #if JVET_N0671_RDCOST_FIX m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc()); #endif CodingUnit &cu = *cs.getCU( partitioner.chType ); const ChromaFormat format = cs.area.chromaFormat;; const int numValidComponents = getNumberValidComponents(format); const SPS &sps = *cs.sps; const PPS &pps = *cs.pps; if( skipResidual ) // No residual coding : SKIP mode { cu.skip = true; cu.rootCbf = false; CHECK( cu.sbtInfo != 0, "sbtInfo shall be 0 if CU has no residual" ); cs.getResiBuf().fill(0); { cs.getRecoBuf().copyFrom(cs.getPredBuf() ); if (m_pcEncCfg->getReshaper() && (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) && !cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) { cs.getRecoBuf().Y().rspSignal(m_pcReshape->getFwdLUT()); } } // add an empty TU cs.addTU(CS::isDualITree(cs) ? cu : cs.area, partitioner.chType); Distortion distortion = 0; for (int comp = 0; comp < numValidComponents; comp++) { const ComponentID compID = ComponentID(comp); if (compID == COMPONENT_Y && !luma) continue; if (compID != COMPONENT_Y && !chroma) continue; CPelBuf reco = cs.getRecoBuf (compID); CPelBuf org = cs.getOrgBuf (compID); #if WCG_EXT if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || ( m_pcEncCfg->getReshaper() && (cs.slice->getReshapeInfo().getUseSliceReshaper()&& m_pcReshape->getCTUFlag()))) { const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] ); if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())) { const CompArea &areaY = cu.Y(); CompArea tmpArea1(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size()); PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1); tmpRecLuma.copyFrom(reco); tmpRecLuma.rspSignal(m_pcReshape->getInvLUT()); distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); } else distortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma ); } else #endif distortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE ); } m_CABACEstimator->resetBits(); if( pps.getTransquantBypassEnabledFlag() ) { m_CABACEstimator->cu_transquant_bypass_flag( cu ); } PredictionUnit &pu = *cs.getPU( partitioner.chType ); m_CABACEstimator->cu_skip_flag ( cu ); if (CU::isIBC(cu)) { m_CABACEstimator->merge_idx(pu); } else { m_CABACEstimator->subblock_merge_flag( cu ); m_CABACEstimator->triangle_mode ( cu ); if (cu.mmvdSkip) { m_CABACEstimator->mmvd_merge_idx(pu); } else m_CABACEstimator->merge_idx ( pu ); } cs.dist = distortion; cs.fracBits = m_CABACEstimator->getEstFracBits(); cs.cost = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist); return; } // Residual coding. if (luma) { cs.getResiBuf().bufs[0].copyFrom(cs.getOrgBuf().bufs[0]); if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) { const CompArea &areaY = cu.Y(); CompArea tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size()); PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); tmpPred.copyFrom(cs.getPredBuf(COMPONENT_Y)); if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) tmpPred.rspSignal(m_pcReshape->getFwdLUT()); cs.getResiBuf(COMPONENT_Y).rspSignal(m_pcReshape->getFwdLUT()); cs.getResiBuf(COMPONENT_Y).subtract(tmpPred); } else cs.getResiBuf().bufs[0].subtract(cs.getPredBuf().bufs[0]); } if (chroma) { cs.getResiBuf().bufs[1].copyFrom(cs.getOrgBuf().bufs[1]); cs.getResiBuf().bufs[2].copyFrom(cs.getOrgBuf().bufs[2]); cs.getResiBuf().bufs[1].subtract(cs.getPredBuf().bufs[1]); cs.getResiBuf().bufs[2].subtract(cs.getPredBuf().bufs[2]); } Distortion zeroDistortion = 0; const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() ); if (luma) { cs.getOrgResiBuf().bufs[0].copyFrom(cs.getResiBuf().bufs[0]); } if (chroma) { cs.getOrgResiBuf().bufs[1].copyFrom(cs.getResiBuf().bufs[1]); cs.getOrgResiBuf().bufs[2].copyFrom(cs.getResiBuf().bufs[2]); } xEstimateInterResidualQT(cs, partitioner, &zeroDistortion, luma, chroma); TransformUnit &firstTU = *cs.getTU( partitioner.chType ); cu.rootCbf = false; m_CABACEstimator->resetBits(); m_CABACEstimator->rqt_root_cbf( cu ); const uint64_t zeroFracBits = m_CABACEstimator->getEstFracBits(); double zeroCost; { #if WCG_EXT if( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() ) { zeroCost = cs.isLossless ? ( cs.cost + 1 ) : m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion, false ); } else #endif zeroCost = cs.isLossless ? ( cs.cost + 1 ) : m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion ); } const int numValidTBlocks = ::getNumberValidTBlocks( *cs.pcv ); for (uint32_t i = 0; i < numValidTBlocks; i++) { cu.rootCbf |= TU::getCbfAtDepth(firstTU, ComponentID(i), 0); } // ------------------------------------------------------- // If a block full of 0's is efficient, then just use 0's. // The costs at this point do not include header bits. if (zeroCost < cs.cost || !cu.rootCbf) { cu.sbtInfo = 0; cu.rootCbf = false; cs.clearTUs(); // add a new "empty" TU spanning the whole CU TransformUnit& tu = cs.addTU(cu, partitioner.chType); for (int comp = 0; comp < numValidComponents; comp++) { tu.rdpcm[comp] = RDPCM_OFF; } cu.firstTU = cu.lastTU = &tu; } // all decisions now made. Fully encode the CU, including the headers: m_CABACEstimator->getCtx() = ctxStart; uint64_t finalFracBits = xGetSymbolFracBitsInter( cs, partitioner ); // we've now encoded the CU, and so have a valid bit cost if (!cu.rootCbf) { if (luma) { cs.getResiBuf().bufs[0].fill(0); // Clear the residual image, if we didn't code it. } if (chroma) { cs.getResiBuf().bufs[1].fill(0); // Clear the residual image, if we didn't code it. cs.getResiBuf().bufs[2].fill(0); // Clear the residual image, if we didn't code it. } } if (luma) { if (cu.rootCbf && cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) { const CompArea &areaY = cu.Y(); CompArea tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size()); PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); tmpPred.copyFrom(cs.getPredBuf(COMPONENT_Y)); if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) tmpPred.rspSignal(m_pcReshape->getFwdLUT()); cs.getRecoBuf(COMPONENT_Y).reconstruct(tmpPred, cs.getResiBuf(COMPONENT_Y), cs.slice->clpRng(COMPONENT_Y)); } else { cs.getRecoBuf().bufs[0].reconstruct(cs.getPredBuf().bufs[0], cs.getResiBuf().bufs[0], cs.slice->clpRngs().comp[0]); if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && !cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) { cs.getRecoBuf().bufs[0].rspSignal(m_pcReshape->getFwdLUT()); } } } if (chroma) { cs.getRecoBuf().bufs[1].reconstruct(cs.getPredBuf().bufs[1], cs.getResiBuf().bufs[1], cs.slice->clpRngs().comp[1]); cs.getRecoBuf().bufs[2].reconstruct(cs.getPredBuf().bufs[2], cs.getResiBuf().bufs[2], cs.slice->clpRngs().comp[2]); } // update with clipped distortion and cost (previously unclipped reconstruction values were used) Distortion finalDistortion = 0; for (int comp = 0; comp < numValidComponents; comp++) { const ComponentID compID = ComponentID(comp); if (compID == COMPONENT_Y && !luma) continue; if (compID != COMPONENT_Y && !chroma) continue; CPelBuf reco = cs.getRecoBuf (compID); CPelBuf org = cs.getOrgBuf (compID); #if WCG_EXT if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || ( m_pcEncCfg->getReshaper() && (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() ) ) ) { const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] ); if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()) ) { const CompArea &areaY = cu.Y(); CompArea tmpArea1(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size()); PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1); tmpRecLuma.copyFrom(reco); tmpRecLuma.rspSignal(m_pcReshape->getInvLUT()); finalDistortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); } else finalDistortion += m_pcRdCost->getDistPart(org, reco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); } else #endif { finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE ); } } cs.dist = finalDistortion; cs.fracBits = finalFracBits; cs.cost = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist); CHECK(cs.tus.size() == 0, "No TUs present"); } uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner &partitioner) { uint64_t fracBits = 0; CodingUnit &cu = *cs.getCU( partitioner.chType ); m_CABACEstimator->resetBits(); if( cu.firstPU->mergeFlag && !cu.rootCbf ) { cu.skip = true; if( cs.pps->getTransquantBypassEnabledFlag() ) { m_CABACEstimator->cu_transquant_bypass_flag( cu ); } m_CABACEstimator->cu_skip_flag ( cu ); m_CABACEstimator->subblock_merge_flag( cu ); m_CABACEstimator->triangle_mode ( cu ); if (cu.mmvdSkip) { m_CABACEstimator->mmvd_merge_idx(*cu.firstPU); } else m_CABACEstimator->merge_idx ( *cu.firstPU ); fracBits += m_CABACEstimator->getEstFracBits(); } else { CHECK( cu.skip, "Skip flag has to be off at this point!" ); if( cs.pps->getTransquantBypassEnabledFlag() ) { m_CABACEstimator->cu_transquant_bypass_flag( cu ); } if (cu.Y().valid()) m_CABACEstimator->cu_skip_flag( cu ); m_CABACEstimator->pred_mode ( cu ); m_CABACEstimator->cu_pred_data( cu ); CUCtx cuCtx; cuCtx.isDQPCoded = true; cuCtx.isChromaQpAdjCoded = true; m_CABACEstimator->cu_residual ( cu, partitioner, cuCtx ); fracBits += m_CABACEstimator->getEstFracBits(); } return fracBits; } double InterSearch::xGetMEDistortionWeight(uint8_t gbiIdx, RefPicList eRefPicList) { if( gbiIdx != GBI_DEFAULT ) { return fabs((double)getGbiWeight(gbiIdx, eRefPicList) / (double)g_GbiWeightBase); } else { return 0.5; } } bool InterSearch::xReadBufferedUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost) { if (m_uniMotions.isReadMode((uint32_t)eRefPicList, (uint32_t)iRefIdx)) { m_uniMotions.copyTo(rcMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx); m_pcRdCost->setPredictor(pcMvPred); m_pcRdCost->setCostScale(0); unsigned imvShift = pu.cu->imv << 1; uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(rcMv.getHor(), rcMv.getVer(), imvShift); ruiBits += uiMvBits; ruiCost += m_pcRdCost->getCost(ruiBits); return true; } return false; } bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost , int& mvpIdx, const AffineAMVPInfo& aamvpi ) { if (m_uniMotions.isReadModeAffine((uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType)) { m_uniMotions.copyAffineMvTo(acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType , mvpIdx ); m_pcRdCost->setCostScale(0); acMvPred[0] = aamvpi.mvCandLT[mvpIdx]; acMvPred[1] = aamvpi.mvCandRT[mvpIdx]; acMvPred[2] = aamvpi.mvCandLB[mvpIdx]; uint32_t uiMvBits = 0; for (int iVerIdx = 0; iVerIdx<(pu.cu->affineType ? 3 : 2); iVerIdx++) { if (iVerIdx) { m_pcRdCost->setPredictor(acMvPred[iVerIdx] + acMv[0] - acMvPred[0]); } else { m_pcRdCost->setPredictor(acMvPred[iVerIdx]); } uiMvBits += m_pcRdCost->getBitsOfVectorWithPredictor(acMv[iVerIdx].getHor(), acMv[iVerIdx].getVer(), 0); } ruiBits += uiMvBits; ruiCost += m_pcRdCost->getCost(ruiBits); return true; } return false; } void InterSearch::initWeightIdxBits() { for (int n = 0; n < GBI_NUM; ++n) { m_estWeightIdxBits[n] = deriveWeightIdxBits(n); } } void InterSearch::xClipMv( Mv& rcMv, const Position& pos, const struct Size& size, const SPS& sps ) { int mvShift = MV_FRACTIONAL_BITS_INTERNAL; int offset = 8; int horMax = ( sps.getPicWidthInLumaSamples() + offset - ( int ) pos.x - 1 ) << mvShift; int horMin = ( -( int ) sps.getMaxCUWidth() - offset - ( int ) pos.x + 1 ) << mvShift; int verMax = ( sps.getPicHeightInLumaSamples() + offset - ( int ) pos.y - 1 ) << mvShift; int verMin = ( -( int ) sps.getMaxCUHeight() - offset - ( int ) pos.y + 1 ) << mvShift; if( sps.getWrapAroundEnabledFlag() ) { int horMax = ( sps.getPicWidthInLumaSamples() + sps.getMaxCUWidth() - size.width + offset - ( int ) pos.x - 1 ) << mvShift; int horMin = ( -( int ) sps.getMaxCUWidth() - offset - ( int ) pos.x + 1 ) << mvShift; rcMv.setHor( std::min( horMax, std::max( horMin, rcMv.getHor() ) ) ); rcMv.setVer( std::min( verMax, std::max( verMin, rcMv.getVer() ) ) ); return; } rcMv.setHor( std::min( horMax, std::max( horMin, rcMv.getHor() ) ) ); rcMv.setVer( std::min( verMax, std::max( verMin, rcMv.getVer() ) ) ); } uint32_t InterSearch::xDetermineBestMvp( PredictionUnit& pu, Mv acMvTemp[3], int& mvpIdx, const AffineAMVPInfo& aamvpi ) { bool mvpUpdated = false; uint32_t minBits = std::numeric_limits<uint32_t>::max(); for ( int i = 0; i < aamvpi.numCand; i++ ) { Mv mvPred[3] = { aamvpi.mvCandLT[i], aamvpi.mvCandRT[i], aamvpi.mvCandLB[i] }; uint32_t candBits = m_auiMVPIdxCost[i][aamvpi.numCand]; candBits += xCalcAffineMVBits( pu, acMvTemp, mvPred, pu.cu->imv != 1 ); if ( candBits < minBits ) { minBits = candBits; mvpIdx = i; mvpUpdated = true; } } CHECK( !mvpUpdated, "xDetermineBestMvp() error" ); return minBits; } void InterSearch::symmvdCheckBestMvp( PredictionUnit& pu, PelUnitBuf& origBuf, Mv curMv, RefPicList curRefList, AMVPInfo amvpInfo[2][33], int32_t gbiIdx, Mv cMvPredSym[2], int32_t mvpIdxSym[2], Distortion& bestCost, bool skip ) { RefPicList tarRefList = (RefPicList)(1 - curRefList); int32_t refIdxCur = pu.cu->slice->getSymRefIdx(curRefList); int32_t refIdxTar = pu.cu->slice->getSymRefIdx(tarRefList); MvField cCurMvField, cTarMvField; cCurMvField.setMvField(curMv, refIdxCur); AMVPInfo& amvpCur = amvpInfo[curRefList][refIdxCur]; AMVPInfo& amvpTar = amvpInfo[tarRefList][refIdxTar]; m_pcRdCost->setCostScale(0); // get prediction of eCurRefPicList PelUnitBuf predBufA = m_tmpPredStorage[curRefList].getBuf(UnitAreaRelative(*pu.cu, pu)); const Picture* picRefA = pu.cu->slice->getRefPic(curRefList, cCurMvField.refIdx); Mv mvA = cCurMvField.mv; mvA.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); clipMv(mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps); xPredInterBlk(COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false); int32_t skipMvpIdx[2]; skipMvpIdx[0] = skip ? mvpIdxSym[0] : -1; skipMvpIdx[1] = skip ? mvpIdxSym[1] : -1; for (int i = 0; i < amvpCur.numCand; i++) { for (int j = 0; j < amvpTar.numCand; j++) { if (skipMvpIdx[curRefList] == i && skipMvpIdx[tarRefList] == j) continue; cTarMvField.setMvField(curMv.getSymmvdMv(amvpCur.mvCand[i], amvpTar.mvCand[j]), refIdxTar); // get prediction of eTarRefPicList PelUnitBuf predBufB = m_tmpPredStorage[tarRefList].getBuf(UnitAreaRelative(*pu.cu, pu)); const Picture* picRefB = pu.cu->slice->getRefPic(tarRefList, cTarMvField.refIdx); Mv mvB = cTarMvField.mv; mvB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); clipMv(mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps); xPredInterBlk(COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false); PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf(UnitAreaRelative(*pu.cu, pu)); if (gbiIdx != GBI_DEFAULT) bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), gbiIdx); else bufTmp.Y().addAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y)); // calc distortion #if JVET_N0329_IBC_SEARCH_IMP DFunc distFunc = (!pu.cu->transQuantBypass && !pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD; Distortion cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); #else Distortion cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD); #endif m_pcRdCost->setPredictor(amvpCur.mvCand[i]); uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(curMv.hor, curMv.ver, (pu.cu->imv << 1)); bits += m_auiMVPIdxCost[i][AMVP_MAX_NUM_CANDS]; bits += m_auiMVPIdxCost[j][AMVP_MAX_NUM_CANDS]; cost += m_pcRdCost->getCost(bits); if (cost < bestCost) { bestCost = cost; cMvPredSym[curRefList] = amvpCur.mvCand[i]; cMvPredSym[tarRefList] = amvpTar.mvCand[j]; mvpIdxSym[curRefList] = i; mvpIdxSym[tarRefList] = j; } } } }