/* The copyright in this software is being made available under the BSD * License, included below. This software may be subject to other third party * and contributor rights, including patent rights, and no such rights are * granted under this license. * * Copyright (c) 2010-2022, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** \file Prediction.cpp \brief prediction class */ #include "InterPrediction.h" #include "Buffer.h" #include "UnitTools.h" #include "MCTS.h" #include <memory.h> #include <algorithm> #if INTER_LIC || (TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM) || JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC #include "Reshape.h" #endif #if ENABLE_SIMD_TMP #include "CommonDefX86.h" #endif //! \ingroup CommonLib //! \{ // ==================================================================================================================== // Constructor / destructor / initialize // ==================================================================================================================== #if JVET_Z0136_OOB bool InterPrediction::isMvOOB(const Mv& rcMv, const struct Position pos, const struct Size size, const SPS* sps, const PPS* pps, bool *mcMask, bool *mcMaskChroma, bool lumaOnly) { return g_pelBufOP.isMvOOB(rcMv, pos, size, sps, pps, mcMask, mcMaskChroma, lumaOnly, m_currChromaFormat); } bool InterPrediction::isMvOOBSubBlk(const Mv& rcMv, const struct Position pos, const struct Size size, const SPS* sps, const PPS* pps, bool *mcMask, int mcStride, bool *mcMaskChroma, int mcCStride, bool lumaOnly) { return g_pelBufOP.isMvOOBSubBlk(rcMv, pos, size, sps, pps, mcMask, mcStride, mcMaskChroma, mcCStride, lumaOnly, m_currChromaFormat); } #endif InterPrediction::InterPrediction() : #if INTER_LIC m_storeBeforeLIC (false), #endif #if INTER_LIC || (TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM) // note: already refactor m_pcReshape ( nullptr ), #endif #if INTER_LIC || JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED m_pcLICRefLeftTemplate ( nullptr ), m_pcLICRefAboveTemplate( nullptr ), m_pcLICRecLeftTemplate ( nullptr ), m_pcLICRecAboveTemplate( nullptr ), #endif #if TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM m_pcCurTplLeft ( nullptr ), m_pcCurTplAbove( nullptr ), m_pcRefTplLeft ( nullptr ), m_pcRefTplAbove( nullptr ), #endif m_currChromaFormat( NUM_CHROMA_FORMAT ) , m_maxCompIDToPred ( MAX_NUM_COMPONENT ) , m_pcRdCost ( nullptr ) , m_storedMv ( nullptr ) , m_skipPROF (false) , m_encOnly (false) , m_isBi (false) , m_gradX0(nullptr) , m_gradY0(nullptr) , m_gradX1(nullptr) , m_gradY1(nullptr) #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF , m_absGx(nullptr) , m_absGy(nullptr) , m_dIx(nullptr) , m_dIy(nullptr) , m_dI(nullptr) , m_signGxGy(nullptr) , m_tmpx_pixel_32bit(nullptr) , m_tmpy_pixel_32bit(nullptr) , m_sumAbsGX_pixel_32bit(nullptr) , m_sumAbsGY_pixel_32bit(nullptr) , m_sumDIX_pixel_32bit(nullptr) , m_sumDIY_pixel_32bit(nullptr) , m_sumSignGY_GX_pixel_32bit(nullptr) #endif , m_subPuMC(false) { for( uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++ ) { for( uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++ ) { m_acYuvPred[refList][ch] = nullptr; } } for( uint32_t c = 0; c < MAX_NUM_COMPONENT; c++ ) { for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; i++ ) { for( uint32_t j = 0; j < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; j++ ) { m_filteredBlock[i][j][c] = nullptr; } m_filteredBlockTmp[i][c] = nullptr; } } #if JVET_AB0112_AFFINE_DMVR for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++) { m_affineDmvrBlockTmp[i] = nullptr; } #endif m_cYuvPredTempDMVRL1 = nullptr; m_cYuvPredTempDMVRL0 = nullptr; for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { m_cRefSamplesDMVRL0[ch] = nullptr; m_cRefSamplesDMVRL1[ch] = nullptr; } #if INTER_LIC m_LICMultApprox[0] = 0; for (int k = 1; k < 64; k++) { m_LICMultApprox[k] = ((1 << 15) + (k >> 1)) / k; } #endif #if MULTI_PASS_DMVR int mvSearchIdx_bilMrg = 0; #if JVET_X0049_BDMVR_SW_OPT uint16_t currtPrio = 0, currIdx = 0; ::memset(m_searchEnlargeOffsetNum, 0, sizeof(m_searchEnlargeOffsetNum)); #endif for (int y = -BDMVR_INTME_RANGE; y <= BDMVR_INTME_RANGE; y++) { for (int x = -BDMVR_INTME_RANGE; x <= BDMVR_INTME_RANGE; x++) { #if JVET_X0049_BDMVR_SW_OPT #else m_searchEnlargeOffsetBilMrg[mvSearchIdx_bilMrg] = Mv(x, y); #endif if ( (abs(x) + abs(y)) == 0 ) { #if JVET_X0049_BDMVR_SW_OPT currtPrio = 0; currIdx = m_searchEnlargeOffsetNum[currtPrio]; m_searchEnlargeOffsetToIdx[currtPrio][currIdx] = mvSearchIdx_bilMrg; #else m_searchPriorityBilMrg[mvSearchIdx_bilMrg] = 0; #endif m_costShiftBilMrg1[mvSearchIdx_bilMrg] = 63; m_costShiftBilMrg2[mvSearchIdx_bilMrg++] = 63; } else if ( (abs(x) + abs(y)) < 4 ) { #if JVET_X0049_BDMVR_SW_OPT currtPrio = 1; currIdx = m_searchEnlargeOffsetNum[currtPrio]; m_searchEnlargeOffsetToIdx[currtPrio][currIdx] = mvSearchIdx_bilMrg; #else m_searchPriorityBilMrg[mvSearchIdx_bilMrg] = 1; #endif m_costShiftBilMrg1[mvSearchIdx_bilMrg] = 63; m_costShiftBilMrg2[mvSearchIdx_bilMrg++] = 63; } else if ((abs(x) + abs(y)) < 7) { #if JVET_X0049_BDMVR_SW_OPT currtPrio = 2; currIdx = m_searchEnlargeOffsetNum[currtPrio]; m_searchEnlargeOffsetToIdx[currtPrio][currIdx] = mvSearchIdx_bilMrg; #else m_searchPriorityBilMrg[mvSearchIdx_bilMrg] = 2; #endif m_costShiftBilMrg1[mvSearchIdx_bilMrg] = 2; m_costShiftBilMrg2[mvSearchIdx_bilMrg++] = 63; } else if ((abs(x) + abs(y)) < 11) { #if JVET_X0049_BDMVR_SW_OPT currtPrio = 3; currIdx = m_searchEnlargeOffsetNum[currtPrio]; m_searchEnlargeOffsetToIdx[currtPrio][currIdx] = mvSearchIdx_bilMrg; #else m_searchPriorityBilMrg[mvSearchIdx_bilMrg] = 3; #endif m_costShiftBilMrg1[mvSearchIdx_bilMrg] = 1; m_costShiftBilMrg2[mvSearchIdx_bilMrg++] = 63; } else { #if JVET_X0049_BDMVR_SW_OPT currtPrio = 4; currIdx = m_searchEnlargeOffsetNum[currtPrio]; m_searchEnlargeOffsetToIdx[currtPrio][currIdx] = mvSearchIdx_bilMrg; #else m_searchPriorityBilMrg[mvSearchIdx_bilMrg] = 4; #endif m_costShiftBilMrg1[mvSearchIdx_bilMrg] = 1; m_costShiftBilMrg2[mvSearchIdx_bilMrg++] = 2; } #if JVET_X0049_BDMVR_SW_OPT m_searchEnlargeOffsetBilMrg[currtPrio][currIdx] = Mv(x, y); m_searchEnlargeOffsetNum[currtPrio]++; #endif } } CHECK(mvSearchIdx_bilMrg != (2 * BDMVR_INTME_RANGE + 1) * (2 * BDMVR_INTME_RANGE + 1), "this is wrong, mvSearchIdx_bilMrg != (2 * BDMVR_INTME_RANGE + 1) * (2 * BDMVR_INTME_RANGE + 1)"); #endif #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC || JVET_AA0061_IBC_MBVD for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { for (uint32_t tmplt = 0; tmplt < 2; tmplt++) { m_acYuvCurAMLTemplate[tmplt][ch] = nullptr; m_acYuvRefAboveTemplate[tmplt][ch] = nullptr; m_acYuvRefLeftTemplate[tmplt][ch] = nullptr; m_acYuvRefAMLTemplate[tmplt][ch] = nullptr; } } #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING for (uint32_t tmplt = 0; tmplt < 2 + 2; tmplt++) { m_acYuvRefAMLTemplatePart0[tmplt] = nullptr; m_acYuvRefAMLTemplatePart1[tmplt] = nullptr; } m_tplWeightTblInitialized = false; #endif #endif #if JVET_Z0061_TM_OBMC for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { for (uint32_t tmplt = 0; tmplt < 2; tmplt++) { m_acYuvRefAboveTemplateOBMC[tmplt][ch] = nullptr; m_acYuvRefLeftTemplateOBMC[tmplt][ch] = nullptr; m_acYuvBlendTemplateOBMC[tmplt][ch] = nullptr; } } #endif } InterPrediction::~InterPrediction() { destroy(); } void InterPrediction::destroy() { for( uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++ ) { for( uint32_t c = 0; c < MAX_NUM_COMPONENT; c++ ) { xFree( m_acYuvPred[i][c] ); m_acYuvPred[i][c] = nullptr; } } for( uint32_t c = 0; c < MAX_NUM_COMPONENT; c++ ) { for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; i++ ) { for( uint32_t j = 0; j < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; j++ ) { xFree( m_filteredBlock[i][j][c] ); m_filteredBlock[i][j][c] = nullptr; } xFree( m_filteredBlockTmp[i][c] ); m_filteredBlockTmp[i][c] = nullptr; } } #if JVET_AB0112_AFFINE_DMVR for (uint32_t i = 0; i < 2; i++) { xFree(m_affineDmvrBlockTmp[i]); m_affineDmvrBlockTmp[i] = nullptr; } #endif m_geoPartBuf[0].destroy(); m_geoPartBuf[1].destroy(); m_colorTransResiBuf[0].destroy(); m_colorTransResiBuf[1].destroy(); m_colorTransResiBuf[2].destroy(); if (m_storedMv != nullptr) { delete[]m_storedMv; m_storedMv = nullptr; } xFree(m_gradX0); m_gradX0 = nullptr; xFree(m_gradY0); m_gradY0 = nullptr; xFree(m_gradX1); m_gradX1 = nullptr; xFree(m_gradY1); m_gradY1 = nullptr; #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF xFree(m_absGx); m_absGx = nullptr; xFree(m_absGy); m_absGy = nullptr; xFree(m_dIx); m_dIx = nullptr; xFree(m_dIy); m_dIy = nullptr; xFree(m_dI); m_dI = nullptr; xFree(m_signGxGy); m_signGxGy = nullptr; xFree(m_tmpx_pixel_32bit); m_tmpx_pixel_32bit = nullptr; xFree(m_tmpy_pixel_32bit); m_tmpy_pixel_32bit = nullptr; xFree(m_sumAbsGX_pixel_32bit); m_sumAbsGX_pixel_32bit = nullptr; xFree(m_sumAbsGY_pixel_32bit); m_sumAbsGY_pixel_32bit = nullptr; xFree(m_sumDIX_pixel_32bit); m_sumDIX_pixel_32bit = nullptr; xFree(m_sumDIY_pixel_32bit); m_sumDIY_pixel_32bit = nullptr; xFree(m_sumSignGY_GX_pixel_32bit); m_sumSignGY_GX_pixel_32bit = nullptr; #endif #if ENABLE_OBMC m_tmpObmcBufL0.destroy(); m_tmpObmcBufT0.destroy(); m_tmpSubObmcBuf.destroy(); #endif xFree(m_cYuvPredTempDMVRL0); m_cYuvPredTempDMVRL0 = nullptr; xFree(m_cYuvPredTempDMVRL1); m_cYuvPredTempDMVRL1 = nullptr; for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { xFree(m_cRefSamplesDMVRL0[ch]); m_cRefSamplesDMVRL0[ch] = nullptr; xFree(m_cRefSamplesDMVRL1[ch]); m_cRefSamplesDMVRL1[ch] = nullptr; } #if JVET_Z0118_GDR m_ibcBuffer0.destroy(); m_ibcBuffer1.destroy(); #else m_ibcBuffer.destroy(); #endif #if TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM xFree(m_pcCurTplLeft ); m_pcCurTplLeft = nullptr; xFree(m_pcCurTplAbove); m_pcCurTplAbove = nullptr; xFree(m_pcRefTplLeft ); m_pcRefTplLeft = nullptr; xFree(m_pcRefTplAbove); m_pcRefTplAbove = nullptr; #endif #if INTER_LIC || JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED xFree(m_pcLICRefLeftTemplate); m_pcLICRefLeftTemplate = nullptr; xFree(m_pcLICRefAboveTemplate); m_pcLICRefAboveTemplate = nullptr; xFree(m_pcLICRecLeftTemplate); m_pcLICRecLeftTemplate = nullptr; xFree(m_pcLICRecAboveTemplate); m_pcLICRecAboveTemplate = nullptr; #endif #if MULTI_HYP_PRED m_additionalHypothesisStorage.destroy(); #endif #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC || JVET_AA0061_IBC_MBVD for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { for (uint32_t tmplt = 0; tmplt < 2; tmplt++) { xFree(m_acYuvCurAMLTemplate[tmplt][ch]); xFree(m_acYuvRefAboveTemplate[tmplt][ch]); xFree(m_acYuvRefLeftTemplate[tmplt][ch]); xFree(m_acYuvRefAMLTemplate[tmplt][ch]); m_acYuvCurAMLTemplate[tmplt][ch] = nullptr; m_acYuvRefAboveTemplate[tmplt][ch] = nullptr; m_acYuvRefLeftTemplate[tmplt][ch] = nullptr; m_acYuvRefAMLTemplate[tmplt][ch] = nullptr; } } #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING for (uint32_t tmplt = 0; tmplt < 2 + 2; tmplt++) { xFree(m_acYuvRefAMLTemplatePart0[tmplt]); xFree(m_acYuvRefAMLTemplatePart1[tmplt]); m_acYuvRefAMLTemplatePart0[tmplt] = nullptr; m_acYuvRefAMLTemplatePart1[tmplt] = nullptr; } #endif #endif #if JVET_Z0061_TM_OBMC for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { for (uint32_t tmplt = 0; tmplt < 2; tmplt++) { xFree(m_acYuvRefAboveTemplateOBMC[tmplt][ch]); xFree(m_acYuvRefLeftTemplateOBMC[tmplt][ch]); xFree(m_acYuvBlendTemplateOBMC[tmplt][ch]); m_acYuvRefAboveTemplateOBMC[tmplt][ch] = nullptr; m_acYuvRefLeftTemplateOBMC[tmplt][ch] = nullptr; m_acYuvBlendTemplateOBMC[tmplt][ch] = nullptr; } } #endif } #if INTER_LIC || (TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM) || JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC #if JVET_Z0153_IBC_EXT_REF void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape, const int picWidth ) #else void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape ) #endif #else void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize ) #endif { m_pcRdCost = pcRdCost; #if INTER_LIC || (TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM) || JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC m_pcReshape = reshape; #endif // if it has been initialised before, but the chroma format has changed, release the memory and start again. if( m_acYuvPred[REF_PIC_LIST_0][COMPONENT_Y] != nullptr && m_currChromaFormat != chromaFormatIDC ) { destroy(); } m_currChromaFormat = chromaFormatIDC; if( m_acYuvPred[REF_PIC_LIST_0][COMPONENT_Y] == nullptr ) // check if first is null (in which case, nothing initialised yet) { for( uint32_t c = 0; c < MAX_NUM_COMPONENT; c++ ) { #if IF_12TAP || MULTI_PASS_DMVR #if MULTI_PASS_DMVR int extendSize = std::max(2 * BIO_EXTEND_SIZE + 2, 2 * BDMVR_INTME_RANGE); #else int extendSize = std::max(2 * BIO_EXTEND_SIZE + 2, 2 * DMVR_NUM_ITERATION); #endif #if IF_12TAP int extWidth = MAX_CU_SIZE + extendSize + 32; #else int extWidth = MAX_CU_SIZE + extendSize + 16; #endif int extHeight = MAX_CU_SIZE + extendSize + 1; #else int extWidth = MAX_CU_SIZE + (2 * BIO_EXTEND_SIZE + 2) + 16; int extHeight = MAX_CU_SIZE + (2 * BIO_EXTEND_SIZE + 2) + 1; extWidth = extWidth > (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 16) ? extWidth : MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 16; extHeight = extHeight > (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 1) ? extHeight : MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 1; #endif for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; i++ ) { #if IF_12TAP m_filteredBlockTmp[i][c] = ( Pel* ) xMalloc( Pel, ( extWidth + 4 ) * ( extHeight + 15 + 4 ) ); #else m_filteredBlockTmp[i][c] = ( Pel* ) xMalloc( Pel, ( extWidth + 4 ) * ( extHeight + 7 + 4 ) ); #endif for( uint32_t j = 0; j < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; j++ ) { m_filteredBlock[i][j][c] = ( Pel* ) xMalloc( Pel, extWidth * extHeight ); } } // new structure for( uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++ ) { m_acYuvPred[i][c] = ( Pel* ) xMalloc( Pel, MAX_CU_SIZE * MAX_CU_SIZE ); } } #if JVET_AB0112_AFFINE_DMVR int memBlockWidth = AFFINE_MIN_BLOCK_SIZE + (AFFINE_DMVR_SEARCH_RANGE << 1); int memBlockHeight = AFFINE_MIN_BLOCK_SIZE + (AFFINE_DMVR_SEARCH_RANGE << 1); int memBlockWidthOffset = BDMVR_SIMD_IF_FACTOR - (memBlockWidth & (BDMVR_SIMD_IF_FACTOR - 1)); memBlockWidthOffset &= (BDMVR_SIMD_IF_FACTOR - 1); int memBlockWidthExt = memBlockWidth + memBlockWidthOffset; // This ensures that iWidthExt is a factor-of-n number, assuming BDMVR_SIMD_IF_FACTOR is equal to n int memBlockNum = (MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE)*(MAX_CU_SIZE / AFFINE_MIN_BLOCK_SIZE); for (uint32_t i = 0; i < 2; i++) { m_affineDmvrBlockTmp[i] = (Pel*)xMalloc(Pel, memBlockWidthExt*memBlockHeight*memBlockNum); } #endif m_geoPartBuf[0].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); m_geoPartBuf[1].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); m_colorTransResiBuf[0].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); m_colorTransResiBuf[1].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); m_colorTransResiBuf[2].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); #if MULTI_HYP_PRED m_additionalHypothesisStorage.create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); #endif m_iRefListIdx = -1; m_gradX0 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_gradY0 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_gradX1 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_gradY1 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF m_absGx = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_absGy = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_dIx = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_dIy = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_dI = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_signGxGy = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE); m_tmpx_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); m_tmpy_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); m_sumAbsGX_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); m_sumAbsGY_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); m_sumDIX_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); m_sumDIY_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); m_sumSignGY_GX_pixel_32bit = (int*)xMalloc(int, BDOF_SUBPU_SIZE); #endif #if ENABLE_OBMC m_tmpObmcBufL0.create(UnitArea(chromaFormatIDC, Area(0, 0, 4, MAX_CU_SIZE))); m_tmpObmcBufT0.create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, 4))); m_tmpSubObmcBuf.create(UnitArea(chromaFormatIDC, Area(0, 0, 20, 4))); m_tmpSubObmcBuf.bufs[0].memset(0); if(chromaFormatIDC != CHROMA_400) { m_tmpSubObmcBuf.bufs[1].memset(0); m_tmpSubObmcBuf.bufs[2].memset(0); } #endif } if (m_cYuvPredTempDMVRL0 == nullptr && m_cYuvPredTempDMVRL1 == nullptr) { m_cYuvPredTempDMVRL0 = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION))); m_cYuvPredTempDMVRL1 = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION))); for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { #if IF_12TAP m_cRefSamplesDMVRL0[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA(0)) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA(0))); m_cRefSamplesDMVRL1[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA(0)) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA(0))); #else m_cRefSamplesDMVRL0[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA)); m_cRefSamplesDMVRL1[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA)); #endif } } #if !JVET_J0090_MEMORY_BANDWITH_MEASURE m_if.initInterpolationFilter( true ); #endif #if TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM if (m_pcCurTplLeft == nullptr) { m_pcCurTplLeft = (Pel*)xMalloc(Pel, TM_TPL_SIZE * MAX_CU_SIZE); m_pcCurTplAbove = (Pel*)xMalloc(Pel, TM_TPL_SIZE * MAX_CU_SIZE); m_pcRefTplLeft = (Pel*)xMalloc(Pel, TM_TPL_SIZE * MAX_CU_SIZE); m_pcRefTplAbove = (Pel*)xMalloc(Pel, TM_TPL_SIZE * MAX_CU_SIZE); } #endif #if INTER_LIC || JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED if (m_pcLICRefLeftTemplate == nullptr) { m_pcLICRefLeftTemplate = (Pel*)xMalloc(Pel, MAX_CU_SIZE); m_pcLICRefAboveTemplate = (Pel*)xMalloc(Pel, MAX_CU_SIZE); m_pcLICRecLeftTemplate = (Pel*)xMalloc(Pel, MAX_CU_SIZE); m_pcLICRecAboveTemplate = (Pel*)xMalloc(Pel, MAX_CU_SIZE); } #endif #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC || JVET_AA0061_IBC_MBVD for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { for (uint32_t tmplt = 0; tmplt < 2; tmplt++) { if( m_acYuvCurAMLTemplate[tmplt][ch] == nullptr ) { m_acYuvCurAMLTemplate[tmplt][ch] = ( Pel* ) xMalloc( Pel, MAX_CU_SIZE * MAX_CU_SIZE ); m_acYuvRefAboveTemplate[tmplt][ch] = ( Pel* ) xMalloc( Pel, MAX_CU_SIZE * MAX_CU_SIZE ); m_acYuvRefLeftTemplate[tmplt][ch] = ( Pel* ) xMalloc( Pel, MAX_CU_SIZE * MAX_CU_SIZE ); m_acYuvRefAMLTemplate[tmplt][ch] = ( Pel* ) xMalloc( Pel, MAX_CU_SIZE * MAX_CU_SIZE ); } } } #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING for (uint32_t tmplt = 0; tmplt < 2 + 2; tmplt++) { if( m_acYuvRefAMLTemplatePart0[tmplt] == nullptr ) { m_acYuvRefAMLTemplatePart0[tmplt] = ( Pel* ) xMalloc( Pel, GEO_MAX_CU_SIZE * GEO_MODE_SEL_TM_SIZE ); m_acYuvRefAMLTemplatePart1[tmplt] = ( Pel* ) xMalloc( Pel, GEO_MAX_CU_SIZE * GEO_MODE_SEL_TM_SIZE ); } } #endif #endif #if JVET_Z0061_TM_OBMC for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { for (uint32_t tmplt = 0; tmplt < 2; tmplt++) { if( m_acYuvRefAboveTemplateOBMC[tmplt][ch] == nullptr ) { m_acYuvRefAboveTemplateOBMC[tmplt][ch] = ( Pel * ) xMalloc( Pel, MAX_CU_SIZE * MAX_CU_SIZE ); m_acYuvRefLeftTemplateOBMC[tmplt][ch] = ( Pel * ) xMalloc( Pel, MAX_CU_SIZE * MAX_CU_SIZE ); m_acYuvBlendTemplateOBMC[tmplt][ch] = ( Pel * ) xMalloc( Pel, MAX_CU_SIZE * MAX_CU_SIZE ); } } } #endif if (m_storedMv == nullptr) { const int MVBUFFER_SIZE = MAX_CU_SIZE / MIN_PU_SIZE; m_storedMv = new Mv[MVBUFFER_SIZE*MVBUFFER_SIZE]; } #if JVET_Z0118_GDR #if JVET_Z0153_IBC_EXT_REF m_ibcBufferWidth = (picWidth + ctuSize - 1) / ctuSize * ctuSize; m_ibcBufferHeight = 3 * ctuSize; #if JVET_AA0106_IBCBUF_CTU256 if( 256 == ctuSize ) { m_ibcBufferHeight = 2 * ctuSize; } #endif if (m_ibcBuffer0.bufs.empty()) { m_ibcBuffer0.create(UnitArea(chromaFormatIDC, Area(0, 0, m_ibcBufferWidth, m_ibcBufferHeight))); } if (m_ibcBuffer1.bufs.empty()) { m_ibcBuffer1.create(UnitArea(chromaFormatIDC, Area(0, 0, m_ibcBufferWidth, m_ibcBufferHeight))); } #else m_ibcBufferWidth = g_IBCBufferSize / ctuSize; if (m_ibcBuffer0.bufs.empty()) { m_ibcBuffer0.create(UnitArea(chromaFormatIDC, Area(0, 0, m_ibcBufferWidth, ctuSize))); } if (m_ibcBuffer1.bufs.empty()) { m_ibcBuffer1.create(UnitArea(chromaFormatIDC, Area(0, 0, m_ibcBufferWidth, ctuSize))); } #endif #else if (m_ibcBuffer.bufs.empty()) { #if JVET_Z0153_IBC_EXT_REF m_ibcBufferWidth = (picWidth + ctuSize - 1) / ctuSize * ctuSize; m_ibcBufferHeight = 3 * ctuSize; #if JVET_AA0106_IBCBUF_CTU256 if( 256 == ctuSize ) { m_ibcBufferHeight = 2 * ctuSize; } #endif m_ibcBuffer.create(UnitArea(chromaFormatIDC, Area(0, 0, m_ibcBufferWidth, m_ibcBufferHeight))); #else m_ibcBufferWidth = g_IBCBufferSize / ctuSize; m_ibcBuffer.create(UnitArea(chromaFormatIDC, Area(0, 0, m_ibcBufferWidth, ctuSize))); #endif } #endif } // ==================================================================================================================== // Public member functions // ==================================================================================================================== bool InterPrediction::xCheckIdenticalMotion( const PredictionUnit &pu ) { const Slice &slice = *pu.cs->slice; if( slice.isInterB() && !pu.cs->pps->getWPBiPred() ) { if( pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 ) { int RefPOCL0 = slice.getRefPic( REF_PIC_LIST_0, pu.refIdx[0] )->getPOC(); int RefPOCL1 = slice.getRefPic( REF_PIC_LIST_1, pu.refIdx[1] )->getPOC(); if( RefPOCL0 == RefPOCL1 ) { if( !pu.cu->affine ) { if( pu.mv[0] == pu.mv[1] ) { return true; } } else { if ( (pu.cu->affineType == AFFINEMODEL_4PARAM && (pu.mvAffi[0][0] == pu.mvAffi[1][0]) && (pu.mvAffi[0][1] == pu.mvAffi[1][1])) || (pu.cu->affineType == AFFINEMODEL_6PARAM && (pu.mvAffi[0][0] == pu.mvAffi[1][0]) && (pu.mvAffi[0][1] == pu.mvAffi[1][1]) && (pu.mvAffi[0][2] == pu.mvAffi[1][2])) ) { return true; } } } } } return false; } void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/, const bool luma /*= true*/, const bool chroma /*= true*/) { #if MULTI_HYP_PRED CHECK(!pu.addHypData.empty(), "Multi Hyp: !pu.addHypData.empty()"); #endif // compute the location of the current PU Position puPos = pu.lumaPos(); Size puSize = pu.lumaSize(); int numPartLine, numPartCol, puHeight, puWidth; { numPartLine = std::max(puSize.width >> ATMVP_SUB_BLOCK_SIZE, 1u); numPartCol = std::max(puSize.height >> ATMVP_SUB_BLOCK_SIZE, 1u); puHeight = numPartCol == 1 ? puSize.height : 1 << ATMVP_SUB_BLOCK_SIZE; puWidth = numPartLine == 1 ? puSize.width : 1 << ATMVP_SUB_BLOCK_SIZE; } PredictionUnit subPu; subPu.cs = pu.cs; subPu.cu = pu.cu; subPu.mergeType = MRG_TYPE_DEFAULT_N; bool isAffine = pu.cu->affine; subPu.cu->affine = false; // join sub-pus containing the same motion bool verMC = puSize.height > puSize.width; int fstStart = (!verMC ? puPos.y : puPos.x); int secStart = (!verMC ? puPos.x : puPos.y); int fstEnd = (!verMC ? puPos.y + puSize.height : puPos.x + puSize.width); int secEnd = (!verMC ? puPos.x + puSize.width : puPos.y + puSize.height); int fstStep = (!verMC ? puHeight : puWidth); int secStep = (!verMC ? puWidth : puHeight); const bool isResamplingPossible = pu.cs->sps->getRprEnabledFlag(); const bool scaled = isResamplingPossible && ( pu.cu->slice->getRefPic( REF_PIC_LIST_0, 0 )->isRefScaled( pu.cs->pps ) || ( pu.cs->slice->getSliceType() == B_SLICE ? pu.cu->slice->getRefPic( REF_PIC_LIST_1, 0 )->isRefScaled( pu.cs->pps ) : false ) ); m_subPuMC = true; for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep) { for (int secDim = secStart; secDim < secEnd; secDim += secStep) { int x = !verMC ? secDim : fstDim; int y = !verMC ? fstDim : secDim; const MotionInfo &curMi = pu.getMotionInfo(Position{ x, y }); int length = secStep; int later = secDim + secStep; while (later < secEnd) { const MotionInfo &laterMi = !verMC ? pu.getMotionInfo(Position{ later, fstDim }) : pu.getMotionInfo(Position{ fstDim, later }); if (!scaled && laterMi == curMi #if INTER_LIC && laterMi.usesLIC == curMi.usesLIC #endif ) { length += secStep; } else { break; } later += secStep; } int dx = !verMC ? length : puWidth; int dy = !verMC ? puHeight : length; subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); subPu = curMi; PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu)); subPu.mmvdEncOptMode = 0; subPu.mvRefine = false; motionCompensation(subPu, subPredBuf, eRefPicList, luma, chroma); secDim = later - secStep; } } m_subPuMC = false; pu.cu->affine = isAffine; } #if !BDOF_RM_CONSTRAINTS void InterPrediction::xSubPuBio(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/, PelUnitBuf* yuvDstTmp /*= NULL*/) { // compute the location of the current PU Position puPos = pu.lumaPos(); Size puSize = pu.lumaSize(); #if JVET_J0090_MEMORY_BANDWITH_MEASURE JVET_J0090_SET_CACHE_ENABLE(true); int mvShift = (MV_FRACTIONAL_BITS_INTERNAL); for (int k = 0; k < NUM_REF_PIC_LIST_01; k++) { RefPicList refId = (RefPicList)k; const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]); for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) { Mv cMv = pu.mv[refId]; int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA; cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), -(((filtersize >> 1) - 1) << mvshiftTemp)); bool wrapRef = false; if ( pu.cu->slice->getRefPic(refId, pu.refIdx[refId])->isWrapAroundEnabled( pu.cs->pps ) ) { wrapRef = wrapClipMv(cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps); } else { clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); } int width = predBuf.bufs[compID].width + (filtersize - 1); int height = predBuf.bufs[compID].height + (filtersize - 1); CPelBuf refBuf; Position recOffset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp); refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, recOffset, pu.blocks[compID].size()), wrapRef); JVET_J0090_SET_REF_PICTURE(refPic, (ComponentID)compID); for (int row = 0; row < height; row++) { for (int col = 0; col < width; col++) { JVET_J0090_CACHE_ACCESS(((Pel *)refBuf.buf) + row * refBuf.stride + col, __FILE__, __LINE__); } } } } JVET_J0090_SET_CACHE_ENABLE(false); #endif PredictionUnit subPu; subPu.cs = pu.cs; subPu.cu = pu.cu; subPu.mergeType = pu.mergeType; subPu.mmvdMergeFlag = pu.mmvdMergeFlag; subPu.mmvdEncOptMode = pu.mmvdEncOptMode; subPu.mergeFlag = pu.mergeFlag; subPu.ciipFlag = pu.ciipFlag; subPu.mvRefine = pu.mvRefine; #if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG) subPu.tmMergeFlag = pu.tmMergeFlag; #endif subPu.refIdx[0] = pu.refIdx[0]; subPu.refIdx[1] = pu.refIdx[1]; int fstStart = puPos.y; int secStart = puPos.x; int fstEnd = puPos.y + puSize.height; int secEnd = puPos.x + puSize.width; int fstStep = std::min((int)MAX_BDOF_APPLICATION_REGION, (int)puSize.height); int secStep = std::min((int)MAX_BDOF_APPLICATION_REGION, (int)puSize.width); for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep) { for (int secDim = secStart; secDim < secEnd; secDim += secStep) { int x = secDim; int y = fstDim; int dx = secStep; int dy = fstStep; #if !JVET_W0097_GPM_MMVD_TM const MotionInfo &curMi = pu.getMotionInfo(Position{ x, y }); #endif subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); #if JVET_W0097_GPM_MMVD_TM subPu.interDir = pu.interDir; for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++) { subPu.refIdx[i] = pu.refIdx[i]; subPu.mv[i] = pu.mv[i]; } #else subPu = curMi; #endif PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu)); if (yuvDstTmp) { PelUnitBuf subPredBufTmp = yuvDstTmp->subBuf(UnitAreaRelative(pu, subPu)); motionCompensation(subPu, subPredBuf, eRefPicList, true, true, &subPredBufTmp); } else motionCompensation(subPu, subPredBuf, eRefPicList); } } JVET_J0090_SET_CACHE_ENABLE(true); } #endif #if MULTI_PASS_DMVR void InterPrediction::xPredInterUni(const PredictionUnit &pu, const RefPicList &eRefPicList, PelUnitBuf &pcYuvPred, const bool &bi, const bool &bioApplied, const bool luma, const bool chroma, const bool isBdofMvRefine) #else void InterPrediction::xPredInterUni(const PredictionUnit &pu, const RefPicList &eRefPicList, PelUnitBuf &pcYuvPred, const bool &bi, const bool &bioApplied, const bool luma, const bool chroma) #endif { const SPS &sps = *pu.cs->sps; int iRefIdx = pu.refIdx[eRefPicList]; Mv mv[3]; bool isIBC = false; #if !INTER_RM_SIZE_CONSTRAINTS #if ENABLE_OBMC if (pu.cu->isobmcMC == false) #endif CHECK( !CU::isIBC( *pu.cu ) && pu.lwidth() == 4 && pu.lheight() == 4, "invalid 4x4 inter blocks" ); #endif if (CU::isIBC(*pu.cu)) { isIBC = true; } if( pu.cu->affine ) { CHECK( iRefIdx < 0, "iRefIdx incorrect." ); mv[0] = pu.mvAffi[eRefPicList][0]; mv[1] = pu.mvAffi[eRefPicList][1]; mv[2] = pu.mvAffi[eRefPicList][2]; } else { mv[0] = pu.mv[eRefPicList]; } if( !pu.cu->affine ) { const bool isResamplingPossible = pu.cs->sps->getRprEnabledFlag(); if( !isIBC && ( !isResamplingPossible || !pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->isRefScaled( pu.cs->pps ) ) ) { if( !pu.cs->pps->getWrapAroundEnabledFlag() ) { #if JVET_AA0096_MC_BOUNDARY_PADDING if (bioApplied) { clipMv(mv[0], pu.lumaPos().offset(-(BIO_EXTEND_SIZE + 1), -(BIO_EXTEND_SIZE + 1)), pu.lumaSize(), sps, *pu.cs->pps); } else { clipMv(mv[0], pu.lumaPos(), pu.lumaSize(), sps, *pu.cs->pps); } #else clipMv( mv[0], pu.cu->lumaPos(), pu.cu->lumaSize(), sps, *pu.cs->pps ); #endif } } } for( uint32_t comp = COMPONENT_Y; comp < pcYuvPred.bufs.size() && comp <= m_maxCompIDToPred; comp++ ) { const ComponentID compID = ComponentID( comp ); if (compID == COMPONENT_Y && !luma) { continue; } if (compID != COMPONENT_Y && !chroma) { continue; } #if MULTI_PASS_DMVR if (compID != COMPONENT_Y && bioApplied && isBdofMvRefine) { continue; } #endif if ( pu.cu->affine ) { CHECK( bioApplied, "BIO is not allowed with affine" ); m_iRefListIdx = eRefPicList; bool genChromaMv = (!luma && chroma && compID == COMPONENT_Cb); #if JVET_Z0136_OOB xPredAffineBlk(compID, pu, pu.cu->slice->getRefPic(eRefPicList, iRefIdx)->unscaledPic, mv, pcYuvPred, bi, pu.cu->slice->clpRng(compID), eRefPicList, genChromaMv, pu.cu->slice->getScalingRatio(eRefPicList, iRefIdx)); #else xPredAffineBlk(compID, pu, pu.cu->slice->getRefPic(eRefPicList, iRefIdx)->unscaledPic, mv, pcYuvPred, bi, pu.cu->slice->clpRng(compID), genChromaMv, pu.cu->slice->getScalingRatio(eRefPicList, iRefIdx)); #endif } else { if (isIBC) { xPredInterBlk(compID, pu, pu.cu->slice->getPic(), mv[0], pcYuvPred, bi, pu.cu->slice->clpRng(compID), bioApplied, isIBC); } else { xPredInterBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->unscaledPic, mv[0], pcYuvPred, bi, pu.cu->slice->clpRng( compID ), bioApplied, isIBC, pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx ) ); } } } } #if MULTI_PASS_DMVR void InterPrediction::xPredInterBiSubPuBDOF(PredictionUnit &pu, PelUnitBuf &pcYuvPred, const bool luma, const bool chroma) { const Slice &slice = *pu.cs->slice; bool bioApplied = true; // common variable for all subPu const bool lumaOnly = (luma && !chroma), chromaOnly = (!luma && chroma); const int bioDy = std::min<int>(pu.lumaSize().height, BDOF_SUBPU_DIM); const int bioDx = std::min<int>(pu.lumaSize().width, BDOF_SUBPU_DIM); const int scaleX = getComponentScaleX(COMPONENT_Cb, pu.chromaFormat); const int scaleY = getComponentScaleY(COMPONENT_Cb, pu.chromaFormat); CPelUnitBuf srcPred0 = ( pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[0][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvPred.Cr())) ); CPelUnitBuf srcPred1 = ( pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[1][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvPred.Cr())) ); Position puPos = pu.lumaPos(); PredictionUnit subPu = pu; CHECK(subPu.refIdx[0] < 0, "this is not possible for BDOF"); CHECK(subPu.refIdx[1] < 0, "this is not possible for BDOF"); int bioSubPuIdx = 0; const int bioSubPuStrideIncr = BDOF_SUBPU_STRIDE - std::max(1, (int)(pu.lumaSize().width >> BDOF_SUBPU_DIM_LOG2)); for (int y = puPos.y, yStart = 0; y < (puPos.y + pu.lumaSize().height); y = y + bioDy, yStart = yStart + bioDy) { for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + bioDx, xStart = xStart + bioDx) { Mv bioMv = m_bdofSubPuMvOffset[bioSubPuIdx]; subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, bioDx, bioDy))); if (pu.bdmvrRefine) { const int bdmvrSubPuIdx = (yStart >> DMVR_SUBCU_HEIGHT_LOG2) * DMVR_SUBPU_STRIDE + (xStart >> DMVR_SUBCU_WIDTH_LOG2); subPu.mv[0] = m_bdmvrSubPuMvBuf[0][bdmvrSubPuIdx] + bioMv; subPu.mv[1] = m_bdmvrSubPuMvBuf[1][bdmvrSubPuIdx] - bioMv; } else { subPu.mv[0] = pu.mv[0] + bioMv; subPu.mv[1] = pu.mv[1] - bioMv; } // inter pred to generate buf data for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { if( subPu.refIdx[refList] < 0) { continue; } RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); CHECK(CU::isIBC(*subPu.cu) && eRefPicList != REF_PIC_LIST_0, "Invalid interdir for ibc mode"); CHECK(CU::isIBC(*subPu.cu) && subPu.refIdx[refList] != MAX_NUM_REF, "Invalid reference index for ibc mode"); CHECK((CU::isInter(*subPu.cu) && subPu.refIdx[refList] >= slice.getNumRefIdx(eRefPicList)), "Invalid reference index"); m_iRefListIdx = refList; PelUnitBuf pcMbBuf = ( subPu.chromaFormat == CHROMA_400 ? PelUnitBuf(subPu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y())) : PelUnitBuf(subPu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[refList][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[refList][2], pcYuvPred.Cr())) ); pcMbBuf = pcMbBuf.subBuf(UnitAreaRelative(pu, subPu)); if (bioMv.hor == 0 && bioMv.ver == 0) { // only chroma MC if (!lumaOnly) xPredInterUni ( subPu, eRefPicList, pcMbBuf, true, bioApplied, false, chroma, false ); } else { xPredInterUni ( subPu, eRefPicList, pcMbBuf, true, bioApplied, luma, chroma, false ); } } // prepare dst sub buf PelUnitBuf subYuvPredBuf = pcYuvPred.subBuf(UnitAreaRelative(pu, subPu)); int dstStride[MAX_NUM_COMPONENT] = { pcYuvPred.bufs[COMPONENT_Y].stride, isChromaEnabled(pu.chromaFormat) ? pcYuvPred.bufs[COMPONENT_Cb].stride : 0, isChromaEnabled(pu.chromaFormat) ? pcYuvPred.bufs[COMPONENT_Cr].stride : 0}; subYuvPredBuf.bufs[COMPONENT_Y].buf = pcYuvPred.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y]; if (isChromaEnabled(pu.chromaFormat)) { subYuvPredBuf.bufs[COMPONENT_Cb].buf = pcYuvPred.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cb]); subYuvPredBuf.bufs[COMPONENT_Cr].buf = pcYuvPred.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cr]); } // prepare src sub buf int srcStride[MAX_NUM_COMPONENT] = { srcPred0.bufs[COMPONENT_Y].stride, isChromaEnabled(pu.chromaFormat) ? srcPred0.bufs[COMPONENT_Cb].stride : 0, isChromaEnabled(pu.chromaFormat) ? srcPred0.bufs[COMPONENT_Cr].stride : 0}; CPelUnitBuf srcSubPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu)); CPelUnitBuf srcSubPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu)); srcSubPred0.bufs[COMPONENT_Y].buf = srcPred0.bufs[COMPONENT_Y].buf + xStart + yStart * srcStride[COMPONENT_Y]; if (isChromaEnabled(pu.chromaFormat)) { srcSubPred0.bufs[COMPONENT_Cb].buf = srcPred0.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cb]); srcSubPred0.bufs[COMPONENT_Cr].buf = srcPred0.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cr]); } srcSubPred1.bufs[COMPONENT_Y].buf = srcPred1.bufs[COMPONENT_Y].buf + xStart + yStart * srcStride[COMPONENT_Y]; if (isChromaEnabled(pu.chromaFormat)) { srcSubPred1.bufs[COMPONENT_Cb].buf = srcPred1.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cb]); srcSubPred1.bufs[COMPONENT_Cr].buf = srcPred1.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cr]); } // generate the dst buf { if (bioMv.hor == 0 && bioMv.ver == 0) { // only derive chroma prediction #if JVET_Z0136_OOB if (!lumaOnly) { bool isOOB[2] = { false,false }; if (pu.interDir == 3) { #if JVET_AA0146_WRAP_AROUND_FIX if (subPu.cu->slice->getRefPic(REF_PIC_LIST_0, subPu.refIdx[0])->isWrapAroundEnabled( subPu.cs->pps )) { isOOB[0] = false; } else #endif isOOB[0] = isMvOOB(subPu.mv[0], subPu.Y().topLeft(), subPu.lumaSize(), subPu.cu->slice->getSPS(), subPu.cu->slice->getPPS(), pu.cs->mcMask[0], pu.cs->mcMaskChroma[0]); #if JVET_AA0146_WRAP_AROUND_FIX if (subPu.cu->slice->getRefPic(REF_PIC_LIST_1, subPu.refIdx[1])->isWrapAroundEnabled( subPu.cs->pps )) { isOOB[1] = false; } else #endif isOOB[1] = isMvOOB(subPu.mv[1], subPu.Y().topLeft(), subPu.lumaSize(), subPu.cu->slice->getSPS(), subPu.cu->slice->getPPS(), pu.cs->mcMask[1], pu.cs->mcMaskChroma[1]); xWeightedAverage(false/*isBdofMvRefine*/, 0/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), false, lumaOnly, true, NULL, pu.cs->mcMask, subYuvPredBuf.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : subYuvPredBuf.Cb().width, isOOB); } else { xWeightedAverage(false/*isBdofMvRefine*/, 0/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), false, lumaOnly, true, NULL, pu.cs->mcMask, subYuvPredBuf.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : subYuvPredBuf.Cb().width, isOOB); } } #else if (!lumaOnly) xWeightedAverage( false/*isBdofMvRefine*/, 0/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), false/*bioApplied*/, lumaOnly, true/*chromaOnly*/, NULL/*yuvPredTmp*/ ); #endif } else { #if JVET_Z0136_OOB bool isOOB[2] = { false,false }; if (pu.interDir == 3) { #if JVET_AA0146_WRAP_AROUND_FIX if (subPu.cu->slice->getRefPic(REF_PIC_LIST_0, subPu.refIdx[0])->isWrapAroundEnabled( subPu.cs->pps )) { isOOB[0] = false; } else #endif isOOB[0] = isMvOOB(subPu.mv[0], subPu.Y().topLeft(), subPu.lumaSize(), subPu.cu->slice->getSPS(), subPu.cu->slice->getPPS(), pu.cs->mcMask[0], pu.cs->mcMaskChroma[0]); #if JVET_AA0146_WRAP_AROUND_FIX if (subPu.cu->slice->getRefPic(REF_PIC_LIST_1, subPu.refIdx[1])->isWrapAroundEnabled( subPu.cs->pps )) { isOOB[1] = false; } else #endif isOOB[1] = isMvOOB(subPu.mv[1], subPu.Y().topLeft(), subPu.lumaSize(), subPu.cu->slice->getSPS(), subPu.cu->slice->getPPS(), pu.cs->mcMask[1], pu.cs->mcMaskChroma[1]); xWeightedAverage(false/*isBdofMvRefine*/, 0/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, NULL, pu.cs->mcMask, subYuvPredBuf.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : subYuvPredBuf.Cb().width, isOOB); } else { xWeightedAverage(false/*isBdofMvRefine*/, 0/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, NULL, pu.cs->mcMask, subYuvPredBuf.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : subYuvPredBuf.Cb().width, isOOB); } #else xWeightedAverage( false/*isBdofMvRefine*/, 0/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, NULL/*yuvPredTmp*/ ); #endif } } bioSubPuIdx += 1; } bioSubPuIdx += bioSubPuStrideIncr; } } #endif #if MULTI_PASS_DMVR void InterPrediction::xPredInterBiBDMVR(PredictionUnit &pu, PelUnitBuf &pcYuvPred, const bool luma, const bool chroma, PelUnitBuf *yuvPredTmp /*= NULL*/) { const PPS &pps = *pu.cs->pps; const Slice &slice = *pu.cs->slice; #if !INTER_RM_SIZE_CONSTRAINTS #if ENABLE_OBMC if (pu.cu->isobmcMC == false) #endif CHECK( !pu.cu->affine && pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 && ( pu.lwidth() + pu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" ); #endif int refIdx0 = pu.refIdx[REF_PIC_LIST_0]; int refIdx1 = pu.refIdx[REF_PIC_LIST_1]; const WPScalingParam *wp0 = pu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0); const WPScalingParam *wp1 = pu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1); bool bioApplied = false; if (pu.cs->sps->getBDOFEnabledFlag() && (!pu.cs->picHeader->getDisBdofFlag())) { #if INTER_LIC if (pu.cu->affine || m_subPuMC || pu.cu->LICFlag #if ENABLE_OBMC || pu.cu->isobmcMC #endif ) #else if (pu.cu->affine || m_subPuMC) #endif { bioApplied = false; } else { const bool biocheck0 = !((WPScalingParam::isWeighted(wp0) || WPScalingParam::isWeighted(wp1)) && slice.getSliceType() == B_SLICE); const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE); if (biocheck0 && biocheck1 && PU::isBiPredFromDifferentDirEqDistPoc(pu) #if !BDOF_RM_CONSTRAINTS && (pu.Y().height >= 8) && (pu.Y().width >= 8) && ((pu.Y().height * pu.Y().width) >= 128) #endif ) { bioApplied = true; } } if (bioApplied && pu.ciipFlag) { bioApplied = false; } if (bioApplied && pu.cu->smvdMode) { bioApplied = false; } if (pu.cu->cs->sps->getUseBcw() && bioApplied && pu.cu->BcwIdx != BCW_DEFAULT) { bioApplied = false; } } if (pu.mmvdEncOptMode == 2 && pu.mmvdMergeFlag) { bioApplied = false; } #if ENABLE_OBMC if (pu.cu->isobmcMC) { bioApplied = false; } #endif const bool isResamplingPossible = pu.cs->sps->getRprEnabledFlag(); bool dmvrApplied = false; dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu); const bool refIsScaled = isResamplingPossible && ( ( refIdx0 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->isRefScaled( pu.cs->pps ) ) || ( refIdx1 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( pu.cs->pps ) ) ); dmvrApplied = dmvrApplied && !refIsScaled; bioApplied = bioApplied && !refIsScaled; // common variable for all subPu const bool lumaOnly = (luma && !chroma), chromaOnly = (!luma && chroma); const int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); const int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); const int scaleX = getComponentScaleX(COMPONENT_Cb, pu.chromaFormat); const int scaleY = getComponentScaleY(COMPONENT_Cb, pu.chromaFormat); CPelUnitBuf srcPred0 = ( pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[0][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvPred.Cr())) ); CPelUnitBuf srcPred1 = ( pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[1][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvPred.Cr())) ); Position puPos = pu.lumaPos(); PredictionUnit subPu = pu; int subPuIdx = 0; const int dmvrSubPuStrideIncr = DMVR_SUBPU_STRIDE - std::max(1, (int)(pu.lumaSize().width >> DMVR_SUBCU_WIDTH_LOG2)); int length = 0, later = 0; int width = pu.lwidth(), height = pu.lheight(); int subPuIdxColumn = 0; if (height > width) { for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) { subPuIdx = subPuIdxColumn; for (int y = puPos.y, yStart = 0; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) { subPu.mv[0] = m_bdmvrSubPuMvBuf[0][subPuIdx]; subPu.mv[1] = m_bdmvrSubPuMvBuf[1][subPuIdx]; length = dy; later = yStart + dy; subPuIdx += DMVR_SUBPU_STRIDE; while (later < width) { Mv nextMv[2] = { m_bdmvrSubPuMvBuf[0][subPuIdx] , m_bdmvrSubPuMvBuf[1][subPuIdx] }; if (nextMv[0] == subPu.mv[0] && nextMv[1] == subPu.mv[1]) { length += dy; } else { break; } later += dy; subPuIdx += DMVR_SUBPU_STRIDE; } subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, length))); // inter pred to generate buf data for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { if (subPu.refIdx[refList] < 0) { continue; } RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); CHECK(CU::isIBC(*subPu.cu) && eRefPicList != REF_PIC_LIST_0, "Invalid interdir for ibc mode"); CHECK(CU::isIBC(*subPu.cu) && subPu.refIdx[refList] != MAX_NUM_REF, "Invalid reference index for ibc mode"); CHECK((CU::isInter(*subPu.cu) && subPu.refIdx[refList] >= slice.getNumRefIdx(eRefPicList)), "Invalid reference index"); m_iRefListIdx = refList; PelUnitBuf pcMbBuf = (subPu.chromaFormat == CHROMA_400 ? PelUnitBuf(subPu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y())) : PelUnitBuf(subPu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[refList][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[refList][2], pcYuvPred.Cr()))); pcMbBuf = pcMbBuf.subBuf(UnitAreaRelative(pu, subPu)); if (subPu.refIdx[0] >= 0 && subPu.refIdx[1] >= 0) { bool isBdofMvRefineSkipChromaMC = (yuvPredTmp == NULL); xPredInterUni(subPu, eRefPicList, pcMbBuf, true , bioApplied, luma, chroma, isBdofMvRefineSkipChromaMC); } else { if (((pps.getUseWP() && slice.getSliceType() == P_SLICE) || (pps.getWPBiPred() && slice.getSliceType() == B_SLICE)) #if INTER_LIC && !subPu.cu->LICFlag #endif ) { xPredInterUni(subPu, eRefPicList, pcMbBuf, true , bioApplied , luma, chroma ); } else { xPredInterUni(subPu, eRefPicList, pcMbBuf, subPu.cu->geoFlag , bioApplied , luma, chroma ); } } } // prepare dst sub buf PelUnitBuf subYuvPredBuf = pcYuvPred.subBuf(UnitAreaRelative(pu, subPu)); int dstStride[MAX_NUM_COMPONENT] = { pcYuvPred.bufs[COMPONENT_Y].stride, isChromaEnabled(pu.chromaFormat) ? pcYuvPred.bufs[COMPONENT_Cb].stride : 0, isChromaEnabled(pu.chromaFormat) ? pcYuvPred.bufs[COMPONENT_Cr].stride : 0 }; subYuvPredBuf.bufs[COMPONENT_Y].buf = pcYuvPred.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y]; if (isChromaEnabled(pu.chromaFormat)) { subYuvPredBuf.bufs[COMPONENT_Cb].buf = pcYuvPred.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cb]); subYuvPredBuf.bufs[COMPONENT_Cr].buf = pcYuvPred.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cr]); } // prepare src sub buf int srcStride[MAX_NUM_COMPONENT] = { srcPred0.bufs[COMPONENT_Y].stride, isChromaEnabled(pu.chromaFormat) ? srcPred0.bufs[COMPONENT_Cb].stride : 0, isChromaEnabled(pu.chromaFormat) ? srcPred0.bufs[COMPONENT_Cr].stride : 0 }; CPelUnitBuf srcSubPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu)); CPelUnitBuf srcSubPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu)); srcSubPred0.bufs[COMPONENT_Y].buf = srcPred0.bufs[COMPONENT_Y].buf + xStart + yStart * srcStride[COMPONENT_Y]; if (isChromaEnabled(pu.chromaFormat)) { srcSubPred0.bufs[COMPONENT_Cb].buf = srcPred0.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cb]); srcSubPred0.bufs[COMPONENT_Cr].buf = srcPred0.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cr]); } srcSubPred1.bufs[COMPONENT_Y].buf = srcPred1.bufs[COMPONENT_Y].buf + xStart + yStart * srcStride[COMPONENT_Y]; if (isChromaEnabled(pu.chromaFormat)) { srcSubPred1.bufs[COMPONENT_Cb].buf = srcPred1.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cb]); srcSubPred1.bufs[COMPONENT_Cr].buf = srcPred1.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cr]); } // generate the dst buf { const int bioSubPuOffset = (xStart >> BDOF_SUBPU_DIM_LOG2) + (yStart >> BDOF_SUBPU_DIM_LOG2) * BDOF_SUBPU_STRIDE; #if JVET_Z0136_OOB bool isOOB[2] = { false,false }; if (pu.interDir == 3) { #if JVET_AA0146_WRAP_AROUND_FIX if (subPu.cu->slice->getRefPic(REF_PIC_LIST_0, subPu.refIdx[0])->isWrapAroundEnabled( subPu.cs->pps )) { isOOB[0] = false; } else #endif isOOB[0] = isMvOOB(subPu.mv[0], subPu.Y().topLeft(), subPu.lumaSize(), subPu.cu->slice->getSPS(), subPu.cu->slice->getPPS(), pu.cs->mcMask[0], pu.cs->mcMaskChroma[0]); #if JVET_AA0146_WRAP_AROUND_FIX if (subPu.cu->slice->getRefPic(REF_PIC_LIST_1, subPu.refIdx[1])->isWrapAroundEnabled( subPu.cs->pps )) { isOOB[1] = false; } else #endif isOOB[1] = isMvOOB(subPu.mv[1], subPu.Y().topLeft(), subPu.lumaSize(), subPu.cu->slice->getSPS(), subPu.cu->slice->getPPS(), pu.cs->mcMask[1], pu.cs->mcMaskChroma[1]); xWeightedAverage(true/*isBdofMvRefine*/, bioSubPuOffset/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp, pu.cs->mcMask, subYuvPredBuf.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : subYuvPredBuf.Cb().width, isOOB); } else { xWeightedAverage(true/*isBdofMvRefine*/, bioSubPuOffset/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp, pu.cs->mcMask, subYuvPredBuf.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : subYuvPredBuf.Cb().width, isOOB); } #else xWeightedAverage(true/*isBdofMvRefine*/, bioSubPuOffset/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp); #endif } yStart = later - dy; y = puPos.y + yStart; } subPuIdxColumn++; } } else for (int y = puPos.y, yStart = 0; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) { for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) { subPu.mv[0] = m_bdmvrSubPuMvBuf[0][subPuIdx]; subPu.mv[1] = m_bdmvrSubPuMvBuf[1][subPuIdx]; length = dx; later = xStart + dx; subPuIdx++; while (later < width) { Mv nextMv[2] = { m_bdmvrSubPuMvBuf[0][subPuIdx] , m_bdmvrSubPuMvBuf[1][subPuIdx] }; if (nextMv[0] == subPu.mv[0] && nextMv[1] == subPu.mv[1]) { length += dx; } else { break; } later += dx; subPuIdx++; } subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, length, dy))); // inter pred to generate buf data for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { if( subPu.refIdx[refList] < 0) { continue; } RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); CHECK(CU::isIBC(*subPu.cu) && eRefPicList != REF_PIC_LIST_0, "Invalid interdir for ibc mode"); CHECK(CU::isIBC(*subPu.cu) && subPu.refIdx[refList] != MAX_NUM_REF, "Invalid reference index for ibc mode"); CHECK((CU::isInter(*subPu.cu) && subPu.refIdx[refList] >= slice.getNumRefIdx(eRefPicList)), "Invalid reference index"); m_iRefListIdx = refList; PelUnitBuf pcMbBuf = ( subPu.chromaFormat == CHROMA_400 ? PelUnitBuf(subPu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y())) : PelUnitBuf(subPu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[refList][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[refList][2], pcYuvPred.Cr())) ); pcMbBuf = pcMbBuf.subBuf(UnitAreaRelative(pu, subPu)); if (subPu.refIdx[0] >= 0 && subPu.refIdx[1] >= 0) { bool isBdofMvRefineSkipChromaMC = (yuvPredTmp == NULL); xPredInterUni ( subPu, eRefPicList, pcMbBuf, true , bioApplied, luma, chroma, isBdofMvRefineSkipChromaMC); } else { if( ( (pps.getUseWP() && slice.getSliceType() == P_SLICE) || (pps.getWPBiPred() && slice.getSliceType() == B_SLICE) ) #if INTER_LIC && !subPu.cu->LICFlag #endif ) { xPredInterUni ( subPu, eRefPicList, pcMbBuf, true , bioApplied , luma, chroma ); } else { xPredInterUni(subPu, eRefPicList, pcMbBuf, subPu.cu->geoFlag , bioApplied , luma, chroma ); } } } // prepare dst sub buf PelUnitBuf subYuvPredBuf = pcYuvPred.subBuf(UnitAreaRelative(pu, subPu)); int dstStride[MAX_NUM_COMPONENT] = { pcYuvPred.bufs[COMPONENT_Y].stride, isChromaEnabled(pu.chromaFormat) ? pcYuvPred.bufs[COMPONENT_Cb].stride : 0, isChromaEnabled(pu.chromaFormat) ? pcYuvPred.bufs[COMPONENT_Cr].stride : 0}; subYuvPredBuf.bufs[COMPONENT_Y].buf = pcYuvPred.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y]; if (isChromaEnabled(pu.chromaFormat)) { subYuvPredBuf.bufs[COMPONENT_Cb].buf = pcYuvPred.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cb]); subYuvPredBuf.bufs[COMPONENT_Cr].buf = pcYuvPred.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cr]); } // prepare src sub buf int srcStride[MAX_NUM_COMPONENT] = { srcPred0.bufs[COMPONENT_Y].stride, isChromaEnabled(pu.chromaFormat) ? srcPred0.bufs[COMPONENT_Cb].stride : 0, isChromaEnabled(pu.chromaFormat) ? srcPred0.bufs[COMPONENT_Cr].stride : 0}; CPelUnitBuf srcSubPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu)); CPelUnitBuf srcSubPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu)); srcSubPred0.bufs[COMPONENT_Y].buf = srcPred0.bufs[COMPONENT_Y].buf + xStart + yStart * srcStride[COMPONENT_Y]; if (isChromaEnabled(pu.chromaFormat)) { srcSubPred0.bufs[COMPONENT_Cb].buf = srcPred0.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cb]); srcSubPred0.bufs[COMPONENT_Cr].buf = srcPred0.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cr]); } srcSubPred1.bufs[COMPONENT_Y].buf = srcPred1.bufs[COMPONENT_Y].buf + xStart + yStart * srcStride[COMPONENT_Y]; if (isChromaEnabled(pu.chromaFormat)) { srcSubPred1.bufs[COMPONENT_Cb].buf = srcPred1.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cb]); srcSubPred1.bufs[COMPONENT_Cr].buf = srcPred1.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * srcStride[COMPONENT_Cr]); } // generate the dst buf { const int bioSubPuOffset = (xStart >> BDOF_SUBPU_DIM_LOG2) + (yStart >> BDOF_SUBPU_DIM_LOG2) * BDOF_SUBPU_STRIDE; #if JVET_Z0136_OOB bool isOOB[2] = { false,false }; if (pu.interDir == 3) { #if JVET_AA0146_WRAP_AROUND_FIX if (subPu.cu->slice->getRefPic(REF_PIC_LIST_0, subPu.refIdx[0])->isWrapAroundEnabled( subPu.cs->pps )) { isOOB[0] = false; } else #endif isOOB[0] = isMvOOB(subPu.mv[0], subPu.Y().topLeft(), subPu.lumaSize(), subPu.cu->slice->getSPS(), subPu.cu->slice->getPPS(), pu.cs->mcMask[0], pu.cs->mcMaskChroma[0]); #if JVET_AA0146_WRAP_AROUND_FIX if (subPu.cu->slice->getRefPic(REF_PIC_LIST_1, subPu.refIdx[1])->isWrapAroundEnabled( subPu.cs->pps )) { isOOB[1] = false; } else #endif isOOB[1] = isMvOOB(subPu.mv[1], subPu.Y().topLeft(), subPu.lumaSize(), subPu.cu->slice->getSPS(), subPu.cu->slice->getPPS(), pu.cs->mcMask[1], pu.cs->mcMaskChroma[1]); xWeightedAverage(true/*isBdofMvRefine*/, bioSubPuOffset/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp, pu.cs->mcMask, subYuvPredBuf.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : subYuvPredBuf.Cb().width, isOOB); } else { xWeightedAverage(true/*isBdofMvRefine*/, bioSubPuOffset/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp, pu.cs->mcMask, subYuvPredBuf.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : subYuvPredBuf.Cb().width, isOOB); } #else xWeightedAverage( true/*isBdofMvRefine*/, bioSubPuOffset/*bdofBlockOffset*/, subPu, srcSubPred0, srcSubPred1, subYuvPredBuf, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp ); #endif } xStart = later - dx; x = puPos.x + xStart; } subPuIdx += dmvrSubPuStrideIncr; } } #endif void InterPrediction::xPredInterBi(PredictionUnit &pu, PelUnitBuf &pcYuvPred, const bool luma, const bool chroma, PelUnitBuf *yuvPredTmp /*= NULL*/) { const PPS &pps = *pu.cs->pps; const Slice &slice = *pu.cs->slice; #if MULTI_PASS_DMVR if ( pu.bdmvrRefine ) { if (yuvPredTmp && (pu.lwidth() > DMVR_SUBCU_WIDTH || pu.lheight() > DMVR_SUBCU_HEIGHT)) // pre-do MC for yuvPredTmp to avoid MC for yuvPredTmp within the subblock loop { for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { CHECK(pu.refIdx[refList] == NOT_VALID, "pu.refIdx[refList] shouldn't be NOT_VALID.") RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); m_iRefListIdx = refList; PelUnitBuf pcMbBuf = (pu.chromaFormat == CHROMA_400 ? PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y())) : PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[refList][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[refList][2], pcYuvPred.Cr()))); xPredInterUni(pu, eRefPicList, pcMbBuf, true, false, luma, chroma); } CPelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[0][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvPred.Cr()))); CPelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[1][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvPred.Cr()))); const bool lumaOnly = luma && !chroma; const bool chromaOnly = !luma && chroma; if (pps.getWPBiPred() && slice.getSliceType() == B_SLICE && pu.cu->BcwIdx == BCW_DEFAULT) { xWeightedPredictionBi(pu, srcPred0, srcPred1, *yuvPredTmp, m_maxCompIDToPred, lumaOnly, chromaOnly); } else if (pps.getUseWP() && slice.getSliceType() == P_SLICE) { xWeightedPredictionUni(pu, srcPred0, REF_PIC_LIST_0, *yuvPredTmp, -1, m_maxCompIDToPred, lumaOnly, chromaOnly); } else { #if JVET_Z0136_OOB bool isOOB[2] = { false,false }; if (pu.interDir == 3) { #if JVET_AA0146_WRAP_AROUND_FIX if (pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->isWrapAroundEnabled( pu.cs->pps )) { isOOB[0] = false; } else #endif isOOB[0] = isMvOOB(pu.mv[0], pu.Y().topLeft(), pu.lumaSize(), pu.cu->slice->getSPS(), pu.cu->slice->getPPS(), pu.cs->mcMask[0], pu.cs->mcMaskChroma[0]); #if JVET_AA0146_WRAP_AROUND_FIX if (pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[1])->isWrapAroundEnabled( pu.cs->pps )) { isOOB[1] = false; } else #endif isOOB[1] = isMvOOB(pu.mv[1], pu.Y().topLeft(), pu.lumaSize(), pu.cu->slice->getSPS(), pu.cu->slice->getPPS(), pu.cs->mcMask[1], pu.cs->mcMaskChroma[1]); xWeightedAverage(false/*isBdofMvRefine*/, 0/*bioSubPuOffset*/, pu, srcPred0, srcPred1, *yuvPredTmp, slice.getSPS()->getBitDepths(), slice.clpRngs(), false, lumaOnly, chromaOnly, NULL, pu.cs->mcMask, yuvPredTmp->Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : yuvPredTmp->Cb().width, isOOB); } else { xWeightedAverage(false/*isBdofMvRefine*/, 0/*bioSubPuOffset*/, pu, srcPred0, srcPred1, *yuvPredTmp, slice.getSPS()->getBitDepths(), slice.clpRngs(), false, lumaOnly, chromaOnly, NULL, pu.cs->mcMask, yuvPredTmp->Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : yuvPredTmp->Cb().width, isOOB); } #else xWeightedAverage(false/*isBdofMvRefine*/, 0/*bioSubPuOffset*/, pu, srcPred0, srcPred1, *yuvPredTmp, slice.getSPS()->getBitDepths(), slice.clpRngs(), false, lumaOnly, chromaOnly); #endif } yuvPredTmp = nullptr; } xPredInterBiBDMVR(pu, pcYuvPred, luma, chroma, yuvPredTmp); return; } #endif #if !INTER_RM_SIZE_CONSTRAINTS #if ENABLE_OBMC if (pu.cu->isobmcMC == false) #endif CHECK( !pu.cu->affine && pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 && ( pu.lwidth() + pu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" ); #endif int refIdx0 = pu.refIdx[REF_PIC_LIST_0]; int refIdx1 = pu.refIdx[REF_PIC_LIST_1]; const WPScalingParam *wp0 = pu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0); const WPScalingParam *wp1 = pu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1); bool bioApplied = false; if (pu.cs->sps->getBDOFEnabledFlag() && (!pu.cs->picHeader->getDisBdofFlag())) { #if INTER_LIC if (pu.cu->affine || m_subPuMC || pu.cu->LICFlag) #else if (pu.cu->affine || m_subPuMC) #endif { bioApplied = false; } else { const bool biocheck0 = !((WPScalingParam::isWeighted(wp0) || WPScalingParam::isWeighted(wp1)) && slice.getSliceType() == B_SLICE); const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE); if (biocheck0 && biocheck1 && PU::isBiPredFromDifferentDirEqDistPoc(pu) #if !BDOF_RM_CONSTRAINTS && (pu.Y().height >= 8) && (pu.Y().width >= 8) && ((pu.Y().height * pu.Y().width) >= 128) #endif ) { bioApplied = true; } } if (bioApplied && pu.ciipFlag) { bioApplied = false; } if (bioApplied && pu.cu->smvdMode) { bioApplied = false; } if (pu.cu->cs->sps->getUseBcw() && bioApplied && pu.cu->BcwIdx != BCW_DEFAULT) { bioApplied = false; } } if (pu.mmvdEncOptMode == 2 && pu.mmvdMergeFlag) { bioApplied = false; } #if ENABLE_OBMC if (pu.cu->isobmcMC) { bioApplied = false; } #endif bool dmvrApplied = false; dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu); const bool isResamplingPossible = pu.cs->sps->getRprEnabledFlag(); const bool refIsScaled = isResamplingPossible && ( ( refIdx0 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->isRefScaled( pu.cs->pps ) ) || ( refIdx1 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( pu.cs->pps ) ) ); dmvrApplied = dmvrApplied && !refIsScaled; bioApplied = bioApplied && !refIsScaled; #if MULTI_PASS_DMVR if (yuvPredTmp && bioApplied && (pu.lwidth() > BDOF_SUBPU_DIM || pu.lheight() > BDOF_SUBPU_DIM)) // pre-do MC for yuvPredTmp to avoid MC for yuvPredTmp within the subblock loop { for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { CHECK(pu.refIdx[refList] == NOT_VALID, "pu.refIdx[refList] shouldn't be NOT_VALID.") RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); m_iRefListIdx = refList; PelUnitBuf pcMbBuf = (pu.chromaFormat == CHROMA_400 ? PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y())) : PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[refList][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[refList][2], pcYuvPred.Cr()))); xPredInterUni(pu, eRefPicList, pcMbBuf, true, false, luma, chroma); } CPelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[0][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvPred.Cr()))); CPelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[1][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvPred.Cr()))); const bool lumaOnly = luma && !chroma; const bool chromaOnly = !luma && chroma; if (pps.getWPBiPred() && slice.getSliceType() == B_SLICE && pu.cu->BcwIdx == BCW_DEFAULT) { xWeightedPredictionBi(pu, srcPred0, srcPred1, *yuvPredTmp, m_maxCompIDToPred, lumaOnly, chromaOnly); } else if (pps.getUseWP() && slice.getSliceType() == P_SLICE) { xWeightedPredictionUni(pu, srcPred0, REF_PIC_LIST_0, *yuvPredTmp, -1, m_maxCompIDToPred, lumaOnly, chromaOnly); } else { #if JVET_Z0136_OOB bool isOOB[2] = { false,false }; if (pu.interDir == 3) { #if JVET_AA0146_WRAP_AROUND_FIX if (pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->isWrapAroundEnabled( pu.cs->pps )) { isOOB[0] = false; } else #endif isOOB[0] = isMvOOB(pu.mv[0], pu.Y().topLeft(), pu.lumaSize(), pu.cu->slice->getSPS(), pu.cu->slice->getPPS(), pu.cs->mcMask[0], pu.cs->mcMaskChroma[0]); #if JVET_AA0146_WRAP_AROUND_FIX if (pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[1])->isWrapAroundEnabled( pu.cs->pps )) { isOOB[1] = false; } else #endif isOOB[1] = isMvOOB(pu.mv[1], pu.Y().topLeft(), pu.lumaSize(), pu.cu->slice->getSPS(), pu.cu->slice->getPPS(), pu.cs->mcMask[1], pu.cs->mcMaskChroma[1]); xWeightedAverage(false/*isBdofMvRefine*/, 0/*bioSubPuOffset*/, pu, srcPred0, srcPred1, *yuvPredTmp, slice.getSPS()->getBitDepths(), slice.clpRngs(), false, lumaOnly, chromaOnly, NULL, pu.cs->mcMask, yuvPredTmp->Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : yuvPredTmp->Cb().width, isOOB); } else { xWeightedAverage(false/*isBdofMvRefine*/, 0/*bioSubPuOffset*/, pu, srcPred0, srcPred1, *yuvPredTmp, slice.getSPS()->getBitDepths(), slice.clpRngs(), false, lumaOnly, chromaOnly, NULL, pu.cs->mcMask, yuvPredTmp->Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : yuvPredTmp->Cb().width, isOOB); } #else xWeightedAverage(false, 0/*bioSubPuOffset*/, pu, srcPred0, srcPred1, *yuvPredTmp, slice.getSPS()->getBitDepths(), slice.clpRngs(), false, lumaOnly, chromaOnly); #endif } yuvPredTmp = nullptr; } #endif for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { if( pu.refIdx[refList] < 0) { continue; } RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); CHECK(CU::isIBC(*pu.cu) && eRefPicList != REF_PIC_LIST_0, "Invalid interdir for ibc mode"); CHECK(CU::isIBC(*pu.cu) && pu.refIdx[refList] != MAX_NUM_REF, "Invalid reference index for ibc mode"); CHECK((CU::isInter(*pu.cu) && pu.refIdx[refList] >= slice.getNumRefIdx(eRefPicList)), "Invalid reference index"); m_iRefListIdx = refList; PelUnitBuf pcMbBuf = ( pu.chromaFormat == CHROMA_400 ? PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y())) : PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[refList][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[refList][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[refList][2], pcYuvPred.Cr())) ); if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0) { if (dmvrApplied) { if (yuvPredTmp) { xPredInterUni(pu, eRefPicList, pcMbBuf, true, false, luma, chroma); } continue; } #if MULTI_PASS_DMVR bool isBdofMvRefineSkipChromaMC = (yuvPredTmp == NULL); xPredInterUni(pu, eRefPicList, pcMbBuf, true, bioApplied, luma, chroma, isBdofMvRefineSkipChromaMC); #else xPredInterUni(pu, eRefPicList, pcMbBuf, true, bioApplied, luma, chroma); #endif } else { if( ( (pps.getUseWP() && slice.getSliceType() == P_SLICE) || (pps.getWPBiPred() && slice.getSliceType() == B_SLICE) ) #if INTER_LIC && !pu.cu->LICFlag #endif ) { xPredInterUni(pu, eRefPicList, pcMbBuf, true, bioApplied, luma, chroma); } else { xPredInterUni(pu, eRefPicList, pcMbBuf, pu.cu->geoFlag, bioApplied, luma, chroma); } } } CPelUnitBuf srcPred0 = ( pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[0][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvPred.Cr())) ); CPelUnitBuf srcPred1 = ( pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[1][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvPred.Cr())) ); const bool lumaOnly = luma && !chroma; const bool chromaOnly = !luma && chroma; if( !pu.cu->geoFlag && (!dmvrApplied) && (!bioApplied) && pps.getWPBiPred() && slice.getSliceType() == B_SLICE && pu.cu->BcwIdx == BCW_DEFAULT) { xWeightedPredictionBi( pu, srcPred0, srcPred1, pcYuvPred, m_maxCompIDToPred, lumaOnly, chromaOnly ); if (yuvPredTmp) { yuvPredTmp->copyFrom(pcYuvPred); } } else if( !pu.cu->geoFlag && pps.getUseWP() && slice.getSliceType() == P_SLICE ) { xWeightedPredictionUni( pu, srcPred0, REF_PIC_LIST_0, pcYuvPred, -1, m_maxCompIDToPred, lumaOnly, chromaOnly ); if (yuvPredTmp) { yuvPredTmp->copyFrom(pcYuvPred); } } else { if (dmvrApplied) { if (yuvPredTmp) { yuvPredTmp->addAvg(srcPred0, srcPred1, slice.clpRngs(), false); } xProcessDMVR(pu, pcYuvPred, slice.clpRngs(), bioApplied); } else { #if MULTI_PASS_DMVR #if JVET_Z0136_OOB bool isOOB[2] = { false,false }; if (pu.interDir == 3) { if (pu.cu->affine && pu.mergeType != MRG_TYPE_SUBPU_ATMVP) // affine { bool *pMcMask0 = pu.cs->mcMask[0]; bool *pMcMask1 = pu.cs->mcMask[1]; for (int h = 0; h < (int)pu.lumaSize().height && (!isOOB[0] || !isOOB[1]); h++) { for (int w = 0; w < (int)pu.lumaSize().width && (!isOOB[0] || !isOOB[1]); w++) { isOOB[0] |= pMcMask0[w]; isOOB[1] |= pMcMask1[w]; } pMcMask0 += (int)pu.lumaSize().width; pMcMask1 += (int)pu.lumaSize().width; } } else { #if JVET_AA0146_WRAP_AROUND_FIX if (pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->isWrapAroundEnabled( pu.cs->pps )) { isOOB[0] = false; } else #endif isOOB[0] = isMvOOB(pu.mv[0], pu.Y().topLeft(), pu.lumaSize(), pu.cu->slice->getSPS(), pu.cu->slice->getPPS(), pu.cs->mcMask[0], pu.cs->mcMaskChroma[0]); #if JVET_AA0146_WRAP_AROUND_FIX if (pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[1])->isWrapAroundEnabled( pu.cs->pps )) { isOOB[1] = false; } else #endif isOOB[1] = isMvOOB(pu.mv[1], pu.Y().topLeft(), pu.lumaSize(), pu.cu->slice->getSPS(), pu.cu->slice->getPPS(), pu.cs->mcMask[1], pu.cs->mcMaskChroma[1]); } xWeightedAverage(true/*isBdofMvRefine*/, 0/*bioSubPuOffset*/, pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp, pu.cs->mcMask, pcYuvPred.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : pcYuvPred.Cb().width, isOOB); } else { xWeightedAverage(true/*isBdofMvRefine*/, 0/*bioSubPuOffset*/, pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp, pu.cs->mcMask, pcYuvPred.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : pcYuvPred.Cb().width, isOOB); } #else xWeightedAverage( true/*isBdofMvRefine*/, 0/*bioSubPuOffset*/, pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp ); #endif #else #if JVET_Z0136_OOB bool isOOB[2] = { false,false }; if (pu.interDir == 3) { if (pu.cu->affine && pu.mergeType != MRG_TYPE_SUBPU_ATMVP) // affine { bool *pMcMask0 = pu.cs->mcMask[0]; bool *pMcMask1 = pu.cs->mcMask[1]; for (int h = 0; h < (int)pu.lumaSize().height && (!isOOB[0] || !isOOB[1]); h++) { for (int w = 0; w < (int)pu.lumaSize().width && (!isOOB[0] || !isOOB[1]); w++) { isOOB[0] |= pMcMask0[w]; isOOB[1] |= pMcMask1[w]; } pMcMask0 += (int)pu.lumaSize().width; pMcMask1 += (int)pu.lumaSize().width; } } else { #if JVET_AA0146_WRAP_AROUND_FIX if (pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->isWrapAroundEnabled( pu.cs->pps )) { isOOB[0] = false; } else #endif isOOB[0] = isMvOOB(pu.mv[0], pu.Y().topLeft(), pu.lumaSize(), pu.cu->slice->getSPS(), pu.cu->slice->getPPS(), pu.cs->mcMask[0], pu.cs->mcMaskChroma[0]); #if JVET_AA0146_WRAP_AROUND_FIX if (pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[1])->isWrapAroundEnabled( pu.cs->pps )) { isOOB[1] = false; } else #endif isOOB[1] = isMvOOB(pu.mv[1], pu.Y().topLeft(), pu.lumaSize(), pu.cu->slice->getSPS(), pu.cu->slice->getPPS(), pu.cs->mcMask[1], pu.cs->mcMaskChroma[1]); } xWeightedAverage(pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp, pu.cs->mcMask, pcYuvPred.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : pcYuvPred.Cb().width, isOOB); } else { xWeightedAverage(pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp, pu.cs->mcMask, pcYuvPred.Y().width, pu.cs->mcMaskChroma, pu.chromaFormat == CHROMA_400 ? 0 : pcYuvPred.Cb().width, isOOB); } #else xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp ); #endif #endif } } } void InterPrediction::xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng , const bool& bioApplied , bool isIBC , const std::pair<int, int> scalingRatio , SizeType dmvrWidth , SizeType dmvrHeight , bool bilinearMC , Pel *srcPadBuf , int32_t srcPadStride #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , bool isAML #if INTER_LIC , bool doLic , Mv mvCurr #endif #endif #if JVET_Z0061_TM_OBMC , bool fastOBMC #endif ) { #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC int filterIdx = 0; #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED if (isAML) { bool changeIF = pu.mmvdMergeFlag; #if JVET_AA0132_CONFIGURABLE_TM_TOOLS changeIF &= pu.cs->sps->getUseTMMMVD(); #endif #if TM_MRG && JVET_AA0093_REFINED_MOTION_FOR_ARMC changeIF |= (pu.tmMergeFlag && !pu.ciipFlag && !pu.cu->geoFlag #if JVET_AA0132_CONFIGURABLE_TM_TOOLS && pu.cs->sps->getUseTMMrgMode() #endif ); #endif filterIdx = changeIF ? 1 : filterIdx; } #endif if (bilinearMC) { filterIdx = 1; } #if JVET_Z0061_TM_OBMC if (fastOBMC) { filterIdx = 1; } #endif #endif JVET_J0090_SET_REF_PICTURE( refPic, compID ); const ChromaFormat chFmt = pu.chromaFormat; const bool rndRes = !bi; int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleX(compID, chFmt); int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleY(compID, chFmt); bool wrapRef = false; Mv mv(_mv); if( !isIBC && refPic->isWrapAroundEnabled( pu.cs->pps ) ) { wrapRef = wrapClipMv( mv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps ); } bool useAltHpelIf = pu.cu->imv == IMV_HPEL; const bool isResamplingPossible = pu.cs->sps->getRprEnabledFlag(); if( isResamplingPossible && !isIBC && xPredInterBlkRPR( scalingRatio, *pu.cs->pps, CompArea( compID, chFmt, pu.blocks[compID], Size( dstPic.bufs[compID].width, dstPic.bufs[compID].height ) ), refPic, mv, dstPic.bufs[compID].buf, dstPic.bufs[compID].stride, bi, wrapRef, clpRng, 0, useAltHpelIf ) ) { CHECK( bilinearMC, "DMVR should be disabled with RPR" ); CHECK( bioApplied, "BDOF should be disabled with RPR" ); } else { int xFrac = mv.hor & ((1 << shiftHor) - 1); int yFrac = mv.ver & ((1 << shiftVer) - 1); if (isIBC) { xFrac = yFrac = 0; JVET_J0090_SET_CACHE_ENABLE(false); } PelBuf & dstBuf = dstPic.bufs[compID]; unsigned width = dstBuf.width; unsigned height = dstBuf.height; CPelBuf refBuf; { Position offset = pu.blocks[compID].pos().offset(mv.getHor() >> shiftHor, mv.getVer() >> shiftVer); #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF int refBufExtendSize = 0; if (bioApplied && compID == COMPONENT_Y) { refBufExtendSize = ((BIO_EXTEND_SIZE + 1) << 1); // trick to use SIMD filter offset.x -= (BIO_EXTEND_SIZE + 1); offset.y -= (BIO_EXTEND_SIZE + 1); } if (dmvrWidth) { refBuf = refPic->getRecoBuf(CompArea(compID, chFmt, offset, Size(dmvrWidth + refBufExtendSize, dmvrHeight + refBufExtendSize)), wrapRef); } else { refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, Size(pu.blocks[compID].width + refBufExtendSize, pu.blocks[compID].height + refBufExtendSize) ), wrapRef); } #else if (dmvrWidth) { refBuf = refPic->getRecoBuf(CompArea(compID, chFmt, offset, Size(dmvrWidth, dmvrHeight)), wrapRef); } else { refBuf = refPic->getRecoBuf(CompArea(compID, chFmt, offset, pu.blocks[compID].size()), wrapRef); } #endif } #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF if (NULL != srcPadBuf && bioApplied == false) #else if (NULL != srcPadBuf) #endif { refBuf.buf = srcPadBuf; refBuf.stride = srcPadStride; } if (dmvrWidth) { width = dmvrWidth; height = dmvrHeight; } // backup data int backupWidth = width; int backupHeight = height; Pel *backupDstBufPtr = dstBuf.buf; int backupDstBufStride = dstBuf.stride; if (bioApplied && compID == COMPONENT_Y) { #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF backupWidth += ((BIO_EXTEND_SIZE + 1) << 1); backupHeight += ((BIO_EXTEND_SIZE + 1) << 1); dstBuf.stride = backupWidth; dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID]; #else width = width + 2 * BIO_EXTEND_SIZE + 2; height = height + 2 * BIO_EXTEND_SIZE + 2; // change MC output dstBuf.stride = width; dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 2; #endif } if( yFrac == 0 ) { #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC m_if.filterHor( compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, filterIdx, bilinearMC, useAltHpelIf ); #else m_if.filterHor( compID, ( Pel* ) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); #endif } else if( xFrac == 0 ) { #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC m_if.filterVer( compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, filterIdx, bilinearMC, useAltHpelIf ); #else m_if.filterVer( compID, ( Pel* ) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); #endif } else { #if SIMD_4x4_12 && defined(TARGET_SIMD_X86) //use 4x4 if possible if( compID == COMPONENT_Y && backupWidth == 4 && backupHeight == 4 && !( (xFrac == 8 || yFrac == 8) && useAltHpelIf ) //to avoid (8,12 or 12,8 passes) && dmvrWidth == 0 //seems to conflict with DMVR, not sure //kolya ) m_if.filter4x4(clpRng, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, xFrac, yFrac, rndRes); else { #endif PelBuf tmpBuf = dmvrWidth ? PelBuf( m_filteredBlockTmp[0][compID], Size( dmvrWidth, dmvrHeight ) ) : PelBuf( m_filteredBlockTmp[0][compID], pu.blocks[compID] ); if( dmvrWidth == 0 ) { tmpBuf.stride = dstBuf.stride; } #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF if (bioApplied && compID == COMPONENT_Y) { tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], Size(backupWidth, backupWidth)); tmpBuf.stride = dstBuf.stride; } #endif #if IF_12TAP int vFilterSize = isLuma( compID ) ? NTAPS_LUMA( 0 ) : NTAPS_CHROMA; #else int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA; #endif #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC if (isLuma(compID) && filterIdx == 1) #else if (bilinearMC) #endif { vFilterSize = NTAPS_BILINEAR; } #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, filterIdx, bilinearMC, useAltHpelIf); JVET_J0090_SET_CACHE_ENABLE(false); m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, filterIdx, bilinearMC, useAltHpelIf); #else m_if.filterHor( compID, ( Pel* ) refBuf.buf - ( ( vFilterSize >> 1 ) - 1 ) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); JVET_J0090_SET_CACHE_ENABLE( false ); m_if.filterVer( compID, ( Pel* ) tmpBuf.buf + ( ( vFilterSize >> 1 ) - 1 ) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); #endif #if SIMD_4x4_12 && defined(TARGET_SIMD_X86) } #endif } JVET_J0090_SET_CACHE_ENABLE( ( srcPadStride == 0 ) && ( bioApplied == false ) ); // Enabled only in non-DMVR-non-BDOF process, In DMVR process, srcPadStride is always non-zero if( bioApplied && compID == COMPONENT_Y ) { #if !MULTI_PASS_DMVR && !SAMPLE_BASED_BDOF #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT const int shift = IF_INTERNAL_FRAC_BITS( clpRng.bd ); #else const int shift = std::max<int>( 2, ( IF_INTERNAL_PREC - clpRng.bd ) ); #endif int xOffset = ( xFrac < 8 ) ? 1 : 0; int yOffset = ( yFrac < 8 ) ? 1 : 0; const Pel* refPel = refBuf.buf - yOffset * refBuf.stride - xOffset; Pel* dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + dstBuf.stride + 1; for( int w = 0; w < ( width - 2 * BIO_EXTEND_SIZE ); w++ ) { Pel val = leftShift_round( refPel[w], shift ); dstPel[w] = val - ( Pel ) IF_INTERNAL_OFFS; } refPel = refBuf.buf + ( 1 - yOffset )*refBuf.stride - xOffset; dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 1; for( int h = 0; h < ( height - 2 * BIO_EXTEND_SIZE - 2 ); h++ ) { Pel val = leftShift_round( refPel[0], shift ); dstPel[0] = val - ( Pel ) IF_INTERNAL_OFFS; val = leftShift_round( refPel[width - 3], shift ); dstPel[width - 3] = val - ( Pel ) IF_INTERNAL_OFFS; refPel += refBuf.stride; dstPel += dstBuf.stride; } refPel = refBuf.buf + ( height - 2 * BIO_EXTEND_SIZE - 2 + 1 - yOffset )*refBuf.stride - xOffset; dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + ( height - 2 * BIO_EXTEND_SIZE )*dstBuf.stride + 1; for( int w = 0; w < ( width - 2 * BIO_EXTEND_SIZE ); w++ ) { Pel val = leftShift_round( refPel[w], shift ); dstPel[w] = val - ( Pel ) IF_INTERNAL_OFFS; } // restore data width = backupWidth; height = backupHeight; #endif dstBuf.buf = backupDstBufPtr; dstBuf.stride = backupDstBufStride; } #if RPR_ENABLE } #endif #if INTER_LIC #if RPR_ENABLE PelBuf& dstBuf = dstPic.bufs[compID]; #endif if( m_storeBeforeLIC ) { m_predictionBeforeLIC.bufs[compID].copyFrom( dstBuf ); } #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING if (pu.cu->LICFlag && (!pu.ciipFlag || doLic)) #else if( pu.cu->LICFlag && !pu.ciipFlag ) #endif { CHECK( pu.cu->geoFlag, "Geometric mode is not used with LIC" ); CHECK( CU::isIBC( *pu.cu ), "IBC mode is not used with LIC" ); CHECK( pu.interDir == 3, "Bi-prediction is not used with LIC" ); #if !JVET_W0090_ARMC_TM && !JVET_Z0056_GPM_SPLIT_MODE_REORDERING CHECK( pu.ciipFlag, "CIIP mode is not used with LIC" ); #endif #if RPR_ENABLE if (PU::checkRprLicCondition(pu)) { #endif #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING if( isAML ) { #if JVET_AA0146_WRAP_AROUND_FIX bool wrapRef = false; Mv mvCurrClip(mvCurr); if (refPic->isWrapAroundEnabled( pu.cs->pps )) { wrapRef = wrapClipMv(mvCurrClip, pu.lumaPos(), pu.lumaSize(), pu.cs->sps, pu.cs->pps); } else { clipMv(mvCurrClip, pu.lumaPos(), pu.lumaSize(), *pu.cu->cs->sps, *pu.cu->cs->pps); } xLocalIlluComp(pu, compID, *refPic, mvCurrClip, bi, dstBuf, wrapRef); #else xLocalIlluComp(pu, compID, *refPic, mvCurr, bi, dstBuf); #endif } else #endif #if JVET_AA0146_WRAP_AROUND_FIX { bool wrapRef = false; Mv mvClip(mv); if (refPic->isWrapAroundEnabled( pu.cs->pps )) { wrapRef = wrapClipMv(mvClip, pu.lumaPos(), pu.lumaSize(), pu.cs->sps, pu.cs->pps); } else { clipMv(mvClip, pu.lumaPos(), pu.lumaSize(), *pu.cu->cs->sps, *pu.cu->cs->pps); } xLocalIlluComp(pu, compID, *refPic, mvClip, bi, dstBuf, wrapRef); } #else xLocalIlluComp( pu, compID, *refPic, _mv, bi, dstBuf ); #endif #if RPR_ENABLE } #endif } #endif #if !RPR_ENABLE } #endif } #if !AFFINE_RM_CONSTRAINTS_AND_OPT bool InterPrediction::isSubblockVectorSpreadOverLimit( int a, int b, int c, int d, int predType ) { int s4 = ( 4 << 11 ); int filterTap = 6; if ( predType == 3 ) { int refBlkWidth = std::max( std::max( 0, 4 * a + s4 ), std::max( 4 * c, 4 * a + 4 * c + s4 ) ) - std::min( std::min( 0, 4 * a + s4 ), std::min( 4 * c, 4 * a + 4 * c + s4 ) ); int refBlkHeight = std::max( std::max( 0, 4 * b ), std::max( 4 * d + s4, 4 * b + 4 * d + s4 ) ) - std::min( std::min( 0, 4 * b ), std::min( 4 * d + s4, 4 * b + 4 * d + s4 ) ); refBlkWidth = ( refBlkWidth >> 11 ) + filterTap + 3; refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3; if ( refBlkWidth * refBlkHeight > ( filterTap + 9 ) * ( filterTap + 9 ) ) { return true; } } else { int refBlkWidth = std::max( 0, 4 * a + s4 ) - std::min( 0, 4 * a + s4 ); int refBlkHeight = std::max( 0, 4 * b ) - std::min( 0, 4 * b ); refBlkWidth = ( refBlkWidth >> 11 ) + filterTap + 3; refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3; if ( refBlkWidth * refBlkHeight > ( filterTap + 9 ) * ( filterTap + 5 ) ) { return true; } refBlkWidth = std::max( 0, 4 * c ) - std::min( 0, 4 * c ); refBlkHeight = std::max( 0, 4 * d + s4 ) - std::min( 0, 4 * d + s4 ); refBlkWidth = ( refBlkWidth >> 11 ) + filterTap + 3; refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3; if ( refBlkWidth * refBlkHeight > ( filterTap + 5 ) * ( filterTap + 9 ) ) { return true; } } return false; } #endif #if AFFINE_ENC_OPT #if JVET_Z0136_OOB void InterPrediction::xPredAffineBlk(const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, RefPicList eRefPicList, const bool genChromaMv, const std::pair<int, int> scalingRatio, const bool calGradient) #else void InterPrediction::xPredAffineBlk(const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const bool genChromaMv, const std::pair<int, int> scalingRatio, const bool calGradient) #endif #else #if JVET_Z0136_OOB void InterPrediction::xPredAffineBlk(const ComponentID &compID, const PredictionUnit &pu, const Picture *refPic, const Mv *_mv, PelUnitBuf &dstPic, const bool &bi, const ClpRng &clpRng, RefPicList eRefPicList, bool genChromaMv, const std::pair<int, int> scalingRatio) #else void InterPrediction::xPredAffineBlk(const ComponentID &compID, const PredictionUnit &pu, const Picture *refPic, const Mv *_mv, PelUnitBuf &dstPic, const bool &bi, const ClpRng &clpRng, bool genChromaMv, const std::pair<int, int> scalingRatio) #endif #endif { JVET_J0090_SET_REF_PICTURE( refPic, compID ); const ChromaFormat chFmt = pu.chromaFormat; int iScaleX = ::getComponentScaleX( compID, chFmt ); int iScaleY = ::getComponentScaleY( compID, chFmt ); Mv mvLT =_mv[0]; Mv mvRT =_mv[1]; Mv mvLB =_mv[2]; #if INTER_LIC Pel* refLeftTemplate = m_pcLICRefLeftTemplate; Pel* refAboveTemplate = m_pcLICRefAboveTemplate; Pel* recLeftTemplate = m_pcLICRecLeftTemplate; Pel* recAboveTemplate = m_pcLICRecAboveTemplate; int numTemplate[2] = { 0 , 0 }; // 0:Above, 1:Left #endif // get affine sub-block width and height const int width = pu.Y().width; const int height = pu.Y().height; int blockWidth = AFFINE_MIN_BLOCK_SIZE; int blockHeight = AFFINE_MIN_BLOCK_SIZE; CHECK(blockWidth > (width >> iScaleX ), "Sub Block width > Block width"); CHECK(blockHeight > (height >> iScaleY), "Sub Block height > Block height"); #if !AFFINE_RM_CONSTRAINTS_AND_OPT const int MVBUFFER_SIZE = MAX_CU_SIZE / MIN_PU_SIZE; #endif const int cxWidth = width >> iScaleX; const int cxHeight = height >> iScaleY; #if !AFFINE_RM_CONSTRAINTS_AND_OPT const int iHalfBW = blockWidth >> 1; const int iHalfBH = blockHeight >> 1; #endif const int iBit = MAX_CU_DEPTH; int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY; #if AFFINE_RM_CONSTRAINTS_AND_OPT iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(width)); iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(width)); if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(height)); iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(height)); } #else iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(cxWidth)); iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(cxWidth)); if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(cxHeight)); iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(cxHeight)); } #endif else { iDMvVerX = -iDMvHorY; iDMvVerY = iDMvHorX; } int iMvScaleHor = mvLT.getHor() << iBit; int iMvScaleVer = mvLT.getVer() << iBit; const SPS &sps = *pu.cs->sps; #if IF_12TAP const int vFilterSize = isLuma(compID) ? NTAPS_LUMA( 0 ) : NTAPS_CHROMA; #else const int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA; #endif const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; bool wrapRef = false; #if !AFFINE_RM_CONSTRAINTS_AND_OPT const bool subblkMVSpreadOverLimit = isSubblockVectorSpreadOverLimit( iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY, pu.interDir ); #endif bool enablePROF = (sps.getUsePROF()) && (!m_skipPROF) && (compID == COMPONENT_Y); enablePROF &= (!pu.cs->picHeader->getDisProfFlag()); enablePROF &= !((pu.cu->affineType == AFFINEMODEL_6PARAM && _mv[0] == _mv[1] && _mv[0] == _mv[2]) || (pu.cu->affineType == AFFINEMODEL_4PARAM && _mv[0] == _mv[1])); #if !AFFINE_RM_CONSTRAINTS_AND_OPT enablePROF &= !subblkMVSpreadOverLimit; #endif const int profThres = 1 << (iBit + (m_isBi ? 1 : 0)); enablePROF &= !m_encOnly || pu.cu->slice->getCheckLDC() || iDMvHorX > profThres || iDMvHorY > profThres || iDMvVerX > profThres || iDMvVerY > profThres || iDMvHorX < -profThres || iDMvHorY < -profThres || iDMvVerX < -profThres || iDMvVerY < -profThres; enablePROF &= (refPic->isRefScaled( pu.cs->pps ) == false); #if AFFINE_MMVD enablePROF &= ((pu.mmvdEncOptMode & 3) != 3); // encoder-only #endif #if AFFINE_ENC_OPT bool isLast = (enablePROF || calGradient) ? false : !bi; #else bool isLast = enablePROF ? false : !bi; #endif #if AFFINE_RM_CONSTRAINTS_AND_OPT const int cuExtW = width + PROF_BORDER_EXT_W * 2; const int cuExtH = height + PROF_BORDER_EXT_H * 2; #else const int cuExtW = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_W * 2; const int cuExtH = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_H * 2; #endif PelBuf gradXExt(m_gradBuf[0], cuExtW, cuExtH); PelBuf gradYExt(m_gradBuf[1], cuExtW, cuExtH); #if IF_12TAP const int MAX_FILTER_SIZE = std::max<int>(NTAPS_LUMA(0), NTAPS_CHROMA); #else const int MAX_FILTER_SIZE = std::max<int>(NTAPS_LUMA, NTAPS_CHROMA); #endif #if AFFINE_RM_CONSTRAINTS_AND_OPT const int dstExtW = ((width + PROF_BORDER_EXT_W * 2 + 7) >> 3) << 3; const int dstExtH = cuExtH; PelBuf dstExtBuf(m_filteredBlockTmp[1][compID], cuExtW, cuExtH); #else const int dstExtW = ((blockWidth + PROF_BORDER_EXT_W * 2 + 7) >> 3) << 3; const int dstExtH = blockHeight + PROF_BORDER_EXT_H * 2; PelBuf dstExtBuf(m_filteredBlockTmp[1][compID], dstExtW, dstExtH); #endif const int refExtH = dstExtH + MAX_FILTER_SIZE - 1; PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], dstExtW, refExtH); PelBuf &dstBuf = dstPic.bufs[compID]; int *dMvScaleHor = m_dMvBuf[m_iRefListIdx]; int *dMvScaleVer = m_dMvBuf[m_iRefListIdx] + 16; if (enablePROF) { int* dMvH = dMvScaleHor; int* dMvV = dMvScaleVer; int quadHorX = iDMvHorX << 2; int quadHorY = iDMvHorY << 2; int quadVerX = iDMvVerX << 2; int quadVerY = iDMvVerY << 2; dMvH[0] = ((iDMvHorX + iDMvVerX) << 1) - ((quadHorX + quadVerX) << 1); dMvV[0] = ((iDMvHorY + iDMvVerY) << 1) - ((quadHorY + quadVerY) << 1); for (int w = 1; w < blockWidth; w++) { dMvH[w] = dMvH[w - 1] + quadHorX; dMvV[w] = dMvV[w - 1] + quadHorY; } dMvH += blockWidth; dMvV += blockWidth; for (int h = 1; h < blockHeight; h++) { for (int w = 0; w < blockWidth; w++) { dMvH[w] = dMvH[w - blockWidth] + quadVerX; dMvV[w] = dMvV[w - blockWidth] + quadVerY; } dMvH += blockWidth; dMvV += blockWidth; } #if CTU_256 const int mvShift = MAX_CU_DEPTH + 1; const int dmvLimit = (1 << 5) - 1; // this means the maximum magnitude of dmv is half pel. The target MV precision is 1/64, thus the bit shift is 5 #else const int mvShift = 8; const int dmvLimit = ( 1 << 5 ) - 1; #endif if (!g_pelBufOP.roundIntVector) { for (int idx = 0; idx < blockWidth * blockHeight; idx++) { roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift); dMvScaleHor[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleHor[idx] ); dMvScaleVer[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleVer[idx] ); } } else { int sz = blockWidth * blockHeight; g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit); g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit); } } #if AFFINE_ENC_OPT else if (calGradient) { ::memset(m_dMvBuf, 0, sizeof(m_dMvBuf)); } #endif #if AFFINE_RM_CONSTRAINTS_AND_OPT if (compID == COMPONENT_Y) { if (iDMvHorX == 0 && iDMvHorY == 0) blockWidth = width; else { int maxDmv = std::max(abs(iDMvHorX), abs(iDMvHorY)) * blockWidth; int TH = 1 << (iBit - 1); // Half pel while (maxDmv < TH && blockWidth < width) { blockWidth <<= 1; maxDmv <<= 1; } } if (iDMvVerX == 0 && iDMvVerY == 0) blockHeight = height; else { int maxDmv = std::max(abs(iDMvVerX), abs(iDMvVerY)) * blockHeight; int TH = 1 << (iBit - 1); // Half pel while (maxDmv < TH && blockHeight < height) { blockHeight <<= 1; maxDmv <<= 1; } } } CMotionBuf mb = pu.getMotionBuf(); const MotionInfo *miLine = mb.buf; const MotionInfo *miLine2 = mb.buf + iScaleX + iScaleY * mb.stride; int stride = ((blockHeight << iScaleY) >> 2) * mb.stride; int iMvScaleTmpHor0 = iMvScaleHor + ((iDMvHorX * blockWidth + iDMvVerX * blockHeight) >> 1); int iMvScaleTmpVer0 = iMvScaleVer + ((iDMvHorY * blockWidth + iDMvVerY * blockHeight) >> 1); #endif #if !AFFINE_RM_CONSTRAINTS_AND_OPT int scaleXLuma = ::getComponentScaleX(COMPONENT_Y, chFmt); int scaleYLuma = ::getComponentScaleY(COMPONENT_Y, chFmt); if (genChromaMv && pu.chromaFormat != CHROMA_444) { CHECK(compID == COMPONENT_Y, "Chroma only subblock MV calculation should not apply to Luma"); int lumaBlockWidth = AFFINE_MIN_BLOCK_SIZE; int lumaBlockHeight = AFFINE_MIN_BLOCK_SIZE; CHECK(lumaBlockWidth > (width >> scaleXLuma), "Sub Block width > Block width"); CHECK(lumaBlockHeight > (height >> scaleYLuma), "Sub Block height > Block height"); const int cxWidthLuma = width >> scaleXLuma; const int cxHeightLuma = height >> scaleYLuma; #if !AFFINE_RM_CONSTRAINTS_AND_OPT const int halfBWLuma = lumaBlockWidth >> 1; const int halfBHLuma = lumaBlockHeight >> 1; int dMvHorXLuma, dMvHorYLuma, dMvVerXLuma, dMvVerYLuma; dMvHorXLuma = (mvRT - mvLT).getHor() << (iBit - floorLog2(cxWidthLuma)); dMvHorYLuma = (mvRT - mvLT).getVer() << (iBit - floorLog2(cxWidthLuma)); if (pu.cu->affineType == AFFINEMODEL_6PARAM) { dMvVerXLuma = (mvLB - mvLT).getHor() << (iBit - floorLog2(cxHeightLuma)); dMvVerYLuma = (mvLB - mvLT).getVer() << (iBit - floorLog2(cxHeightLuma)); } else { dMvVerXLuma = -dMvHorYLuma; dMvVerYLuma = dMvHorXLuma; } #endif #if !AFFINE_RM_CONSTRAINTS_AND_OPT const bool subblkMVSpreadOverLimitLuma = isSubblockVectorSpreadOverLimit(dMvHorXLuma, dMvHorYLuma, dMvVerXLuma, dMvVerYLuma, pu.interDir); #endif // get luma MV block by block for (int h = 0; h < cxHeightLuma; h += lumaBlockHeight) { for (int w = 0; w < cxWidthLuma; w += lumaBlockWidth) { int mvScaleTmpHor, mvScaleTmpVer; #if !AFFINE_RM_CONSTRAINTS_AND_OPT if (!subblkMVSpreadOverLimitLuma) #endif { #if AFFINE_RM_CONSTRAINTS_AND_OPT mvScaleTmpHor = iMvScaleTmpHor0 + iDMvHorX * w + iDMvVerX * h; mvScaleTmpVer = iMvScaleTmpVer0 + iDMvHorY * w + iDMvVerY * h; #else mvScaleTmpHor = iMvScaleHor + dMvHorXLuma * (halfBWLuma + w) + dMvVerXLuma * (halfBHLuma + h); mvScaleTmpVer = iMvScaleVer + dMvHorYLuma * (halfBWLuma + w) + dMvVerYLuma * (halfBHLuma + h); #endif } #if !AFFINE_RM_CONSTRAINTS_AND_OPT else { mvScaleTmpHor = iMvScaleHor + dMvHorXLuma * (cxWidthLuma >> 1) + dMvVerXLuma * (cxHeightLuma >> 1); mvScaleTmpVer = iMvScaleVer + dMvHorYLuma * (cxWidthLuma >> 1) + dMvVerYLuma * (cxHeightLuma >> 1); } #endif roundAffineMv(mvScaleTmpHor, mvScaleTmpVer, shift); Mv tmpMv(mvScaleTmpHor, mvScaleTmpVer); tmpMv.clipToStorageBitDepth(); mvScaleTmpHor = tmpMv.getHor(); mvScaleTmpVer = tmpMv.getVer(); m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(mvScaleTmpHor, mvScaleTmpVer); } } } #endif #if AFFINE_ENC_OPT int gradLineOffset = 0, gradOffset = 0; int gradSubBlkStride = blockHeight * width; #elif AFFINE_RM_CONSTRAINTS_AND_OPT int gradLineOffset = 0, gradOffset = 0; int gradSubBlkStride = blockHeight * cuExtW; #endif #if JVET_Z0136_OOB if (compID == COMPONENT_Y && pu.interDir == 3) { bool *pMcMask = pu.cs->mcMask[int(eRefPicList)]; memset(pMcMask, false, cxWidth * cxHeight); bool *pMcMaskChroma = pu.cs->mcMaskChroma[int(eRefPicList)]; int chromaScale = getComponentScaleX(COMPONENT_Cb, m_currChromaFormat); int cxWidthChroma = cxWidth >> chromaScale; int cxHeightChroma = cxHeight >> chromaScale; memset(pMcMaskChroma, false, cxWidthChroma * cxHeightChroma); } #endif // get prediction block by block for ( int h = 0; h < cxHeight; h += blockHeight ) { for ( int w = 0; w < cxWidth; w += blockWidth ) { int iMvScaleTmpHor, iMvScaleTmpVer; if (compID == COMPONENT_Y || pu.chromaFormat == CHROMA_444) { #if !AFFINE_RM_CONSTRAINTS_AND_OPT if ( !subblkMVSpreadOverLimit ) #endif { #if AFFINE_RM_CONSTRAINTS_AND_OPT iMvScaleTmpHor = iMvScaleTmpHor0 + iDMvHorX * w + iDMvVerX * h; iMvScaleTmpVer = iMvScaleTmpVer0 + iDMvHorY * w + iDMvVerY * h; #else iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (iHalfBW + w) + iDMvVerX * (iHalfBH + h); iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (iHalfBW + w) + iDMvVerY * (iHalfBH + h); #endif } #if !AFFINE_RM_CONSTRAINTS_AND_OPT else { iMvScaleTmpHor = iMvScaleHor + iDMvHorX * ( cxWidth >> 1 ) + iDMvVerX * ( cxHeight >> 1 ); iMvScaleTmpVer = iMvScaleVer + iDMvHorY * ( cxWidth >> 1 ) + iDMvVerY * ( cxHeight >> 1 ); } #endif roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift); Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); tmpMv.clipToStorageBitDepth(); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); // clip and scale if ( refPic->isWrapAroundEnabled( pu.cs->pps ) ) { #if !AFFINE_RM_CONSTRAINTS_AND_OPT m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer); #endif Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); wrapRef = wrapClipMv( tmpMv, Position( pu.Y().x + w, pu.Y().y + h ), Size( blockWidth, blockHeight ), &sps, pu.cs->pps ); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); } else { wrapRef = false; #if !AFFINE_RM_CONSTRAINTS_AND_OPT m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer); #endif if (refPic->isRefScaled(pu.cs->pps) == false) { clipMv(tmpMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); } } #if JVET_Z0136_OOB if (compID == COMPONENT_Y && pu.interDir == 3) { int chromaScale = getComponentScaleX(COMPONENT_Cb, m_currChromaFormat); bool *pMcMask = pu.cs->mcMask[int(eRefPicList)] + w + h * cxWidth; bool *pMcMaskChroma = pu.cs->mcMaskChroma[int(eRefPicList)] + (w >> chromaScale) + (h >> chromaScale) * (cxWidth >> chromaScale); int cxWidthChroma = cxWidth >> chromaScale; #if JVET_AA0146_WRAP_AROUND_FIX if ( refPic->isWrapAroundEnabled( pu.cs->pps ) ) { bool *pMcMask0 = pMcMask; for (int y = 0; y < blockHeight; y++) { memset(pMcMask0, false, blockWidth); pMcMask0 += cxWidth; } bool *pMcMaskChroma0 = pMcMaskChroma; int widthChroma = blockWidth >> chromaScale; int heightChroma = blockHeight >> chromaScale; for (int y = 0; y < heightChroma; y++) { memset(pMcMaskChroma0, false, widthChroma); pMcMaskChroma0 += cxWidthChroma; } } else #endif isMvOOBSubBlk(tmpMv, Position(pu.Y().x + w, pu.Y().y + h), Size(blockWidth, blockHeight), pu.cu->slice->getSPS(), pu.cu->slice->getPPS(), pMcMask, cxWidth, pMcMaskChroma, cxWidthChroma); } #endif } else { #if AFFINE_RM_CONSTRAINTS_AND_OPT Mv curMv = miLine[(w << iScaleX) >> 2].mv[m_iRefListIdx] + miLine2[(w << iScaleX) >> 2].mv[m_iRefListIdx]; #else Mv curMv = m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE) * MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE)] + m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE + iScaleY)* MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE + iScaleX)]; #endif roundAffineMv(curMv.hor, curMv.ver, 1); if ( refPic->isWrapAroundEnabled( pu.cs->pps ) ) { wrapRef = wrapClipMv( curMv, Position( pu.Y().x + ( w << iScaleX ), pu.Y().y + ( h << iScaleY ) ), Size( blockWidth << iScaleX, blockHeight << iScaleY ), &sps, pu.cs->pps ); } else { wrapRef = false; if (refPic->isRefScaled(pu.cs->pps) == false) { clipMv(curMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); } } iMvScaleTmpHor = curMv.hor; iMvScaleTmpVer = curMv.ver; } if( xPredInterBlkRPR( scalingRatio, *pu.cs->pps, CompArea( compID, chFmt, pu.blocks[compID].offset( w, h ), Size( blockWidth, blockHeight ) ), refPic, Mv( iMvScaleTmpHor, iMvScaleTmpVer ), dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, bi, wrapRef, clpRng, 2 ) ) { CHECK( enablePROF, "PROF should be disabled with RPR" ); } else { #if INTER_LIC if (pu.cu->LICFlag && (w == 0 || h == 0)) { #if JVET_AA0146_WRAP_AROUND_FIX xGetSublkTemplate(*pu.cu, compID, *refPic, Mv(iMvScaleTmpHor, iMvScaleTmpVer), blockWidth, blockHeight, w, h, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, recAboveTemplate, wrapRef); #else xGetSublkTemplate(*pu.cu, compID, *refPic, Mv(iMvScaleTmpHor, iMvScaleTmpVer), blockWidth, blockHeight, w, h, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, recAboveTemplate); #endif } #endif // get the MV in high precision int xFrac, yFrac, xInt, yInt; if (!iScaleX) { xInt = iMvScaleTmpHor >> 4; xFrac = iMvScaleTmpHor & 15; } else { xInt = iMvScaleTmpHor >> 5; xFrac = iMvScaleTmpHor & 31; } if (!iScaleY) { yInt = iMvScaleTmpVer >> 4; yFrac = iMvScaleTmpVer & 15; } else { yInt = iMvScaleTmpVer >> 5; yFrac = iMvScaleTmpVer & 31; } const CPelBuf refBuf = refPic->getRecoBuf( CompArea(compID, chFmt, pu.blocks[compID].offset(xInt + w, yInt + h), pu.blocks[compID]), wrapRef); Pel *ref = (Pel *) refBuf.buf; Pel *dst = dstBuf.buf + w + h * dstBuf.stride; int refStride = refBuf.stride; int dstStride = dstBuf.stride; int bw = blockWidth; int bh = blockHeight; #if AFFINE_ENC_OPT if (enablePROF || calGradient) #else if (enablePROF) #endif { dst = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); dstStride = dstExtBuf.stride; } #if IF_12TAP if (yFrac == 0) { m_if.filterHor(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng , 0, false, false); } else if (xFrac == 0) { m_if.filterVer(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng , 0, false, false); } else { #if SIMD_4x4_12 && defined(TARGET_SIMD_X86) #if AFFINE_RM_CONSTRAINTS_AND_OPT if (compID == COMPONENT_Y && bw == 4 && bh == 4) #else if (compID == COMPONENT_Y) #endif m_if.filter4x4(clpRng, (Pel*)ref, refStride, dst, dstStride, xFrac, yFrac, isLast); else { #endif m_if.filterHor(compID, (Pel*)ref - ((vFilterSize >> 1) - 1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh + vFilterSize - 1, xFrac, false, chFmt, clpRng , 0, false, false); JVET_J0090_SET_CACHE_ENABLE(false); m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, isLast, chFmt, clpRng , 0, false, false); JVET_J0090_SET_CACHE_ENABLE(true); #if SIMD_4x4_12 && defined(TARGET_SIMD_X86) } #endif } #else if (yFrac == 0) { m_if.filterHor(compID, (Pel *) ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng); } else if (xFrac == 0) { m_if.filterVer(compID, (Pel *) ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng); } else { m_if.filterHor(compID, (Pel *) ref - ((vFilterSize >> 1) - 1) * refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh + vFilterSize - 1, xFrac, false, chFmt, clpRng); JVET_J0090_SET_CACHE_ENABLE(false); m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, isLast, chFmt, clpRng); JVET_J0090_SET_CACHE_ENABLE(true); } #endif #if AFFINE_ENC_OPT if (enablePROF || calGradient) #else if (enablePROF) #endif { #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT const int shift = IF_INTERNAL_FRAC_BITS(clpRng.bd); #else const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); #endif const int xOffset = xFrac >> 3; const int yOffset = yFrac >> 3; const int refOffset = (blockHeight + 1) * refStride; const int dstOffset = (blockHeight + 1) * dstStride; const Pel *refPel = ref - (1 - yOffset) * refStride + xOffset - 1; Pel * dstPel = dst - dstStride - 1; for (int pw = 0; pw < blockWidth + 2; pw++) { dstPel[pw] = leftShift_round(refPel[pw], shift) - (Pel) IF_INTERNAL_OFFS; dstPel[pw + dstOffset] = leftShift_round(refPel[pw + refOffset], shift) - (Pel) IF_INTERNAL_OFFS; } refPel = ref + yOffset * refBuf.stride + xOffset; dstPel = dst; for (int ph = 0; ph < blockHeight; ph++, refPel += refStride, dstPel += dstStride) { dstPel[-1] = leftShift_round(refPel[-1], shift) - (Pel) IF_INTERNAL_OFFS; dstPel[blockWidth] = leftShift_round(refPel[blockWidth], shift) - (Pel) IF_INTERNAL_OFFS; } #if AFFINE_RM_CONSTRAINTS_AND_OPT gradOffset = gradLineOffset + w; #if AFFINE_ENC_OPT g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, width, m_gradX0 + gradOffset, m_gradY0 + gradOffset, clpRng.bd); #else g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, cuExtW, m_gradX0 + gradOffset, m_gradY0 + gradOffset, clpRng.bd); #endif #else #if AFFINE_ENC_OPT gradOffset = gradLineOffset + w; g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, width, m_gradX0 + gradOffset, m_gradY0 + gradOffset, clpRng.bd); #else PelBuf gradXBuf = gradXExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2); PelBuf gradYBuf = gradYExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2); g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, gradXBuf.stride, gradXBuf.buf, gradYBuf.buf, clpRng.bd); #endif #endif #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT const Pel offset = (1 << (shift - 1)) + IF_INTERNAL_OFFS; #else const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); const Pel offset = (1 << (shiftNum - 1)) + IF_INTERNAL_OFFS; #endif #if AFFINE_RM_CONSTRAINTS_AND_OPT Pel* src = dst; #if AFFINE_ENC_OPT Pel* gX = m_gradX0 + gradOffset + width + 1; Pel* gY = m_gradY0 + gradOffset + width + 1; #else Pel* gX = m_gradX0 + gradOffset + cuExtW + 1; Pel* gY = m_gradY0 + gradOffset + cuExtW + 1; #endif Pel * dstY = dstBuf.buf + w + h * dstBuf.stride; for (int sh = 0; sh < blockHeight; sh += AFFINE_MIN_BLOCK_SIZE) { for (int sw = 0; sw < blockWidth; sw += AFFINE_MIN_BLOCK_SIZE) { #if AFFINE_ENC_OPT g_pelBufOP.applyPROF(dstY + sw, dstBuf.stride, src + sw, dstExtBuf.stride, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, gX + sw, gY + sw, width, dMvScaleHor, dMvScaleVer, AFFINE_MIN_BLOCK_SIZE, bi, shift, offset, clpRng); #else g_pelBufOP.applyPROF(dstY + sw, dstBuf.stride, src + sw, dstExtBuf.stride, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, gX + sw, gY + sw, cuExtW, dMvScaleHor, dMvScaleVer, AFFINE_MIN_BLOCK_SIZE, bi, shift, offset, clpRng); #endif } src += (dstStride << 2); #if AFFINE_ENC_OPT gX += (width << 2); gY += (width << 2); #else gX += (cuExtW << 2); gY += (cuExtW << 2); #endif dstY += (dstBuf.stride << 2); } #else Pel *src = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); #if AFFINE_ENC_OPT Pel* gX = m_gradX0 + gradOffset + width + 1; Pel* gY = m_gradY0 + gradOffset + width + 1; Pel * dstY = dstBuf.bufAt(w, h); g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, width, dMvScaleHor, dMvScaleVer, blockWidth, bi, shift, offset, clpRng); #else Pel *gX = gradXBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); Pel *gY = gradYBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); Pel *dstY = dstBuf.bufAt(w, h); #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, bi, shift, offset, clpRng); #else g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, bi, shiftNum, offset, clpRng); #endif #endif #endif } } } #if AFFINE_RM_CONSTRAINTS_AND_OPT || AFFINE_ENC_OPT gradLineOffset += gradSubBlkStride; #endif #if AFFINE_RM_CONSTRAINTS_AND_OPT miLine += stride; miLine2 += stride; #endif } #if INTER_LIC if (m_storeBeforeLIC) { m_predictionBeforeLIC.bufs[compID].copyFrom(dstBuf); } #if RPR_ENABLE if( pu.cu->LICFlag && PU::checkRprLicCondition( pu ) ) #else if (pu.cu->LICFlag) #endif { PelBuf &dstBuf = dstPic.bufs[compID]; int LICshift = 0, scale = 0, offset = 0; xGetLICParamGeneral(*pu.cu, compID, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, recAboveTemplate, LICshift, scale, offset); const ClpRng& clpRng = pu.cu->cs->slice->clpRng(compID); dstBuf.linearTransform(scale, LICshift, offset, true, clpRng); } #endif } #if MULTI_PASS_DMVR #if JVET_Z0136_OOB void InterPrediction::applyBiOptFlow(const bool isBdofMvRefine, const int bdofBlockOffset, const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths, bool *mcMask[2], bool *mcMaskChroma[2], bool *isOOB) #else void InterPrediction::applyBiOptFlow(const bool isBdofMvRefine, const int bdofBlockOffset, const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths) #endif #else #if JVET_Z0136_OOB void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths, bool *mcMask[2], bool *mcMaskChroma[2], bool *isOOB) #else void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths) #endif #endif { const int height = yuvDst.Y().height; const int width = yuvDst.Y().width; int heightG = height + 2 * BIO_EXTEND_SIZE; int widthG = width + 2 * BIO_EXTEND_SIZE; int offsetPos = widthG*BIO_EXTEND_SIZE + BIO_EXTEND_SIZE; Pel* gradX0 = m_gradX0; Pel* gradX1 = m_gradX1; Pel* gradY0 = m_gradY0; Pel* gradY1 = m_gradY1; int stridePredMC = widthG + 2; const Pel* srcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + stridePredMC + 1; const Pel* srcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + stridePredMC + 1; const int src0Stride = stridePredMC; const int src1Stride = stridePredMC; Pel* dstY = yuvDst.Y().buf; const int dstStride = yuvDst.Y().stride; const Pel* srcY0Temp = srcY0; const Pel* srcY1Temp = srcY1; for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { Pel* dstTempPtr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + stridePredMC + 1; Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1; Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1; xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY, clipBitDepths.recon[toChannelType(COMPONENT_Y)]); #if !MULTI_PASS_DMVR && !SAMPLE_BASED_BDOF Pel* padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 2; for (int y = 0; y< height; y++) { padStr[-1] = padStr[0]; padStr[width] = padStr[width - 1]; padStr += stridePredMC; } padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 1; ::memcpy(padStr - stridePredMC, padStr, sizeof(Pel)*(widthG)); ::memcpy(padStr + height*stridePredMC, padStr + (height - 1)*stridePredMC, sizeof(Pel)*(widthG)); #endif } const ClpRng& clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y); const int bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)]; #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT const int shiftNum = IF_INTERNAL_FRAC_BITS(bitDepth) + 1; #else const int shiftNum = IF_INTERNAL_PREC + 1 - bitDepth; #endif const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; const int limit = ( 1 << 4 ) - 1; #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF int srcBlockOffset = (stridePredMC + 1) * BIO_EXTEND_SIZE; int bioBlockParamOffset = (widthG + 1); int dstBlockOffset = 0; const int bioDx = (width < BDOF_SUBPU_DIM) ? width : BDOF_SUBPU_DIM; const int bioDy = (height < BDOF_SUBPU_DIM) ? height : BDOF_SUBPU_DIM; const int srcBlockOffsetIncrementY = (stridePredMC << BDOF_SUBPU_DIM_LOG2) - width; const int dstBlockOffsetIncrementY = (dstStride << BDOF_SUBPU_DIM_LOG2) - width; const int bioBlockParamOffsetIncrementY = (widthG << BDOF_SUBPU_DIM_LOG2) - width; #endif #if MULTI_PASS_DMVR if (isBdofMvRefine) { g_pelBufOP.calcBIOParameter(srcY0, srcY1, gradX0, gradX1, gradY0, gradY1, widthG, heightG, src0Stride, src1Stride, widthG, bitDepth, m_absGx, m_absGy, m_dIx, m_dIy, m_signGxGy, m_dI); m_bdofMvRefined = true; int bioSubPuMvIndex = 0; const int bioSubPuMvIndexIncrementY = BDOF_SUBPU_STRIDE - std::max(1, (width >> BDOF_SUBPU_DIM_LOG2)); const int bioBlockDistTh = (bioDx * bioDy) << (5 - 4); //4 is to compensate the shift4 of dI in calcBIOParameter Pel* dI = m_dI + 2 + 2 * widthG; for (int yBlock = 0; yBlock < height; yBlock += bioDy) { for (int xBlock = 0; xBlock < width; xBlock += bioDx) { srcY0Temp = srcY0 + srcBlockOffset; srcY1Temp = srcY1 + srcBlockOffset; int costSubblockSAD = 0; Pel* tmp = dI + bioBlockParamOffset; g_pelBufOP.calAbsSum(tmp, widthG, bioDx, bioDy, &costSubblockSAD); if (costSubblockSAD < bioBlockDistTh) { #if JVET_Z0136_OOB int maskOffset = yBlock * width + xBlock; bool *pSubMcMask[2] = { pu.cs->mcMask[0] + maskOffset, pu.cs->mcMask[1] + maskOffset }; bool isOOBTmp[2] = { false, false }; if (isOOB[0] || isOOB[1]) { for (int dir = 0; dir < 2; dir++) { bool *pMcMask = (dir == 0) ? pSubMcMask[0] : pSubMcMask[1]; for (int y = 0; y < bioDy && !isOOBTmp[dir]; y++) { for (int x = 0; x < bioDx && !isOOBTmp[dir]; x++) { isOOBTmp[dir] |= pMcMask[x]; } pMcMask += width; } } } m_bdofSubPuMvOffset[bdofBlockOffset + bioSubPuMvIndex].setZero(); if (bioDx == 4) { g_pelBufOP.addAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY + dstBlockOffset, dstStride, bioDx, bioDy, shiftNum, offset, clpRng, pSubMcMask, width, isOOBTmp); } else { g_pelBufOP.addAvg8(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY + dstBlockOffset, dstStride, bioDx, bioDy, shiftNum, offset, clpRng, pSubMcMask, width, isOOBTmp); } #else m_bdofSubPuMvOffset[bdofBlockOffset + bioSubPuMvIndex].setZero(); if (bioDx == 4) { g_pelBufOP.addAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY + dstBlockOffset, dstStride, bioDx, bioDy, shiftNum, offset, clpRng); } else { g_pelBufOP.addAvg8(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY + dstBlockOffset, dstStride, bioDx, bioDy, shiftNum, offset, clpRng); } #endif srcBlockOffset += bioDx; dstBlockOffset += bioDx; bioBlockParamOffset += bioDx; bioSubPuMvIndex += 1; continue; } if (!pu.bdmvrRefine) { m_bdofSubPuMvOffset[bdofBlockOffset + bioSubPuMvIndex].setZero(); #if JVET_Z0136_OOB int maskOffset = yBlock * width + xBlock; bool *pSubMcMask[2] = { pu.cs->mcMask[0] + maskOffset, pu.cs->mcMask[1] + maskOffset }; bool isOOBTmp[2] = { false, false }; if (isOOB[0] || isOOB[1]) { for (int dir = 0; dir < 2; dir++) { bool *pMcMask = (dir == 0) ? pSubMcMask[0] : pSubMcMask[1]; for (int y = 0; y < bioDy && !isOOBTmp[dir]; y++) { for (int x = 0; x < bioDx && !isOOBTmp[dir]; x++) { isOOBTmp[dir] |= pMcMask[x]; } pMcMask += width; } } } subBlockBiOptFlow(dstY + dstBlockOffset, dstStride, srcY0Temp, src0Stride, srcY1Temp, src1Stride, bioBlockParamOffset, widthG, bioDx, bioDy, clpRng, shiftNum, offset, limit, pSubMcMask, width, isOOBTmp); #else subBlockBiOptFlow(dstY + dstBlockOffset, dstStride, srcY0Temp, src0Stride, srcY1Temp, src1Stride, bioBlockParamOffset, widthG, bioDx, bioDy, clpRng, shiftNum, offset, limit); #endif srcBlockOffset += bioDx; dstBlockOffset += bioDx; bioBlockParamOffset += bioDx; bioSubPuMvIndex += 1; continue; } int sumAbsGX_block = 0, sumAbsGY_block = 0, sumDIX_block = 0, sumDIY_block = 0, sumSignGY_GX_block = 0; g_pelBufOP.calcBIOParamSum4(m_absGx + bioBlockParamOffset, m_absGy + bioBlockParamOffset, m_dIx + bioBlockParamOffset, m_dIy + bioBlockParamOffset, m_signGxGy + bioBlockParamOffset, bioDx + 4, bioDy + 4, widthG, &sumAbsGX_block, &sumAbsGY_block, &sumDIX_block, &sumDIY_block, &sumSignGY_GX_block); int tmpx_block = (sumAbsGX_block == 0 ? 0 : rightShiftMSB(sumDIX_block << 3, sumAbsGX_block)); int tmpData_block = ((tmpx_block * sumSignGY_GX_block) >> 1); int tmpy_block = (sumAbsGY_block == 0 ? 0 : rightShiftMSB(((sumDIY_block << 3) - tmpData_block), sumAbsGY_block)); tmpx_block = Clip3(-256, 256, tmpx_block); tmpy_block = Clip3(-256, 256, tmpy_block); Mv bioMv; if (tmpx_block >= 0) bioMv.hor = ((tmpx_block + 4) >> 3); else bioMv.hor = (-1) * ((((-1) * tmpx_block) + 4) >> 3); if (tmpy_block >= 0) bioMv.ver = ((tmpy_block + 4) >> 3); else bioMv.ver = (-1) * ((((-1) * tmpy_block) + 4) >> 3); m_bdofSubPuMvOffset[bdofBlockOffset + bioSubPuMvIndex] = bioMv; if (bioMv.hor == 0 && bioMv.ver == 0) { // by doing this, we do not need to do second LUMA MC #if JVET_Z0136_OOB int maskOffset = yBlock * width + xBlock; bool *pSubMcMask[2] = { pu.cs->mcMask[0] + maskOffset, pu.cs->mcMask[1] + maskOffset }; bool isOOBTmp[2] = { false, false }; if (isOOB[0] || isOOB[1]) { for (int dir = 0; dir < 2; dir++) { bool *pMcMask = (dir == 0) ? pSubMcMask[0] : pSubMcMask[1]; for (int y = 0; y < bioDy && !isOOBTmp[dir]; y++) { for (int x = 0; x < bioDx && !isOOBTmp[dir]; x++) { isOOBTmp[dir] |= pMcMask[x]; } pMcMask += width; } } } subBlockBiOptFlow(dstY + dstBlockOffset, dstStride, srcY0Temp, src0Stride, srcY1Temp, src1Stride, bioBlockParamOffset, widthG, bioDx, bioDy, clpRng, shiftNum, offset, limit, pSubMcMask, width, isOOBTmp); #else subBlockBiOptFlow(dstY + dstBlockOffset, dstStride, srcY0Temp, src0Stride, srcY1Temp, src1Stride, bioBlockParamOffset, widthG, bioDx, bioDy, clpRng, shiftNum, offset, limit); #endif } srcBlockOffset += bioDx; dstBlockOffset += bioDx; bioBlockParamOffset += bioDx; bioSubPuMvIndex += 1; } srcBlockOffset += srcBlockOffsetIncrementY; dstBlockOffset += dstBlockOffsetIncrementY; bioBlockParamOffset += bioBlockParamOffsetIncrementY; bioSubPuMvIndex += bioSubPuMvIndexIncrementY; } return; } #endif #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF g_pelBufOP.calcBIOParameter(srcY0, srcY1, gradX0, gradX1, gradY0, gradY1, widthG, heightG, src0Stride, src1Stride, widthG, bitDepth, m_absGx, m_absGy, m_dIx, m_dIy, m_signGxGy, nullptr); for (int yBlock = 0; yBlock < height; yBlock += bioDy) { for (int xBlock = 0; xBlock < width; xBlock += bioDx) { srcY0Temp = srcY0 + srcBlockOffset; srcY1Temp = srcY1 + srcBlockOffset; #if JVET_Z0136_OOB int maskOffset = yBlock * width + xBlock; bool *pSubMcMask[2] = { pu.cs->mcMask[0] + maskOffset, pu.cs->mcMask[1] + maskOffset }; bool isOOBTmp[2] = { false, false }; if (isOOB[0] || isOOB[1]) { for (int dir = 0; dir < 2; dir++) { bool *pMcMask = (dir == 0) ? pSubMcMask[0] : pSubMcMask[1]; for (int y = 0; y < bioDy && !isOOBTmp[dir]; y++) { for (int x = 0; x < bioDx && !isOOBTmp[dir]; x++) { isOOBTmp[dir] |= pMcMask[x]; } pMcMask += width; } } } subBlockBiOptFlow(dstY + dstBlockOffset, dstStride, srcY0Temp, src0Stride, srcY1Temp, src1Stride, bioBlockParamOffset, widthG, bioDx, bioDy, clpRng, shiftNum, offset, limit, pSubMcMask, width, isOOBTmp); #else subBlockBiOptFlow(dstY + dstBlockOffset, dstStride, srcY0Temp, src0Stride, srcY1Temp, src1Stride, bioBlockParamOffset, widthG, bioDx, bioDy, clpRng, shiftNum, offset, limit); #endif srcBlockOffset += bioDx; dstBlockOffset += bioDx; bioBlockParamOffset += bioDx; } srcBlockOffset += srcBlockOffsetIncrementY; dstBlockOffset += dstBlockOffsetIncrementY; bioBlockParamOffset += bioBlockParamOffsetIncrementY; } return; #endif int xUnit = (width >> 2); int yUnit = (height >> 2); Pel *dstY0 = dstY; gradX0 = m_gradX0; gradX1 = m_gradX1; gradY0 = m_gradY0; gradY1 = m_gradY1; Pel *pGradX0Tmp, *pGradX1Tmp, *pGradY0Tmp, *pGradY1Tmp; const Pel *SrcY0Tmp, *SrcY1Tmp; int tmpx = 0, tmpy = 0; int sumAbsGX = 0, sumAbsGY = 0, sumDIX = 0, sumDIY = 0, sumSignGY_GX = 0; int gradOfst, srcOfst, dstOfst, gradLineOfst = 0, srcLineOfst = 0, dstLineOfst = 0; for (int yu = 0; yu < yUnit; yu++) { gradOfst = gradLineOfst; srcOfst = srcLineOfst; dstOfst = dstLineOfst; for (int xu = 0; xu < xUnit; xu++) { sumAbsGX = 0; sumAbsGY = 0; sumDIX = 0; sumDIY = 0, sumSignGY_GX = 0; pGradX0Tmp = m_gradX0 + gradOfst; pGradX1Tmp = m_gradX1 + gradOfst; pGradY0Tmp = m_gradY0 + gradOfst; pGradY1Tmp = m_gradY1 + gradOfst; SrcY1Tmp = srcY1 + srcOfst; SrcY0Tmp = srcY0 + srcOfst; g_pelBufOP.calcBIOSums(SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride, src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX); tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX << 2, sumAbsGX)); tmpx = Clip3(-limit, limit, tmpx); int mainsGxGy = sumSignGY_GX >> 12; int secsGxGy = sumSignGY_GX & ((1 << 12) - 1); int tmpData = tmpx * mainsGxGy; tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1; tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY << 2) - tmpData), sumAbsGY)); tmpy = Clip3(-limit, limit, tmpy); srcY0Temp = SrcY0Tmp + ( stridePredMC + 1 ); srcY1Temp = SrcY1Tmp + ( stridePredMC + 1 ); gradX0 = pGradX0Tmp + offsetPos; gradX1 = pGradX1Tmp + offsetPos; gradY0 = pGradY0Tmp + offsetPos; gradY1 = pGradY1Tmp + offsetPos; dstY0 = dstY + dstOfst; gradOfst += 4; srcOfst += 4; dstOfst += 4; xAddBIOAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, gradX0, gradX1, gradY0, gradY1, widthG, (1 << 2), (1 << 2), (int)tmpx, (int)tmpy, shiftNum, offset, clpRng); } // xu gradLineOfst += ( widthG << 2 ); srcLineOfst += ( src0Stride << 2 ); dstLineOfst += ( dstStride << 2 ); } // yu } #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF #if JVET_Z0136_OOB void InterPrediction::subBlockBiOptFlow(Pel* dstY, const int dstStride, const Pel* src0, const int src0Stride, const Pel* src1, const int src1Stride, int bioParamOffset, const int bioParamStride, int width, int height, const ClpRng& clpRng, const int shiftNum, const int offset, const int limit, bool *mcMask[2], int mcStride, bool *isOOB) #else void InterPrediction::subBlockBiOptFlow(Pel* dstY, const int dstStride, const Pel* src0, const int src0Stride, const Pel* src1, const int src1Stride, int bioParamOffset, const int bioParamStride, int width, int height, const ClpRng& clpRng, const int shiftNum, const int offset, const int limit) #endif { #if SAMPLE_BASED_BDOF g_pelBufOP.calcBIOParamSum5(m_absGx + bioParamOffset, m_absGy + bioParamOffset, m_dIx + bioParamOffset, m_dIy + bioParamOffset, m_signGxGy + bioParamOffset, bioParamStride, width, height, m_sumAbsGX_pixel_32bit, m_sumAbsGY_pixel_32bit, m_sumDIX_pixel_32bit, m_sumDIY_pixel_32bit, m_sumSignGY_GX_pixel_32bit); // sumDIX and sumDIY left shift by 2 is calculated in previous step const int bioSubblockSize = width * height; for (int pixel_index = 0; pixel_index < bioSubblockSize; pixel_index++) { if (m_sumAbsGX_pixel_32bit[pixel_index] == 0) { m_sumDIX_pixel_32bit[pixel_index] = 0; m_sumAbsGX_pixel_32bit[pixel_index] = 32; } else { m_sumAbsGX_pixel_32bit[pixel_index] = floorLog2(m_sumAbsGX_pixel_32bit[pixel_index]); } if (m_sumAbsGY_pixel_32bit[pixel_index] == 0) { m_sumDIY_pixel_32bit[pixel_index] = 0; m_sumSignGY_GX_pixel_32bit[pixel_index] = 0; m_sumAbsGY_pixel_32bit[pixel_index] = 32; } else { m_sumAbsGY_pixel_32bit[pixel_index] = floorLog2(m_sumAbsGY_pixel_32bit[pixel_index]); } } g_pelBufOP.calcBIOClippedVxVy(m_sumDIX_pixel_32bit, m_sumAbsGX_pixel_32bit, m_sumDIY_pixel_32bit, m_sumAbsGY_pixel_32bit, m_sumSignGY_GX_pixel_32bit, limit, bioSubblockSize, m_tmpx_pixel_32bit, m_tmpy_pixel_32bit); bioParamOffset += ((bioParamStride + 1) << 1); #else bioParamOffset += ((bioParamStride + 1) << 1); int unitSize = 4, extendSize = 1; // unitSize = 1, extendSize = 2 gives same results as per-pixel BDOF for (int yUnit = 0; yUnit < height; yUnit += unitSize) { for (int xUnit = 0; xUnit < width; xUnit += unitSize) { int subTmpx = 0, subTmpy = 0; int subSumGx = 0, subSumGy = 0, subSumDIX = 0, subSumDIY = 0, subSumSignGY_GX = 0; int subBioParamOffset = bioParamOffset + (yUnit - extendSize) * bioParamStride + xUnit; for (int ySub = -extendSize; ySub < (extendSize + unitSize); ySub++) { for (int xSub = -extendSize; xSub < (extendSize + unitSize); xSub++) { subSumGx += m_absGx[subBioParamOffset + xSub]; subSumGy += m_absGy[subBioParamOffset + xSub]; subSumDIX += m_dIx[subBioParamOffset + xSub]; subSumDIY += m_dIy[subBioParamOffset + xSub]; subSumSignGY_GX += m_signGxGy[subBioParamOffset + xSub]; } subBioParamOffset += bioParamStride; } subTmpx = (subSumGx == 0 ? 0 : rightShiftMSB(subSumDIX << 2, subSumGx)); subTmpx = Clip3(-limit, limit, subTmpx); int mainsGxGy = subSumSignGY_GX >> 12; int secsGxGy = subSumSignGY_GX & ((1 << 12) - 1); int tmpData = subTmpx * mainsGxGy; tmpData = ((tmpData << 12) + subTmpx*secsGxGy) >> 1; subTmpy = (subSumGy == 0 ? 0 : rightShiftMSB(((subSumDIY << 2) - tmpData), subSumGy)); subTmpy = Clip3(-limit, limit, subTmpy); int curSubIdx = yUnit * width + xUnit; for (int ySub = 0; ySub < unitSize; ySub++) { for (int xSub = 0; xSub < unitSize; xSub++) { m_tmpx_pixel_32bit[curSubIdx + xSub] = subTmpx; m_tmpy_pixel_32bit[curSubIdx + xSub] = subTmpy; } curSubIdx += width; } } } #endif #if JVET_Z0136_OOB g_pelBufOP.addBIOAvgN(src0, src0Stride, src1, src1Stride, dstY, dstStride, m_gradX0 + bioParamOffset, m_gradX1 + bioParamOffset, m_gradY0 + bioParamOffset, m_gradY1 + bioParamOffset, bioParamStride, width, height, m_tmpx_pixel_32bit, m_tmpy_pixel_32bit, shiftNum, offset, clpRng, mcMask, mcStride, isOOB); #else g_pelBufOP.addBIOAvgN(src0, src0Stride, src1, src1Stride, dstY, dstStride, m_gradX0 + bioParamOffset, m_gradX1 + bioParamOffset, m_gradY0 + bioParamOffset, m_gradY1 + bioParamOffset, bioParamStride, width, height, m_tmpx_pixel_32bit, m_tmpy_pixel_32bit, shiftNum, offset, clpRng); #endif } #endif void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng) { g_pelBufOP.addBIOAvg4(src0, src0Stride, src1, src1Stride, dst, dstStride, gradX0, gradX1, gradY0, gradY1, gradStride, width, height, tmpx, tmpy, shift, offset, clpRng); } void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth) { g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY, bitDepth); } void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth) { g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG, bitDepth); } void InterPrediction::xCalcBlkGradient(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize) { g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize); } #if MULTI_PASS_DMVR void InterPrediction::xWeightedAverage(const bool isBdofMvRefine, const int bdofBlockOffset, #else void InterPrediction::xWeightedAverage( #endif const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, #if JVET_Z0136_OOB const ClpRngs& clpRngs, const bool& bioApplied, bool lumaOnly, bool chromaOnly, PelUnitBuf* yuvDstTmp /*= NULL*/, bool *mcMask[2], int mcStride, bool *mcMaskChroma[2], int mcCStride, bool *isOOB) #else const ClpRngs& clpRngs, const bool& bioApplied, bool lumaOnly, bool chromaOnly, PelUnitBuf* yuvDstTmp /*= NULL*/) #endif { CHECK( (chromaOnly && lumaOnly), "should not happen" ); const int iRefIdx0 = pu.refIdx[0]; const int iRefIdx1 = pu.refIdx[1]; if( iRefIdx0 >= 0 && iRefIdx1 >= 0 ) { if( pu.cu->BcwIdx != BCW_DEFAULT && (yuvDstTmp || !pu.ciipFlag) ) { CHECK(bioApplied, "Bcw is disallowed with BIO"); #if JVET_Z0136_OOB pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->BcwIdx, chromaOnly, lumaOnly, mcMask, mcStride, mcMaskChroma, mcCStride, isOOB); #else pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->BcwIdx, chromaOnly, lumaOnly); #endif #if JVET_Z0136_OOB if (yuvDstTmp) { yuvDstTmp->addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, BCW_DEFAULT, chromaOnly, lumaOnly, mcMask, mcStride, mcMaskChroma, mcCStride, isOOB); } #else if (yuvDstTmp) yuvDstTmp->addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly); #endif return; } if (bioApplied) { #if !JVET_Z0136_OOB const int src0Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2; const int src1Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2; #if MULTI_PASS_DMVR || SAMPLE_BASED_BDOF const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + (1 + BIO_EXTEND_SIZE) * (src0Stride + 1); const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + (1 + BIO_EXTEND_SIZE) * (src1Stride + 1); #else const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + 2 * src0Stride + 2; const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + 2 * src1Stride + 2; #endif #endif bool bioEnabled = true; if (bioEnabled) { #if MULTI_PASS_DMVR #if JVET_Z0136_OOB applyBiOptFlow(isBdofMvRefine, bdofBlockOffset, pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths, mcMask, mcMaskChroma, isOOB); #else applyBiOptFlow(isBdofMvRefine, bdofBlockOffset, pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths); #endif #else #if JVET_Z0136_OOB applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths, mcMask, mcMaskChroma, isOOB); #else applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths); #endif #endif #if JVET_Z0136_OOB if (yuvDstTmp) { yuvDstTmp->addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->BcwIdx, false, true, mcMask, mcStride, mcMaskChroma, mcCStride, isOOB); } #else if (yuvDstTmp) yuvDstTmp->bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]); #endif } else { #if JVET_Z0136_OOB pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, BCW_DEFAULT, chromaOnly, lumaOnly, mcMask, mcStride, mcMaskChroma, mcCStride, isOOB); if (yuvDstTmp) { yuvDstTmp->bufs[0].copyFrom(pcYuvDst.bufs[0]); } #else pcYuvDst.bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]); if (yuvDstTmp) yuvDstTmp->bufs[0].copyFrom(pcYuvDst.bufs[0]); #endif } } if (!bioApplied && (lumaOnly || chromaOnly)) { #if JVET_Z0136_OOB pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, BCW_DEFAULT, chromaOnly, lumaOnly, mcMask, mcStride, mcMaskChroma, mcCStride, isOOB); #else pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly); #endif } #if MULTI_PASS_DMVR // this part is to derive the chroma dst pred else if (!isBdofMvRefine || !bioApplied || yuvDstTmp != NULL) #else else #endif { #if JVET_Z0136_OOB if (bioApplied) { pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, BCW_DEFAULT, true, false, mcMask, mcStride, mcMaskChroma, mcCStride, isOOB); } else { pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, BCW_DEFAULT, chromaOnly, lumaOnly, mcMask, mcStride, mcMaskChroma, mcCStride, isOOB); } #else pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied); #endif } if (yuvDstTmp) { if (bioApplied) { if (isChromaEnabled(yuvDstTmp->chromaFormat)) { yuvDstTmp->bufs[1].copyFrom(pcYuvDst.bufs[1]); yuvDstTmp->bufs[2].copyFrom(pcYuvDst.bufs[2]); } } else { yuvDstTmp->copyFrom(pcYuvDst, lumaOnly, chromaOnly); } } } else if( iRefIdx0 >= 0 && iRefIdx1 < 0 ) { if( pu.cu->geoFlag ) { #if JVET_W0097_GPM_MMVD_TM pcYuvDst.copyFrom(pcYuvSrc0, lumaOnly, chromaOnly); #else pcYuvDst.copyFrom( pcYuvSrc0 ); #endif } else { pcYuvDst.copyClip( pcYuvSrc0, clpRngs, lumaOnly, chromaOnly ); } if (yuvDstTmp) { yuvDstTmp->copyFrom( pcYuvDst, lumaOnly, chromaOnly ); } } else if( iRefIdx0 < 0 && iRefIdx1 >= 0 ) { if( pu.cu->geoFlag ) { #if JVET_W0097_GPM_MMVD_TM pcYuvDst.copyFrom(pcYuvSrc1, lumaOnly, chromaOnly); #else pcYuvDst.copyFrom( pcYuvSrc1 ); #endif } else { pcYuvDst.copyClip( pcYuvSrc1, clpRngs, lumaOnly, chromaOnly ); } if (yuvDstTmp) { yuvDstTmp->copyFrom(pcYuvDst, lumaOnly, chromaOnly); } } } #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING #if !INTER_LIC template <bool trueAfalseL> void InterPrediction::xGetPredBlkTpl(const CodingUnit& cu, const ComponentID compID, const CPelBuf& refBuf, const Mv& mv, const int posW, const int posH, const int tplSize, Pel* predBlkTpl #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED , bool AML #endif ) { const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; const int horShift = (lumaShift + ::getComponentScaleX(compID, cu.chromaFormat)); const int verShift = (lumaShift + ::getComponentScaleY(compID, cu.chromaFormat)); const int xInt = mv.getHor() >> horShift; const int yInt = mv.getVer() >> verShift; const int xFrac = mv.getHor() & ((1 << horShift) - 1); const int yFrac = mv.getVer() & ((1 << verShift) - 1); const Pel* ref; Pel* dst; int refStride, dstStride, bw, bh; if (trueAfalseL) { ref = refBuf.bufAt(cu.blocks[compID].pos().offset(xInt + posW, yInt + posH - 1)); dst = predBlkTpl + posW; refStride = refBuf.stride; dstStride = tplSize; bw = tplSize; bh = 1; } else { ref = refBuf.bufAt(cu.blocks[compID].pos().offset(xInt + posW - 1, yInt + posH)); dst = predBlkTpl + posH; refStride = refBuf.stride; dstStride = 1; bw = 1; bh = tplSize; } #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED int nFilterIdx = AML ? 1 : 0; #else const int nFilterIdx = 0; #endif const bool useAltHpelIf = false; if (yFrac == 0) { m_if.filterHor(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, xFrac, true, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); } else if (xFrac == 0) { m_if.filterVer(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, yFrac, true, true, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); } else { #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED #if IF_12TAP int vFilterSize = isLuma(compID) ? NTAPS_LUMA(0) : NTAPS_CHROMA; #else int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA; #endif if (isLuma(compID) && nFilterIdx == 1) { vFilterSize = NTAPS_BILINEAR; } #else #if IF_12TAP const int vFilterSize = isLuma(compID) ? NTAPS_LUMA(0) : NTAPS_CHROMA; #else const int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA; #endif #endif PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], Size(bw, bh + vFilterSize - 1)); m_if.filterHor(compID, (Pel*)ref - ((vFilterSize >> 1) - 1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh + vFilterSize - 1, xFrac, false, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); JVET_J0090_SET_CACHE_ENABLE(false); m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, true, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); JVET_J0090_SET_CACHE_ENABLE(true); } } #endif void InterPrediction::xWeightedAverageY(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs) { const int iRefIdx0 = pu.refIdx[0]; const int iRefIdx1 = pu.refIdx[1]; if (iRefIdx0 >= 0 && iRefIdx1 >= 0) { if (pu.cu->BcwIdx != BCW_DEFAULT) { #if JVET_Z0136_OOB bool isOOB[2] = { false,false }; pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->BcwIdx, false, true, pu.cs->mcMask, -1, pu.cs->mcMaskChroma, -1, isOOB); #else pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->BcwIdx, false, true); #endif } else { pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, false, true); } } else if (iRefIdx0 >= 0 && iRefIdx1 < 0) { pcYuvDst.copyClip(pcYuvSrc0, clpRngs, true); } else if (iRefIdx0 < 0 && iRefIdx1 >= 0) { pcYuvDst.copyClip(pcYuvSrc1, clpRngs, true); } } #endif #if JVET_W0090_ARMC_TM void InterPrediction::xPredAffineTpl(const PredictionUnit &pu, const RefPicList &eRefPicList, int* numTemplate, Pel* refLeftTemplate, Pel* refAboveTemplate) { int iRefIdx = pu.refIdx[eRefPicList]; CHECK(iRefIdx < 0, "iRefIdx incorrect."); const Picture* refPic = pu.cu->slice->getRefPic(eRefPicList, iRefIdx)->unscaledPic; Mv mvLT = pu.mvAffi[eRefPicList][0]; Mv mvRT = pu.mvAffi[eRefPicList][1]; Mv mvLB = pu.mvAffi[eRefPicList][2]; // get affine sub-block width and height const int width = pu.Y().width; const int height = pu.Y().height; int blockWidth = AFFINE_MIN_BLOCK_SIZE; int blockHeight = AFFINE_MIN_BLOCK_SIZE; CHECK(blockWidth > width, "Sub Block width > Block width"); CHECK(blockHeight > height, "Sub Block height > Block height"); const int cxWidth = width; const int cxHeight = height; const int iBit = MAX_CU_DEPTH; int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY; iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(width)); iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(width)); if (pu.cu->affineType == AFFINEMODEL_6PARAM) { iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(height)); iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(height)); } else { iDMvVerX = -iDMvHorY; iDMvVerY = iDMvHorX; } int iMvScaleHor = mvLT.getHor() << iBit; int iMvScaleVer = mvLT.getVer() << iBit; const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; #if !AFFINE_RM_CONSTRAINTS_AND_OPT const bool subblkMVSpreadOverLimit = isSubblockVectorSpreadOverLimit(iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY, pu.interDir); #endif #if AFFINE_RM_CONSTRAINTS_AND_OPT if (iDMvHorX == 0 && iDMvHorY == 0) blockWidth = width; else { int maxDmv = std::max(abs(iDMvHorX), abs(iDMvHorY)) * blockWidth; int TH = 1 << (iBit - 1); // Half pel while (maxDmv < TH && blockWidth < width) { blockWidth <<= 1; maxDmv <<= 1; } } if (iDMvVerX == 0 && iDMvVerY == 0) blockHeight = height; else { int maxDmv = std::max(abs(iDMvVerX), abs(iDMvVerY)) * blockHeight; int TH = 1 << (iBit - 1); // Half pel while (maxDmv < TH && blockHeight < height) { blockHeight <<= 1; maxDmv <<= 1; } } #endif int iMvScaleTmpHor0 = iMvScaleHor + ((iDMvHorX * blockWidth + iDMvVerX * blockHeight) >> 1); int iMvScaleTmpVer0 = iMvScaleVer + ((iDMvHorY * blockWidth + iDMvVerY * blockHeight) >> 1); #if JVET_Z0139_NA_AFF const CodingUnit *const cuAbove = pu.cu->cs->getCU(pu.cu->blocks[COMPONENT_Y].pos().offset(0, -1), toChannelType(COMPONENT_Y)); const CodingUnit *const cuLeft = pu.cu->cs->getCU(pu.cu->blocks[COMPONENT_Y].pos().offset(-1, 0), toChannelType(COMPONENT_Y)); // get prediction block by block for (int h = 0; (cuLeft && h < cxHeight) || h < 1; h += blockHeight) { for (int w = 0; (cuAbove && w < cxWidth) || w < 1; w += blockWidth) #else for (int h = 0; h < cxHeight; h += blockHeight) { for (int w = 0; w < cxWidth; w += blockWidth) #endif { if (w == 0 || h == 0) { int iMvScaleTmpHor, iMvScaleTmpVer; #if !AFFINE_RM_CONSTRAINTS_AND_OPT if (!subblkMVSpreadOverLimit) #endif { iMvScaleTmpHor = iMvScaleTmpHor0 + iDMvHorX * w + iDMvVerX * h; iMvScaleTmpVer = iMvScaleTmpVer0 + iDMvHorY * w + iDMvVerY * h; } #if !AFFINE_RM_CONSTRAINTS_AND_OPT else { iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (cxWidth >> 1) + iDMvVerX * (cxHeight >> 1); iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (cxWidth >> 1) + iDMvVerY * (cxHeight >> 1); } #endif roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift); Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); tmpMv.clipToStorageBitDepth(); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); // clip and scale #if JVET_AA0146_WRAP_AROUND_FIX bool wrapRef = false; if ( refPic->isWrapAroundEnabled( pu.cs->pps ) ) { Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); wrapRef = wrapClipMv( tmpMv, pu.lumaPos(), pu.lumaSize(), pu.cs->sps, pu.cs->pps ); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); } else { wrapRef = false; #endif if (refPic->isRefScaled(pu.cs->pps) == false) { clipMv(tmpMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); } #if JVET_AA0146_WRAP_AROUND_FIX } #endif xGetSublkAMLTemplate(*pu.cu, COMPONENT_Y, *refPic, Mv(iMvScaleTmpHor, iMvScaleTmpVer), blockWidth, blockHeight, w, h, numTemplate, refLeftTemplate, refAboveTemplate #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED , pu.afMmvdFlag #if JVET_AA0132_CONFIGURABLE_TM_TOOLS && pu.cs->sps->getUseTMMMVD() #endif #endif #if JVET_AA0146_WRAP_AROUND_FIX , wrapRef #endif ); } } } } #endif void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList , const bool luma, const bool chroma , PelUnitBuf* predBufWOBIO /*= NULL*/ ) { // Note: there appears to be an interaction with weighted prediction that // makes the code follow different paths if chroma is on or off (in the encoder). // Therefore for 4:0:0, "chroma" is not changed to false. #if MULTI_HYP_PRED if (!pu.addHypData.empty()) { CHECK(eRefPicList != REF_PIC_LIST_X, "Multi Hyp: eRefPicList != REF_PIC_LIST_X"); CHECK(!luma, "Multi Hyp: !luma"); xAddHypMC(pu, predBuf, predBufWOBIO, !chroma); return; } #endif CHECK(predBufWOBIO && pu.ciipFlag, "the case should not happen!"); if (!pu.cs->pcv->isEncoder) { if (CU::isIBC(*pu.cu)) { CHECK(!luma, "IBC only for Chroma is not allowed."); xIntraBlockCopy(pu, predBuf, COMPONENT_Y); if (chroma && isChromaEnabled(pu.chromaFormat)) { xIntraBlockCopy(pu, predBuf, COMPONENT_Cb); xIntraBlockCopy(pu, predBuf, COMPONENT_Cr); } return; } } // dual tree handling for IBC as the only ref if ((!luma || !chroma) && eRefPicList == REF_PIC_LIST_0) { xPredInterUni(pu, eRefPicList, predBuf, false, false, luma, chroma); return; } // else, go with regular MC below CodingStructure &cs = *pu.cs; const PPS &pps = *cs.pps; const SliceType sliceType = cs.slice->getSliceType(); if( eRefPicList != REF_PIC_LIST_X ) { CHECK(predBufWOBIO != NULL, "the case should not happen!"); if ((CU::isIBC(*pu.cu) == false) && ((sliceType == P_SLICE && pps.getUseWP()) || (sliceType == B_SLICE && pps.getWPBiPred())) #if INTER_LIC && !pu.cu->LICFlag #endif ) { xPredInterUni(pu, eRefPicList, predBuf, true, false, luma, chroma); xWeightedPredictionUni(pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred, (luma && !chroma), (!luma && chroma)); } else { xPredInterUni(pu, eRefPicList, predBuf, false, false, luma, chroma); } } else { #if !INTER_RM_SIZE_CONSTRAINTS #if ENABLE_OBMC if (pu.cu->isobmcMC == false) #endif CHECK( !pu.cu->affine && pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 && ( pu.lwidth() + pu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" ); #endif #if !BDOF_RM_CONSTRAINTS int refIdx0 = pu.refIdx[REF_PIC_LIST_0]; int refIdx1 = pu.refIdx[REF_PIC_LIST_1]; const WPScalingParam *wp0 = pu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0); const WPScalingParam *wp1 = pu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1); bool bioApplied = false; const Slice &slice = *pu.cs->slice; if (pu.cs->sps->getBDOFEnabledFlag() && (!pu.cs->picHeader->getDisBdofFlag())) { if (pu.cu->affine || m_subPuMC) { bioApplied = false; } else { const bool biocheck0 = !((WPScalingParam::isWeighted(wp0) || WPScalingParam::isWeighted(wp1)) && slice.getSliceType() == B_SLICE); const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE); if (biocheck0 && biocheck1 && PU::isBiPredFromDifferentDirEqDistPoc(pu) && (pu.Y().height >= 8) && (pu.Y().width >= 8) && ((pu.Y().height * pu.Y().width) >= 128) ) { bioApplied = true; } } if (bioApplied && pu.ciipFlag) { bioApplied = false; } if (bioApplied && pu.cu->smvdMode) { bioApplied = false; } if (pu.cu->cs->sps->getUseBcw() && bioApplied && pu.cu->BcwIdx != BCW_DEFAULT) { bioApplied = false; } if (pu.mmvdEncOptMode == 2 && pu.mmvdMergeFlag) { bioApplied = false; } } #if ENABLE_OBMC if (pu.cu->isobmcMC) { bioApplied = false; } #endif bool refIsScaled = ( refIdx0 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->isRefScaled( pu.cs->pps ) ) || ( refIdx1 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( pu.cs->pps ) ); bioApplied = refIsScaled ? false : bioApplied; bool dmvrApplied = false; dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu); #if MULTI_PASS_DMVR if ((pu.lumaSize().width > MAX_BDOF_APPLICATION_REGION || pu.lumaSize().height > MAX_BDOF_APPLICATION_REGION) && pu.mergeType != MRG_TYPE_SUBPU_ATMVP && (bioApplied && !dmvrApplied && !pu.bdmvrRefine)) #else if ((pu.lumaSize().width > MAX_BDOF_APPLICATION_REGION || pu.lumaSize().height > MAX_BDOF_APPLICATION_REGION) && pu.mergeType != MRG_TYPE_SUBPU_ATMVP && (bioApplied && !dmvrApplied)) #endif { xSubPuBio(pu, predBuf, eRefPicList, predBufWOBIO); } else #endif if( pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC ) { CHECK( predBufWOBIO != NULL, "the case should not happen!" ); xSubPuMC( pu, predBuf, eRefPicList, luma, chroma ); } else if( xCheckIdenticalMotion( pu ) ) { xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false, false, luma, chroma ); if( predBufWOBIO ) { predBufWOBIO->copyFrom( predBuf, ( luma && !chroma ), ( chroma && !luma ) ); } } else { #if MULTI_PASS_DMVR m_bdofMvRefined = false; #if !BDOF_RM_CONSTRAINTS if (pu.bdmvrRefine && !bioApplied) { for (int bdofSubPuIdx = 0; bdofSubPuIdx < BDOF_SUBPU_MAX_NUM; bdofSubPuIdx++) { m_bdofSubPuMvOffset[bdofSubPuIdx].setZero(); } } #endif xPredInterBi(pu, predBuf, luma, chroma, predBufWOBIO); if (m_bdofMvRefined) { xPredInterBiSubPuBDOF(pu, predBuf, luma, chroma); // do not change the predBufWOBIO m_bdofMvRefined = false; } #else xPredInterBi( pu, predBuf, luma, chroma, predBufWOBIO ); #endif } } return; } void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList , const bool luma, const bool chroma ) { for( auto &pu : CU::traversePUs( cu ) ) { PelUnitBuf predBuf = cu.cs->getPredBuf( pu ); pu.mvRefine = true; motionCompensation(pu, predBuf, eRefPicList, luma, chroma); pu.mvRefine = false; } } void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/ , const bool luma, const bool chroma ) { PelUnitBuf predBuf = pu.cs->getPredBuf( pu ); motionCompensation(pu, predBuf, eRefPicList, luma, chroma); } #if ENABLE_OBMC /** Function for sub-block based Overlapped Block Motion Compensation (OBMC). * * This function can: * 1. Perform sub-block OBMC for a CU. * 2. Before motion estimation, subtract (scaled) predictors generated by applying neighboring motions to current CU/PU from the original signal of current CU/PU, * to make the motion estimation biased to OBMC. */ void InterPrediction::subBlockOBMC(PredictionUnit &pu, PelUnitBuf* pDst) { if ( pu.cs->sps->getUseOBMC() == false || pu.cu->obmcFlag == false #if INTER_LIC || pu.cu->LICFlag #endif || pu.lwidth() * pu.lheight() < 32 ) { return; } const UnitArea orgPuArea = pu; PredictionUnit subPu = pu; const uint32_t uiWidth = pu.lwidth(); const uint32_t uiHeight = pu.lheight(); const uint32_t uiMinCUW = pu.cs->pcv->minCUWidth; const uint32_t uiHeightInBlock = uiHeight / uiMinCUW; const uint32_t uiWidthInBlock = uiWidth / uiMinCUW; #if MULTI_PASS_DMVR const bool bSubMotion = pu.cu->affine || pu.bdmvrRefine; #else const bool bSubMotion = pu.cu->affine || PU::checkDMVRCondition(pu); #endif MotionInfo NeighMi = MotionInfo(); int BcwIdx = pu.cu->BcwIdx; bool affine = pu.cu->affine; bool geo = pu.cu->geoFlag; subPu.cu->affine = false; subPu.cu->BcwIdx = BCW_DEFAULT; subPu.cu->geoFlag = false; #if INTER_LIC subPu.cu->LICFlag = false; #endif subPu.ciipFlag = false; #if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG) subPu.tmMergeFlag = false; #endif #if MULTI_PASS_DMVR subPu.bdmvrRefine = false; #endif subPu.mvRefine = false; subPu.mmvdMergeFlag = false; PelUnitBuf pcYuvPred = pDst == nullptr ? pu.cs->getPredBuf(pu) : *pDst; PelUnitBuf pcYuvTmpPredL0 = m_tmpObmcBufL0.subBuf(UnitAreaRelative(*pu.cu, pu)); PelUnitBuf pcYuvTmpPredT0 = m_tmpObmcBufT0.subBuf(UnitAreaRelative(*pu.cu, pu)); for (int iBlkBoundary = 0; iBlkBoundary < 2; iBlkBoundary++) // 0 - top; 1 - left { unsigned int uiLengthInBlock = ((iBlkBoundary == 0) ? uiWidthInBlock : uiHeightInBlock); int iSub = 0, iState = 0; while (iSub < uiLengthInBlock) { int iLength = 0; Position curOffset = (iBlkBoundary == 0) ? Position(iSub * uiMinCUW, 0) : Position(0, iSub * uiMinCUW); iState = PU::getSameNeigMotion(pu, NeighMi, curOffset, iBlkBoundary, iLength, uiLengthInBlock - iSub); if (iState == 2) // do OBMC { subPu = NeighMi; if (iBlkBoundary == 0) { subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(orgPuArea.lumaPos().offset(iSub * uiMinCUW, 0), Size{ iLength*uiMinCUW, uiMinCUW }))); } else { subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(orgPuArea.lumaPos().offset(0, iSub * uiMinCUW), Size{ uiMinCUW, iLength*uiMinCUW }))); } const UnitArea predArea = UnitAreaRelative(orgPuArea, subPu); PelUnitBuf cPred = pcYuvPred.subBuf(predArea); PelUnitBuf cTmp1; if (iBlkBoundary == 0)//above { cTmp1 = pcYuvTmpPredT0.subBuf(predArea); } else//left { cTmp1 = pcYuvTmpPredL0.subBuf(predArea); } #if JVET_Z0061_TM_OBMC bool isAbove = (iBlkBoundary == 0) ? 1 : 0; int iOBMCmode = selectOBMCmode(pu, subPu, isAbove, iLength, uiMinCUW, curOffset); #if JVET_AA0132_CONFIGURABLE_TM_TOOLS if (iOBMCmode == -1) { xSubBlockMotionCompensation(subPu, cTmp1); for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) { xSubblockOBMC(ComponentID(compID), subPu, cPred, cTmp1, iBlkBoundary); } iSub += iLength; } else #endif if (iOBMCmode == 1) // 1: current; { iSub += iLength; } else if (iOBMCmode == 2) // 2: neighbour; { xSubBlockMotionCompensation(subPu, cTmp1); for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) { xSubblockTMOBMC(ComponentID(compID), subPu, cPred, cTmp1, iBlkBoundary, iOBMCmode); } iSub += iLength; } else // 3: blend (OBMC) or default 0: best has not been found; { xSubBlockMotionCompensation(subPu, cTmp1); for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) { xSubblockTMOBMC(ComponentID(compID), subPu, cPred, cTmp1, iBlkBoundary, iOBMCmode); } iSub += iLength; } #else xSubBlockMotionCompensation(subPu, cTmp1); for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) { xSubblockOBMC(ComponentID(compID), subPu, cPred, cTmp1, iBlkBoundary); } iSub += iLength; #endif } else if (iState == 1 || iState == 3) // consecutive intra neighbors or skip OBMC based on MV similarity { iSub += iLength; } else // unavailable neighbors { iSub += uiLengthInBlock; break; } } CHECK(iSub != uiLengthInBlock, "not all sub-blocks are merged"); } if (!bSubMotion) { pu.cu->BcwIdx = BcwIdx; pu.cu->affine = affine; pu.cu->geoFlag = geo; return; } PelUnitBuf pcYuvTmpPred = m_tmpSubObmcBuf; PelUnitBuf cTmp1 = pcYuvTmpPred.subBuf(UnitArea(pu.chromaFormat, Area(0, 0, uiMinCUW, uiMinCUW))); PelUnitBuf cTmp2 = pcYuvTmpPred.subBuf(UnitArea(pu.chromaFormat, Area(4, 0, uiMinCUW, uiMinCUW))); PelUnitBuf cTmp3 = pcYuvTmpPred.subBuf(UnitArea(pu.chromaFormat, Area(8, 0, uiMinCUW, uiMinCUW))); PelUnitBuf cTmp4 = pcYuvTmpPred.subBuf(UnitArea(pu.chromaFormat, Area(12, 0, uiMinCUW, uiMinCUW))); PelUnitBuf zero = pcYuvTmpPred.subBuf(UnitArea(pu.chromaFormat, Area(16, 0, uiMinCUW, uiMinCUW))); for (int iSubX = 0; iSubX < uiWidthInBlock; iSubX += 1) { for (int iSubY = 0; iSubY < uiHeightInBlock; iSubY += 1) { bool bCURBoundary = (iSubX == uiWidthInBlock - 1); bool bCUBBoundary = (iSubY == uiHeightInBlock - 1); subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(orgPuArea.lumaPos().offset(iSubX * uiMinCUW, iSubY * uiMinCUW), Size{ uiMinCUW, uiMinCUW }))); const UnitArea predArea = UnitAreaRelative(orgPuArea, subPu); PelUnitBuf cPred = pcYuvPred.subBuf(predArea); bool isAboveAvail = false, isLeftAvail = false, isBelowAvail = false, isRightAvail = false; // above if (iSubY) { isAboveAvail = PU::getNeighborMotion(pu, NeighMi, Position(iSubX * uiMinCUW, iSubY * uiMinCUW), Size(uiMinCUW, uiMinCUW), 0); if (isAboveAvail) { subPu = NeighMi; xSubBlockMotionCompensation(subPu, cTmp1); } } // left if (iSubX) { isLeftAvail = PU::getNeighborMotion(pu, NeighMi, Position(iSubX * uiMinCUW, iSubY * uiMinCUW), Size(uiMinCUW, uiMinCUW), 1); if (isLeftAvail) { subPu = NeighMi; xSubBlockMotionCompensation(subPu, cTmp2); } } // below if (!bCUBBoundary) { isBelowAvail = PU::getNeighborMotion(pu, NeighMi, Position(iSubX * uiMinCUW, iSubY * uiMinCUW), Size(uiMinCUW, uiMinCUW), 2); if (isBelowAvail) { subPu = NeighMi; xSubBlockMotionCompensation(subPu, cTmp3); } } // right if (!bCURBoundary) { isRightAvail = PU::getNeighborMotion(pu, NeighMi, Position(iSubX * uiMinCUW, iSubY * uiMinCUW), Size(uiMinCUW, uiMinCUW), 3); if (isRightAvail) { subPu = NeighMi; xSubBlockMotionCompensation(subPu, cTmp4); } } if( isAboveAvail || isLeftAvail || isBelowAvail || isRightAvail ) { for( int compID = 0; compID < MAX_NUM_COMPONENT; compID++ ) { xSubblockOBMCBlending( ComponentID( compID ), subPu, cPred, isAboveAvail ? cTmp1: zero, isLeftAvail ? cTmp2: zero, isBelowAvail ? cTmp3: zero, isRightAvail ? cTmp4: zero, isAboveAvail, isLeftAvail, isBelowAvail, isRightAvail, true ); } } } } pu.cu->BcwIdx = BcwIdx; pu.cu->affine = affine; pu.cu->geoFlag = geo; return; } // Function for (weighted) averaging predictors of current block and predictors generated by applying neighboring motions to current block. void InterPrediction::xSubblockOBMC(const ComponentID eComp, PredictionUnit &pu, PelUnitBuf &pcYuvPredDst, PelUnitBuf &pcYuvPredSrc, int iDir, bool bSubMotion) { int iWidth = pu.blocks[eComp].width; int iHeight = pu.blocks[eComp].height; if (iWidth == 0 || iHeight == 0) { return; } Pel* pOrgDst = pcYuvPredDst.bufs[eComp].buf; Pel* pOrgSrc = pcYuvPredSrc.bufs[eComp].buf; const int strideDst = pcYuvPredDst.bufs[eComp].stride; const int strideSrc = pcYuvPredSrc.bufs[eComp].stride; if (iDir == 0) //above { for (int i = 0; i < iWidth; i++) { Pel* pDst = pOrgDst; Pel* pSrc = pOrgSrc; pDst[i] = bSubMotion ? (3 * pDst[i] + pSrc[i] + 2) >> 2 : (26 * pDst[i] + 6 * pSrc[i] + 16) >> 5; if (eComp == COMPONENT_Y) { pDst += strideDst; pSrc += strideSrc; pDst[i] = (7 * pDst[i] + pSrc[i] + 4) >> 3; pDst += strideDst; pSrc += strideSrc; pDst[i] = (15 * pDst[i] + pSrc[i] + 8) >> 4; if (!bSubMotion) { pDst += strideDst; pSrc += strideSrc; pDst[i] = (31 * pDst[i] + pSrc[i] + 16) >> 5; } } } } if (iDir == 1) //left { Pel* pDst = pOrgDst; Pel* pSrc = pOrgSrc; for (int i = 0; i < iHeight; i++) { pDst[0] = bSubMotion ? (3 * pDst[0] + pSrc[0] + 2) >> 2 : (26 * pDst[0] + 6 * pSrc[0] + 16) >> 5; if (eComp == COMPONENT_Y) { pDst[1] = (7 * pDst[1] + pSrc[1] + 4) >> 3; pDst[2] = (15 * pDst[2] + pSrc[2] + 8) >> 4; if (!bSubMotion) { pDst[3] = (31 * pDst[3] + pSrc[3] + 16) >> 5; } } pDst += strideDst; pSrc += strideSrc; } } if (iDir == 2) //below { for (int i = 0; i < iWidth; i++) { Pel* pDst = pOrgDst + (iHeight - 1) * strideDst; Pel* pSrc = pOrgSrc + (iHeight - 1) * strideSrc; pDst[i] = (3 * pDst[i] + pSrc[i] + 2) >> 2; if (eComp == COMPONENT_Y) { pDst -= strideDst; pSrc -= strideSrc; pDst[i] = (7 * pDst[i] + pSrc[i] + 4) >> 3; pDst -= strideDst; pSrc -= strideSrc; pDst[i] = (15 * pDst[i] + pSrc[i] + 8) >> 4; } } } if (iDir == 3) //right { Pel* pDst = pOrgDst + (iWidth - 4); Pel* pSrc = pOrgSrc + (iWidth - 4); for (int i = 0; i < iHeight; i++) { pDst[3] = (3 * pDst[3] + pSrc[3] + 2) >> 2; if (eComp == COMPONENT_Y) { pDst[2] = (7 * pDst[2] + pSrc[2] + 4) >> 3; pDst[1] = (15 * pDst[1] + pSrc[1] + 8) >> 4; } pDst += strideDst; pSrc += strideSrc; } } } #if ENABLE_OBMC void InterPrediction::xSubblockOBMCBlending(const ComponentID eComp, PredictionUnit &pu, PelUnitBuf &pcYuvPredDst, PelUnitBuf &pcYuvPredSrc1, PelUnitBuf &pcYuvPredSrc2, PelUnitBuf &pcYuvPredSrc3, PelUnitBuf &pcYuvPredSrc4, bool isAboveAvail, bool isLeftAvail, bool isBelowAvail, bool isRightAvail, bool bSubMotion) { int iWidth = pu.blocks[eComp].width; int iHeight = pu.blocks[eComp].height; if (iWidth == 0 || iHeight == 0) { return; } Pel* pOrgDst = pcYuvPredDst.bufs[eComp].buf; Pel* pOrgSrc1 = pcYuvPredSrc1.bufs[eComp].buf; Pel* pOrgSrc2 = pcYuvPredSrc2.bufs[eComp].buf; Pel* pOrgSrc3 = pcYuvPredSrc3.bufs[eComp].buf; Pel* pOrgSrc4 = pcYuvPredSrc4.bufs[eComp].buf; const int strideDst = pcYuvPredDst.bufs[eComp].stride; const int strideSrc = pcYuvPredSrc1.bufs[eComp].stride; unsigned int isChroma = !isLuma( eComp ); unsigned int aboveWeight[4], leftWeight[4], belowWeight[4], rightWeight[4]; if( isAboveAvail ) { memcpy( aboveWeight, defaultWeight[isChroma], sizeof( aboveWeight ) ); } else { memset( aboveWeight, 0, sizeof( aboveWeight ) ); } if( isLeftAvail ) { memcpy( leftWeight, defaultWeight[isChroma], sizeof( leftWeight ) ); } else { memset( leftWeight, 0, sizeof( leftWeight ) ); } if( isBelowAvail ) { memcpy( belowWeight, defaultWeight[isChroma], sizeof( belowWeight ) ); } else { memset( belowWeight, 0, sizeof( belowWeight ) ); } if( isRightAvail ) { memcpy( rightWeight, defaultWeight[isChroma], sizeof( rightWeight ) ); } else { memset( rightWeight, 0, sizeof( rightWeight ) ); } unsigned int shift = 7; unsigned int sumWeight = 1 << shift; unsigned int add = 1 << (shift - 1); Pel* pDst = pOrgDst; Pel* pSrc1 = pOrgSrc1; Pel* pSrc2 = pOrgSrc2; Pel* pSrc3 = pOrgSrc3; Pel* pSrc4 = pOrgSrc4; if( isLuma( eComp ) ) { for( int j = 0; j < iHeight; j++ ) { unsigned int idx_h = iHeight - 1 - j; for( int i = 0; i < iWidth; i++ ) { unsigned int idx_w = iWidth - 1 - i; unsigned int sumOBMCWeight = aboveWeight[j] + leftWeight[i] + belowWeight[idx_h] + rightWeight[idx_w]; if( sumOBMCWeight == 0 ) { continue; } unsigned int currentWeight = sumWeight - sumOBMCWeight; pDst[i] = (currentWeight * pDst[i] + aboveWeight[j] * pSrc1[i] + leftWeight[i] * pSrc2[i] + belowWeight[idx_h] * pSrc3[i] + rightWeight[idx_w] * pSrc4[i] + add) >> shift; } pDst += strideDst; pSrc1 += strideSrc; pSrc2 += strideSrc; pSrc3 += strideSrc; pSrc4 += strideSrc; } } else { pDst[0] = ((sumWeight - aboveWeight[0] - leftWeight[0]) * pDst[0] + aboveWeight[0] * pSrc1[0] + leftWeight[0] * pSrc2[0] + add) >> shift; pDst[1] = ((sumWeight - aboveWeight[0] - rightWeight[0]) * pDst[1] + aboveWeight[0] * pSrc1[1] + rightWeight[0] * pSrc4[1] + add) >> shift; pDst += strideDst; pSrc2 += strideSrc; pSrc3 += strideSrc; pSrc4 += strideSrc; pDst[0] = ((sumWeight - leftWeight[0] - belowWeight[0]) * pDst[0] + leftWeight[0] * pSrc2[0] + belowWeight[0] * pSrc3[0] + add) >> shift; pDst[1] = ((sumWeight - belowWeight[0] - rightWeight[0]) * pDst[1] + belowWeight[0] * pSrc3[1] + rightWeight[0] * pSrc4[1] + add) >> shift; } } #endif void InterPrediction::xSubBlockMotionCompensation(PredictionUnit &pu, PelUnitBuf &pcYuvPred) { if (xCheckIdenticalMotion(pu)) { xPredInterUni(pu, REF_PIC_LIST_0, pcYuvPred, false, false, true, true); } else { xPredInterBi(pu, pcYuvPred, true); } } #endif int InterPrediction::rightShiftMSB(int numer, int denom) { return numer >> floorLog2(denom); } #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING void InterPrediction::initTplWeightTable() { if (m_tplWeightTblInitialized) { return; } m_tplWeightTblInitialized = true; for (int hIdx = 0; hIdx < GEO_NUM_CU_SIZE; hIdx++) { int height = 1 << ( hIdx + GEO_MIN_CU_LOG2); for (int wIdx = 0; wIdx < GEO_NUM_CU_SIZE; wIdx++) { for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; ++splitDir) { int16_t(&offset)[2] = g_weightOffset[splitDir][hIdx][wIdx]; int16_t angle = g_GeoParams[splitDir][0]; Pel* weight = &g_globalGeoWeightsTpl[g_angle2mask[angle]][GEO_TM_ADDED_WEIGHT_MASK_SIZE * GEO_WEIGHT_MASK_SIZE_EXT + GEO_TM_ADDED_WEIGHT_MASK_SIZE]; if (g_angle2mirror[angle] == 2) { weight += ((GEO_WEIGHT_MASK_SIZE - 1 - offset[1]) * GEO_WEIGHT_MASK_SIZE_EXT + offset[0]); } else if (g_angle2mirror[angle] == 1) { weight += (offset[1] * GEO_WEIGHT_MASK_SIZE_EXT + (GEO_WEIGHT_MASK_SIZE - 1 - offset[0])); } else { weight += (offset[1] * GEO_WEIGHT_MASK_SIZE_EXT + offset[0]); } m_tplWeightTblDict[hIdx][wIdx][splitDir] = weight; m_tplWeightTbl = m_tplWeightTblDict[hIdx][wIdx]; // Transpose weights for left template weight = getTplWeightTableCU<false, 1>(splitDir); int verticalOffset = g_angle2mirror[angle] == 2 ? -GEO_WEIGHT_MASK_SIZE_EXT : GEO_WEIGHT_MASK_SIZE_EXT; for (int h = 0; h < height; ++h) { m_tplColWeightTblDict[hIdx][wIdx][splitDir][h] = weight[0]; weight += verticalOffset; } } } } m_tplWeightTbl = nullptr; m_tplColWeightTbl = nullptr; } #endif #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING void InterPrediction::deriveGpmSplitMode(PredictionUnit& pu, MergeCtx &geoMrgCtx #if JVET_W0097_GPM_MMVD_TM && TM_MRG , MergeCtx(&geoTmMrgCtx)[GEO_NUM_TM_MV_CAND] #endif #if JVET_Y0065_GPM_INTRA , IntraPrediction* pcIntraPred #endif ) { if ( pu.cu->cs->pcv->isEncoder || !pu.cs->slice->getSPS()->getUseAltGPMSplitModeCode()) { return; } uint8_t numValidInList = 0; uint8_t modeList[GEO_NUM_SIG_PARTMODE]; bool refinedSplitMode = !PU::checkRprRefExistingInGpm(pu, geoMrgCtx, pu.geoMergeIdx0, geoMrgCtx, pu.geoMergeIdx1) && xAMLGetCurBlkTemplate(pu, pu.lwidth(), pu.lheight()); if (refinedSplitMode) { #if JVET_W0097_GPM_MMVD_TM && TM_MRG if (pu.tmMergeFlag) { Pel* pRefTopPart0 [GEO_NUM_TM_MV_CAND] = {nullptr, m_acYuvRefAMLTemplatePart0[0], m_acYuvRefAMLTemplatePart0[2], nullptr }; // For mergeCtx[GEO_TM_SHAPE_AL] and mergeCtx[GEO_TM_SHAPE_A] Pel* pRefLeftPart0[GEO_NUM_TM_MV_CAND] = {nullptr, m_acYuvRefAMLTemplatePart0[1], m_acYuvRefAMLTemplatePart0[3], nullptr }; // For mergeCtx[GEO_TM_SHAPE_AL] and mergeCtx[GEO_TM_SHAPE_A] Pel* pRefTopPart1 [GEO_NUM_TM_MV_CAND] = {nullptr, m_acYuvRefAMLTemplatePart1[0], nullptr, m_acYuvRefAMLTemplatePart1[2]}; // For mergeCtx[GEO_TM_SHAPE_AL] and mergeCtx[GEO_TM_SHAPE_L] Pel* pRefLeftPart1[GEO_NUM_TM_MV_CAND] = {nullptr, m_acYuvRefAMLTemplatePart1[1], nullptr, m_acYuvRefAMLTemplatePart1[3]}; // For mergeCtx[GEO_TM_SHAPE_AL] and mergeCtx[GEO_TM_SHAPE_L] fillPartGPMRefTemplate<0, false>(pu, geoTmMrgCtx[GEO_TM_SHAPE_AL], pu.geoMergeIdx0, -1, pRefTopPart0[GEO_TM_SHAPE_AL], pRefLeftPart0[GEO_TM_SHAPE_AL]); fillPartGPMRefTemplate<0, false>(pu, geoTmMrgCtx[GEO_TM_SHAPE_A ], pu.geoMergeIdx0, -1, pRefTopPart0[GEO_TM_SHAPE_A ], pRefLeftPart0[GEO_TM_SHAPE_A ]); fillPartGPMRefTemplate<1, false>(pu, geoTmMrgCtx[GEO_TM_SHAPE_AL], pu.geoMergeIdx1, -1, pRefTopPart1[GEO_TM_SHAPE_AL], pRefLeftPart1[GEO_TM_SHAPE_AL]); fillPartGPMRefTemplate<1, false>(pu, geoTmMrgCtx[GEO_TM_SHAPE_L ], pu.geoMergeIdx1, -1, pRefTopPart1[GEO_TM_SHAPE_L ], pRefLeftPart1[GEO_TM_SHAPE_L ]); // Update split mode getBestGeoTMModeList(pu, numValidInList, modeList, pRefTopPart0, pRefLeftPart0, pRefTopPart1, pRefLeftPart1); CHECK(pu.geoSyntaxMode < 0 || pu.geoSyntaxMode >= GEO_NUM_SIG_PARTMODE || pu.geoSyntaxMode >= numValidInList, "Invalid GEO split direction!"); CHECK(numValidInList <= 0 || numValidInList > GEO_NUM_SIG_PARTMODE, "Error occurs"); pu.geoSplitDir = modeList[pu.geoSyntaxMode]; return; } else #endif { #if JVET_W0097_GPM_MMVD_TM int geoMmvdIdx0 = (pu.geoMMVDFlag0 ? pu.geoMMVDIdx0 : -1); int geoMmvdIdx1 = (pu.geoMMVDFlag1 ? pu.geoMMVDIdx1 : -1); #else int geoMmvdIdx0 = -1; int geoMmvdIdx1 = -1; #endif fillPartGPMRefTemplate<0>(pu, geoMrgCtx, pu.geoMergeIdx0, geoMmvdIdx0); fillPartGPMRefTemplate<1>(pu, geoMrgCtx, pu.geoMergeIdx1, geoMmvdIdx1); #if JVET_Y0065_GPM_INTRA if (pu.gpmIntraFlag) { std::vector<Pel>* LUT = m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() ? &m_pcReshape->getInvLUT() : nullptr; pcIntraPred->clearPrefilledIntraGPMRefTemplate(); pcIntraPred->fillIntraGPMRefTemplateAll(pu, m_bAMLTemplateAvailabe[0], m_bAMLTemplateAvailabe[1], false, true, true, LUT, pu.geoMergeIdx0, pu.geoMergeIdx1); } #endif } } else { m_bAMLTemplateAvailabe[0] = false; m_bAMLTemplateAvailabe[1] = false; } // Update split mode #if JVET_Y0065_GPM_INTRA bool isIintra[2] = {false, false}; Pel* pIntraRefTop [2][GEO_NUM_PARTITION_MODE]; Pel* pIntraRefLeft[2][GEO_NUM_PARTITION_MODE]; if (refinedSplitMode && pu.gpmIntraFlag) { isIintra[0] = pu.geoMergeIdx0 >= GEO_MAX_NUM_UNI_CANDS; isIintra[1] = pu.geoMergeIdx1 >= GEO_MAX_NUM_UNI_CANDS; for (uint8_t partIdx = 0; partIdx < 2; ++partIdx) { if (isIintra[partIdx]) { uint8_t realCandIdx = (partIdx == 0 ? pu.geoMergeIdx0 : pu.geoMergeIdx1) - GEO_MAX_NUM_UNI_CANDS; for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; ++splitDir) { pIntraRefTop [partIdx][splitDir] = pcIntraPred->getPrefilledIntraGPMRefTemplate(partIdx, splitDir, realCandIdx, 0); pIntraRefLeft[partIdx][splitDir] = pcIntraPred->getPrefilledIntraGPMRefTemplate(partIdx, splitDir, realCandIdx, 1); } } } } #endif getBestGeoModeList(pu, numValidInList, modeList, m_acYuvRefAMLTemplatePart0[0], m_acYuvRefAMLTemplatePart0[1], m_acYuvRefAMLTemplatePart1[0], m_acYuvRefAMLTemplatePart1[1] #if JVET_Y0065_GPM_INTRA , isIintra[0] ? pIntraRefTop [0] : nullptr , isIintra[0] ? pIntraRefLeft[0] : nullptr , isIintra[1] ? pIntraRefTop [1] : nullptr , isIintra[1] ? pIntraRefLeft[1] : nullptr #endif ); CHECK(pu.geoSyntaxMode < 0 || pu.geoSyntaxMode >= GEO_NUM_SIG_PARTMODE || pu.geoSyntaxMode >= numValidInList, "Invalid GEO split direction!"); CHECK(numValidInList <= 0 || numValidInList > GEO_NUM_SIG_PARTMODE, "Error occurs"); pu.geoSplitDir = modeList[pu.geoSyntaxMode]; } #endif #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING void InterPrediction::motionCompensationGeo( CodingUnit &cu, MergeCtx &geoMrgCtx #if JVET_W0097_GPM_MMVD_TM && TM_MRG , MergeCtx(&geoTmMrgCtx)[GEO_NUM_TM_MV_CAND] #endif #if JVET_Y0065_GPM_INTRA , IntraPrediction* pcIntraPred, std::vector<Pel>* reshapeLUT #endif ) #else #if JVET_W0097_GPM_MMVD_TM && TM_MRG #if JVET_Y0065_GPM_INTRA void InterPrediction::motionCompensationGeo( CodingUnit &cu, MergeCtx &geoMrgCtx, MergeCtx &geoTmMrgCtx0, MergeCtx &geoTmMrgCtx1, IntraPrediction* pcIntraPred, std::vector<Pel>* reshapeLUT ) #else void InterPrediction::motionCompensationGeo(CodingUnit &cu, MergeCtx &geoMrgCtx, MergeCtx &geoTmMrgCtx0, MergeCtx &geoTmMrgCtx1) #endif #else #if JVET_Y0065_GPM_INTRA void InterPrediction::motionCompensationGeo( CodingUnit &cu, MergeCtx &geoMrgCtx, IntraPrediction* pcIntraPred, std::vector<Pel>* reshapeLUT ) #else void InterPrediction::motionCompensationGeo( CodingUnit &cu, MergeCtx &geoMrgCtx ) #endif #endif #endif { #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING deriveGpmSplitMode(*cu.firstPU, geoMrgCtx #if JVET_W0097_GPM_MMVD_TM && TM_MRG , geoTmMrgCtx #endif #if JVET_Y0065_GPM_INTRA , pcIntraPred #endif ); #if JVET_W0097_GPM_MMVD_TM && TM_MRG MergeCtx& geoTmMrgCtx0 = geoTmMrgCtx[g_geoTmShape[0][g_GeoParams[cu.firstPU->geoSplitDir][0]]]; MergeCtx& geoTmMrgCtx1 = geoTmMrgCtx[g_geoTmShape[1][g_GeoParams[cu.firstPU->geoSplitDir][0]]]; #endif #endif const uint8_t splitDir = cu.firstPU->geoSplitDir; const uint8_t candIdx0 = cu.firstPU->geoMergeIdx0; const uint8_t candIdx1 = cu.firstPU->geoMergeIdx1; #if JVET_W0097_GPM_MMVD_TM const bool geoMMVDFlag0 = cu.firstPU->geoMMVDFlag0; const uint8_t geoMMVDIdx0 = cu.firstPU->geoMMVDIdx0; const bool geoMMVDFlag1 = cu.firstPU->geoMMVDFlag1; const uint8_t geoMMVDIdx1 = cu.firstPU->geoMMVDIdx1; #if TM_MRG const bool geoTmFlag0 = cu.firstPU->geoTmFlag0; const bool geoTmFlag1 = cu.firstPU->geoTmFlag1; #endif #endif #if JVET_AA0058_GPM_ADP_BLD const uint8_t bldIdx = cu.firstPU->geoBldIdx; #endif for( auto &pu : CU::traversePUs( cu ) ) { const UnitArea localUnitArea( cu.cs->area.chromaFormat, Area( 0, 0, pu.lwidth(), pu.lheight() ) ); PelUnitBuf tmpGeoBuf0 = m_geoPartBuf[0].getBuf( localUnitArea ); PelUnitBuf tmpGeoBuf1 = m_geoPartBuf[1].getBuf( localUnitArea ); PelUnitBuf predBuf = cu.cs->getPredBuf( pu ); #if JVET_Y0065_GPM_INTRA bool isIntra0 = candIdx0 >= GEO_MAX_NUM_UNI_CANDS; bool isIntra1 = candIdx1 >= GEO_MAX_NUM_UNI_CANDS; if (isIntra0) { PU::getGeoIntraMPMs(pu, pu.intraMPM, splitDir, g_geoTmShape[0][g_GeoParams[pu.geoSplitDir][0]]); pu.intraDir[0] = pu.intraMPM[candIdx0 - GEO_MAX_NUM_UNI_CANDS]; pcIntraPred->initIntraPatternChType(cu, pu.Y()); pcIntraPred->predIntraAng(COMPONENT_Y, tmpGeoBuf0.Y(), pu); if (isChromaEnabled(pu.chromaFormat)) { pu.intraDir[1] = pu.intraDir[0]; pcIntraPred->initIntraPatternChType(cu, pu.Cb()); pcIntraPred->predIntraAng(COMPONENT_Cb, tmpGeoBuf0.Cb(), pu); pcIntraPred->initIntraPatternChType(cu, pu.Cr()); pcIntraPred->predIntraAng(COMPONENT_Cr, tmpGeoBuf0.Cr(), pu); } } else { #endif #if JVET_W0097_GPM_MMVD_TM #if TM_MRG if (geoTmFlag0) { geoTmMrgCtx0.setMergeInfo(pu, candIdx0); } else #endif if (geoMMVDFlag0) { geoMrgCtx.setGeoMmvdMergeInfo(pu, candIdx0, geoMMVDIdx0); } else #endif geoMrgCtx.setMergeInfo( pu, candIdx0 ); motionCompensation(pu, tmpGeoBuf0, REF_PIC_LIST_X, true, isChromaEnabled(pu.chromaFormat)); // TODO: check 4:0:0 interaction with weighted prediction. if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvBufferForMCTSConstraint( pu, true ) ) { printf( "DECODER_GEO_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() ); } #if JVET_Y0065_GPM_INTRA if (isIntra1) { tmpGeoBuf0.roundToOutputBitdepth(tmpGeoBuf0, cu.slice->clpRngs()); #if ENABLE_OBMC #if JVET_W0123_TIMD_FUSION PU::spanMotionInfo2(pu); #else PU::spanMotionInfo(pu); #endif cu.isobmcMC = true; subBlockOBMC(pu, &tmpGeoBuf0); cu.isobmcMC = false; #endif } } if (isIntra1) { PU::getGeoIntraMPMs(pu, pu.intraMPM+GEO_MAX_NUM_INTRA_CANDS, splitDir, g_geoTmShape[1][g_GeoParams[pu.geoSplitDir][0]]); pu.intraDir[0] = pu.intraMPM[candIdx1 - GEO_MAX_NUM_UNI_CANDS + GEO_MAX_NUM_INTRA_CANDS]; pcIntraPred->initIntraPatternChType(cu, pu.Y()); pcIntraPred->predIntraAng(COMPONENT_Y, tmpGeoBuf1.Y(), pu); if (isChromaEnabled(pu.chromaFormat)) { pu.intraDir[1] = pu.intraDir[0]; pcIntraPred->initIntraPatternChType(cu, pu.Cb()); pcIntraPred->predIntraAng(COMPONENT_Cb, tmpGeoBuf1.Cb(), pu); pcIntraPred->initIntraPatternChType(cu, pu.Cr()); pcIntraPred->predIntraAng(COMPONENT_Cr, tmpGeoBuf1.Cr(), pu); } } else { #endif #if JVET_W0097_GPM_MMVD_TM #if TM_MRG if (geoTmFlag1) { geoTmMrgCtx1.setMergeInfo(pu, candIdx1); } else #endif if (geoMMVDFlag1) { geoMrgCtx.setGeoMmvdMergeInfo(pu, candIdx1, geoMMVDIdx1); } else #endif geoMrgCtx.setMergeInfo( pu, candIdx1 ); motionCompensation(pu, tmpGeoBuf1, REF_PIC_LIST_X, true, isChromaEnabled(pu.chromaFormat)); // TODO: check 4:0:0 interaction with weighted prediction. if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvBufferForMCTSConstraint( pu, true ) ) { printf( "DECODER_GEO_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() ); } #if JVET_Y0065_GPM_INTRA if (isIntra0) { tmpGeoBuf1.roundToOutputBitdepth(tmpGeoBuf1, cu.slice->clpRngs()); #if ENABLE_OBMC #if JVET_W0123_TIMD_FUSION PU::spanMotionInfo2(pu); #else PU::spanMotionInfo(pu); #endif cu.isobmcMC = true; subBlockOBMC(pu, &tmpGeoBuf1); cu.isobmcMC = false; #endif } } if (pu.gpmIntraFlag) { if (reshapeLUT) { if (!isIntra1) { tmpGeoBuf1.Y().rspSignal(*reshapeLUT); } else if (!isIntra0) { tmpGeoBuf0.Y().rspSignal(*reshapeLUT); } } #if JVET_AA0058_GPM_ADP_BLD weightedGeoBlkRounded(pu, splitDir, bldIdx, isChromaEnabled(pu.chromaFormat) ? MAX_NUM_CHANNEL_TYPE : CHANNEL_TYPE_LUMA, predBuf, tmpGeoBuf0, tmpGeoBuf1); #else weightedGeoBlkRounded(pu, splitDir, isChromaEnabled(pu.chromaFormat)? MAX_NUM_CHANNEL_TYPE : CHANNEL_TYPE_LUMA, predBuf, tmpGeoBuf0, tmpGeoBuf1); #endif } else #endif #if JVET_AA0058_GPM_ADP_BLD weightedGeoBlk(pu, splitDir, bldIdx, isChromaEnabled(pu.chromaFormat) ? MAX_NUM_CHANNEL_TYPE : CHANNEL_TYPE_LUMA, predBuf, tmpGeoBuf0, tmpGeoBuf1); #else weightedGeoBlk(pu, splitDir, isChromaEnabled(pu.chromaFormat)? MAX_NUM_CHANNEL_TYPE : CHANNEL_TYPE_LUMA, predBuf, tmpGeoBuf0, tmpGeoBuf1); #endif } } #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING #if JVET_W0097_GPM_MMVD_TM && TM_MRG void InterPrediction::getBestGeoTMModeList(PredictionUnit &pu, uint8_t& numValidInList, uint8_t(&modeList)[GEO_NUM_SIG_PARTMODE], Pel* (&pRefTopPart0)[GEO_NUM_TM_MV_CAND], Pel* (&pRefLeftPart0)[GEO_NUM_TM_MV_CAND], Pel* (&pRefTopPart1)[GEO_NUM_TM_MV_CAND], Pel* (&pRefLeftPart1)[GEO_NUM_TM_MV_CAND]) { if (!m_bAMLTemplateAvailabe[0] && !m_bAMLTemplateAvailabe[1]) { for (int i = 0; i < GEO_NUM_SIG_PARTMODE; ++i) { modeList[i] = i; } numValidInList = GEO_NUM_SIG_PARTMODE; return; } // Check split mode cost uint32_t uiCost[GEO_NUM_PARTITION_MODE] = { 0, }; if (m_bAMLTemplateAvailabe[0]) { SizeType szPerLine = pu.lwidth(); PelUnitBuf pcBufPredCurTop = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], szPerLine, GEO_MODE_SEL_TM_SIZE)); PelUnitBuf pcBufPredRefTop = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], szPerLine, GEO_MODE_SEL_TM_SIZE)); PelUnitBuf pcBufPredRefTopPart0[GEO_NUM_TM_MV_CAND] = {PelUnitBuf(), PelUnitBuf(pu.chromaFormat, PelBuf(pRefTopPart0[GEO_TM_SHAPE_AL], szPerLine, GEO_MODE_SEL_TM_SIZE)), PelUnitBuf(pu.chromaFormat, PelBuf(pRefTopPart0[GEO_TM_SHAPE_A ], szPerLine, GEO_MODE_SEL_TM_SIZE)), PelUnitBuf()}; PelUnitBuf pcBufPredRefTopPart1[GEO_NUM_TM_MV_CAND] = {PelUnitBuf(), PelUnitBuf(pu.chromaFormat, PelBuf(pRefTopPart1[GEO_TM_SHAPE_AL], szPerLine, GEO_MODE_SEL_TM_SIZE)), PelUnitBuf(), PelUnitBuf(pu.chromaFormat, PelBuf(pRefTopPart1[GEO_TM_SHAPE_L ], szPerLine, GEO_MODE_SEL_TM_SIZE))}; DistParam cDistParam; cDistParam.applyWeight = false; m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; ++splitDir) { uint8_t shapeIdx0 = g_geoTmShape[0][g_GeoParams[splitDir][0]]; uint8_t shapeIdx1 = g_geoTmShape[1][g_GeoParams[splitDir][0]]; weightedGeoTpl<true>(pu, splitDir, pcBufPredRefTop, pcBufPredRefTopPart0[shapeIdx0], pcBufPredRefTopPart1[shapeIdx1]); uint32_t tempDist = (uint32_t)cDistParam.distFunc(cDistParam); uiCost[splitDir] += tempDist; } } if (m_bAMLTemplateAvailabe[1]) { SizeType szPerLine = pu.lheight(); PelUnitBuf pcBufPredCurLeftTr = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], szPerLine, GEO_MODE_SEL_TM_SIZE)); // To enable SIMD for cost computation PelUnitBuf pcBufPredRefLeftTr = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], szPerLine, GEO_MODE_SEL_TM_SIZE)); // To enable SIMD for cost computation PelUnitBuf pcBufPredRefLeft = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], GEO_MODE_SEL_TM_SIZE, szPerLine)); PelUnitBuf pcBufPredRefLeftPart0[GEO_NUM_TM_MV_CAND] = {PelUnitBuf(), PelUnitBuf(pu.chromaFormat, PelBuf(pRefLeftPart0[GEO_TM_SHAPE_AL], GEO_MODE_SEL_TM_SIZE, szPerLine)), PelUnitBuf(pu.chromaFormat, PelBuf(pRefLeftPart0[GEO_TM_SHAPE_A ], GEO_MODE_SEL_TM_SIZE, szPerLine)), PelUnitBuf()}; PelUnitBuf pcBufPredRefLeftPart1[GEO_NUM_TM_MV_CAND] = {PelUnitBuf(), PelUnitBuf(pu.chromaFormat, PelBuf(pRefLeftPart1[GEO_TM_SHAPE_AL], GEO_MODE_SEL_TM_SIZE, szPerLine)), PelUnitBuf(), PelUnitBuf(pu.chromaFormat, PelBuf(pRefLeftPart1[GEO_TM_SHAPE_L ], GEO_MODE_SEL_TM_SIZE, szPerLine))}; DistParam cDistParam; cDistParam.applyWeight = false; m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeftTr.Y(), pcBufPredRefLeftTr.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); // To enable SIMD for cost computation for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; ++splitDir) { uint8_t shapeIdx0 = g_geoTmShape[0][g_GeoParams[splitDir][0]]; uint8_t shapeIdx1 = g_geoTmShape[1][g_GeoParams[splitDir][0]]; weightedGeoTpl<false>(pu, splitDir, pcBufPredRefLeft, pcBufPredRefLeftPart0[shapeIdx0], pcBufPredRefLeftPart1[shapeIdx1]); uint32_t tempDist = (uint32_t)cDistParam.distFunc(cDistParam); uiCost[splitDir] += tempDist; } } // Find best N candidates numValidInList = (uint8_t)getIndexMappingTableToSortedArray1D<uint32_t, GEO_NUM_PARTITION_MODE, uint8_t, GEO_NUM_SIG_PARTMODE>(uiCost, modeList); } #endif void InterPrediction::getBestGeoModeList(PredictionUnit &pu, uint8_t& numValidInList, uint8_t(&modeList)[GEO_NUM_SIG_PARTMODE], Pel* pRefTopPart0, Pel* pRefLeftPart0, Pel* pRefTopPart1, Pel* pRefLeftPart1 #if JVET_Y0065_GPM_INTRA , Pel** pIntraRefTopPart0, Pel** pIntraRefLeftPart0, Pel** pIntraRefTopPart1, Pel** pIntraRefLeftPart1 #endif ) { if (!m_bAMLTemplateAvailabe[0] && !m_bAMLTemplateAvailabe[1]) { for (int i = 0; i < GEO_NUM_SIG_PARTMODE; ++i) { modeList[i] = i; } numValidInList = GEO_NUM_SIG_PARTMODE; return; } // Check split mode cost uint32_t uiCost[GEO_NUM_PARTITION_MODE] = { 0, }; if (m_bAMLTemplateAvailabe[0]) { SizeType szPerLine = pu.lwidth(); PelUnitBuf pcBufPredCurTop = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], szPerLine, GEO_MODE_SEL_TM_SIZE)); PelUnitBuf pcBufPredRefTop = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], szPerLine, GEO_MODE_SEL_TM_SIZE)); PelUnitBuf pcBufPredRefTopPart0 = PelUnitBuf(pu.chromaFormat, PelBuf(pRefTopPart0, szPerLine, GEO_MODE_SEL_TM_SIZE)); PelUnitBuf pcBufPredRefTopPart1 = PelUnitBuf(pu.chromaFormat, PelBuf(pRefTopPart1, szPerLine, GEO_MODE_SEL_TM_SIZE)); DistParam cDistParam; cDistParam.applyWeight = false; m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; ++splitDir) { #if JVET_Y0065_GPM_INTRA pcBufPredRefTopPart0.Y().buf = pIntraRefTopPart0 == nullptr ? pcBufPredRefTopPart0.Y().buf : pIntraRefTopPart0[splitDir]; pcBufPredRefTopPart1.Y().buf = pIntraRefTopPart1 == nullptr ? pcBufPredRefTopPart1.Y().buf : pIntraRefTopPart1[splitDir]; #endif weightedGeoTpl<true>(pu, splitDir, pcBufPredRefTop, pcBufPredRefTopPart0, pcBufPredRefTopPart1); uint32_t tempDist = (uint32_t)cDistParam.distFunc(cDistParam); uiCost[splitDir] += tempDist; } } if (m_bAMLTemplateAvailabe[1]) { SizeType szPerLine = pu.lheight(); PelUnitBuf pcBufPredCurLeftTr = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], szPerLine, GEO_MODE_SEL_TM_SIZE)); // To enable SIMD for cost computation PelUnitBuf pcBufPredRefLeftTr = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], szPerLine, GEO_MODE_SEL_TM_SIZE)); // To enable SIMD for cost computation PelUnitBuf pcBufPredRefLeft = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], GEO_MODE_SEL_TM_SIZE, szPerLine)); PelUnitBuf pcBufPredRefLeftPart0 = PelUnitBuf(pu.chromaFormat, PelBuf(pRefLeftPart0, GEO_MODE_SEL_TM_SIZE, szPerLine)); PelUnitBuf pcBufPredRefLeftPart1 = PelUnitBuf(pu.chromaFormat, PelBuf(pRefLeftPart1, GEO_MODE_SEL_TM_SIZE, szPerLine)); DistParam cDistParam; cDistParam.applyWeight = false; m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeftTr.Y(), pcBufPredRefLeftTr.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); // To enable SIMD for cost computation for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; ++splitDir) { #if JVET_Y0065_GPM_INTRA pcBufPredRefLeftPart0.Y().buf = pIntraRefLeftPart0 == nullptr ? pcBufPredRefLeftPart0.Y().buf : pIntraRefLeftPart0[splitDir]; pcBufPredRefLeftPart1.Y().buf = pIntraRefLeftPart1 == nullptr ? pcBufPredRefLeftPart1.Y().buf : pIntraRefLeftPart1[splitDir]; #endif weightedGeoTpl<false>(pu, splitDir, pcBufPredRefLeft, pcBufPredRefLeftPart0, pcBufPredRefLeftPart1); uint32_t tempDist = (uint32_t)cDistParam.distFunc(cDistParam); uiCost[splitDir] += tempDist; } } // Find best N candidates numValidInList = (uint8_t)getIndexMappingTableToSortedArray1D<uint32_t, GEO_NUM_PARTITION_MODE, uint8_t, GEO_NUM_SIG_PARTMODE>(uiCost, modeList); } template <bool trueTFalseL> void InterPrediction::weightedGeoTpl( PredictionUnit &pu, const uint8_t splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1) { m_if.weightedGeoTpl<trueTFalseL>( pu, splitDir, predDst, predSrc0, predSrc1 ); } #endif #if JVET_AA0058_GPM_ADP_BLD void InterPrediction::weightedGeoBlk(PredictionUnit &pu, const uint8_t splitDir, const uint8_t bldIdx, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1) { if (channel == CHANNEL_TYPE_LUMA) { m_if.weightedGeoBlk(pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, bldIdx, predDst, predSrc0, predSrc1); } else if (channel == CHANNEL_TYPE_CHROMA) { m_if.weightedGeoBlk(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, bldIdx, predDst, predSrc0, predSrc1); m_if.weightedGeoBlk(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, bldIdx, predDst, predSrc0, predSrc1); } else { m_if.weightedGeoBlk(pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, bldIdx, predDst, predSrc0, predSrc1); if (isChromaEnabled(pu.chromaFormat)) { m_if.weightedGeoBlk(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, bldIdx, predDst, predSrc0, predSrc1); m_if.weightedGeoBlk(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, bldIdx, predDst, predSrc0, predSrc1); } } } #if JVET_Y0065_GPM_INTRA void InterPrediction::weightedGeoBlkRounded(PredictionUnit &pu, const uint8_t splitDir, const uint8_t bldIdx, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1) { if (channel == CHANNEL_TYPE_LUMA) { m_if.weightedGeoBlkRounded(pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, bldIdx, predDst, predSrc0, predSrc1); } else if (channel == CHANNEL_TYPE_CHROMA) { m_if.weightedGeoBlkRounded(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, bldIdx, predDst, predSrc0, predSrc1); m_if.weightedGeoBlkRounded(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, bldIdx, predDst, predSrc0, predSrc1); } else { m_if.weightedGeoBlkRounded(pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, bldIdx, predDst, predSrc0, predSrc1); if (isChromaEnabled(pu.chromaFormat)) { m_if.weightedGeoBlkRounded(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, bldIdx, predDst, predSrc0, predSrc1); m_if.weightedGeoBlkRounded(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, bldIdx, predDst, predSrc0, predSrc1); } } } #endif #else void InterPrediction::weightedGeoBlk( PredictionUnit &pu, const uint8_t splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1) { if( channel == CHANNEL_TYPE_LUMA ) { m_if.weightedGeoBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 ); } else if( channel == CHANNEL_TYPE_CHROMA ) { m_if.weightedGeoBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 ); m_if.weightedGeoBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 ); } else { m_if.weightedGeoBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 ); if (isChromaEnabled(pu.chromaFormat)) { m_if.weightedGeoBlk(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1); m_if.weightedGeoBlk(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1); } } } #if JVET_Y0065_GPM_INTRA void InterPrediction::weightedGeoBlkRounded( PredictionUnit &pu, const uint8_t splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1) { if( channel == CHANNEL_TYPE_LUMA ) { m_if.weightedGeoBlkRounded( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 ); } else if( channel == CHANNEL_TYPE_CHROMA ) { m_if.weightedGeoBlkRounded( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 ); m_if.weightedGeoBlkRounded( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 ); } else { m_if.weightedGeoBlkRounded( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 ); if (isChromaEnabled(pu.chromaFormat)) { m_if.weightedGeoBlkRounded(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1); m_if.weightedGeoBlkRounded(pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1); } } } #endif #endif void InterPrediction::xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId, bool forLuma) { int offset, width, height; Mv cMv; const Picture* refPic = pu.cu->slice->getRefPic( refId, pu.refIdx[refId] )->unscaledPic; int mvShift = (MV_FRACTIONAL_BITS_INTERNAL); int start = 0; int end = MAX_NUM_COMPONENT; start = forLuma ? 0 : 1; end = forLuma ? 1 : MAX_NUM_COMPONENT; for (int compID = start; compID < end; compID++) { cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer()); #if IF_12TAP pcPad.bufs[compID].stride = (pcPad.bufs[compID].width + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA(0)); int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA(0) : NTAPS_CHROMA; #else pcPad.bufs[compID].stride = (pcPad.bufs[compID].width + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA); int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA; #endif width = pcPad.bufs[compID].width; height = pcPad.bufs[compID].height; offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1); int mvshiftTempHor = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); int mvshiftTempVer = mvShift + getComponentScaleY((ComponentID)compID, pu.chromaFormat); width += (filtersize - 1); height += (filtersize - 1); cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTempHor), -(((filtersize >> 1) - 1) << mvshiftTempVer)); bool wrapRef = false; if( refPic->isWrapAroundEnabled( pu.cs->pps ) ) { wrapRef = wrapClipMv( cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps ); } else { clipMv( cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps ); } /* Pre-fetch similar to HEVC*/ { CPelBuf refBuf; Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTempHor, cMv.getVer() >> mvshiftTempVer); refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()), wrapRef); PelBuf &dstBuf = pcPad.bufs[compID]; g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height); } } } void InterPrediction::xPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId) { int offset = 0, width, height; int padsize; Mv cMv; for (int compID = 0; compID < getNumberValidComponents(pu.chromaFormat); compID++) { #if IF_12TAP int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA(0) : NTAPS_CHROMA; #else int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA; #endif width = pcPad.bufs[compID].width; height = pcPad.bufs[compID].height; offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1); /*using the larger padsize for 422*/ padsize = (DMVR_NUM_ITERATION) >> getComponentScaleY((ComponentID)compID, pu.chromaFormat); width += (filtersize - 1); height += (filtersize - 1); /*padding on all side of size DMVR_PAD_LENGTH*/ g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize); } } inline int32_t div_for_maxq7(int64_t N, int64_t D) { int32_t sign, q; sign = 0; if (N < 0) { sign = 1; N = -N; } q = 0; D = (D << 3); if (N >= D) { N -= D; q++; } q = (q << 1); D = (D >> 1); if (N >= D) { N -= D; q++; } q = (q << 1); if (N >= (D >> 1)) { q++; } if (sign) { return (-q); } return(q); } void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv) { int64_t numerator, denominator; int32_t mvDeltaSubPel; int32_t mvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/ /*horizontal*/ numerator = (int64_t)((sadBuffer[1] - sadBuffer[3]) << mvSubPelLvl); denominator = (int64_t)((sadBuffer[1] + sadBuffer[3] - (sadBuffer[0] << 1))); #if MULTI_PASS_DMVR if (denominator > 0) { if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0])) { mvDeltaSubPel = div_for_maxq7(numerator, denominator); deltaMv[0] = (mvDeltaSubPel); } else { if (sadBuffer[1] == sadBuffer[0]) { deltaMv[0] = -8;// half pel } else { deltaMv[0] = 8;// half pel } } } else { if (sadBuffer[1] < sadBuffer[3]) { deltaMv[0] = -8; } else if (sadBuffer[1] == sadBuffer[3]) { deltaMv[0] = 0; } else { deltaMv[0] = 8; } } #else if (0 != denominator) { if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0])) { mvDeltaSubPel = div_for_maxq7(numerator, denominator); deltaMv[0] = (mvDeltaSubPel); } else { if (sadBuffer[1] == sadBuffer[0]) { deltaMv[0] = -8;// half pel } else { deltaMv[0] = 8;// half pel } } } #endif /*vertical*/ numerator = (int64_t)((sadBuffer[2] - sadBuffer[4]) << mvSubPelLvl); denominator = (int64_t)((sadBuffer[2] + sadBuffer[4] - (sadBuffer[0] << 1))); #if MULTI_PASS_DMVR if (denominator > 0) { if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0])) { mvDeltaSubPel = div_for_maxq7(numerator, denominator); deltaMv[1] = (mvDeltaSubPel); } else { if (sadBuffer[2] == sadBuffer[0]) { deltaMv[1] = -8;// half pel } else { deltaMv[1] = 8;// half pel } } } else { if (sadBuffer[2] < sadBuffer[4]) { deltaMv[1] = -8; } else if (sadBuffer[2] == sadBuffer[4]) { deltaMv[1] = 0; } else { deltaMv[1] = 8; } } #else if (0 != denominator) { if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0])) { mvDeltaSubPel = div_for_maxq7(numerator, denominator); deltaMv[1] = (mvDeltaSubPel); } else { if (sadBuffer[2] == sadBuffer[0]) { deltaMv[1] = -8;// half pel } else { deltaMv[1] = 8;// half pel } } } #endif return; } void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *deltaMV, uint64_t *pSADsArray, int width, int height) { const int32_t refStrideL0 = m_biLinearBufStride; const int32_t refStrideL1 = m_biLinearBufStride; Pel *pRefL0Orig = pRefL0; Pel *pRefL1Orig = pRefL1; for (int nIdx = 0; (nIdx < 25); ++nIdx) { int32_t sadOffset = ((m_pSearchOffset[nIdx].getVer() * ((2 * DMVR_NUM_ITERATION) + 1)) + m_pSearchOffset[nIdx].getHor()); pRefL0 = pRefL0Orig + m_pSearchOffset[nIdx].hor + (m_pSearchOffset[nIdx].ver * refStrideL0); pRefL1 = pRefL1Orig - m_pSearchOffset[nIdx].hor - (m_pSearchOffset[nIdx].ver * refStrideL1); if (*(pSADsArray + sadOffset) == MAX_UINT64) { const uint64_t cost = xDMVRCost(bd, pRefL0, refStrideL0, pRefL1, refStrideL1, width, height); *(pSADsArray + sadOffset) = cost; } if (*(pSADsArray + sadOffset) < minCost) { minCost = *(pSADsArray + sadOffset); deltaMV[0] = m_pSearchOffset[nIdx].getHor(); deltaMV[1] = m_pSearchOffset[nIdx].getVer(); } } } void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit &pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied, const Mv mergeMV[NUM_REF_PIC_LIST_01], bool blockMoved) { int offset, deltaIntMvX, deltaIntMvY; PelUnitBuf pcYUVTemp = pcYuvSrc0; PelUnitBuf pcPadTemp = pcPad0; /*always high precision MVs are used*/ int mvShift = MV_FRACTIONAL_BITS_INTERNAL; for (int k = 0; k < NUM_REF_PIC_LIST_01; k++) { RefPicList refId = (RefPicList)k; Mv cMv = pu.mv[refId]; m_iRefListIdx = refId; const Picture* refPic = pu.cu->slice->getRefPic( refId, pu.refIdx[refId] )->unscaledPic; Mv cMvClipped = cMv; if( !pu.cs->pps->getWrapAroundEnabledFlag() ) { clipMv( cMvClipped, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps ); } Mv startMv = mergeMV[refId]; if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvForMCTSConstraint( pu, startMv, MV_PRECISION_INTERNAL ) ) { const Area& tileArea = pu.cs->picture->mctsInfo.getTileArea(); printf( "Attempt an access over tile boundary at block %d,%d %d,%d with MV %d,%d (in Tile TL: %d,%d BR: %d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), startMv.getHor(), startMv.getVer(), tileArea.topLeft().x, tileArea.topLeft().y, tileArea.bottomRight().x, tileArea.bottomRight().y ); THROW( "MCTS constraint failed!" ); } for (int compID = 0; compID < getNumberValidComponents(pu.chromaFormat); compID++) { Pel *srcBufPelPtr = NULL; int pcPadstride = 0; if (blockMoved || (compID == 0)) { pcPadstride = pcPadTemp.bufs[compID].stride; int mvshiftTempHor = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); int mvshiftTempVer = mvShift + getComponentScaleY((ComponentID)compID, pu.chromaFormat); int leftPixelExtra; if (compID == COMPONENT_Y) { #if IF_12TAP leftPixelExtra = (NTAPS_LUMA(0) >> 1) - 1; #else leftPixelExtra = (NTAPS_LUMA >> 1) - 1; #endif } else { leftPixelExtra = (NTAPS_CHROMA >> 1) - 1; } PelBuf &srcBuf = pcPadTemp.bufs[compID]; deltaIntMvX = (cMv.getHor() >> mvshiftTempHor) - (startMv.getHor() >> mvshiftTempHor); deltaIntMvY = (cMv.getVer() >> mvshiftTempVer) - (startMv.getVer() >> mvshiftTempVer); CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement"); offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1); offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride; offset += (deltaIntMvX); srcBufPelPtr = (srcBuf.buf + offset); } JVET_J0090_SET_CACHE_ENABLE(false); xPredInterBlk((ComponentID) compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID], bioApplied, false, pu.cu->slice->getScalingRatio(refId, pu.refIdx[refId]), 0, 0, 0, srcBufPelPtr, pcPadstride); JVET_J0090_SET_CACHE_ENABLE(false); } pcYUVTemp = pcYuvSrc1; pcPadTemp = pcPad1; } } uint64_t InterPrediction::xDMVRCost(int bitDepth, Pel* pOrg, uint32_t refStride, const Pel* pRef, uint32_t orgStride, int width, int height) { DistParam cDistParam; cDistParam.applyWeight = false; cDistParam.useMR = false; m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, orgStride, refStride, bitDepth, COMPONENT_Y, width, height, 1); uint64_t uiCost = cDistParam.distFunc(cDistParam); return uiCost>>1; } void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *totalDeltaMV, int16_t *deltaMV, uint64_t *pSADsArray) { int sadStride = (((2 * DMVR_NUM_ITERATION) + 1)); uint64_t sadbuffer[5]; if (notZeroCost && (abs(totalDeltaMV[0]) != (2 << MV_FRACTIONAL_BITS_INTERNAL)) && (abs(totalDeltaMV[1]) != (2 << MV_FRACTIONAL_BITS_INTERNAL))) { int32_t tempDeltaMv[2] = { 0,0 }; sadbuffer[0] = pSADsArray[0]; sadbuffer[1] = pSADsArray[-1]; sadbuffer[2] = pSADsArray[-sadStride]; sadbuffer[3] = pSADsArray[1]; sadbuffer[4] = pSADsArray[sadStride]; xSubPelErrorSrfc(sadbuffer, tempDeltaMv); totalDeltaMV[0] += tempDeltaMv[0]; totalDeltaMV[1] += tempDeltaMv[1]; } } void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs) { const int refIdx0 = pu.refIdx[0]; const int refIdx1 = pu.refIdx[1]; /*use merge MV as starting MV*/ Mv mergeMVL0(pu.mv[REF_PIC_LIST_0]); Mv mergeMVL1(pu.mv[REF_PIC_LIST_1]); /*Clip the starting MVs*/ if( !pu.cs->pps->getWrapAroundEnabledFlag() ) { clipMv( mergeMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps ); clipMv( mergeMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps ); } /*L0 MC for refinement*/ { int offset; #if IF_12TAP int leftPixelExtra = (NTAPS_LUMA(0) >> 1) - 1; #else int leftPixelExtra = (NTAPS_LUMA >> 1) - 1; #endif offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride + 1); offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride; offset += (-(int)DMVR_NUM_ITERATION); PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y]; PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0, m_biLinearBufStride , pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION))); xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->unscaledPic, mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y], false, false, pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ), pu.lwidth() + ( 2 * DMVR_NUM_ITERATION ), pu.lheight() + ( 2 * DMVR_NUM_ITERATION ), true, ( (Pel *)srcBuf.buf ) + offset, srcBuf.stride ); } /*L1 MC for refinement*/ { int offset; #if IF_12TAP int leftPixelExtra = (NTAPS_LUMA(0) >> 1) - 1; #else int leftPixelExtra = (NTAPS_LUMA >> 1) - 1; #endif offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride + 1); offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride; offset += (-(int)DMVR_NUM_ITERATION); PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y]; PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1, m_biLinearBufStride , pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION))); xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->unscaledPic, mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y], false, false, pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ), pu.lwidth() + ( 2 * DMVR_NUM_ITERATION ), pu.lheight() + ( 2 * DMVR_NUM_ITERATION ), true, ( (Pel *)srcBuf.buf ) + offset, srcBuf.stride ); } } void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bioApplied) { #if MULTI_PASS_DMVR CHECK( true, "DMVR is removed when MULTI_PASS_DMVR is turned on." ); #else int iterationCount = 1; /*Always High Precision*/ int mvShift = MV_FRACTIONAL_BITS_INTERNAL; /*use merge MV as starting MV*/ Mv mergeMv[] = { pu.mv[REF_PIC_LIST_0] , pu.mv[REF_PIC_LIST_1] }; m_biLinearBufStride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)); int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); Position puPos = pu.lumaPos(); int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd; int bioEnabledThres = 2 * dy * dx; bool bioAppliedType[MAX_NUM_SUBCU_DMVR]; #if JVET_J0090_MEMORY_BANDWITH_MEASURE JVET_J0090_SET_CACHE_ENABLE(true); for (int k = 0; k < NUM_REF_PIC_LIST_01; k++) { RefPicList refId = (RefPicList)k; const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]); for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) { Mv cMv = pu.mv[refId]; int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA; cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), -(((filtersize >> 1) - 1) << mvshiftTemp)); bool wrapRef = false; if ( pu.cs->pps->getWrapAroundEnabledFlag() ) { wrapRef = wrapClipMv(cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps); } else { clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); } int width = pcYuvDst.bufs[compID].width + (filtersize - 1); int height = pcYuvDst.bufs[compID].height + (filtersize - 1); CPelBuf refBuf; Position recOffset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp); refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, recOffset, pu.blocks[compID].size()), wrapRef); JVET_J0090_SET_REF_PICTURE(refPic, (ComponentID)compID); for (int row = 0; row < height; row++) { for (int col = 0; col < width; col++) { JVET_J0090_CACHE_ACCESS(((Pel *)refBuf.buf) + row * refBuf.stride + col, __FILE__, __LINE__); } } } } JVET_J0090_SET_CACHE_ENABLE(false); #endif { int num = 0; int scaleX = getComponentScaleX(COMPONENT_Cb, pu.chromaFormat); int scaleY = getComponentScaleY(COMPONENT_Cb, pu.chromaFormat); m_biLinearBufStride = (dx + (2 * DMVR_NUM_ITERATION)); // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION; Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION; PredictionUnit subPu = pu; subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy))); m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ? PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) : PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr()))); m_cYuvRefBuffDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu)); m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ? PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) : PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr()))); m_cYuvRefBuffDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu)); PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ? PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) : PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr()))); PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ? PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) : PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr()))); srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu)); srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu)); int yStart = 0; for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) { for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) { subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); subPu.mv[0] = mergeMv[0]; subPu.mv[1] = mergeMv[1]; xPrefetch(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0, 1); xPrefetch(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1, 1); xinitMC(subPu, clpRngs); uint64_t minCost = MAX_UINT64; bool notZeroCost = true; int16_t totalDeltaMV[2] = { 0,0 }; int16_t deltaMV[2] = { 0, 0 }; uint64_t *pSADsArray; for (int i = 0; i < (((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)); i++) { m_SADsArray[i] = MAX_UINT64; } pSADsArray = &m_SADsArray[(((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)) >> 1]; for (int i = 0; i < iterationCount; i++) { deltaMV[0] = 0; deltaMV[1] = 0; Pel *addrL0 = biLinearPredL0 + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride); Pel *addrL1 = biLinearPredL1 - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride); if (i == 0) { minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy); minCost -= (minCost >>2); pSADsArray[0] = minCost; if (minCost < (dx * dy)) { notZeroCost = false; break; } } if (!minCost) { notZeroCost = false; break; } xBIPMVRefine(bd, addrL0, addrL1, minCost, deltaMV, pSADsArray, dx, dy); if (deltaMV[0] == 0 && deltaMV[1] == 0) { break; } totalDeltaMV[0] += deltaMV[0]; totalDeltaMV[1] += deltaMV[1]; pSADsArray += ((deltaMV[1] * (((2 * DMVR_NUM_ITERATION) + 1))) + deltaMV[0]); } bioAppliedType[num] = (minCost < bioEnabledThres) ? false : bioApplied; totalDeltaMV[0] = (totalDeltaMV[0] << mvShift); totalDeltaMV[1] = (totalDeltaMV[1] << mvShift); xDMVRSubPixelErrorSurface(notZeroCost, totalDeltaMV, deltaMV, pSADsArray); pu.mvdL0SubPu[num] = Mv(totalDeltaMV[0], totalDeltaMV[1]); PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu)); bool blockMoved = false; if (pu.mvdL0SubPu[num] != Mv(0, 0)) { blockMoved = true; if (isChromaEnabled(pu.chromaFormat)) { xPrefetch(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0, 0); xPrefetch(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1, 0); } xPad(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0); xPad(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1); } int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, isChromaEnabled(pu.chromaFormat) ? pcYuvDst.bufs[COMPONENT_Cb].stride : 0, isChromaEnabled(pu.chromaFormat) ? pcYuvDst.bufs[COMPONENT_Cr].stride : 0}; subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num]; subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num]; subPu.mv[0].clipToStorageBitDepth(); subPu.mv[1].clipToStorageBitDepth(); xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffDMVRL0, m_cYuvRefBuffDMVRL1, bioAppliedType[num], mergeMv, blockMoved); subPredBuf.bufs[COMPONENT_Y].buf = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y]; if (isChromaEnabled(pu.chromaFormat)) { subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cb]); subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cr]); } xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioAppliedType[num]); num++; } } } JVET_J0090_SET_CACHE_ENABLE(true); #endif } #if JVET_AA0061_IBC_MBVD void InterPrediction::sortIbcMergeMbvdCandidates(PredictionUnit &pu, MergeCtx& mrgCtx, uint32_t * ibcMbvdLUT,uint32_t * ibcMbvdValidNum, int ibcMbvdIdx) { const int tempNum = (const int) (std::min<int>(IBC_MBVD_BASE_NUM, mrgCtx.numValidMergeCand) * IBC_MBVD_MAX_REFINE_NUM); const int groupSize = std::min<int>(tempNum, ADAPTIVE_SUB_GROUP_SIZE_IBC_MBVD); Distortion candCostList[IBC_MBVD_BASE_NUM* IBC_MBVD_MAX_REFINE_NUM]; for (uint32_t i = 0; i < tempNum; i++) { ibcMbvdLUT[i] = i; candCostList[i] = MAX_UINT; } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLIBCGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } int startMMVDIdx = 0; int endMMVDIdx = tempNum; if(ibcMbvdIdx!= -1) { uint32_t gpId = ibcMbvdIdx/groupSize; startMMVDIdx = gpId * groupSize; endMMVDIdx = (gpId+1) * groupSize; } int encGrpSize = IBC_MBVD_SIZE_ENC; int baseIdx = 0; const int cuPelX = pu.Y().x; const int cuPelY = pu.Y().y; int roiWidth = pu.lwidth(); int roiHeight = pu.lheight(); const int picWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples(); const int picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples(); const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); for (int mmvdMergeCand = startMMVDIdx; mmvdMergeCand < endMMVDIdx; mmvdMergeCand++) { bool mbvdCandMisAlign = mrgCtx.setIbcMbvdMergeCandiInfo(pu, mmvdMergeCand, mmvdMergeCand); if (mbvdCandMisAlign) { continue; } int xPred = pu.bv.getHor(); int yPred = pu.bv.getVer(); if (!PU::searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth)) // not valid bv derived { continue; } baseIdx = mmvdMergeCand / IBC_MBVD_MAX_REFINE_NUM; ibcMbvdValidNum[baseIdx]++; uiCost = 0; PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); getIBCAMLRefTemplate(pu, nWidth, nHeight); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } // update part uint32_t i; uint32_t shift = 0; uint32_t gpIdx = mmvdMergeCand/groupSize; uint32_t endIdx = gpIdx * groupSize + encGrpSize; while (shift < encGrpSize && uiCost < candCostList[endIdx - 1 - shift]) { shift++; } if (shift != 0) { for (i = 1; i < shift; i++) { ibcMbvdLUT[endIdx - i] = ibcMbvdLUT[endIdx - 1 - i]; candCostList[endIdx - i] = candCostList[endIdx - 1 - i]; } ibcMbvdLUT[endIdx - shift] = mmvdMergeCand; candCostList[endIdx - shift] = uiCost; } } } #endif #if JVET_AA0061_IBC_MBVD || (JVET_W0090_ARMC_TM && JVET_Y0058_IBC_LIST_MODIFY) bool InterPrediction::xAMLIBCGetCurBlkTemplate(PredictionUnit& pu, int nCurBlkWidth, int nCurBlkHeight) { m_bAMLTemplateAvailabe[0] = xAMLIsTopTempAvailable(pu); m_bAMLTemplateAvailabe[1] = xAMLIsLeftTempAvailable(pu); if (!m_bAMLTemplateAvailabe[0] && !m_bAMLTemplateAvailabe[1]) { return false; } const Picture& currPic = *pu.cs->picture; const CPelBuf recBuf = currPic.getRecoBuf(pu.cs->picture->blocks[COMPONENT_Y]); /* std::vector<Pel>& invLUT = m_pcReshape->getInvLUT();*/ if (m_bAMLTemplateAvailabe[0]) { const Pel* rec = recBuf.bufAt(pu.blocks[COMPONENT_Y].pos().offset(0, -AML_MERGE_TEMPLATE_SIZE)); PelBuf pcYBuf = PelBuf(m_acYuvCurAMLTemplate[0][0], nCurBlkWidth, AML_MERGE_TEMPLATE_SIZE); Pel* pcY = pcYBuf.bufAt(0, 0); for (int k = 0; k < nCurBlkWidth; k++) { for (int l = 0; l < AML_MERGE_TEMPLATE_SIZE; l++) { int recVal = rec[k + l * recBuf.stride]; pcY[k + l * nCurBlkWidth] = recVal; } } } if (m_bAMLTemplateAvailabe[1]) { PelBuf pcYBuf = PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nCurBlkHeight); Pel* pcY = pcYBuf.bufAt(0, 0); const Pel* rec = recBuf.bufAt(pu.blocks[COMPONENT_Y].pos().offset(-AML_MERGE_TEMPLATE_SIZE, 0)); for (int k = 0; k < nCurBlkHeight; k++) { for (int l = 0; l < AML_MERGE_TEMPLATE_SIZE; l++) { int recVal = rec[recBuf.stride * k + l]; pcY[AML_MERGE_TEMPLATE_SIZE * k + l] = recVal; } } } return true; } void InterPrediction::getIBCAMLRefTemplate(PredictionUnit &pu, int nCurBlkWidth, int nCurBlkHeight) { Mv mvCurr; mvCurr = pu.bv; const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; const int horShift = (lumaShift + ::getComponentScaleX(COMPONENT_Y, pu.chromaFormat)); const int verShift = (lumaShift + ::getComponentScaleY(COMPONENT_Y, pu.chromaFormat)); const Picture& currPic = *pu.cs->picture; const CPelBuf recBuf = currPic.getRecoBuf(pu.cs->picture->blocks[COMPONENT_Y]); /* std::vector<Pel>& invLUT = m_pcReshape->getInvLUT();*/ if (m_bAMLTemplateAvailabe[0]) { Mv mvTop(0, -AML_MERGE_TEMPLATE_SIZE); #if JVET_AA0070_RRIBC if (pu.cu->rribcFlipType == 2) { mvTop.setVer(nCurBlkHeight); } #endif mvTop += mvCurr; MotionInfo miTop; miTop.mv[0] = Mv(mvTop.hor <<horShift , mvTop.ver<< verShift); miTop.refIdx[0] = MAX_NUM_REF; #if JVET_AA0070_RRIBC if (pu.cu->rribcFlipType == 2) { if (!PU::checkIsIBCCandidateValid(pu, miTop, true, true)) { mvTop.setVer(mvCurr.getVer() + nCurBlkHeight - AML_MERGE_TEMPLATE_SIZE); } } else #endif if (!PU::checkIsIBCCandidateValid(pu, miTop)) { mvTop = mvCurr; } const Pel* rec = recBuf.bufAt(pu.blocks[COMPONENT_Y].pos().offset(mvTop.hor, mvTop.ver)); PelBuf pcYBuf = PelBuf(m_acYuvRefAMLTemplate[0][0], nCurBlkWidth, AML_MERGE_TEMPLATE_SIZE); Pel* pcY = pcYBuf.bufAt(0, 0); for (int k = 0; k < nCurBlkWidth; k++) { for (int l = 0; l < AML_MERGE_TEMPLATE_SIZE; l++) { #if JVET_AA0070_RRIBC int recVal; if (pu.cu->rribcFlipType == 0) { recVal = rec[k + l * recBuf.stride]; } else if (pu.cu->rribcFlipType == 1) { recVal = rec[nCurBlkWidth - 1 - k + l * recBuf.stride]; } else { recVal = rec[k + (AML_MERGE_TEMPLATE_SIZE - 1 - l) * recBuf.stride]; } #else int recVal = rec[k + l * recBuf.stride]; #endif pcY[k + l * nCurBlkWidth] = recVal; } } } if (m_bAMLTemplateAvailabe[1]) { Mv mvLeft(-AML_MERGE_TEMPLATE_SIZE, 0); #if JVET_AA0070_RRIBC if (pu.cu->rribcFlipType == 1) { mvLeft.setHor(nCurBlkWidth); } #endif mvLeft += mvCurr; MotionInfo miLeft; miLeft.mv[0] = Mv(mvLeft.hor <<horShift , mvLeft.ver<< verShift); miLeft.refIdx[0] = MAX_NUM_REF; #if JVET_AA0070_RRIBC if (pu.cu->rribcFlipType == 1) { if (!PU::checkIsIBCCandidateValid(pu, miLeft, true, false)) { mvLeft.setHor(mvCurr.getHor() + nCurBlkWidth - AML_MERGE_TEMPLATE_SIZE); } } else #endif if (!PU::checkIsIBCCandidateValid(pu, miLeft)) { mvLeft = mvCurr; } PelBuf pcYBuf = PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nCurBlkHeight); Pel* pcY = pcYBuf.bufAt(0, 0); const Pel* rec = recBuf.bufAt(pu.blocks[COMPONENT_Y].pos().offset( mvLeft.hor, mvLeft.ver)); for (int k = 0; k < nCurBlkHeight; k++) { for (int l = 0; l < AML_MERGE_TEMPLATE_SIZE; l++) { #if JVET_AA0070_RRIBC int recVal; if (pu.cu->rribcFlipType == 0) { recVal = rec[recBuf.stride * k + l]; } else if (pu.cu->rribcFlipType == 1) { recVal = rec[recBuf.stride * k + AML_MERGE_TEMPLATE_SIZE - 1 - l]; } else { recVal = rec[recBuf.stride * (nCurBlkHeight - 1 - k) + l]; } #else int recVal = rec[recBuf.stride * k + l]; #endif pcY[AML_MERGE_TEMPLATE_SIZE * k + l] = recVal; } } } } #endif #if JVET_J0090_MEMORY_BANDWITH_MEASURE void InterPrediction::cacheAssign( CacheModel *cache ) { m_cacheModel = cache; m_if.cacheAssign( cache ); m_if.initInterpolationFilter( !cache->isCacheEnable() ); } #endif #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED #if JVET_AA0093_ENHANCED_MMVD_EXTENSION void InterPrediction::sortInterMergeMMVDCandidates(PredictionUnit &pu, MergeCtx& mrgCtx, uint32_t * mmvdLUT, int16_t MMVDIdx) { const int tempNum = (const int) (std::min<int>(MMVD_BASE_MV_NUM, mrgCtx.numValidMergeCand) * MMVD_MAX_REFINE_NUM); const int groupSize = std::min<int>(tempNum, ADAPTIVE_SUB_GROUP_SIZE_MMVD); #if _WINDOWS Distortion candCostList[MMVD_BASE_MV_NUM* MMVD_MAX_REFINE_NUM]; #else Distortion candCostList[tempNum] ; #endif for (uint32_t i = 0; i < tempNum; i++) { mmvdLUT[i] = i; candCostList[i] = MAX_UINT; } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight) #if JVET_AA0132_CONFIGURABLE_TM_TOOLS || !pu.cs->sps->getUseTMMMVD() #endif ) { return; } // Store int8_t posList0 = 0; int8_t posList1 = 0; bool load0 = false; bool load1 = false; int startMMVDIdx = 0; int endMMVDIdx = tempNum; if(MMVDIdx != -1) { uint32_t gpId = MMVDIdx/groupSize; startMMVDIdx = gpId * groupSize; endMMVDIdx = (gpId+1) * groupSize; } int shiftEnc = MMVD_SIZE_SHIFT; int encGrpSize = groupSize >> shiftEnc; encGrpSize /= MMVD_BI_DIR; #if !JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC uint8_t curIdx = 0; double threshold = 1.02; #endif encGrpSize <<= MMVD_SIZE_SHIFT; // Joint for all const Slice &slice = *pu.cs->slice; const int mvShift = MV_FRACTIONAL_BITS_DIFF; const int refMvdCands[] = { 1 << mvShift , 2 << mvShift , 4 << mvShift , 8 << mvShift , 16 << mvShift , 32 << mvShift }; const int xDir[] = {1, -1, 0, 0, 1, -1, 1, -1, 2, -2, -2, 2, 1, -1, -1, 1}; const int yDir[] = {0, 0, 1, -1, 1, -1, -1, 1, 1, -1, 1, -1, 2, -2, 2, -2}; pu.mmvdMergeFlag = true; pu.mergeFlag = true; pu.regularMergeFlag = true; pu.mergeType = MRG_TYPE_DEFAULT_N; pu.bdmvrRefine = false; pu.mvd[REF_PIC_LIST_0] = Mv(); pu.mvd[REF_PIC_LIST_1] = Mv(); pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID; pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID; pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID; pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID; #if MULTI_HYP_PRED pu.addHypData.clear(); pu.numMergedAddHyps = 0; #endif // Joint for all ends for (int fPosBaseIdx = startMMVDIdx/MMVD_MAX_REFINE_NUM; fPosBaseIdx < endMMVDIdx/MMVD_MAX_REFINE_NUM; fPosBaseIdx++) { // joint for each base const int refList0 = mrgCtx.mmvdBaseMv[fPosBaseIdx][0].refIdx; const int refList1 = mrgCtx.mmvdBaseMv[fPosBaseIdx][1].refIdx; pu.cu->imv = mrgCtx.mmvdUseAltHpelIf[fPosBaseIdx] ? IMV_HPEL : 0; #if INTER_LIC pu.cu->LICFlag = mrgCtx.LICFlags[fPosBaseIdx]; #endif pu.cu->BcwIdx = (mrgCtx.interDirNeighbours[fPosBaseIdx] == 3) ? mrgCtx.BcwIdx[fPosBaseIdx] : BCW_DEFAULT; pu.refIdx[REF_PIC_LIST_0] = refList0; pu.refIdx[REF_PIC_LIST_1] = refList1; // joint for each base ends //Store posList0 = 0; posList1 = 0; load0 = false; load1 = false; for (int tempIdx = 0; tempIdx < MMVD_MAX_REFINE_NUM; tempIdx++) { // special for each case int fPosStep = 0; int fPosPosition = 0; Mv tempMv[2]; fPosStep = tempIdx / MMVD_MAX_DIR; fPosPosition = tempIdx - fPosStep * MMVD_MAX_DIR; // special for each case /* Skipping dir 16 to 47 for uni */ if ((fPosPosition >= MMVD_MAX_DIR_UNI) && ((refList0 == -1) || (refList1 == -1))) { continue; } int candIdx = fPosBaseIdx * MMVD_MAX_REFINE_NUM + tempIdx; pu.mmvdMergeIdx = candIdx; pu.mergeIdx = candIdx; int offset = refMvdCands[fPosStep]; if ( pu.cu->slice->getPicHeader()->getDisFracMMVD() ) { offset <<= 2; } if ((refList0 != -1) && (refList1 != -1)) { tempMv[0] = Mv(0,0); tempMv[1] = Mv(0,0); int fPosPosition1 = fPosPosition % MMVD_MAX_DIR_UNI; int fPosPosition2 = fPosPosition / MMVD_MAX_DIR_UNI; if (fPosPosition2 == 0) { tempMv[0] = Mv(xDir[fPosPosition1] * offset, yDir[fPosPosition1] * offset); //Store posList0 = fPosPosition1 + 1; posList1 = 0; load0 = false; load1 = (fPosPosition1 != 0) || (fPosStep != 0); } else if (fPosPosition2 == 1) { tempMv[1] = Mv(xDir[fPosPosition1] * offset, yDir[fPosPosition1] * offset); //Store posList0 = 0; posList1 = fPosPosition1 + 1; load0 = (fPosPosition1 != 0) || (fPosStep != 0); load1 = false; } else { const int poc0 = slice.getRefPOC(REF_PIC_LIST_0, refList0); const int poc1 = slice.getRefPOC(REF_PIC_LIST_1, refList1); const int currPoc = slice.getPOC(); tempMv[0] = Mv(xDir[fPosPosition1] * offset, yDir[fPosPosition1] * offset); if ((poc1 - currPoc)*(poc0 - currPoc) > 0) { tempMv[1] = tempMv[0]; //Store posList0 = fPosPosition1 + 1; posList1 = fPosPosition1 + 1; load0 = true; load1 = true; } else { tempMv[1].set(-1 * tempMv[0].getHor(), -1 * tempMv[0].getVer()); //Store posList0 = fPosPosition1 + 1; posList1 = (fPosPosition1 ^ 1) + 1; load0 = true; load1 = true; } } pu.interDir = 3; pu.mv[REF_PIC_LIST_0] = mrgCtx.mmvdBaseMv[fPosBaseIdx][0].mv + tempMv[0]; pu.mv[REF_PIC_LIST_1] = mrgCtx.mmvdBaseMv[fPosBaseIdx][1].mv + tempMv[1]; pu.mv[0].clipToStorageBitDepth(); pu.mv[1].clipToStorageBitDepth(); } else if (refList0 != -1) { tempMv[0] = Mv(xDir[fPosPosition] * offset, yDir[fPosPosition] * offset); pu.interDir = 1; pu.mv[REF_PIC_LIST_0] = mrgCtx.mmvdBaseMv[fPosBaseIdx][0].mv + tempMv[0]; pu.mv[REF_PIC_LIST_1] = Mv(0, 0); pu.mv[0].clipToStorageBitDepth(); } else { tempMv[1] = Mv(xDir[fPosPosition] * offset, yDir[fPosPosition] * offset); pu.interDir = 2; pu.mv[REF_PIC_LIST_0] = Mv(0, 0); pu.mv[REF_PIC_LIST_1] = mrgCtx.mmvdBaseMv[fPosBaseIdx][1].mv + tempMv[1]; pu.mv[1].clipToStorageBitDepth(); } // Derive prediction for (int refList = 0; refList < 2; refList++) { if (pu.refIdx[refList] >= 0) { pu.mv[refList].roundToPrecision(MV_PRECISION_QUARTER, MV_PRECISION_INT); } } uiCost = 0; PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); //Store if ((refList0 != -1) && (refList1 != -1)) { getBlkAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft, posList0, posList1, load0, load1); } else { getBlkAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); } if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC if (m_bAMLTemplateAvailabe[0] && !m_bAMLTemplateAvailabe[1]) { uiCost += (uiCost * nHeight) / nWidth; } if (!m_bAMLTemplateAvailabe[0] && m_bAMLTemplateAvailabe[1]) { uiCost += (uiCost * nWidth) / nHeight; } #endif // update part uint32_t i; uint32_t shift = 0; uint32_t gpIdx = candIdx/groupSize; uint32_t endIdx = gpIdx * groupSize + encGrpSize; while (shift < encGrpSize && uiCost < candCostList[endIdx - 1 - shift]) { shift++; } if (shift != 0) { for (i = 1; i < shift; i++) { mmvdLUT[endIdx - i] = mmvdLUT[endIdx - 1 - i]; candCostList[endIdx - i] = candCostList[endIdx - 1 - i]; } mmvdLUT[endIdx - shift] = candIdx; candCostList[endIdx - shift] = uiCost; } } #if !JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC int startPoint = fPosBaseIdx * MMVD_MAX_REFINE_NUM; for (int i = 1; i < encGrpSize; i++) { if(((double)candCostList[startPoint+i]/(double)candCostList[startPoint+curIdx]) > threshold) { mmvdLUT[startPoint+curIdx+1] = mmvdLUT[startPoint+i]; candCostList[startPoint+curIdx+1] = candCostList[startPoint+i]; curIdx++; if (curIdx >= (encGrpSize >> MMVD_SIZE_SHIFT) -1) { break; } } } curIdx = 0; #endif } #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC Distortion lambdaTh = pu.cs->slice->getCostForARMC(); startMMVDIdx = 0; endMMVDIdx = MMVD_BASE_MV_NUM; if (MMVDIdx != -1) { uint32_t gpId = MMVDIdx / groupSize; startMMVDIdx = gpId; endMMVDIdx = (gpId + 1); } for (int mmvdBaseIdx = startMMVDIdx; mmvdBaseIdx < endMMVDIdx; mmvdBaseIdx++) { uint32_t startIdx = ( mmvdBaseIdx ) * groupSize; int maxCand = (MMVD_MAX_REFINE_NUM >> MMVD_SIZE_SHIFT)/MMVD_BI_DIR; bool enoughDiverse = false; uint32_t diverseCand = 0; uint32_t posToBeInserted = 0; for (uint32_t uiCand = startIdx; uiCand < startIdx + maxCand - 1; ++uiCand) { for (uint32_t uiMergeCand = uiCand + 1; uiMergeCand < startIdx + groupSize - 2; ++uiMergeCand) { if (lambdaTh <= abs((int)(candCostList[uiMergeCand] - candCostList[uiCand]))) { enoughDiverse = true; for (uint32_t uiMergeCand3 = startIdx; uiMergeCand3 < uiCand; ++uiMergeCand3) { if (lambdaTh > abs((int)(candCostList[uiMergeCand] - candCostList[uiMergeCand3]))) { enoughDiverse = false; break; } } if (enoughDiverse) { diverseCand = uiMergeCand; posToBeInserted = uiCand + 1; break; } } } if (enoughDiverse) { Distortion saveCandCost = candCostList[diverseCand]; uint32_t mmvdLUTSave = mmvdLUT[diverseCand]; for (int i = diverseCand; i > posToBeInserted - 1; i--) { candCostList[i] = candCostList[i - 1]; mmvdLUT[i] = mmvdLUT[i - 1]; } candCostList[posToBeInserted] = saveCandCost; mmvdLUT[posToBeInserted] = mmvdLUTSave; } } } #endif } #else void InterPrediction::sortInterMergeMMVDCandidates(PredictionUnit& pu, MergeCtx& mrgCtx, uint32_t* mmvdLUT, uint32_t MMVDIdx) { const int tempNum = (const int)(std::min<int>(MMVD_BASE_MV_NUM, mrgCtx.numValidMergeCand) * MMVD_MAX_REFINE_NUM); const int groupSize = std::min<int>(tempNum, ADAPTIVE_SUB_GROUP_SIZE_MMVD); #if _WINDOWS Distortion candCostList[MMVD_BASE_MV_NUM * MMVD_MAX_REFINE_NUM]; #else Distortion candCostList[tempNum]; #endif for (uint32_t i = 0; i < tempNum; i++) { mmvdLUT[i] = i; candCostList[i] = MAX_UINT; } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight) #if JVET_AA0132_CONFIGURABLE_TM_TOOLS || !pu.cs->sps->getUseTMMMVD() #endif ) { return; } int startMMVDIdx = 0; int endMMVDIdx = tempNum; if (MMVDIdx != -1) { uint32_t gpId = MMVDIdx / groupSize; startMMVDIdx = gpId * groupSize; endMMVDIdx = (gpId + 1) * groupSize; } #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC int encGrpSize = groupSize; #else int shiftEnc = MMVD_SIZE_SHIFT; int encGrpSize = groupSize >> shiftEnc; #endif for (int mmvdMergeCand = startMMVDIdx; mmvdMergeCand < endMMVDIdx; mmvdMergeCand++) { mrgCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand, mmvdMergeCand); for (int refList = 0; refList < 2; refList++) { if (pu.refIdx[refList] >= 0) { pu.mv[refList].roundToPrecision(MV_PRECISION_QUARTER, MV_PRECISION_INT); } } uiCost = 0; PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); getBlkAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC if (m_bAMLTemplateAvailabe[0] && !m_bAMLTemplateAvailabe[1]) { uiCost += (uiCost * nHeight) / nWidth; } if (!m_bAMLTemplateAvailabe[0] && m_bAMLTemplateAvailabe[1]) { uiCost += (uiCost * nWidth) / nHeight; } #endif // update part uint32_t i; uint32_t shift = 0; uint32_t gpIdx = mmvdMergeCand/groupSize; uint32_t endIdx = gpIdx * groupSize + encGrpSize; while (shift < encGrpSize && uiCost < candCostList[endIdx - 1 - shift]) { shift++; } if (shift != 0) { for (i = 1; i < shift; i++) { mmvdLUT[endIdx - i] = mmvdLUT[endIdx - 1 - i]; candCostList[endIdx - i] = candCostList[endIdx - 1 - i]; } mmvdLUT[endIdx - shift] = mmvdMergeCand; candCostList[endIdx - shift] = uiCost; } } #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC Distortion lambdaTh = pu.cs->slice->getCostForARMC(); startMMVDIdx = 0; endMMVDIdx = MMVD_BASE_MV_NUM; if (MMVDIdx != -1) { uint32_t gpId = MMVDIdx / groupSize; startMMVDIdx = gpId; endMMVDIdx = (gpId + 1); } for (int mmvdBaseIdx = startMMVDIdx; mmvdBaseIdx < endMMVDIdx; mmvdBaseIdx++) { uint32_t startIdx = (mmvdBaseIdx)*groupSize; int maxCand = (MMVD_MAX_REFINE_NUM >> MMVD_SIZE_SHIFT); bool enoughDiverse = false; uint32_t diverseCand = 0; uint32_t posToBeInserted = 0; for (uint32_t uiCand = startIdx; uiCand < startIdx + maxCand - 1; ++uiCand) { for (uint32_t uiMergeCand = uiCand + 1; uiMergeCand < startIdx + groupSize - 2; ++uiMergeCand) { if (lambdaTh <= abs((int)(candCostList[uiMergeCand] - candCostList[uiCand]))) { enoughDiverse = true; for (uint32_t uiMergeCand3 = startIdx; uiMergeCand3 < uiCand; ++uiMergeCand3) { if (lambdaTh > abs((int)(candCostList[uiMergeCand] - candCostList[uiMergeCand3]))) { enoughDiverse = false; break; } } if (enoughDiverse) { diverseCand = uiMergeCand; posToBeInserted = uiCand + 1; break; } } } if (enoughDiverse) { Distortion saveCandCost = candCostList[diverseCand]; uint32_t mmvdLUTSave = mmvdLUT[diverseCand]; for (int i = diverseCand; i > posToBeInserted - 1; i--) { candCostList[i] = candCostList[i - 1]; mmvdLUT[i] = mmvdLUT[i - 1]; } candCostList[posToBeInserted] = saveCandCost; mmvdLUT[posToBeInserted] = mmvdLUTSave; } } } #endif } #endif #endif #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED #if JVET_AA0093_ENHANCED_MMVD_EXTENSION void InterPrediction::sortAffineMergeCandidates(PredictionUnit pu, AffineMergeCtx& affMrgCtx, uint32_t * affMmvdLUT, int16_t afMMVDIdx, bool fromStart) { int baseIdxToMergeIdxOffset = (int)PU::getMergeIdxFromAfMmvdBaseIdx(affMrgCtx, 0); int baseCount = std::min<int>((int)AF_MMVD_BASE_NUM, affMrgCtx.numValidMergeCand - baseIdxToMergeIdxOffset); const int tempNum = baseCount * AF_MMVD_MAX_REFINE_NUM; const int groupSize = std::min<int>(tempNum, ADAPTIVE_SUB_GROUP_SIZE_MMVD_AFF); #if _WINDOWS Distortion candCostList[AF_MMVD_BASE_NUM * AF_MMVD_MAX_REFINE_NUM]; #else Distortion candCostList[tempNum]; #endif for (uint32_t i = 0; i < tempNum; i++) { affMmvdLUT[i] = i; candCostList[i] = MAX_UINT; } if (baseCount < 1 #if JVET_AA0132_CONFIGURABLE_TM_TOOLS || !pu.cs->sps->getUseTMMMVD() #endif ) { return; } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } int startMMVDIdx = 0; int endMMVDIdx = tempNum; int8_t gpId = -1; int stepCutoff = AF_MMVD_STEP_NUM >> 1; if(afMMVDIdx != -1) { gpId = afMMVDIdx/groupSize; startMMVDIdx = fromStart ? 0 : gpId * groupSize; endMMVDIdx = (gpId + 2) * groupSize; gpId += (afMMVDIdx % 2); } int shiftEnc = AFFINE_MMVD_SIZE_SHIFT; int encGrpSize = groupSize >> shiftEnc; encGrpSize /= AFFINE_BI_DIR; //changed pu.cu->affine = true; pu.cu->imv = IMV_OFF; pu.cu->mmvdSkip = false; pu.regularMergeFlag = false; pu.mmvdMergeFlag = false; pu.mergeFlag = true; pu.afMmvdFlag = true; pu.cu->LICFlag = false; pu.ciipFlag = false; //Store int8_t posList0 = 0; int8_t posList1 = 0; bool load0 = false; bool load1 = false; for (int fPosBaseIdx = startMMVDIdx/AF_MMVD_MAX_REFINE_NUM; fPosBaseIdx < endMMVDIdx/AF_MMVD_MAX_REFINE_NUM; fPosBaseIdx++) { int baseIdx = fPosBaseIdx; pu.afMmvdBaseIdx = (uint8_t)baseIdx; pu.mergeIdx = (uint8_t)(baseIdxToMergeIdxOffset + baseIdx); pu.mergeType = affMrgCtx.mergeType[pu.mergeIdx]; pu.interDir = affMrgCtx.interDirNeighbours[pu.mergeIdx]; pu.cu->affineType = affMrgCtx.affineType[pu.mergeIdx]; pu.cu->BcwIdx = affMrgCtx.BcwIdx[pu.mergeIdx]; //Store posList0 = 0; posList1 = 0; load0 = false; load1 = false; int startSub = 0; int endSub = AF_MMVD_MAX_REFINE_NUM; if (!fromStart) { if (gpId % 2) { startSub = groupSize; } else { endSub = groupSize; } } for (int tempIdx = startSub; tempIdx < endSub; tempIdx++) { uint16_t mmvdMergeCand = fPosBaseIdx * AF_MMVD_MAX_REFINE_NUM + tempIdx; pu.afMmvdMergeIdx = (uint16_t)mmvdMergeCand; int stepIdx = tempIdx; int dirIdx = stepIdx % AF_MMVD_OFFSET_DIR; int fPosPosition1 = dirIdx % 8; stepIdx = stepIdx / AF_MMVD_OFFSET_DIR; pu.afMmvdDir = (uint8_t)dirIdx; pu.afMmvdStep = (uint8_t)stepIdx; //changed if ((pu.afMmvdDir >= 8) && (pu.interDir < 3)) { continue; } MvField mvfMmvd[2][3]; PU::getAfMmvdMvf(pu, affMrgCtx, mvfMmvd, pu.mergeIdx, pu.afMmvdStep, pu.afMmvdDir); #if JVET_Z0067_RPR_ENABLE bool bIsRefScaled = false; #endif for (int i = 0; i < 2; i++) { if( pu.cs->slice->getNumRefIdx( RefPicList( i ) ) > 0 ) { pu.mvpIdx[i] = 0; pu.mvpNum[i] = 0; pu.mvd[i] = Mv(); pu.refIdx[i] = mvfMmvd[i][0].refIdx; pu.mvAffi[i][0] = mvfMmvd[i][0].mv; pu.mvAffi[i][1] = mvfMmvd[i][1].mv; pu.mvAffi[i][2] = mvfMmvd[i][2].mv; } #if JVET_Z0067_RPR_ENABLE if ( !bIsRefScaled && pu.refIdx[i]>=0 && pu.cu->slice->getRefPic(i ? REF_PIC_LIST_1 : REF_PIC_LIST_0, pu.refIdx[i])->isRefScaled(pu.cs->pps) ) { bIsRefScaled = true; } #endif } //Store if (pu.interDir == 3) { if (dirIdx < 8) { posList0 = fPosPosition1 + 1; posList1 = 0; load0 = false; load1 = (fPosPosition1 != 0) || (stepIdx != 0 && stepIdx != stepCutoff ); } else if (dirIdx < 16) { posList0 = 0; posList1 = fPosPosition1 + 1; load0 = (fPosPosition1 != 0) || (stepIdx != 0 && stepIdx != stepCutoff ); load1 = false; } else { const int poc0 = pu.cu->slice->getRefPOC(REF_PIC_LIST_0, mvfMmvd[0][0].refIdx); const int poc1 = pu.cu->slice->getRefPOC(REF_PIC_LIST_1, mvfMmvd[0][0].refIdx); const int currPoc = pu.cu->slice->getPOC(); if ((poc1 - currPoc)*(poc0 - currPoc) > 0) { posList0 = fPosPosition1 + 1; posList1 = fPosPosition1 + 1; load0 = true; load1 = true; } else { posList0 = fPosPosition1 + 1; posList1 = (fPosPosition1 ^ 1) + 1; load0 = true; load1 = true; } } } uiCost = 0; #if JVET_Z0067_RPR_ENABLE if ( bIsRefScaled ) { uiCost = std::numeric_limits<Distortion>::max(); } else { #endif PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); //Store if (pu.interDir == 3) { getAffAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft, posList0, posList1, load0, load1); } else { getAffAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); } if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } #if JVET_Z0067_RPR_ENABLE } #endif // update part uint32_t i; uint32_t shift = 0; uint32_t gpIdx = mmvdMergeCand/groupSize; uint32_t endIdx = gpIdx * groupSize + encGrpSize; while (shift < encGrpSize && uiCost < candCostList[endIdx - 1 - shift]) { shift++; } if (shift != 0) { for (i = 1; i < shift; i++) { affMmvdLUT[endIdx - i] = affMmvdLUT[endIdx - 1 - i]; candCostList[endIdx - i] = candCostList[endIdx - 1 - i]; } affMmvdLUT[endIdx - shift] = mmvdMergeCand; candCostList[endIdx - shift] = uiCost; } } for (int j = encGrpSize - 1; j >= 0 ; j--) { affMmvdLUT[fPosBaseIdx * AF_MMVD_MAX_REFINE_NUM + 2*j] = affMmvdLUT[fPosBaseIdx * AF_MMVD_MAX_REFINE_NUM + j]; } for (int j = 0; j < encGrpSize ; j++) { affMmvdLUT[fPosBaseIdx * AF_MMVD_MAX_REFINE_NUM + 2*j + 1] = affMmvdLUT[fPosBaseIdx * AF_MMVD_MAX_REFINE_NUM + groupSize + j]; } } } #else void InterPrediction::sortAffineMergeCandidates(PredictionUnit pu, AffineMergeCtx& affMrgCtx, uint32_t * affMmvdLUT, uint32_t afMMVDIdx) { const int tempNum = AF_MMVD_NUM; int baseIdxToMergeIdxOffset = (int)PU::getMergeIdxFromAfMmvdBaseIdx(affMrgCtx, 0); int baseCount = std::min<int>((int)AF_MMVD_BASE_NUM, affMrgCtx.numValidMergeCand - baseIdxToMergeIdxOffset); const int groupSize = std::min<int>(tempNum, ADAPTIVE_SUB_GROUP_SIZE_MMVD_AFF); Distortion candCostList[tempNum]; for (uint32_t i = 0; i < tempNum; i++) { affMmvdLUT[i] = i; candCostList[i] = MAX_UINT; } if (baseCount < 1 #if JVET_AA0132_CONFIGURABLE_TM_TOOLS || !pu.cs->sps->getUseTMMMVD() #endif ) { return; } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } int startMMVDIdx = 0; int endMMVDIdx = tempNum; if(afMMVDIdx != -1) { uint32_t gpId = afMMVDIdx/groupSize; startMMVDIdx = gpId * groupSize; endMMVDIdx = (gpId+1) * groupSize; } int shiftEnc = AFFINE_MMVD_SIZE_SHIFT; int encGrpSize = groupSize >> shiftEnc; for (int mmvdMergeCand = startMMVDIdx; mmvdMergeCand < endMMVDIdx; mmvdMergeCand++) { pu.afMmvdMergeIdx = (uint8_t)mmvdMergeCand; int baseIdx = (int)mmvdMergeCand / AF_MMVD_MAX_REFINE_NUM; int stepIdx = (int)mmvdMergeCand - baseIdx * AF_MMVD_MAX_REFINE_NUM; int dirIdx = stepIdx % AF_MMVD_OFFSET_DIR; stepIdx = stepIdx / AF_MMVD_OFFSET_DIR; pu.cu->affine = true; pu.cu->imv = IMV_OFF; pu.cu->mmvdSkip = false; pu.regularMergeFlag = false; pu.mmvdMergeFlag = false; pu.mergeFlag = true; pu.afMmvdFlag = true; pu.afMmvdBaseIdx = (uint8_t)baseIdx; pu.afMmvdDir = (uint8_t)dirIdx; pu.afMmvdStep = (uint8_t)stepIdx; pu.mergeIdx = (uint8_t)(baseIdxToMergeIdxOffset + baseIdx); pu.mergeType = affMrgCtx.mergeType[pu.mergeIdx]; #if INTER_LIC pu.cu->LICFlag = affMrgCtx.LICFlags[pu.mergeIdx]; pu.cu->LICFlag = false; #endif pu.interDir = affMrgCtx.interDirNeighbours[pu.mergeIdx]; pu.cu->affineType = affMrgCtx.affineType[pu.mergeIdx]; pu.cu->BcwIdx = affMrgCtx.BcwIdx[pu.mergeIdx]; pu.ciipFlag = false; MvField mvfMmvd[2][3]; PU::getAfMmvdMvf(pu, affMrgCtx, mvfMmvd, pu.mergeIdx, pu.afMmvdStep, pu.afMmvdDir); #if JVET_Z0067_RPR_ENABLE bool bIsRefScaled = false; #endif for (int i = 0; i < 2; i++) { if( pu.cs->slice->getNumRefIdx( RefPicList( i ) ) > 0 ) { pu.mvpIdx[i] = 0; pu.mvpNum[i] = 0; pu.mvd[i] = Mv(); pu.refIdx[i] = mvfMmvd[i][0].refIdx; pu.mvAffi[i][0] = mvfMmvd[i][0].mv; pu.mvAffi[i][1] = mvfMmvd[i][1].mv; pu.mvAffi[i][2] = mvfMmvd[i][2].mv; } #if JVET_Z0067_RPR_ENABLE if ( !bIsRefScaled && pu.refIdx[i]>=0 && pu.cu->slice->getRefPic(i ? REF_PIC_LIST_1 : REF_PIC_LIST_0, pu.refIdx[i])->isRefScaled(pu.cs->pps) ) { bIsRefScaled = true; } #endif } uiCost = 0; #if JVET_Z0067_RPR_ENABLE if ( bIsRefScaled ) { uiCost = std::numeric_limits<Distortion>::max(); } else { #endif PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); getAffAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } #if JVET_Z0067_RPR_ENABLE } #endif // update part uint32_t i; uint32_t shift = 0; uint32_t gpIdx = mmvdMergeCand/groupSize; uint32_t endIdx = gpIdx * groupSize + encGrpSize; while (shift < encGrpSize && uiCost < candCostList[endIdx - 1 - shift]) { shift++; } if (shift != 0) { for (i = 1; i < shift; i++) { affMmvdLUT[endIdx - i] = affMmvdLUT[endIdx - 1 - i]; candCostList[endIdx - i] = candCostList[endIdx - 1 - i]; } affMmvdLUT[endIdx - shift] = mmvdMergeCand; candCostList[endIdx - shift] = uiCost; } } } #endif #endif #if JVET_W0090_ARMC_TM #if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING void InterPrediction::adjustMergeCandidatesInOneCandidateGroup(PredictionUnit &pu, MergeCtx& mvpMergeCandCtx, int numRetrievedMergeCand, int mrgCandIdx) { if (mvpMergeCandCtx.numValidMergeCand <= 1) { return; } const int numCandInCategory = std::min(numRetrievedMergeCand, mvpMergeCandCtx.numValidMergeCand); uint32_t RdCandList[MRG_MAX_NUM_CANDS]; Distortion candCostList[MRG_MAX_NUM_CANDS]; for (uint32_t j = 0; j < MRG_MAX_NUM_CANDS; j++) { RdCandList[j] = j; candCostList[j] = MAX_UINT; } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; auto origMergeIdx = pu.mergeIdx; for (uint32_t uiMergeCand = 0; uiMergeCand < mvpMergeCandCtx.numValidMergeCand; uiMergeCand++) { if (mvpMergeCandCtx.candCost[uiMergeCand] == MAX_UINT64) { uiCost = 0; mvpMergeCandCtx.setMergeInfo(pu, uiMergeCand); PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); getBlkAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } } else { uiCost = mvpMergeCandCtx.candCost[uiMergeCand]; } updateCandList(uiMergeCand, uiCost, numCandInCategory, RdCandList, candCostList); } pu.mergeIdx = origMergeIdx; updateCandInOneCandidateGroup(mvpMergeCandCtx, RdCandList, numCandInCategory); mvpMergeCandCtx.numValidMergeCand = numCandInCategory; for (int idx = 0; idx < numCandInCategory; idx++) { mvpMergeCandCtx.candCost[idx] = candCostList[idx]; } } #if JVET_AB0079_TM_BCW_MRG void InterPrediction::adjustMergeCandidatesBcwIdx(PredictionUnit& pu, MergeCtx& mrgCtx, const int mergeIdx) { if (pu.ciipFlag || pu.cu->geoFlag || !pu.cs->sps->getUseAML()) { return; } int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); Mv mvCurr; const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; const int horShift = (lumaShift + ::getComponentScaleX(COMPONENT_Y, pu.chromaFormat)); const int verShift = (lumaShift + ::getComponentScaleY(COMPONENT_Y, pu.chromaFormat)); DistParam cDistParam; cDistParam.applyWeight = false; int origBcwIdx = 0, bestBcwIdx = -1; Distortion uiBestCost = MAX_UINT64; Distortion uiCost = 0; for (uint32_t uiMergeCand = 0; uiMergeCand < mrgCtx.numValidMergeCand; uiMergeCand++) { if (mrgCtx.interDirNeighbours[uiMergeCand] != 3) { continue; } if (mergeIdx != -1 && uiMergeCand != mergeIdx) { continue; } mrgCtx.setMergeInfo(pu, uiMergeCand); #if JVET_Z0067_RPR_ENABLE bool bRefIsRescaled = false; for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { const RefPicList eRefPicList = refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0; bRefIsRescaled |= (pu.refIdx[refList] >= 0) ? pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->isRefScaled(pu.cs->pps) : false; } if (bRefIsRescaled) { continue; } #endif if (xCheckIdenticalMotion(pu)) { continue; } // perform interpolation for template for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); CHECK(pu.refIdx[refList] >= pu.cu->slice->getNumRefIdx(eRefPicList), "Invalid reference index"); m_iRefListIdx = refList; mvCurr = pu.mv[refList]; Mv subPelMv = mvCurr; clipMv(mvCurr, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); if (m_bAMLTemplateAvailabe[0]) { Mv mvTop(0, -(AML_MERGE_TEMPLATE_SIZE << verShift)); mvTop += subPelMv; clipMv(mvTop, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); PelUnitBuf pcMbBuf = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[refList][0], pcBufPredRefTop.Y())); #if RPR_ENABLE const Picture* picRef = pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->unscaledPic; const std::pair<int, int>& scalingRatio = pu.cu->slice->getScalingRatio(eRefPicList, pu.refIdx[refList]); #if INTER_LIC xPredInterBlk(COMPONENT_Y, pu, picRef, mvTop, pcMbBuf, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, scalingRatio, 0, 0, false, NULL, 0, true, true, mvCurr); #else xPredInterBlk(COMPONENT_Y, pu, picRef, mvTop, pcMbBuf, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, scalingRatio, 0, 0, false, NULL, 0, true); #endif #else #if INTER_LIC xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvTop, pcMbBuf, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, mvCurr); #else xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvTop, pcMbBuf, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true); #endif #endif } if (m_bAMLTemplateAvailabe[1]) { Mv mvLeft(-(AML_MERGE_TEMPLATE_SIZE << horShift), 0); mvLeft += subPelMv; clipMv(mvLeft, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); PelUnitBuf pcMbBuf = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[refList][0], pcBufPredRefLeft.Y())); #if RPR_ENABLE const Picture* picRef = pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->unscaledPic; const std::pair<int, int>& scalingRatio = pu.cu->slice->getScalingRatio(eRefPicList, pu.refIdx[refList]); #if INTER_LIC xPredInterBlk(COMPONENT_Y, pu, picRef, mvLeft, pcMbBuf, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, scalingRatio, 0, 0, false, NULL, 0, true, true, mvCurr); #else xPredInterBlk(COMPONENT_Y, pu, picRef, mvLeft, pcMbBuf, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, scalingRatio, 0, 0, false, NULL, 0, true); #endif #else #if INTER_LIC xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvLeft, pcMbBuf, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, mvCurr); #else xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvLeft, pcMbBuf, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true); #endif #endif } } // perform bi-prediction with different BCW index origBcwIdx = mrgCtx.BcwIdx[uiMergeCand]; bestBcwIdx = -1; uiBestCost = MAX_UINT64; uiCost = 0; const int origWeight = getBcwWeight(origBcwIdx, REF_PIC_LIST_0); bool bioApplied = pu.cs->sps->getBDOFEnabledFlag() && !pu.cs->picHeader->getDisBdofFlag() && PU::isBiPredFromDifferentDirEqDistPoc(pu) ? true : false; for (int idx = 0; idx < BCW_MRG_NUM; idx++) { pu.cu->BcwIdx = g_BcwMrgParsingOrder[idx]; if (abs(origWeight - getBcwWeight(pu.cu->BcwIdx, REF_PIC_LIST_0)) > 1) { continue; } uiCost = 0; if (m_bAMLTemplateAvailabe[0]) { CPelUnitBuf srcPred0 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[0][0], pcBufPredRefTop.Y())); CPelUnitBuf srcPred1 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[1][0], pcBufPredRefTop.Y())); xWeightedAverageY(pu, srcPred0, srcPred1, pcBufPredRefTop, pu.cu->slice->getSPS()->getBitDepths(), pu.cu->slice->clpRngs()); m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { CPelUnitBuf srcPred0 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[0][0], pcBufPredRefLeft.Y())); CPelUnitBuf srcPred1 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[1][0], pcBufPredRefLeft.Y())); xWeightedAverageY(pu, srcPred0, srcPred1, pcBufPredRefLeft, pu.cu->slice->getSPS()->getBitDepths(), pu.cu->slice->clpRngs()); m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (origBcwIdx == pu.cu->BcwIdx && bioApplied && pu.cu->BcwIdx == BCW_DEFAULT) { uiCost = uiCost - (uiCost >> 3) - (uiCost >> 4); } else if (origBcwIdx == pu.cu->BcwIdx) { uiCost = uiCost - (uiCost >> 4) - (uiCost >> 5); } else if (bioApplied && pu.cu->BcwIdx == BCW_DEFAULT) { uiCost = uiCost - (uiCost >> 4) - (uiCost >> 5); } if (uiCost < uiBestCost) { uiBestCost = uiCost; bestBcwIdx = pu.cu->BcwIdx; } } // for (int idx = 0; idx < loopBcw; idx++) pu.cu->BcwIdx = bestBcwIdx; mrgCtx.BcwIdx[uiMergeCand] = bestBcwIdx; } // for (uint32_t uiMergeCand = 0; uiMergeCand < mrgCtx.numValidMergeCand; uiMergeCand++) pu.mergeIdx = mergeIdx; } #endif #if JVET_Z0102_NO_ARMC_FOR_ZERO_CAND void InterPrediction::adjustMergeCandidates(PredictionUnit& pu, MergeCtx& mvpMergeCandCtx, int numRetrievedMergeCand) { if (mvpMergeCandCtx.numValidMergeCand <= 1) { return; } const int numCandInCategory = std::min(numRetrievedMergeCand, mvpMergeCandCtx.numValidMergeCand); #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC uint32_t rdCandList[NUM_MERGE_CANDS + MRG_MAX_NUM_CANDS]; Distortion candCostList[NUM_MERGE_CANDS + MRG_MAX_NUM_CANDS]; for (uint32_t j = 0; j < NUM_MERGE_CANDS + MRG_MAX_NUM_CANDS; j++) #else uint32_t rdCandList[MRG_MAX_NUM_CANDS]; Distortion candCostList[MRG_MAX_NUM_CANDS]; for (uint32_t j = 0; j < MRG_MAX_NUM_CANDS; j++) #endif { rdCandList[j] = j; candCostList[j] = MAX_UINT64; } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; auto origMergeIdx = pu.mergeIdx; PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); for (uint32_t uiMergeCand = 0; uiMergeCand < mvpMergeCandCtx.numValidMergeCand; uiMergeCand++) { if (mvpMergeCandCtx.numCandToTestEnc != mvpMergeCandCtx.numValidMergeCand) { if (uiMergeCand >= mvpMergeCandCtx.numCandToTestEnc) { mvpMergeCandCtx.candCost[uiMergeCand] = MAX_UINT64 - 1; } } if (mvpMergeCandCtx.candCost[uiMergeCand] == MAX_UINT64) { uiCost = 0; mvpMergeCandCtx.setMergeInfo(pu, uiMergeCand); PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); #if JVET_Z0067_RPR_ENABLE bool bRefIsRescaled = false; for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { const RefPicList eRefPicList = refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0; bRefIsRescaled |= (pu.refIdx[refList] >= 0) ? pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->isRefScaled(pu.cs->pps) : false; } if (bRefIsRescaled) { uiCost = std::numeric_limits<Distortion>::max(); } else { #endif getBlkAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } #if JVET_Z0067_RPR_ENABLE } #endif #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC if (m_bAMLTemplateAvailabe[0] && !m_bAMLTemplateAvailabe[1]) { uiCost += (uiCost * nHeight) / nWidth; } if (!m_bAMLTemplateAvailabe[0] && m_bAMLTemplateAvailabe[1]) { uiCost += (uiCost * nWidth) / nHeight; } #endif } else { uiCost = mvpMergeCandCtx.candCost[uiMergeCand]; } updateCandList(uiMergeCand, uiCost, mvpMergeCandCtx.numValidMergeCand, rdCandList, candCostList); } pu.mergeIdx = origMergeIdx; #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC MergeCtx pairMergeCand; pairMergeCand.numValidMergeCand = 0; pairMergeCand.numCandToTestEnc = 0; bool pairAdded = false; if (mvpMergeCandCtx.numCandToTestEnc > 1 ) { int cnt = 0; int maxPairToBeAdded = std::min(mvpMergeCandCtx.numCandToTestEnc, numCandInCategory); #if TM_MRG if (pu.tmMergeFlag #if JVET_AA0132_CONFIGURABLE_TM_TOOLS && pu.cs->sps->getUseTMMrgMode() #endif ) { maxPairToBeAdded = std::min(mvpMergeCandCtx.numCandToTestEnc, TM_MRG_MAX_NUM_INIT_CANDS); } #endif int cand1 = 0; cnt = 0; for (int cand2 = 1; cand2 < std::min(maxPairToBeAdded, MRG_MAX_NUM_CANDS); cand2++) { if (cand1 == cand2) { continue; } pairMergeCand.mvFieldNeighbours[cnt * 2].setMvField(Mv(0, 0), NOT_VALID); pairMergeCand.mvFieldNeighbours[cnt * 2 + 1].setMvField(Mv(0, 0), NOT_VALID); #if INTER_LIC pairMergeCand.LICFlags[cnt] = mvpMergeCandCtx.LICFlags[rdCandList[0]]; #endif pairMergeCand.BcwIdx[cnt] = mvpMergeCandCtx.BcwIdx[rdCandList[0]]; pairMergeCand.useAltHpelIf[cnt] = mvpMergeCandCtx.useAltHpelIf[rdCandList[0]]; pairMergeCand.candCost[cnt] = MAX_UINT64; #if MULTI_HYP_PRED pairMergeCand.addHypNeighbours[cnt].clear(); #endif // calculate average MV for L0 and L1 seperately unsigned char interDir = 0; for (int refListId = 0; refListId < (pu.cu->slice->isInterB() ? 2 : 1); refListId++) { const short refIdxI = mvpMergeCandCtx.mvFieldNeighbours[rdCandList[cand1] * 2 + refListId].refIdx; const short refIdxJ = mvpMergeCandCtx.mvFieldNeighbours[rdCandList[cand2] * 2 + refListId].refIdx; // both MVs are invalid, skip if ((refIdxI == NOT_VALID) && (refIdxJ == NOT_VALID)) { continue; } interDir += 1 << refListId; // both MVs are valid, average these two MVs if ((refIdxI != NOT_VALID) && (refIdxJ != NOT_VALID) && refIdxI == refIdxJ) { const Mv& mvI = mvpMergeCandCtx.mvFieldNeighbours[rdCandList[cand1] * 2 + refListId].mv; const Mv& mvJ = mvpMergeCandCtx.mvFieldNeighbours[rdCandList[cand2] * 2 + refListId].mv; // average two MVs Mv avgMv = mvI; avgMv += mvJ; roundAffineMv(avgMv.hor, avgMv.ver, 1); pairMergeCand.mvFieldNeighbours[cnt * 2 + refListId].setMvField(avgMv, refIdxI); } // only one MV is valid, take the only one MV else if (refIdxI != NOT_VALID) { Mv singleMv = mvpMergeCandCtx.mvFieldNeighbours[rdCandList[cand1] * 2 + refListId].mv; pairMergeCand.mvFieldNeighbours[cnt * 2 + refListId].setMvField(singleMv, refIdxI); } else if (refIdxJ != NOT_VALID) { Mv singleMv = mvpMergeCandCtx.mvFieldNeighbours[rdCandList[cand2] * 2 + refListId].mv; pairMergeCand.mvFieldNeighbours[cnt * 2 + refListId].setMvField(singleMv, refIdxJ); } } pairMergeCand.interDirNeighbours[cnt] = interDir; if (interDir > 0) { #if INTER_LIC if (interDir == 3) { pairMergeCand.LICFlags[cnt] = false; } #endif uint32_t mvdSimilarityThresh = 1; #if TM_MRG mvdSimilarityThresh = #if JVET_AA0132_CONFIGURABLE_TM_TOOLS pu.cs->sps->getUseTMMrgMode() && #endif pu.tmMergeFlag ? PU::getTMMvdThreshold(pu) : mvdSimilarityThresh; #endif if (!pairMergeCand.xCheckSimilarMotion(cnt, mvdSimilarityThresh)) { if (!mvpMergeCandCtx.xCheckSimilarMotion2Lists(cnt, &pairMergeCand, mvdSimilarityThresh)) { pairAdded = true; cnt++; pairMergeCand.numValidMergeCand++; pairMergeCand.numCandToTestEnc++; if (pairMergeCand.numValidMergeCand == MAX_PAIR_CANDS) { break; } } } } } if (pairAdded) { for (uint32_t uiMergeCand = 0; uiMergeCand < pairMergeCand.numValidMergeCand; uiMergeCand++) { if (pairMergeCand.candCost[uiMergeCand] == MAX_UINT64) { uiCost = 0; pairMergeCand.setMergeInfo(pu, uiMergeCand); PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); getBlkAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[0] && !m_bAMLTemplateAvailabe[1]) { uiCost += (uiCost * nHeight) / nWidth; } if (!m_bAMLTemplateAvailabe[0] && m_bAMLTemplateAvailabe[1]) { uiCost += (uiCost * nWidth) / nHeight; } } else { uiCost = pairMergeCand.candCost[uiMergeCand]; } updateCandList((uiMergeCand+ mvpMergeCandCtx.numValidMergeCand), uiCost, (mvpMergeCandCtx.numValidMergeCand + pairMergeCand.numValidMergeCand), rdCandList, candCostList); } pu.mergeIdx = origMergeIdx; } Distortion cost = pu.cs->slice->getCostForARMC(); uint32_t candToBeRemoved = NUM_MERGE_CANDS + MRG_MAX_NUM_CANDS; Distortion min = MAX_UINT64; for (int sizeCandList = mvpMergeCandCtx.numCandToTestEnc+ pairMergeCand.numValidMergeCand; sizeCandList > 1; sizeCandList--) { min = MAX_UINT64; #if TM_MRG if(pu.tmMergeFlag #if JVET_AA0132_CONFIGURABLE_TM_TOOLS && pu.cs->sps->getUseTMMrgMode() #endif ) { candToBeRemoved = 0; min = candCostList[0]; } #endif for (uint32_t uiMergeCand = 0; uiMergeCand < sizeCandList - 1; ++uiMergeCand) { if (min > abs((int)(candCostList[uiMergeCand + 1] - candCostList[uiMergeCand]))) { min = abs((int)(candCostList[uiMergeCand + 1] - candCostList[uiMergeCand])); candToBeRemoved = uiMergeCand + 1; } } if (min < cost) { uint32_t candToBeReplaced = sizeCandList - 1; for (uint32_t uiMergeCand = candToBeRemoved + 1; uiMergeCand < sizeCandList; ++uiMergeCand) { if (cost < abs((int)((int)candCostList[uiMergeCand] - (int)candCostList[candToBeRemoved]))) { candToBeReplaced = uiMergeCand; break; } } if (sizeCandList > numCandInCategory) { candToBeReplaced = sizeCandList - 1; } uint32_t rdCand = rdCandList[candToBeRemoved]; Distortion candCost = candCostList[(candToBeRemoved)]; for (int ui = candToBeRemoved; ui < (candToBeReplaced > sizeCandList - 1 ? sizeCandList - 1 : candToBeReplaced); ui++) { candCostList[ui] = candCostList[(ui + 1)]; rdCandList[ui] = rdCandList[(ui + 1)]; } candCostList[candToBeReplaced] = candCost; rdCandList[candToBeReplaced] = rdCand; } else { break; } } } updateCandInTwoCandidateGroups(mvpMergeCandCtx, rdCandList, mvpMergeCandCtx.numValidMergeCand, pairMergeCand); #else updateCandInOneCandidateGroup(mvpMergeCandCtx, rdCandList, mvpMergeCandCtx.numValidMergeCand); #endif for (int idx = 0; idx < mvpMergeCandCtx.numValidMergeCand; idx++) { mvpMergeCandCtx.candCost[idx] = candCostList[idx]; } mvpMergeCandCtx.numValidMergeCand = numCandInCategory; #if !JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC for (int idx = 0; idx < numCandInCategory; idx++) { mvpMergeCandCtx.candCost[idx] = candCostList[idx]; } #endif } #endif #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC void InterPrediction::updateCandInTwoCandidateGroups(MergeCtx& mrgCtx, uint32_t* rdCandList, int numCandInCategory, MergeCtx mrgCtx2) { MergeCtx mrgCtxTmp; for (uint32_t uiMergeCand = 0; uiMergeCand < mrgCtx.numValidMergeCand; uiMergeCand++) { mrgCtxTmp.BcwIdx[uiMergeCand] = mrgCtx.BcwIdx[uiMergeCand]; mrgCtxTmp.interDirNeighbours[uiMergeCand] = mrgCtx.interDirNeighbours[uiMergeCand]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1]; mrgCtxTmp.useAltHpelIf[uiMergeCand] = mrgCtx.useAltHpelIf[uiMergeCand]; #if INTER_LIC mrgCtxTmp.LICFlags[uiMergeCand] = mrgCtx.LICFlags[uiMergeCand]; #endif #if MULTI_HYP_PRED mrgCtxTmp.addHypNeighbours[uiMergeCand] = mrgCtx.addHypNeighbours[uiMergeCand]; #endif } //update for (uint32_t uiMergeCand = 0; uiMergeCand < numCandInCategory; uiMergeCand++) { if (rdCandList[uiMergeCand] >= mrgCtx.numValidMergeCand) { mrgCtx.BcwIdx[uiMergeCand] = mrgCtx2.BcwIdx[rdCandList[uiMergeCand] - mrgCtx.numValidMergeCand]; mrgCtx.interDirNeighbours[uiMergeCand] = mrgCtx2.interDirNeighbours[rdCandList[uiMergeCand] - mrgCtx.numValidMergeCand]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtx2.mvFieldNeighbours[((rdCandList[uiMergeCand] -mrgCtx.numValidMergeCand) << 1)]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtx2.mvFieldNeighbours[((rdCandList[uiMergeCand] - mrgCtx.numValidMergeCand) << 1) + 1]; mrgCtx.useAltHpelIf[uiMergeCand] = mrgCtx2.useAltHpelIf[rdCandList[uiMergeCand] - mrgCtx.numValidMergeCand]; #if INTER_LIC mrgCtx.LICFlags[uiMergeCand] = mrgCtx2.LICFlags[rdCandList[uiMergeCand] - mrgCtx.numValidMergeCand]; #endif #if MULTI_HYP_PRED mrgCtx.addHypNeighbours[uiMergeCand] = mrgCtx2.addHypNeighbours[rdCandList[uiMergeCand] - mrgCtx.numValidMergeCand]; #endif } else { mrgCtx.BcwIdx[uiMergeCand] = mrgCtxTmp.BcwIdx[rdCandList[uiMergeCand]]; mrgCtx.interDirNeighbours[uiMergeCand] = mrgCtxTmp.interDirNeighbours[rdCandList[uiMergeCand]]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtxTmp.mvFieldNeighbours[(rdCandList[uiMergeCand] << 1)]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtxTmp.mvFieldNeighbours[(rdCandList[uiMergeCand] << 1) + 1]; mrgCtx.useAltHpelIf[uiMergeCand] = mrgCtxTmp.useAltHpelIf[rdCandList[uiMergeCand]]; #if INTER_LIC mrgCtx.LICFlags[uiMergeCand] = mrgCtxTmp.LICFlags[rdCandList[uiMergeCand]]; #endif #if MULTI_HYP_PRED mrgCtx.addHypNeighbours[uiMergeCand] = mrgCtxTmp.addHypNeighbours[rdCandList[uiMergeCand]]; #endif } } } #endif void InterPrediction::updateCandInOneCandidateGroup(MergeCtx& mrgCtx, uint32_t* rdCandList, int numCandInCategory) { MergeCtx mrgCtxTmp; for (uint32_t uiMergeCand = 0; uiMergeCand < mrgCtx.numValidMergeCand; uiMergeCand++) { mrgCtxTmp.BcwIdx[uiMergeCand] = mrgCtx.BcwIdx[uiMergeCand]; mrgCtxTmp.interDirNeighbours[uiMergeCand] = mrgCtx.interDirNeighbours[uiMergeCand]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1]; mrgCtxTmp.useAltHpelIf[uiMergeCand] = mrgCtx.useAltHpelIf[uiMergeCand]; #if INTER_LIC mrgCtxTmp.LICFlags[uiMergeCand] = mrgCtx.LICFlags[uiMergeCand]; #endif #if MULTI_HYP_PRED mrgCtxTmp.addHypNeighbours[uiMergeCand] = mrgCtx.addHypNeighbours[uiMergeCand]; #endif } //update for (uint32_t uiMergeCand = 0; uiMergeCand < numCandInCategory; uiMergeCand++) { mrgCtx.BcwIdx[uiMergeCand] = mrgCtxTmp.BcwIdx[rdCandList[uiMergeCand]]; mrgCtx.interDirNeighbours[uiMergeCand] = mrgCtxTmp.interDirNeighbours[rdCandList[uiMergeCand]]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtxTmp.mvFieldNeighbours[(rdCandList[uiMergeCand] << 1)]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtxTmp.mvFieldNeighbours[(rdCandList[uiMergeCand] << 1) + 1]; mrgCtx.useAltHpelIf[uiMergeCand] = mrgCtxTmp.useAltHpelIf[rdCandList[uiMergeCand]]; #if INTER_LIC mrgCtx.LICFlags[uiMergeCand] = mrgCtxTmp.LICFlags[rdCandList[uiMergeCand]]; #endif #if MULTI_HYP_PRED mrgCtx.addHypNeighbours[uiMergeCand] = mrgCtxTmp.addHypNeighbours[rdCandList[uiMergeCand]]; #endif } } #endif void InterPrediction::adjustInterMergeCandidates(PredictionUnit &pu, MergeCtx& mrgCtx, int mrgCandIdx) { uint32_t RdCandList[MRG_MAX_NUM_CANDS][MRG_MAX_NUM_CANDS]; Distortion candCostList[MRG_MAX_NUM_CANDS][MRG_MAX_NUM_CANDS]; for (uint32_t i = 0; i < MRG_MAX_NUM_CANDS; i++) { for (uint32_t j = 0; j < MRG_MAX_NUM_CANDS; j++) { RdCandList[i][j] = j; candCostList[i][j] = MAX_UINT; } } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; /*const SPS &sps = *pu.cs->sps; Position puPos = pu.lumaPos();*/ int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } #if JVET_X0049_ADAPT_DMVR #if JVET_AA0093_ENHANCED_MMVD_EXTENSION uint16_t origMergeIdx = pu.mergeIdx; #else uint8_t origMergeIdx = pu.mergeIdx; #endif #endif for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE)*ADAPTIVE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE > mrgCtx.numValidMergeCand)) ? mrgCtx.numValidMergeCand : ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE)); ++uiMergeCand) { bool firstGroup = (uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE) == 0 ? true : false; bool lastGroup = ((uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE >= mrgCtx.numValidMergeCand) ? true : false; if (lastGroup && !firstGroup) { break; } uiCost = 0; mrgCtx.setMergeInfo(pu, uiMergeCand); PU::spanMotionInfo(pu, mrgCtx); PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); #if JVET_Y0128_NON_CTC bool bRefIsRescaled = false; for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { const RefPicList eRefPicList = refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0; bRefIsRescaled |= (pu.refIdx[refList] >= 0) ? pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->isRefScaled(pu.cs->pps) : false; } if ( !bRefIsRescaled ) { #endif getBlkAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } #if JVET_Y0128_NON_CTC } #endif updateCandList(uiMergeCand, uiCost, ADAPTIVE_SUB_GROUP_SIZE, RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE], candCostList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE]); } #if JVET_X0049_ADAPT_DMVR pu.mergeIdx = origMergeIdx; #else pu.mergeIdx = mrgCandIdx; //restore the merge index #endif updateCandInfo(mrgCtx, RdCandList , mrgCandIdx ); } #endif #if JVET_W0090_ARMC_TM #if JVET_AA0093_REFINED_MOTION_FOR_ARMC void InterPrediction::adjustMergeCandidatesInOneCandidateGroup(PredictionUnit &pu, MergeCtx& mvpMergeCandCtx, bool* applyBDMVR, Mv** mvBufBDMVR, Mv** mvBufBDMVRTmp, int numRetrievedMergeCand, bool subRefineList[][2], bool subRefineListTmp[][2], int mrgCandIdx) { if (mvpMergeCandCtx.numValidMergeCand <= 1) { return; } if (!xAMLIsTopTempAvailable(pu) && !xAMLIsLeftTempAvailable(pu)) { return; } const int numCandInCategory = std::min(numRetrievedMergeCand, mvpMergeCandCtx.numValidMergeCand); uint32_t rdCandList[10]; Distortion candCostList[10]; for (uint32_t j = 0; j < 10; j++) { rdCandList[j] = j; candCostList[j] = MAX_UINT; } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; auto origMergeIdx = pu.mergeIdx; for (uint32_t uiMergeCand = 0; uiMergeCand < mvpMergeCandCtx.numValidMergeCand; uiMergeCand++) { if (mvpMergeCandCtx.numCandToTestEnc != mvpMergeCandCtx.numValidMergeCand) { if (uiMergeCand > mvpMergeCandCtx.numCandToTestEnc) { mvpMergeCandCtx.candCost[uiMergeCand] = MAX_UINT64 - 1; } } #if TM_MRG if (( #if JVET_AA0132_CONFIGURABLE_TM_TOOLS pu.cs->sps->getUseTMMrgMode() && #endif pu.tmMergeFlag) || (mvpMergeCandCtx.numCandToTestEnc != mvpMergeCandCtx.numValidMergeCand && uiMergeCand > mvpMergeCandCtx.numCandToTestEnc )) #else if ((mvpMergeCandCtx.numCandToTestEnc != mvpMergeCandCtx.numValidMergeCand && uiMergeCand > mvpMergeCandCtx.numCandToTestEnc )) #endif { uiCost = mvpMergeCandCtx.candCost[uiMergeCand]; } else { uiCost = 0; mvpMergeCandCtx.setMergeInfo(pu, uiMergeCand); PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], pu.lwidth(), AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], pu.lwidth(), AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, pu.lheight()))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, pu.lheight()))); #if JVET_Z0067_RPR_ENABLE bool bRefIsRescaled = false; for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { const RefPicList eRefPicList = refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0; bRefIsRescaled |= (pu.refIdx[refList] >= 0) ? pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->isRefScaled(pu.cs->pps) : false; } if (bRefIsRescaled) { uiCost = std::numeric_limits<Distortion>::max(); } else { #endif getBlkAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } #if JVET_Z0067_RPR_ENABLE } #endif } updateCandList(uiMergeCand, uiCost, numCandInCategory, rdCandList, candCostList); } pu.mergeIdx = origMergeIdx; updateCandInOneCandidateGroup(mvpMergeCandCtx, rdCandList, applyBDMVR, mvBufBDMVR, mvBufBDMVRTmp, subRefineList, subRefineListTmp, numCandInCategory); } void InterPrediction::updateCandInOneCandidateGroup(MergeCtx& mrgCtx, uint32_t* rdCandList, bool* applyBDMVR, Mv** mvBufBDMVR, Mv** mvBufBDMVRTmp, bool subRefineList[][2], bool subRefineListTmp[][2],int numCandInCategory) { bool applyBDMVRTmp[10]; MergeCtx mrgCtxTmp; for (uint32_t uiMergeCand = 0; uiMergeCand < mrgCtx.numValidMergeCand; uiMergeCand++) { mrgCtxTmp.BcwIdx[uiMergeCand] = mrgCtx.BcwIdx[uiMergeCand]; mrgCtxTmp.interDirNeighbours[uiMergeCand] = mrgCtx.interDirNeighbours[uiMergeCand]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1]; mrgCtxTmp.useAltHpelIf[uiMergeCand] = mrgCtx.useAltHpelIf[uiMergeCand]; #if INTER_LIC mrgCtxTmp.LICFlags[uiMergeCand] = mrgCtx.LICFlags[uiMergeCand]; #endif #if MULTI_HYP_PRED mrgCtxTmp.addHypNeighbours[uiMergeCand] = mrgCtx.addHypNeighbours[uiMergeCand]; #endif if (applyBDMVR) { applyBDMVRTmp[uiMergeCand] = applyBDMVR[uiMergeCand]; } } //update for (uint32_t uiMergeCand = 0; uiMergeCand < numCandInCategory; uiMergeCand++) { mrgCtx.BcwIdx[uiMergeCand] = mrgCtxTmp.BcwIdx[rdCandList[uiMergeCand]]; mrgCtx.interDirNeighbours[uiMergeCand] = mrgCtxTmp.interDirNeighbours[rdCandList[uiMergeCand]]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtxTmp.mvFieldNeighbours[(rdCandList[uiMergeCand] << 1)]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtxTmp.mvFieldNeighbours[(rdCandList[uiMergeCand] << 1) + 1]; mrgCtx.useAltHpelIf[uiMergeCand] = mrgCtxTmp.useAltHpelIf[rdCandList[uiMergeCand]]; #if INTER_LIC mrgCtx.LICFlags[uiMergeCand] = mrgCtxTmp.LICFlags[rdCandList[uiMergeCand]]; #endif #if MULTI_HYP_PRED mrgCtx.addHypNeighbours[uiMergeCand] = mrgCtxTmp.addHypNeighbours[rdCandList[uiMergeCand]]; #endif if (applyBDMVR) { applyBDMVR[uiMergeCand] = applyBDMVRTmp[rdCandList[uiMergeCand]]; } if (mvBufBDMVR && mvBufBDMVRTmp) { mvBufBDMVRTmp[(uiMergeCand << 1)] = mvBufBDMVR[(rdCandList[uiMergeCand] << 1)]; mvBufBDMVRTmp[(uiMergeCand << 1) + 1] = mvBufBDMVR[(rdCandList[uiMergeCand] << 1) + 1]; } if (subRefineList && subRefineListTmp) { subRefineList[uiMergeCand][0] = subRefineListTmp[rdCandList[uiMergeCand]][0]; subRefineList[uiMergeCand][1] = subRefineListTmp[rdCandList[uiMergeCand]][1]; } } if (mvBufBDMVR && mvBufBDMVRTmp) { for (uint32_t uiMergeCand = 0; uiMergeCand < numCandInCategory; uiMergeCand++) { mvBufBDMVR[(uiMergeCand << 1)] = mvBufBDMVRTmp[(uiMergeCand << 1)]; mvBufBDMVR[(uiMergeCand << 1) + 1] = mvBufBDMVRTmp[(uiMergeCand << 1) + 1]; } } } #endif #endif #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC || JVET_AA0061_IBC_MBVD || JVET_Y0058_IBC_LIST_MODIFY bool InterPrediction::xAMLGetCurBlkTemplate(PredictionUnit& pu, int nCurBlkWidth, int nCurBlkHeight) { m_bAMLTemplateAvailabe[0] = xAMLIsTopTempAvailable(pu); m_bAMLTemplateAvailabe[1] = xAMLIsLeftTempAvailable(pu); if (!m_bAMLTemplateAvailabe[0] && !m_bAMLTemplateAvailabe[1]) { return false; } /* const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; const int horShift = (lumaShift + ::getComponentScaleX(COMPONENT_Y, pu.chromaFormat)); const int verShift = (lumaShift + ::getComponentScaleY(COMPONENT_Y, pu.chromaFormat));*/ const Picture& currPic = *pu.cs->picture; const CPelBuf recBuf = currPic.getRecoBuf(pu.cs->picture->blocks[COMPONENT_Y]); std::vector<Pel>& invLUT = m_pcReshape->getInvLUT(); #if JVET_Z0054_BLK_REF_PIC_REORDER if(!m_fillCurTplAboveARMC) #endif if (m_bAMLTemplateAvailabe[0]) { const Pel* rec = recBuf.bufAt(pu.blocks[COMPONENT_Y].pos().offset(0, -AML_MERGE_TEMPLATE_SIZE)); PelBuf pcYBuf = PelBuf(m_acYuvCurAMLTemplate[0][0], nCurBlkWidth, AML_MERGE_TEMPLATE_SIZE); Pel* pcY = pcYBuf.bufAt(0, 0); for (int k = 0; k < nCurBlkWidth; k++) { for (int l = 0; l < AML_MERGE_TEMPLATE_SIZE; l++) { int recVal = rec[k + l * recBuf.stride]; if (m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) { recVal = invLUT[recVal]; } pcY[k + l * nCurBlkWidth] = recVal; } } #if JVET_Z0054_BLK_REF_PIC_REORDER m_fillCurTplAboveARMC = true; #endif } #if JVET_Z0054_BLK_REF_PIC_REORDER if(!m_fillCurTplLeftARMC) #endif if (m_bAMLTemplateAvailabe[1]) { PelBuf pcYBuf = PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nCurBlkHeight); Pel* pcY = pcYBuf.bufAt(0, 0); const Pel* rec = recBuf.bufAt(pu.blocks[COMPONENT_Y].pos().offset(-AML_MERGE_TEMPLATE_SIZE, 0)); for (int k = 0; k < nCurBlkHeight; k++) { for (int l = 0; l < AML_MERGE_TEMPLATE_SIZE; l++) { int recVal = rec[recBuf.stride * k + l]; if (m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) { recVal = invLUT[recVal]; } pcY[AML_MERGE_TEMPLATE_SIZE * k + l] = recVal; } } #if JVET_Z0054_BLK_REF_PIC_REORDER m_fillCurTplLeftARMC = true; #endif } return true; } bool InterPrediction::xAMLIsTopTempAvailable(PredictionUnit& pu) { const CodingStructure &cs = *pu.cs; Position posRT = pu.Y().topRight(); const PredictionUnit *puAbove = cs.getPURestricted(posRT.offset(0, -1), pu, pu.chType); return (puAbove && pu.cu != puAbove->cu); } bool InterPrediction::xAMLIsLeftTempAvailable(PredictionUnit& pu) { const CodingStructure &cs = *pu.cs; Position posLB = pu.Y().bottomLeft(); const PredictionUnit *puLeft = cs.getPURestricted(posLB.offset(-1, 0), pu, pu.chType); return (puLeft && pu.cu != puLeft->cu); } #endif #if JVET_W0090_ARMC_TM void InterPrediction::updateCandList(uint32_t uiCand, Distortion uiCost, uint32_t uiMrgCandNum, uint32_t* RdCandList, Distortion* CandCostList) { uint32_t i; uint32_t shift = 0; while (shift < uiMrgCandNum && uiCost < CandCostList[uiMrgCandNum - 1 - shift]) { shift++; } if (shift != 0) { for (i = 1; i < shift; i++) { RdCandList[uiMrgCandNum - i] = RdCandList[uiMrgCandNum - 1 - i]; CandCostList[uiMrgCandNum - i] = CandCostList[uiMrgCandNum - 1 - i]; } RdCandList[uiMrgCandNum - shift] = uiCand; CandCostList[uiMrgCandNum - shift] = uiCost; } } void InterPrediction::updateCandInfo(MergeCtx& mrgCtx, uint32_t(*RdCandList)[MRG_MAX_NUM_CANDS], int mrgCandIdx) { MergeCtx mrgCtxTmp; for (uint32_t ui = 0; ui < MRG_MAX_NUM_CANDS; ++ui) { mrgCtxTmp.BcwIdx[ui] = BCW_DEFAULT; mrgCtxTmp.interDirNeighbours[ui] = 0; mrgCtxTmp.mvFieldNeighbours[(ui << 1)].refIdx = NOT_VALID; mrgCtxTmp.mvFieldNeighbours[(ui << 1) + 1].refIdx = NOT_VALID; mrgCtxTmp.useAltHpelIf[ui] = false; #if INTER_LIC mrgCtxTmp.LICFlags[ui] = false; #endif #if MULTI_HYP_PRED mrgCtxTmp.addHypNeighbours[ui].clear(); #endif } for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE)*ADAPTIVE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE > mrgCtx.numValidMergeCand)) ? mrgCtx.numValidMergeCand : ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE)); ++uiMergeCand) { bool firstGroup = (uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE) == 0 ? true : false; bool lastGroup = ((uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE >= mrgCtx.numValidMergeCand) ? true : false; if (lastGroup && !firstGroup) { break; } mrgCtxTmp.BcwIdx[uiMergeCand] = mrgCtx.BcwIdx[uiMergeCand]; mrgCtxTmp.interDirNeighbours[uiMergeCand] = mrgCtx.interDirNeighbours[uiMergeCand]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1]; mrgCtxTmp.useAltHpelIf[uiMergeCand] = mrgCtx.useAltHpelIf[uiMergeCand]; #if INTER_LIC mrgCtxTmp.LICFlags[uiMergeCand] = mrgCtx.LICFlags[uiMergeCand]; #endif #if MULTI_HYP_PRED mrgCtxTmp.addHypNeighbours[uiMergeCand] = mrgCtx.addHypNeighbours[uiMergeCand]; #endif } //update for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE)*ADAPTIVE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE > mrgCtx.numValidMergeCand)) ? mrgCtx.numValidMergeCand : ((mrgCandIdx / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE)); ++uiMergeCand) { bool firstGroup = (uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE) == 0 ? true : false; bool lastGroup = ((uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE + 1)*ADAPTIVE_SUB_GROUP_SIZE >= mrgCtx.numValidMergeCand) ? true : false; if (lastGroup && !firstGroup) { break; } mrgCtx.BcwIdx[uiMergeCand] = mrgCtxTmp.BcwIdx[RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE]]; mrgCtx.interDirNeighbours[uiMergeCand] = mrgCtxTmp.interDirNeighbours[RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE]]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE] << 1)]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE] << 1) + 1]; mrgCtx.useAltHpelIf[uiMergeCand] = mrgCtxTmp.useAltHpelIf[RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE]]; #if INTER_LIC mrgCtx.LICFlags[uiMergeCand] = mrgCtxTmp.LICFlags[RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE]]; #endif #if MULTI_HYP_PRED mrgCtx.addHypNeighbours[uiMergeCand] = mrgCtxTmp.addHypNeighbours[RdCandList[uiMergeCand / ADAPTIVE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_SUB_GROUP_SIZE]]; #endif } } #endif #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING #if JVET_AA0093_ENHANCED_MMVD_EXTENSION void InterPrediction::getBlkAMLRefTemplate(PredictionUnit &pu, PelUnitBuf &pcBufPredRefTop, PelUnitBuf &pcBufPredRefLeft, int8_t posList0, int8_t posList1, bool load0, bool load1) #else void InterPrediction::getBlkAMLRefTemplate(PredictionUnit &pu, PelUnitBuf &pcBufPredRefTop, PelUnitBuf &pcBufPredRefLeft) #endif { #if JVET_AA0093_ENHANCED_MMVD_EXTENSION bool bLoadSave = (posList0 != -1); #endif Mv mvCurr; const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; const int horShift = (lumaShift + ::getComponentScaleX(COMPONENT_Y, pu.chromaFormat)); const int verShift = (lumaShift + ::getComponentScaleY(COMPONENT_Y, pu.chromaFormat)); #if JVET_AA0093_ENHANCED_MMVD_EXTENSION if (!bLoadSave && xCheckIdenticalMotion(pu)) #else if (xCheckIdenticalMotion(pu)) #endif { mvCurr = pu.mv[0]; /*const int horIntMv = (mvCurr.getHor() + ((1 << horShift) >> 1)) >> horShift; const int verIntMv = (mvCurr.getVer() + ((1 << verShift) >> 1)) >> verShift; Mv subPelMv(horIntMv << horShift, verIntMv << verShift);*/ Mv subPelMv = mvCurr; clipMv(mvCurr, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); CHECK(pu.refIdx[0] < 0, "invalid ref idx"); if (m_bAMLTemplateAvailabe[0]) { Mv mvTop(0, -(AML_MERGE_TEMPLATE_SIZE << verShift)); mvTop += subPelMv; clipMv(mvTop, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); #if RPR_ENABLE const Picture* picRef = pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->unscaledPic; const std::pair<int, int>& scalingRatio = pu.cu->slice->getScalingRatio(REF_PIC_LIST_0, pu.refIdx[0]); #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, picRef, mvTop, pcBufPredRefTop, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, picRef, mvTop, pcBufPredRefTop, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true ); #endif #else #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_0, pu.refIdx[0] ), mvTop, pcBufPredRefTop, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_0, pu.refIdx[0] ), mvTop, pcBufPredRefTop, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, SCALE_1X, 0, 0, false, NULL, 0, true ); #endif #endif } if (m_bAMLTemplateAvailabe[1]) { Mv mvLeft(-(AML_MERGE_TEMPLATE_SIZE << horShift), 0); mvLeft += subPelMv; clipMv(mvLeft, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); #if RPR_ENABLE const Picture* picRef = pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->unscaledPic; const std::pair<int, int>& scalingRatio = pu.cu->slice->getScalingRatio(REF_PIC_LIST_0, pu.refIdx[0]); #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, picRef, mvLeft, pcBufPredRefLeft, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, picRef, mvLeft, pcBufPredRefLeft, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true ); #endif #else #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_0, pu.refIdx[0] ), mvLeft, pcBufPredRefLeft, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_0, pu.refIdx[0] ), mvLeft, pcBufPredRefLeft, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, SCALE_1X, 0, 0, false, NULL, 0, true ); #endif #endif } } else { for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { if (pu.refIdx[refList] < 0) { continue; } #if JVET_AA0093_ENHANCED_MMVD_EXTENSION if (bLoadSave && (((refList == 0) && load0) || ((refList == 1) && load1)) )// changed { continue; } #endif RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); CHECK(pu.refIdx[refList] >= pu.cu->slice->getNumRefIdx(eRefPicList), "Invalid reference index"); m_iRefListIdx = refList; mvCurr = pu.mv[refList]; /*const int horIntMv = (mvCurr.getHor() + ((1 << horShift) >> 1)) >> horShift; const int verIntMv = (mvCurr.getVer() + ((1 << verShift) >> 1)) >> verShift; Mv subPelMv(horIntMv << horShift, verIntMv << verShift);*/ Mv subPelMv = mvCurr; clipMv(mvCurr, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); if (m_bAMLTemplateAvailabe[0]) { Mv mvTop(0, -(AML_MERGE_TEMPLATE_SIZE << verShift)); mvTop += subPelMv; clipMv(mvTop, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); #if JVET_AA0093_ENHANCED_MMVD_EXTENSION int offsetA = bLoadSave ? (refList == 0 ? posList0 : posList1) * MAX_CU_SIZE : 0; PelUnitBuf pcMbBuf = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[refList][0] + offsetA, pcBufPredRefTop.Y())); // changed #else PelUnitBuf pcMbBuf = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[refList][0], pcBufPredRefTop.Y())); #endif if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0) { #if RPR_ENABLE const Picture* picRef = pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->unscaledPic; const std::pair<int, int>& scalingRatio = pu.cu->slice->getScalingRatio(eRefPicList, pu.refIdx[refList]); #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, picRef, mvTop, pcMbBuf, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, picRef, mvTop, pcMbBuf, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true ); #endif #else #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( eRefPicList, pu.refIdx[refList] ), mvTop, pcMbBuf, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( eRefPicList, pu.refIdx[refList] ), mvTop, pcMbBuf, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, SCALE_1X, 0, 0, false, NULL, 0, true ); #endif #endif } else { #if RPR_ENABLE const Picture* picRef = pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->unscaledPic; const std::pair<int, int>& scalingRatio = pu.cu->slice->getScalingRatio(eRefPicList, pu.refIdx[refList]); #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, picRef, mvTop, pcMbBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, picRef, mvTop, pcMbBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true ); #endif #else #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( eRefPicList, pu.refIdx[refList] ), mvTop, pcMbBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( eRefPicList, pu.refIdx[refList] ), mvTop, pcMbBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, SCALE_1X, 0, 0, false, NULL, 0, true ); #endif #endif } } if (m_bAMLTemplateAvailabe[1]) { Mv mvLeft(-(AML_MERGE_TEMPLATE_SIZE << horShift), 0); mvLeft += subPelMv; clipMv(mvLeft, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); #if JVET_AA0093_ENHANCED_MMVD_EXTENSION int offsetA = bLoadSave ? (refList == 0 ? posList0 : posList1) * MAX_CU_SIZE : 0; PelUnitBuf pcMbBuf = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[refList][0] + offsetA, pcBufPredRefLeft.Y())); // changed #else PelUnitBuf pcMbBuf = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[refList][0], pcBufPredRefLeft.Y())); #endif if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0) { #if RPR_ENABLE const Picture* picRef = pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->unscaledPic; const std::pair<int, int>& scalingRatio = pu.cu->slice->getScalingRatio( eRefPicList, pu.refIdx[refList] ); #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, picRef, mvLeft, pcMbBuf, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, picRef, mvLeft, pcMbBuf, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true ); #endif #else #if INTER_LIC xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvLeft, pcMbBuf, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, mvCurr); #else xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]), mvLeft, pcMbBuf, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0, true); #endif #endif } else { #if RPR_ENABLE const Picture* picRef = pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->unscaledPic; const std::pair<int, int>& scalingRatio = pu.cu->slice->getScalingRatio(eRefPicList, pu.refIdx[refList]); #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, picRef, mvLeft, pcMbBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, picRef, mvLeft, pcMbBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, scalingRatio, 0, 0, false, NULL, 0, true ); #endif #else #if INTER_LIC xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( eRefPicList, pu.refIdx[refList] ), mvLeft, pcMbBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, SCALE_1X, 0, 0, false, NULL, 0, true, true, mvCurr ); #else xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( eRefPicList, pu.refIdx[refList] ), mvLeft, pcMbBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false, SCALE_1X, 0, 0, false, NULL, 0, true ); #endif #endif } } } #if JVET_AA0093_ENHANCED_MMVD_EXTENSION int offset0 = 0; int offset1 = 0; if (bLoadSave) { offset0 = posList0 * MAX_CU_SIZE; offset1 = posList1 * MAX_CU_SIZE; } if (m_bAMLTemplateAvailabe[0]) { CPelUnitBuf srcPred0 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[0][0] + offset0, pcBufPredRefTop.Y())); // changed CPelUnitBuf srcPred1 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[1][0] + offset1, pcBufPredRefTop.Y())); // changed xWeightedAverageY(pu, srcPred0, srcPred1, pcBufPredRefTop, pu.cu->slice->getSPS()->getBitDepths(), pu.cu->slice->clpRngs()); } if (m_bAMLTemplateAvailabe[1]) { CPelUnitBuf srcPred0 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[0][0] + offset0, pcBufPredRefLeft.Y())); // changed CPelUnitBuf srcPred1 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[1][0] + offset1, pcBufPredRefLeft.Y())); // changed xWeightedAverageY(pu, srcPred0, srcPred1, pcBufPredRefLeft, pu.cu->slice->getSPS()->getBitDepths(), pu.cu->slice->clpRngs()); } #else if (m_bAMLTemplateAvailabe[0]) { CPelUnitBuf srcPred0 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[0][0], pcBufPredRefTop.Y())); CPelUnitBuf srcPred1 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[1][0], pcBufPredRefTop.Y())); xWeightedAverageY(pu, srcPred0, srcPred1, pcBufPredRefTop, pu.cu->slice->getSPS()->getBitDepths(), pu.cu->slice->clpRngs()); } if (m_bAMLTemplateAvailabe[1]) { CPelUnitBuf srcPred0 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[0][0], pcBufPredRefLeft.Y())); CPelUnitBuf srcPred1 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[1][0], pcBufPredRefLeft.Y())); xWeightedAverageY(pu, srcPred0, srcPred1, pcBufPredRefLeft, pu.cu->slice->getSPS()->getBitDepths(), pu.cu->slice->clpRngs()); } #endif } } #endif #if JVET_W0090_ARMC_TM void InterPrediction::adjustAffineMergeCandidates(PredictionUnit &pu, AffineMergeCtx& affMrgCtx, int mrgCandIdx #if JVET_Z0139_NA_AFF , int sortedCandNum #endif ) { #if JVET_Z0139_NA_AFF const uint32_t maxNumAffineMergeCand = (sortedCandNum > 0) ? sortedCandNum: pu.cs->slice->getPicHeader()->getMaxNumAffineMergeCand(); #endif #if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION uint32_t rdCandList[RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE][RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE]; Distortion candCostList[RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE][RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE]; #else uint32_t rdCandList[AFFINE_MRG_MAX_NUM_CANDS][AFFINE_MRG_MAX_NUM_CANDS]; Distortion candCostList[AFFINE_MRG_MAX_NUM_CANDS][AFFINE_MRG_MAX_NUM_CANDS]; #endif #if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION for (uint32_t i = 0; i < RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE; i++) { for (uint32_t j = 0; j < RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE; j++) #else for (uint32_t i = 0; i < AFFINE_MRG_MAX_NUM_CANDS; i++) { for (uint32_t j = 0; j < AFFINE_MRG_MAX_NUM_CANDS; j++) #endif { rdCandList[i][j] = j; candCostList[i][j] = MAX_UINT; } } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE > affMrgCtx.maxNumMergeCand)) ? affMrgCtx.maxNumMergeCand : ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE)); ++uiMergeCand) { bool firstGroup = (uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE) == 0 ? true : false; bool lastGroup = ((uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE >= affMrgCtx.maxNumMergeCand) ? true : false; if (lastGroup && !firstGroup) { break; } #if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION bool lowPrio = false; if (uiMergeCand >= affMrgCtx.numAffCandToTestEnc) { affMrgCtx.candCost[uiMergeCand] = MAX_UINT64 >> 1; uiCost = MAX_UINT >> 1; lowPrio = true; } if (affMrgCtx.candCost[uiMergeCand] == MAX_UINT64) { #endif uiCost = 0; // set merge information pu.interDir = affMrgCtx.interDirNeighbours[uiMergeCand]; pu.mergeFlag = true; pu.regularMergeFlag = false; pu.mergeIdx = uiMergeCand; pu.cu->affine = true; pu.cu->affineType = affMrgCtx.affineType[uiMergeCand]; #if AFFINE_MMVD pu.afMmvdFlag = false; #endif pu.cu->BcwIdx = affMrgCtx.BcwIdx[uiMergeCand]; #if INTER_LIC pu.cu->LICFlag = affMrgCtx.LICFlags[uiMergeCand]; #endif pu.mergeType = affMrgCtx.mergeType[uiMergeCand]; #if JVET_Z0139_NA_AFF if (pu.mergeType == MRG_TYPE_DEFAULT_N) #else if (pu.mergeType == MRG_TYPE_SUBPU_ATMVP) { pu.refIdx[0] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][0].refIdx; pu.refIdx[1] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][0].refIdx; PU::spanMotionInfo(pu, *affMrgCtx.mrgCtx); } else #endif { for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { for (int i = 0; i < 3; i++) { pu.mvAffi[refList][i] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + refList][i].mv; } pu.refIdx[refList] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + refList][0].refIdx; } PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); #if RPR_ENABLE bool bRefIsRescaled = false; for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { const RefPicList eRefPicList = refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0; #if JVET_Z0118_GDR Picture *refPic = pu.refIdx[refList] >= 0 ? pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList]) : nullptr; if (refPic) { bRefIsRescaled |= pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->isRefScaled(pu.cs->pps); } #else bRefIsRescaled |= (pu.refIdx[refList] >= 0) ? pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->isRefScaled(pu.cs->pps) : false; #endif } if ( !bRefIsRescaled ) { #endif getAffAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC if (m_bAMLTemplateAvailabe[0] && !m_bAMLTemplateAvailabe[1]) { uiCost += (uiCost * nHeight) / nWidth; } if (!m_bAMLTemplateAvailabe[0] && m_bAMLTemplateAvailabe[1]) { uiCost += (uiCost * nWidth) / nHeight; } #endif #if RPR_ENABLE } #endif } #if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION } else if (!lowPrio) { uiCost = affMrgCtx.candCost[uiMergeCand]; } #endif #if JVET_Z0139_NA_AFF updateCandList(uiMergeCand, uiCost, maxNumAffineMergeCand, rdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE], candCostList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE]); #else updateCandList(uiMergeCand, uiCost, ADAPTIVE_AFFINE_SUB_GROUP_SIZE, RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE], candCostList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE]); #endif } pu.mergeIdx = mrgCandIdx; //restore the merge index #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC if (maxNumAffineMergeCand > 2) { Distortion cost = pu.cs->slice->getCostForARMC(); uint32_t candToBeRemoved = AFFINE_MRG_MAX_NUM_CANDS - 1; Distortion min = MAX_UINT64; for (int sizeCandList = maxNumAffineMergeCand; sizeCandList > 1; sizeCandList--) { min = MAX_UINT64; for (uint32_t uiMergeCand = 0; uiMergeCand < sizeCandList - 1; ++uiMergeCand) { if (min > abs((int)(candCostList[0][uiMergeCand + 1] - candCostList[0][uiMergeCand]))) { min = abs((int)(candCostList[0][uiMergeCand + 1] - candCostList[0][uiMergeCand])); candToBeRemoved = uiMergeCand + 1; } } if (candToBeRemoved > AFFINE_MRG_MAX_NUM_CANDS - 2) { continue; } if (min < cost) { uint32_t candToBeReplaced = sizeCandList - 1; for (uint32_t uiMergeCand = std::min( (int) (candToBeRemoved + 1), (int) (AFFINE_MRG_MAX_NUM_CANDS - 1) ); uiMergeCand < std::min((int)sizeCandList, (int)(AFFINE_MRG_MAX_NUM_CANDS - 1)); ++uiMergeCand) { if (cost < abs((int)(candCostList[0][uiMergeCand] - candCostList[0][candToBeRemoved]))) { candToBeReplaced = uiMergeCand; break; } } if (sizeCandList > maxNumAffineMergeCand) { candToBeReplaced = sizeCandList - 1; } uint32_t rdCand = rdCandList[0][candToBeRemoved]; Distortion candCost = candCostList[0][(candToBeRemoved)]; for (int ui = candToBeRemoved; ui < (candToBeReplaced > sizeCandList - 1 ? sizeCandList - 1 : candToBeReplaced); ui++) { candCostList[0][ui] = candCostList[0][(ui + 1)]; rdCandList[0][ui] = rdCandList[0][(ui + 1)]; } candCostList[0][candToBeReplaced] = candCost; rdCandList[0][candToBeReplaced] = rdCand; } else { break; } } } #endif updateAffineCandInfo(pu, affMrgCtx, rdCandList , mrgCandIdx ); } void InterPrediction::updateAffineCandInfo(PredictionUnit &pu, AffineMergeCtx& affMrgCtx, #if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION uint32_t(*RdCandList)[RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE], #else uint32_t(*RdCandList)[AFFINE_MRG_MAX_NUM_CANDS], #endif int mrgCandIdx) { AffineMergeCtx affMrgCtxTmp; #if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION const uint32_t maxNumAffineMergeCand = pu.cs->slice->getPicHeader()->getMaxNumAffineMergeCand() + ADDITIONAL_AFFINE_CAND_NUM; const uint32_t outputListSize = pu.cs->slice->getPicHeader()->getMaxNumAffineMergeCand(); #else const uint32_t maxNumAffineMergeCand = pu.cs->slice->getPicHeader()->getMaxNumAffineMergeCand(); #endif for (int i = 0; i < maxNumAffineMergeCand; i++) { for (int mvNum = 0; mvNum < 3; mvNum++) { affMrgCtxTmp.mvFieldNeighbours[(i << 1) + 0][mvNum].setMvField(Mv(), -1); affMrgCtxTmp.mvFieldNeighbours[(i << 1) + 1][mvNum].setMvField(Mv(), -1); } affMrgCtxTmp.interDirNeighbours[i] = 0; affMrgCtxTmp.affineType[i] = AFFINEMODEL_4PARAM; affMrgCtxTmp.mergeType[i] = MRG_TYPE_DEFAULT_N; affMrgCtxTmp.BcwIdx[i] = BCW_DEFAULT; #if INTER_LIC affMrgCtxTmp.LICFlags[i] = false; #endif } for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE > affMrgCtx.maxNumMergeCand)) ? affMrgCtx.maxNumMergeCand : ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE)); ++uiMergeCand) { bool firstGroup = (uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE) == 0 ? true : false; bool lastGroup = ((uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE >= affMrgCtx.maxNumMergeCand) ? true : false; if (lastGroup && !firstGroup) { break; } for (int mvNum = 0; mvNum < 3; mvNum++) { affMrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 0][mvNum] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][mvNum]; affMrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 1][mvNum] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][mvNum]; } affMrgCtxTmp.interDirNeighbours[uiMergeCand] = affMrgCtx.interDirNeighbours[uiMergeCand]; affMrgCtxTmp.affineType[uiMergeCand] = affMrgCtx.affineType[uiMergeCand]; affMrgCtxTmp.mergeType[uiMergeCand] = affMrgCtx.mergeType[uiMergeCand]; affMrgCtxTmp.BcwIdx[uiMergeCand] = affMrgCtx.BcwIdx[uiMergeCand]; #if INTER_LIC affMrgCtxTmp.LICFlags[uiMergeCand] = affMrgCtx.LICFlags[uiMergeCand]; #endif } //update for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE > affMrgCtx.maxNumMergeCand)) ? affMrgCtx.maxNumMergeCand : ((mrgCandIdx / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE)); ++uiMergeCand) { bool firstGroup = (uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE) == 0 ? true : false; bool lastGroup = ((uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE + 1)*ADAPTIVE_AFFINE_SUB_GROUP_SIZE >= affMrgCtx.maxNumMergeCand) ? true : false; if (lastGroup && !firstGroup) { break; } #if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION if (uiMergeCand == outputListSize) { break; } #endif for (int mvNum = 0; mvNum < 3; mvNum++) { affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][mvNum] = affMrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE] << 1) + 0][mvNum]; affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][mvNum] = affMrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE] << 1) + 1][mvNum]; } affMrgCtx.interDirNeighbours[uiMergeCand] = affMrgCtxTmp.interDirNeighbours[RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE]]; affMrgCtx.affineType[uiMergeCand] = affMrgCtxTmp.affineType[RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE]]; affMrgCtx.mergeType[uiMergeCand] = affMrgCtxTmp.mergeType[RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE]]; affMrgCtx.BcwIdx[uiMergeCand] = affMrgCtxTmp.BcwIdx[RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE]]; #if INTER_LIC affMrgCtx.LICFlags[uiMergeCand] = affMrgCtxTmp.LICFlags[RdCandList[uiMergeCand / ADAPTIVE_AFFINE_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_AFFINE_SUB_GROUP_SIZE]]; #endif } } void InterPrediction::xGetSublkAMLTemplate(const CodingUnit& cu, const ComponentID compID, const Picture& refPic, const Mv& mv, const int sublkWidth, const int sublkHeight, const int posW, const int posH, int* numTemplate, Pel* refLeftTemplate, Pel* refAboveTemplate #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED , bool afMMVD #endif #if JVET_AA0146_WRAP_AROUND_FIX , bool wrapRef #endif ) { const int bitDepth = cu.cs->sps->getBitDepth(toChannelType(compID)); const int precShift = std::max(0, bitDepth - 12); const CodingUnit* const cuAbove = cu.cs->getCU(cu.blocks[compID].pos().offset(0, -1), toChannelType(compID)); const CodingUnit* const cuLeft = cu.cs->getCU(cu.blocks[compID].pos().offset(-1, 0), toChannelType(compID)); #if JVET_AA0146_WRAP_AROUND_FIX const CPelBuf refBuf = cuAbove || cuLeft ? refPic.getRecoBuf(refPic.blocks[compID], wrapRef) : CPelBuf(); #else const CPelBuf refBuf = cuAbove || cuLeft ? refPic.getRecoBuf(refPic.blocks[compID]) : CPelBuf(); #endif // above if (cuAbove && posH == 0) { #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED xGetPredBlkTpl<true>(cu, compID, refBuf, mv, posW, posH, sublkWidth, refAboveTemplate, afMMVD); #else xGetPredBlkTpl<true>(cu, compID, refBuf, mv, posW, posH, sublkWidth, refAboveTemplate); #endif for (int k = posW; k < posW + sublkWidth; k++) { int refVal = refAboveTemplate[k]; refVal >>= precShift; refAboveTemplate[k] = refVal; numTemplate[0]++; } } // left if (cuLeft && posW == 0) { #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED xGetPredBlkTpl<false>(cu, compID, refBuf, mv, posW, posH, sublkHeight, refLeftTemplate, afMMVD); #else xGetPredBlkTpl<false>(cu, compID, refBuf, mv, posW, posH, sublkHeight, refLeftTemplate); #endif for (int k = posH; k < posH + sublkHeight; k++) { int refVal = refLeftTemplate[k]; refVal >>= precShift; refLeftTemplate[k] = refVal; numTemplate[1]++; } } } #if JVET_AA0093_ENHANCED_MMVD_EXTENSION void InterPrediction::getAffAMLRefTemplate(PredictionUnit &pu, PelUnitBuf &pcBufPredRefTop, PelUnitBuf &pcBufPredRefLeft, int8_t posList0, int8_t posList1, bool load0, bool load1) #else void InterPrediction::getAffAMLRefTemplate(PredictionUnit &pu, PelUnitBuf &pcBufPredRefTop, PelUnitBuf &pcBufPredRefLeft) #endif { #if JVET_AA0093_ENHANCED_MMVD_EXTENSION bool bLoadSave = (posList0 != -1); #endif #if INTER_LIC int LICshift[2] = { 0 }; int scale[2] = { 0 }; int offset[2] = { 0 }; #endif const int bitDepth = pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA); #if JVET_AA0093_ENHANCED_MMVD_EXTENSION if (!bLoadSave && xCheckIdenticalMotion(pu)) #else if (xCheckIdenticalMotion(pu)) #endif { Pel * refLeftTemplate = m_acYuvRefAMLTemplate[1][0]; Pel * refAboveTemplate = m_acYuvRefAMLTemplate[0][0]; int numTemplate[2] = { 0, 0 }; // 0:Above, 1:Left const RefPicList eRefPicList = REF_PIC_LIST_0; #if JVET_Z0067_RPR_ENABLE CHECK(pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[eRefPicList])->isRefScaled(pu.cs->pps), "getAffAMLRefTemplate not supported with ref scaled."); #endif xPredAffineTpl(pu, eRefPicList, numTemplate, refLeftTemplate, refAboveTemplate); #if INTER_LIC if (pu.cu->LICFlag) { Pel *recLeftTemplate = m_acYuvCurAMLTemplate[1][0]; Pel *recAboveTemplate = m_acYuvCurAMLTemplate[0][0]; xGetLICParamGeneral(*pu.cu, COMPONENT_Y, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, recAboveTemplate, LICshift[0], scale[0], offset[0]); if (m_bAMLTemplateAvailabe[0]) { PelBuf & dstBuf = pcBufPredRefTop.bufs[0]; const ClpRng &clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y); dstBuf.linearTransform(scale[0], LICshift[0], offset[0], true, clpRng); } if (m_bAMLTemplateAvailabe[1]) { PelBuf & dstBuf = pcBufPredRefLeft.bufs[0]; const ClpRng &clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y); dstBuf.linearTransform(scale[0], LICshift[0], offset[0], true, clpRng); } } #endif } else { for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { if (pu.refIdx[refList] < 0) { continue; } #if JVET_AA0093_ENHANCED_MMVD_EXTENSION if (bLoadSave && (((refList == 0) && load0) || ((refList == 1) && load1)) )// changed { continue; } #endif RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); CHECK(pu.refIdx[refList] >= pu.cu->slice->getNumRefIdx(eRefPicList), "Invalid reference index"); #if JVET_Z0067_RPR_ENABLE CHECK(pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[eRefPicList])->isRefScaled(pu.cs->pps), "getAffAMLRefTemplate not supported with ref scaled."); #endif Pel *refLeftTemplate = m_acYuvRefLeftTemplate[refList][0]; Pel *refAboveTemplate = m_acYuvRefAboveTemplate[refList][0]; #if JVET_AA0093_ENHANCED_MMVD_EXTENSION if (bLoadSave) { refLeftTemplate += (refList == 0 ? posList0 : posList1) * MAX_CU_SIZE; refAboveTemplate += (refList == 0 ? posList0 : posList1) * MAX_CU_SIZE;// changed } #endif int numTemplate[2] = { 0, 0 }; // 0:Above, 1:Left xPredAffineTpl(pu, eRefPicList, numTemplate, refLeftTemplate, refAboveTemplate); #if INTER_LIC if (pu.cu->LICFlag) { Pel *recLeftTemplate = m_acYuvCurAMLTemplate[1][0]; Pel *recAboveTemplate = m_acYuvCurAMLTemplate[0][0]; xGetLICParamGeneral(*pu.cu, COMPONENT_Y, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, recAboveTemplate, LICshift[refList], scale[refList], offset[refList]); } #endif } if (m_bAMLTemplateAvailabe[0]) { PelUnitBuf srcPred[2]; #if JVET_AA0093_ENHANCED_MMVD_EXTENSION int offset0 = bLoadSave ? posList0 * MAX_CU_SIZE : 0; int offset1 = bLoadSave ? posList1 * MAX_CU_SIZE : 0; srcPred[0] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[0][0] + offset0, pcBufPredRefTop.Y())); //changed srcPred[1] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[1][0] + offset1, pcBufPredRefTop.Y())); //changed #else srcPred[0] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[0][0], pcBufPredRefTop.Y())); srcPred[1] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[1][0], pcBufPredRefTop.Y())); #endif #if INTER_LIC #if JVET_AA0093_ENHANCED_MMVD_EXTENSION if (!bLoadSave && pu.cu->LICFlag) #else if (pu.cu->LICFlag) #endif { for (int i = 0; i < 2; i++) { if (pu.refIdx[i] >= 0) { PelBuf & dstBuf = srcPred[i].bufs[0]; const ClpRng &clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y); dstBuf.linearTransform(scale[i], LICshift[i], offset[i], true, clpRng); } } } #endif const int iRefIdx0 = pu.refIdx[0]; const int iRefIdx1 = pu.refIdx[1]; if (iRefIdx0 >= 0 && iRefIdx1 >= 0) { for (int i = 0; i < 2; i++) { #if JVET_AA0093_ENHANCED_MMVD_EXTENSION if (bLoadSave && (((i == 0) && load0) || ((i == 1) && load1)))// changed { continue; } #endif PelBuf & dstBuf = srcPred[i].bufs[0]; const int biShift = IF_INTERNAL_PREC - bitDepth; const Pel biOffset = -IF_INTERNAL_OFFS; ClpRng clpRngDummy; dstBuf.linearTransform(1, -biShift, biOffset, false, clpRngDummy); } } xWeightedAverageY(pu, srcPred[0], srcPred[1], pcBufPredRefTop, pu.cu->slice->getSPS()->getBitDepths(), pu.cu->slice->clpRngs()); } if (m_bAMLTemplateAvailabe[1]) { PelUnitBuf srcPred[2]; #if JVET_AA0093_ENHANCED_MMVD_EXTENSION int offset0 = bLoadSave ? posList0 * MAX_CU_SIZE : 0; int offset1 = bLoadSave ? posList1 * MAX_CU_SIZE : 0; srcPred[0] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[0][0] + offset0, pcBufPredRefLeft.Y())); //changed srcPred[1] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[1][0] + offset1, pcBufPredRefLeft.Y())); //changed #else srcPred[0] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[0][0], pcBufPredRefLeft.Y())); srcPred[1] = PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[1][0], pcBufPredRefLeft.Y())); #endif #if INTER_LIC #if JVET_AA0093_ENHANCED_MMVD_EXTENSION if (!bLoadSave && pu.cu->LICFlag) #else if (pu.cu->LICFlag) #endif { for (int i = 0; i < 2; i++) { if (pu.refIdx[i] >= 0) { PelBuf & dstBuf = srcPred[i].bufs[0]; const ClpRng &clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y); dstBuf.linearTransform(scale[i], LICshift[i], offset[i], true, clpRng); } } } #endif const int iRefIdx0 = pu.refIdx[0]; const int iRefIdx1 = pu.refIdx[1]; if (iRefIdx0 >= 0 && iRefIdx1 >= 0) { for (int i = 0; i < 2; i++) { #if JVET_AA0093_ENHANCED_MMVD_EXTENSION if (bLoadSave && (((i == 0) && load0) || ((i == 1) && load1)))// changed { continue; } #endif PelBuf & dstBuf = srcPred[i].bufs[0]; const int biShift = IF_INTERNAL_PREC - bitDepth; const Pel biOffset = -IF_INTERNAL_OFFS; ClpRng clpRngDummy; dstBuf.linearTransform(1, -biShift, biOffset, false, clpRngDummy); } } xWeightedAverageY(pu, srcPred[0], srcPred[1], pcBufPredRefLeft, pu.cu->slice->getSPS()->getBitDepths(), pu.cu->slice->clpRngs()); } } } #if JVET_Y0058_IBC_LIST_MODIFY void InterPrediction::adjustIBCMergeCandidates(PredictionUnit &pu, MergeCtx& mrgCtx, int mrgCandIdx) { #if JVET_Z0084_IBC_TM if (mrgCtx.numValidMergeCand <= 1) { return; } #endif uint32_t RdCandList[IBC_MRG_MAX_NUM_CANDS][IBC_MRG_MAX_NUM_CANDS]; Distortion candCostList[IBC_MRG_MAX_NUM_CANDS][IBC_MRG_MAX_NUM_CANDS]; for (uint32_t i = 0; i < IBC_MRG_MAX_NUM_CANDS; i++) { for (uint32_t j = 0; j < IBC_MRG_MAX_NUM_CANDS; j++) { RdCandList[i][j] = j; candCostList[i][j] = MAX_UINT; } } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; /*const SPS &sps = *pu.cs->sps; Position puPos = pu.lumaPos();*/ int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLIBCGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_IBC_SUB_GROUP_SIZE)*ADAPTIVE_IBC_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_IBC_SUB_GROUP_SIZE + 1)*ADAPTIVE_IBC_SUB_GROUP_SIZE > mrgCtx.numValidMergeCand)) ? mrgCtx.numValidMergeCand : ((mrgCandIdx / ADAPTIVE_IBC_SUB_GROUP_SIZE + 1)*ADAPTIVE_IBC_SUB_GROUP_SIZE)); ++uiMergeCand) { bool firstGroup = (uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE) == 0 ? true : false; bool lastGroup = ((uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE + 1)*ADAPTIVE_IBC_SUB_GROUP_SIZE >= mrgCtx.numValidMergeCand) ? true : false; if (lastGroup && !firstGroup) { break; } uiCost = 0; mrgCtx.setMergeInfo(pu, uiMergeCand); if (pu.bv == Mv(0, 0)) { break; } PelBuf pcBufPredRefTop = PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE); PelBuf pcBufPredCurTop = PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE); PelBuf pcBufPredRefLeft = PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight); PelBuf pcBufPredCurLeft = PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight); getIBCAMLRefTemplate(pu, nWidth, nHeight); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop, pcBufPredRefTop, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft, pcBufPredRefLeft, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } updateCandList(uiMergeCand, uiCost, ADAPTIVE_IBC_SUB_GROUP_SIZE, RdCandList[uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE], candCostList[uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE]); } updateIBCCandInfo(pu, mrgCtx, RdCandList , mrgCandIdx ); pu.mergeIdx = mrgCandIdx; //restore the merge index } void InterPrediction::updateIBCCandInfo(PredictionUnit &pu, MergeCtx& mrgCtx, uint32_t(*RdCandList)[IBC_MRG_MAX_NUM_CANDS], int mrgCandIdx) { MergeCtx mrgCtxTmp; for (uint32_t ui = 0; ui < IBC_MRG_MAX_NUM_CANDS; ++ui) { mrgCtxTmp.BcwIdx[ui] = BCW_DEFAULT; mrgCtxTmp.interDirNeighbours[ui] = 0; mrgCtxTmp.mvFieldNeighbours[(ui << 1)].refIdx = NOT_VALID; mrgCtxTmp.mvFieldNeighbours[(ui << 1) + 1].refIdx = NOT_VALID; mrgCtxTmp.useAltHpelIf[ui] = false; #if INTER_LIC mrgCtxTmp.LICFlags[ui] = false; #endif #if JVET_AA0070_RRIBC mrgCtxTmp.rribcFlipTypes[ui] = 0; #endif } for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_IBC_SUB_GROUP_SIZE)*ADAPTIVE_IBC_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_IBC_SUB_GROUP_SIZE + 1)*ADAPTIVE_IBC_SUB_GROUP_SIZE > mrgCtx.numValidMergeCand)) ? mrgCtx.numValidMergeCand : ((mrgCandIdx / ADAPTIVE_IBC_SUB_GROUP_SIZE + 1)*ADAPTIVE_IBC_SUB_GROUP_SIZE)); ++uiMergeCand) { bool firstGroup = (uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE) == 0 ? true : false; bool lastGroup = ((uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE + 1)*ADAPTIVE_IBC_SUB_GROUP_SIZE >= mrgCtx.numValidMergeCand) ? true : false; if (lastGroup && !firstGroup) { break; } mrgCtx.setMergeInfo(pu, uiMergeCand); if (pu.bv == Mv(0, 0)) { break; } mrgCtxTmp.BcwIdx[uiMergeCand] = mrgCtx.BcwIdx[uiMergeCand]; mrgCtxTmp.interDirNeighbours[uiMergeCand] = mrgCtx.interDirNeighbours[uiMergeCand]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1]; mrgCtxTmp.useAltHpelIf[uiMergeCand] = mrgCtx.useAltHpelIf[uiMergeCand]; #if INTER_LIC mrgCtxTmp.LICFlags[uiMergeCand] = mrgCtx.LICFlags[uiMergeCand]; #endif #if JVET_AA0070_RRIBC mrgCtxTmp.rribcFlipTypes[uiMergeCand] = mrgCtx.rribcFlipTypes[uiMergeCand]; #endif } //update for (uint32_t uiMergeCand = ((mrgCandIdx < 0) ? 0 : (mrgCandIdx / ADAPTIVE_IBC_SUB_GROUP_SIZE)*ADAPTIVE_IBC_SUB_GROUP_SIZE); uiMergeCand < (((mrgCandIdx < 0) || ((mrgCandIdx / ADAPTIVE_IBC_SUB_GROUP_SIZE + 1)*ADAPTIVE_IBC_SUB_GROUP_SIZE > mrgCtx.numValidMergeCand)) ? mrgCtx.numValidMergeCand : ((mrgCandIdx / ADAPTIVE_IBC_SUB_GROUP_SIZE + 1)*ADAPTIVE_IBC_SUB_GROUP_SIZE)); ++uiMergeCand) { bool firstGroup = (uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE) == 0 ? true : false; bool lastGroup = ((uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE + 1)*ADAPTIVE_IBC_SUB_GROUP_SIZE >= mrgCtx.numValidMergeCand) ? true : false; if (lastGroup && !firstGroup) { break; } mrgCtx.setMergeInfo(pu, uiMergeCand); if (pu.bv == Mv(0, 0)) { break; } mrgCtx.BcwIdx[uiMergeCand] = mrgCtxTmp.BcwIdx[RdCandList[uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_IBC_SUB_GROUP_SIZE]]; mrgCtx.interDirNeighbours[uiMergeCand] = mrgCtxTmp.interDirNeighbours[RdCandList[uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_IBC_SUB_GROUP_SIZE]]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_IBC_SUB_GROUP_SIZE] << 1)]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_IBC_SUB_GROUP_SIZE] << 1) + 1]; mrgCtx.useAltHpelIf[uiMergeCand] = mrgCtxTmp.useAltHpelIf[RdCandList[uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_IBC_SUB_GROUP_SIZE]]; #if INTER_LIC mrgCtx.LICFlags[uiMergeCand] = mrgCtxTmp.LICFlags[RdCandList[uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE][uiMergeCand%ADAPTIVE_IBC_SUB_GROUP_SIZE]]; #endif #if JVET_AA0070_RRIBC mrgCtx.rribcFlipTypes[uiMergeCand] = mrgCtxTmp.rribcFlipTypes[RdCandList[uiMergeCand / ADAPTIVE_IBC_SUB_GROUP_SIZE][uiMergeCand % ADAPTIVE_IBC_SUB_GROUP_SIZE]]; #endif } } #endif #if JVET_Z0075_IBC_HMVP_ENLARGE void InterPrediction::adjustIBCMergeCandidates(PredictionUnit &pu, MergeCtx& mrgCtx,uint32_t startPos,uint32_t endPos) { #if JVET_Z0084_IBC_TM if (mrgCtx.numValidMergeCand <= 1) { return; } #endif uint32_t RdCandList[IBC_MRG_MAX_NUM_CANDS_MEM]; Distortion candCostList[IBC_MRG_MAX_NUM_CANDS_MEM]; for (uint32_t i = 0; i < IBC_MRG_MAX_NUM_CANDS_MEM; i++) { RdCandList[i] = i; candCostList[i] = MAX_UINT; } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; /*const SPS &sps = *pu.cs->sps; Position puPos = pu.lumaPos();*/ int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLIBCGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } for (uint32_t uiMergeCand = startPos; uiMergeCand < endPos; ++uiMergeCand) { uiCost = 0; mrgCtx.setMergeInfo(pu, uiMergeCand); if (pu.bv == Mv(0, 0)) { break; } PelBuf pcBufPredRefTop = PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE); PelBuf pcBufPredCurTop = PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE); PelBuf pcBufPredRefLeft = PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight); PelBuf pcBufPredCurLeft = PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight); getIBCAMLRefTemplate(pu, nWidth, nHeight); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop, pcBufPredRefTop, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft, pcBufPredRefLeft, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } updateCandList(uiMergeCand, uiCost, IBC_MRG_MAX_NUM_CANDS_MEM, RdCandList, candCostList); } updateIBCCandInfo(pu, mrgCtx, RdCandList, startPos, endPos); } void InterPrediction::updateIBCCandInfo(PredictionUnit &pu, MergeCtx& mrgCtx, uint32_t* RdCandList,uint32_t startPos,uint32_t endPos) { MergeCtx mrgCtxTmp; for (uint32_t ui = 0; ui < IBC_MRG_MAX_NUM_CANDS_MEM; ++ui) { mrgCtxTmp.BcwIdx[ui] = BCW_DEFAULT; mrgCtxTmp.interDirNeighbours[ui] = 0; mrgCtxTmp.mvFieldNeighbours[(ui << 1)].refIdx = NOT_VALID; mrgCtxTmp.mvFieldNeighbours[(ui << 1) + 1].refIdx = NOT_VALID; mrgCtxTmp.useAltHpelIf[ui] = false; #if INTER_LIC mrgCtxTmp.LICFlags[ui] = false; #endif #if JVET_AA0070_RRIBC mrgCtxTmp.rribcFlipTypes[ui] = 0; #endif } for (uint32_t uiMergeCand = startPos; uiMergeCand < endPos; ++uiMergeCand) { mrgCtx.setMergeInfo(pu, uiMergeCand); if (pu.bv == Mv(0, 0)) { break; } mrgCtxTmp.BcwIdx[uiMergeCand] = mrgCtx.BcwIdx[uiMergeCand]; mrgCtxTmp.interDirNeighbours[uiMergeCand] = mrgCtx.interDirNeighbours[uiMergeCand]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)]; mrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1]; mrgCtxTmp.useAltHpelIf[uiMergeCand] = mrgCtx.useAltHpelIf[uiMergeCand]; #if INTER_LIC mrgCtxTmp.LICFlags[uiMergeCand] = mrgCtx.LICFlags[uiMergeCand]; #endif #if JVET_AA0070_RRIBC mrgCtxTmp.rribcFlipTypes[uiMergeCand] = mrgCtx.rribcFlipTypes[uiMergeCand]; #endif } //update for (uint32_t uiMergeCand = startPos; uiMergeCand < endPos; ++uiMergeCand) { mrgCtx.setMergeInfo(pu, uiMergeCand); if (pu.bv == Mv(0, 0)) { break; } mrgCtx.BcwIdx[uiMergeCand] = mrgCtxTmp.BcwIdx[RdCandList[uiMergeCand -startPos]]; mrgCtx.interDirNeighbours[uiMergeCand] = mrgCtxTmp.interDirNeighbours[RdCandList[uiMergeCand -startPos]]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1)] = mrgCtxTmp.mvFieldNeighbours[RdCandList[uiMergeCand -startPos] << 1]; mrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1] = mrgCtxTmp.mvFieldNeighbours[(RdCandList[uiMergeCand -startPos] << 1) + 1]; mrgCtx.useAltHpelIf[uiMergeCand] = mrgCtxTmp.useAltHpelIf[RdCandList[uiMergeCand -startPos]]; #if INTER_LIC mrgCtx.LICFlags[uiMergeCand] = mrgCtxTmp.LICFlags[RdCandList[uiMergeCand -startPos]]; #endif #if JVET_AA0070_RRIBC mrgCtx.rribcFlipTypes[uiMergeCand] = mrgCtxTmp.rribcFlipTypes[RdCandList[uiMergeCand -startPos]]; #endif } } #endif #if JVET_AA0107_RMVF_AFFINE_MERGE_DERIVATION void InterPrediction::adjustAffineMergeCandidatesOneGroup(PredictionUnit &pu, AffineMergeCtx& affMrgCtx, int listsize, int mrgCandIdx) { uint32_t rdCandList[RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE][RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE]; Distortion candCostList[RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE][RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE]; const uint32_t maxNumAffineMergeCand = listsize; for (uint32_t i = 0; i < maxNumAffineMergeCand; i++) { for (uint32_t j = 0; j < maxNumAffineMergeCand; j++) { rdCandList[i][j] = j; candCostList[i][j] = MAX_UINT; } } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } for (uint32_t uiMergeCand = 0; uiMergeCand < listsize; ++uiMergeCand) { uiCost = 0; // set merge information pu.interDir = affMrgCtx.interDirNeighbours[uiMergeCand]; pu.mergeFlag = true; pu.regularMergeFlag = false; pu.mergeIdx = uiMergeCand; pu.cu->affine = true; pu.cu->affineType = affMrgCtx.affineType[uiMergeCand]; #if AFFINE_MMVD pu.afMmvdFlag = false; #endif pu.cu->BcwIdx = affMrgCtx.BcwIdx[uiMergeCand]; #if INTER_LIC pu.cu->LICFlag = affMrgCtx.LICFlags[uiMergeCand]; #endif pu.mergeType = affMrgCtx.mergeType[uiMergeCand]; #if JVET_Z0139_NA_AFF if (pu.mergeType == MRG_TYPE_DEFAULT_N) #else if (pu.mergeType == MRG_TYPE_SUBPU_ATMVP) { pu.refIdx[0] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][0].refIdx; pu.refIdx[1] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][0].refIdx; PU::spanMotionInfo(pu, *affMrgCtx.mrgCtx); } else #endif { for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { for (int i = 0; i < 3; i++) { pu.mvAffi[refList][i] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + refList][i].mv; } pu.refIdx[refList] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + refList][0].refIdx; } PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); #if RPR_ENABLE bool bRefIsRescaled = false; for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { const RefPicList eRefPicList = refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0; bRefIsRescaled |= (pu.refIdx[refList] >= 0) ? pu.cu->slice->getRefPic(eRefPicList, pu.refIdx[refList])->isRefScaled(pu.cs->pps) : false; } if (!bRefIsRescaled) { #endif getAffAMLRefTemplate(pu, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } #if RPR_ENABLE } #endif } updateCandList(uiMergeCand, uiCost, maxNumAffineMergeCand, rdCandList[uiMergeCand / RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE], candCostList[uiMergeCand / RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE]); } pu.mergeIdx = mrgCandIdx; //restore the merge index updateAffineCandInfo2(pu, affMrgCtx, rdCandList, listsize, mrgCandIdx ); for (int idx = 0; idx < listsize; idx++) { affMrgCtx.candCost[idx] = candCostList[0][idx]; } } void InterPrediction::updateAffineCandInfo2(PredictionUnit &pu, AffineMergeCtx& affMrgCtx, uint32_t(*rdCandList)[RMVF_AFFINE_MRG_MAX_CAND_LIST_SIZE], int listsize, int mrgCandIdx) { AffineMergeCtx affMrgCtxTmp; for (uint32_t uiMergeCand = 0; uiMergeCand < listsize; ++uiMergeCand) { for (int mvNum = 0; mvNum < 3; mvNum++) { affMrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 0][mvNum] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][mvNum]; affMrgCtxTmp.mvFieldNeighbours[(uiMergeCand << 1) + 1][mvNum] = affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][mvNum]; } affMrgCtxTmp.interDirNeighbours[uiMergeCand] = affMrgCtx.interDirNeighbours[uiMergeCand]; affMrgCtxTmp.affineType[uiMergeCand] = affMrgCtx.affineType[uiMergeCand]; affMrgCtxTmp.mergeType[uiMergeCand] = affMrgCtx.mergeType[uiMergeCand]; affMrgCtxTmp.BcwIdx[uiMergeCand] = affMrgCtx.BcwIdx[uiMergeCand]; #if INTER_LIC affMrgCtxTmp.LICFlags[uiMergeCand] = affMrgCtx.LICFlags[uiMergeCand]; #endif } //update for (uint32_t uiMergeCand = 0; uiMergeCand < listsize; ++uiMergeCand) { for (int mvNum = 0; mvNum < 3; mvNum++) { affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][mvNum] = affMrgCtxTmp.mvFieldNeighbours[(rdCandList[uiMergeCand / listsize][uiMergeCand%listsize] << 1) + 0][mvNum]; affMrgCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][mvNum] = affMrgCtxTmp.mvFieldNeighbours[(rdCandList[uiMergeCand / listsize][uiMergeCand%listsize] << 1) + 1][mvNum]; } affMrgCtx.interDirNeighbours[uiMergeCand] = affMrgCtxTmp.interDirNeighbours[rdCandList[uiMergeCand / listsize][uiMergeCand%listsize]]; affMrgCtx.affineType[uiMergeCand] = affMrgCtxTmp.affineType[rdCandList[uiMergeCand / listsize][uiMergeCand%listsize]]; affMrgCtx.mergeType[uiMergeCand] = affMrgCtxTmp.mergeType[rdCandList[uiMergeCand / listsize][uiMergeCand%listsize]]; affMrgCtx.BcwIdx[uiMergeCand] = affMrgCtxTmp.BcwIdx[rdCandList[uiMergeCand / listsize][uiMergeCand%listsize]]; #if INTER_LIC affMrgCtx.LICFlags[uiMergeCand] = affMrgCtxTmp.LICFlags[rdCandList[uiMergeCand / listsize][uiMergeCand%listsize]]; #endif } } #endif #endif #if JVET_Z0061_TM_OBMC void InterPrediction::xOBMCWeightedAverageY(const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc0, const CPelUnitBuf &pcYuvSrc1, PelUnitBuf &pcYuvDst, const BitDepths &clipBitDepths, const ClpRngs &clpRngs, MotionInfo currMi) { const int iRefIdx0 = currMi.refIdx[0]; const int iRefIdx1 = currMi.refIdx[1]; if (iRefIdx0 >= 0 && iRefIdx1 >= 0) { if (pu.cu->BcwIdx != BCW_DEFAULT) { #if JVET_Z0136_OOB bool isOOB[2] = { false, false }; pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->BcwIdx, false, true, pu.cs->mcMask, -1, pu.cs->mcMaskChroma, -1, isOOB); #else pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->BcwIdx, false, true); #endif } else { pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, false, true); } } else if (iRefIdx0 >= 0 && iRefIdx1 < 0) { pcYuvDst.copyClip(pcYuvSrc0, clpRngs, true); } else if (iRefIdx0 < 0 && iRefIdx1 >= 0) { pcYuvDst.copyClip(pcYuvSrc1, clpRngs, true); } } int InterPrediction::selectOBMCmode(PredictionUnit &pu, PredictionUnit &subblockPu, bool isAbove, int iLength, uint32_t uiMinCUW, Position off) { #if JVET_AA0132_CONFIGURABLE_TM_TOOLS && JVET_Z0061_TM_OBMC && ENABLE_OBMC if (!pu.cs->sps->getUseOBMCTMMode()) { return -1; } #endif const Position posSubBlock(pu.lumaPos().offset(off)); Position posNeighborMotion = Position(0, 0); if (isAbove) { posNeighborMotion = posSubBlock.offset(0, -1); } else if (!isAbove) { posNeighborMotion = posSubBlock.offset(-1, 0); } PredictionUnit *tmpPu = nullptr; tmpPu = pu.cs->getPU(posNeighborMotion, pu.chType); if (!tmpPu) { return 0; } MotionInfo neigMi = tmpPu->getMotionInfo(posNeighborMotion); MotionInfo currMi = pu.getMotionInfo(posSubBlock); Distortion candCostList[3]; for (uint32_t i = 0; i < 3; i++) { candCostList[i] = MAX_UINT; } Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; int nWidth = isAbove ? (iLength * uiMinCUW) : uiMinCUW; int nHeight = isAbove ? uiMinCUW : (iLength * uiMinCUW); if (!xAMLGetCurBlkTemplate(pu, pu.lumaSize().width, pu.lumaSize().height)) { return 0; } // Process above boundary PelUnitBuf pcBufPredCurTopTmp = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], pu.lumaSize().width, TM_OBMC_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = pcBufPredCurTopTmp.subBuf(UnitArea(pu.chromaFormat, Area(off.x, 0, nWidth, TM_OBMC_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefTop0 = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplateOBMC[0][0], nWidth, TM_OBMC_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefTop1 = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplateOBMC[1][0], nWidth, TM_OBMC_TEMPLATE_SIZE))); PelUnitBuf pcBufBlendDstAbove = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvBlendTemplateOBMC[0][0], nWidth, TM_OBMC_TEMPLATE_SIZE))); if ((isAbove) && (m_bAMLTemplateAvailabe[0])) { // 0: use current mv to do MC for template; getBlkOBMCRefTemplate(subblockPu, pcBufPredRefTop0, isAbove, currMi); uiCost = 0; m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop0.Y(), subblockPu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost = cDistParam.distFunc(cDistParam); candCostList[0] = uiCost; // 1: use neighbour mv to do MC for template; getBlkOBMCRefTemplate(subblockPu, pcBufPredRefTop1, isAbove, neigMi); uiCost = 0; m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop1.Y(), subblockPu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost = cDistParam.distFunc(cDistParam); candCostList[1] = uiCost; // 2: Now calculate the blending template uiCost = 0; CPelUnitBuf srcPred0 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplateOBMC[0][0], pcBufPredCurTop.Y())); CPelUnitBuf srcPred1 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAboveTemplateOBMC[1][0], pcBufPredCurTop.Y())); for (int i = 0; i < nWidth; i++) { pcBufBlendDstAbove.Y().buf[i] = (26 * srcPred0.Y().buf[i] + 6 * srcPred1.Y().buf[i] + 16) >> 5; } m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufBlendDstAbove.Y(), subblockPu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost = cDistParam.distFunc(cDistParam); candCostList[2] = uiCost; int bestOBMCmode = 0; if ((candCostList[0] < candCostList[1]) && (candCostList[0] < candCostList[2])) { bestOBMCmode = 1; } else { candCostList[0] = candCostList[0] << 3; candCostList[1] = candCostList[1] << 3; if ((candCostList[1] + (candCostList[1] >> 2) + (candCostList[1] >> 3)) <= candCostList[0]) { bestOBMCmode = 2; } else if (candCostList[0] <= candCostList[1]) { bestOBMCmode = 3; } else if (candCostList[1] <= candCostList[0]) { bestOBMCmode = 4; } } return bestOBMCmode; } // Process left boundary PelUnitBuf pcBufPredCurLeftTmp = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], TM_OBMC_TEMPLATE_SIZE, pu.lumaSize().height))); PelUnitBuf pcBufPredCurLeft = pcBufPredCurLeftTmp.subBuf(UnitArea(pu.chromaFormat, Area(0, off.y, TM_OBMC_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredRefLeft0 = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplateOBMC[0][0], TM_OBMC_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredRefLeft1 = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplateOBMC[1][0], TM_OBMC_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufBlendDstLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvBlendTemplateOBMC[1][0], TM_OBMC_TEMPLATE_SIZE, nHeight))); if ((!isAbove) && (m_bAMLTemplateAvailabe[1])) { // 0: use current mv to do MC for template; getBlkOBMCRefTemplate(subblockPu, pcBufPredRefLeft0, isAbove, currMi); uiCost = 0; m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft0.Y(), subblockPu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost = cDistParam.distFunc(cDistParam); candCostList[0] = uiCost; // 1: use neighbour mv to do MC for template; getBlkOBMCRefTemplate(subblockPu, pcBufPredRefLeft1, isAbove, neigMi); uiCost = 0; m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft1.Y(), subblockPu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost = cDistParam.distFunc(cDistParam); candCostList[1] = uiCost; // 2: Now calculate the blending template uiCost = 0; CPelUnitBuf srcPred0 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplateOBMC[0][0], pcBufPredCurLeft.Y())); CPelUnitBuf srcPred1 = CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefLeftTemplateOBMC[1][0], pcBufPredCurLeft.Y())); int idx = 0; for (int i = 0; i < nHeight; i++) { pcBufBlendDstLeft.Y().buf[idx] = (26 * srcPred0.Y().buf[idx] + 6 * srcPred1.Y().buf[idx] + 16) >> 5; idx += pcBufBlendDstLeft.bufs[COMPONENT_Y].stride; } m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufBlendDstLeft.Y(), subblockPu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost = cDistParam.distFunc(cDistParam); candCostList[2] = uiCost; int bestOBMCmode = 0; if ((candCostList[0] < candCostList[1]) && (candCostList[0] < candCostList[2])) { bestOBMCmode = 1; } else { candCostList[0] = candCostList[0] << 3; candCostList[1] = candCostList[1] << 3; if ((candCostList[1] + (candCostList[1] >> 2) + (candCostList[1] >> 3)) <= candCostList[0]) { bestOBMCmode = 2; } else if (candCostList[0] <= candCostList[1]) { bestOBMCmode = 3; } else if (candCostList[1] <= candCostList[0]) { bestOBMCmode = 4; } } return bestOBMCmode; } else { return 0; } } bool InterPrediction::xCheckIdenticalMotionOBMC(PredictionUnit &pu, MotionInfo tryMi) { const Slice &slice = *pu.cs->slice; if (slice.isInterB() && !pu.cs->pps->getWPBiPred()) { if (tryMi.refIdx[0] >= 0 && tryMi.refIdx[1] >= 0) { int RefPOCL0 = slice.getRefPic(REF_PIC_LIST_0, tryMi.refIdx[0])->getPOC(); int RefPOCL1 = slice.getRefPic(REF_PIC_LIST_1, tryMi.refIdx[1])->getPOC(); if (RefPOCL0 == RefPOCL1) { if (!pu.cu->affine) { if (tryMi.mv[0] == tryMi.mv[1]) { return true; } } else { if ((pu.cu->affineType == AFFINEMODEL_4PARAM && (pu.mvAffi[0][0] == pu.mvAffi[1][0]) && (pu.mvAffi[0][1] == pu.mvAffi[1][1])) || (pu.cu->affineType == AFFINEMODEL_6PARAM && (pu.mvAffi[0][0] == pu.mvAffi[1][0]) && (pu.mvAffi[0][1] == pu.mvAffi[1][1]) && (pu.mvAffi[0][2] == pu.mvAffi[1][2]))) { return true; } } } } } return false; } void InterPrediction::getBlkOBMCRefTemplate(PredictionUnit &subblockPu, PelUnitBuf &pcBufPredRef, bool isAbove, MotionInfo tryMi) { Mv mvCurr; const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; const int horShift = (lumaShift + ::getComponentScaleX(COMPONENT_Y, subblockPu.chromaFormat)); const int verShift = (lumaShift + ::getComponentScaleY(COMPONENT_Y, subblockPu.chromaFormat)); if (xCheckIdenticalMotionOBMC(subblockPu, tryMi)) { mvCurr = tryMi.mv[0]; Mv subPelMv = mvCurr; clipMv(mvCurr, subblockPu.lumaPos(), subblockPu.lumaSize(), *subblockPu.cs->sps, *subblockPu.cs->pps); CHECK(tryMi.refIdx[0] < 0, "invalid ref idx"); if ((isAbove) && (m_bAMLTemplateAvailabe[0])) { Mv mvTop(0, -(TM_OBMC_TEMPLATE_SIZE << verShift)); mvTop += subPelMv; clipMv(mvTop, subblockPu.lumaPos(), subblockPu.lumaSize(), *subblockPu.cs->sps, *subblockPu.cs->pps); #if RPR_ENABLE const Picture * picRef = subblockPu.cu->slice->getRefPic(REF_PIC_LIST_0, tryMi.refIdx[0])->unscaledPic; const std::pair<int, int> &scalingRatio = subblockPu.cu->slice->getScalingRatio(REF_PIC_LIST_0, tryMi.refIdx[0]); xPredInterBlk(COMPONENT_Y, subblockPu, picRef, mvTop, pcBufPredRef, false, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, scalingRatio, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #else xPredInterBlk(COMPONENT_Y, subblockPu, subblockPu.cu->slice->getRefPic(REF_PIC_LIST_0, tryMi.refIdx[0]), mvTop, pcBufPredRef, false, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #endif } if ((!isAbove) && (m_bAMLTemplateAvailabe[1])) { Mv mvLeft(-(TM_OBMC_TEMPLATE_SIZE << horShift), 0); mvLeft += subPelMv; clipMv(mvLeft, subblockPu.lumaPos(), subblockPu.lumaSize(), *subblockPu.cs->sps, *subblockPu.cs->pps); #if RPR_ENABLE const Picture * picRef = subblockPu.cu->slice->getRefPic(REF_PIC_LIST_0, tryMi.refIdx[0])->unscaledPic; const std::pair<int, int> &scalingRatio = subblockPu.cu->slice->getScalingRatio(REF_PIC_LIST_0, tryMi.refIdx[0]); xPredInterBlk(COMPONENT_Y, subblockPu, picRef, mvLeft, pcBufPredRef, false, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, scalingRatio, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #else xPredInterBlk(COMPONENT_Y, subblockPu, subblockPu.cu->slice->getRefPic(REF_PIC_LIST_0, tryMi.refIdx[0]), mvLeft, pcBufPredRef, false, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #endif } } else { for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { if (tryMi.refIdx[refList] < 0) { continue; } RefPicList eRefPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); CHECK(tryMi.refIdx[refList] >= subblockPu.cu->slice->getNumRefIdx(eRefPicList), "Invalid reference index"); m_iRefListIdx = refList; mvCurr = tryMi.mv[refList]; Mv subPelMv = mvCurr; clipMv(mvCurr, subblockPu.lumaPos(), subblockPu.lumaSize(), *subblockPu.cs->sps, *subblockPu.cs->pps); if ((isAbove) && (m_bAMLTemplateAvailabe[0])) { Mv mvTop(0, -(TM_OBMC_TEMPLATE_SIZE << verShift)); mvTop += subPelMv; clipMv(mvTop, subblockPu.lumaPos(), subblockPu.lumaSize(), *subblockPu.cs->sps, *subblockPu.cs->pps); PelUnitBuf pcMbBuf = PelUnitBuf(subblockPu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[refList][0], pcBufPredRef.Y())); if (tryMi.refIdx[0] >= 0 && tryMi.refIdx[1] >= 0) { #if RPR_ENABLE const Picture *picRef = subblockPu.cu->slice->getRefPic(eRefPicList, tryMi.refIdx[refList])->unscaledPic; const std::pair<int, int> &scalingRatio = subblockPu.cu->slice->getScalingRatio(eRefPicList, tryMi.refIdx[refList]); xPredInterBlk(COMPONENT_Y, subblockPu, picRef, mvTop, pcMbBuf, true, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, scalingRatio, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #else xPredInterBlk(COMPONENT_Y, subblockPu, subblockPu.cu->slice->getRefPic(eRefPicList, tryMi.refIdx[refList]), mvTop, pcMbBuf, true, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #endif if (refList == 1) { CPelUnitBuf srcPred0 = CPelUnitBuf(subblockPu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[0][0], pcBufPredRef.Y())); CPelUnitBuf srcPred1 = CPelUnitBuf(subblockPu.chromaFormat, PelBuf(m_acYuvRefAboveTemplate[1][0], pcBufPredRef.Y())); xOBMCWeightedAverageY(subblockPu, srcPred0, srcPred1, pcBufPredRef, subblockPu.cu->slice->getSPS()->getBitDepths(), subblockPu.cu->slice->clpRngs(), tryMi); } } else { #if RPR_ENABLE const Picture *picRef = subblockPu.cu->slice->getRefPic(eRefPicList, tryMi.refIdx[refList])->unscaledPic; const std::pair<int, int> &scalingRatio = subblockPu.cu->slice->getScalingRatio(eRefPicList, tryMi.refIdx[refList]); xPredInterBlk(COMPONENT_Y, subblockPu, picRef, mvTop, pcBufPredRef, false, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, scalingRatio, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #else xPredInterBlk(COMPONENT_Y, subblockPu, subblockPu.cu->slice->getRefPic(eRefPicList, tryMi.refIdx[refList]), mvTop, pcBufPredRef, false, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #endif } } if ((!isAbove) && (m_bAMLTemplateAvailabe[1])) { Mv mvLeft(-(TM_OBMC_TEMPLATE_SIZE << horShift), 0); mvLeft += subPelMv; clipMv(mvLeft, subblockPu.lumaPos(), subblockPu.lumaSize(), *subblockPu.cs->sps, *subblockPu.cs->pps); PelUnitBuf pcMbBuf = PelUnitBuf(subblockPu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[refList][0], pcBufPredRef.Y())); if (tryMi.refIdx[0] >= 0 && tryMi.refIdx[1] >= 0) { #if RPR_ENABLE const Picture *picRef = subblockPu.cu->slice->getRefPic(eRefPicList, tryMi.refIdx[refList])->unscaledPic; const std::pair<int, int> &scalingRatio = subblockPu.cu->slice->getScalingRatio(eRefPicList, tryMi.refIdx[refList]); xPredInterBlk(COMPONENT_Y, subblockPu, picRef, mvLeft, pcMbBuf, true, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, scalingRatio, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #else xPredInterBlk(COMPONENT_Y, subblockPu, subblockPu.cu->slice->getRefPic(eRefPicList, tryMi.refIdx[refList]), mvLeft, pcMbBuf, true, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #endif if (refList == 1) { CPelUnitBuf srcPred0 = CPelUnitBuf(subblockPu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[0][0], pcBufPredRef.Y())); CPelUnitBuf srcPred1 = CPelUnitBuf(subblockPu.chromaFormat, PelBuf(m_acYuvRefLeftTemplate[1][0], pcBufPredRef.Y())); xOBMCWeightedAverageY(subblockPu, srcPred0, srcPred1, pcBufPredRef, subblockPu.cu->slice->getSPS()->getBitDepths(), subblockPu.cu->slice->clpRngs(), tryMi); } } else { #if RPR_ENABLE const Picture *picRef = subblockPu.cu->slice->getRefPic(eRefPicList, tryMi.refIdx[refList])->unscaledPic; const std::pair<int, int> &scalingRatio = subblockPu.cu->slice->getScalingRatio(eRefPicList, tryMi.refIdx[refList]); xPredInterBlk(COMPONENT_Y, subblockPu, picRef, mvLeft, pcBufPredRef, false, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, scalingRatio, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #else xPredInterBlk(COMPONENT_Y, subblockPu, subblockPu.cu->slice->getRefPic(eRefPicList, tryMi.refIdx[refList]), mvLeft, pcBufPredRef, false, subblockPu.cu->slice->clpRng(COMPONENT_Y), false, false, SCALE_1X, 0, 0, false, NULL, 0 #if JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING , false #if INTER_LIC , false, Mv(0, 0) #endif #endif , true); #endif } } } } } void InterPrediction::xSubblockOBMCCopy(const ComponentID eComp, PredictionUnit &pu, PelUnitBuf &pcYuvPredDst, PelUnitBuf &pcYuvPredSrc, int iDir) { int iWidth = pu.blocks[eComp].width; int iHeight = pu.blocks[eComp].height; if (iWidth == 0 || iHeight == 0) { return; } Pel * pOrgDst = pcYuvPredDst.bufs[eComp].buf; Pel * pOrgSrc = pcYuvPredSrc.bufs[eComp].buf; const int strideDst = pcYuvPredDst.bufs[eComp].stride; const int strideSrc = pcYuvPredSrc.bufs[eComp].stride; if (iDir == 0) // above { for (int i = 0; i < iWidth; i++) { Pel *pDst = pOrgDst; Pel *pSrc = pOrgSrc; pDst[i] = pSrc[i]; } } if (iDir == 1) // left { Pel *pDst = pOrgDst; Pel *pSrc = pOrgSrc; for (int i = 0; i < iHeight; i++) { pDst[0] = pSrc[0]; pDst += strideDst; pSrc += strideSrc; } } } void InterPrediction::xSubblockTMOBMC(const ComponentID eComp, PredictionUnit &pu, PelUnitBuf &pcYuvPredDst, PelUnitBuf &pcYuvPredSrc, int iDir, int iOBMCmode) { int iWidth = pu.blocks[eComp].width; int iHeight = pu.blocks[eComp].height; if (iWidth == 0 || iHeight == 0) { return; } Pel * pOrgDst = pcYuvPredDst.bufs[eComp].buf; Pel * pOrgSrc = pcYuvPredSrc.bufs[eComp].buf; const int strideDst = pcYuvPredDst.bufs[eComp].stride; const int strideSrc = pcYuvPredSrc.bufs[eComp].stride; if (iDir == 0) // above { for (int i = 0; i < iWidth; i++) { Pel *pDst = pOrgDst; Pel *pSrc = pOrgSrc; if (iOBMCmode == 2) // neighbor is best { pDst[i] = (26 * pDst[i] + 6 * pSrc[i] + 16) >> 5; if (eComp == COMPONENT_Y) { pDst += strideDst; pSrc += strideSrc; pDst[i] = (7 * pDst[i] + pSrc[i] + 4) >> 3; pDst += strideDst; pSrc += strideSrc; pDst[i] = (15 * pDst[i] + pSrc[i] + 8) >> 4; pDst += strideDst; pSrc += strideSrc; pDst[i] = (31 * pDst[i] + pSrc[i] + 16) >> 5; } } else if (iOBMCmode == 4) { pDst[i] = (7 * pDst[i] + pSrc[i] + 4) >> 3; if (eComp == COMPONENT_Y) { pDst += strideDst; pSrc += strideSrc; pDst[i] = (15 * pDst[i] + pSrc[i] + 8) >> 4; pDst += strideDst; pSrc += strideSrc; pDst[i] = (31 * pDst[i] + pSrc[i] + 16) >> 5; } } else // blending is best { pDst[i] = (15 * pDst[i] + pSrc[i] + 8) >> 4; // luma blend 3 lines if (eComp == COMPONENT_Y) { pDst += strideDst; pSrc += strideSrc; pDst[i] = (31 * pDst[i] + pSrc[i] + 16) >> 5; } } } } if (iDir == 1) // left { Pel *pDst = pOrgDst; Pel *pSrc = pOrgSrc; for (int i = 0; i < iHeight; i++) { if (iOBMCmode == 2) // neighbor is best { pDst[0] = (26 * pDst[0] + 6 * pSrc[0] + 16) >> 5; if (eComp == COMPONENT_Y) { pDst[1] = (7 * pDst[1] + pSrc[1] + 4) >> 3; pDst[2] = (15 * pDst[2] + pSrc[2] + 8) >> 4; pDst[3] = (31 * pDst[3] + pSrc[3] + 16) >> 5; } } else if (iOBMCmode == 4) // neighbor is best { pDst[0] = (7 * pDst[0] + pSrc[0] + 4) >> 3; if (eComp == COMPONENT_Y) { pDst[1] = (15 * pDst[1] + pSrc[1] + 8) >> 4; pDst[2] = (31 * pDst[2] + pSrc[2] + 16) >> 5; } } else // blending is best { pDst[0] = (15 * pDst[0] + pSrc[0] + 8) >> 4; // luma blend 3 lines if (eComp == COMPONENT_Y) { pDst[1] = (31 * pDst[1] + pSrc[1] + 16) >> 5; } } pDst += strideDst; pSrc += strideSrc; } } } #endif void InterPrediction::xFillIBCBuffer(CodingUnit &cu) { #if JVET_Z0118_GDR bool isCleanCu = cu.cs->isClean(cu); bool useCleanIBCBuffer = cu.cs->isInGdrIntervalOrRecoveryPoc() && isCleanCu; #endif for (auto &currPU : CU::traverseTUs(cu)) { for (const CompArea &area : currPU.blocks) { if (!area.valid()) { continue; } #if JVET_Z0153_IBC_EXT_REF const int shiftSampleHor = ::getComponentScaleX(area.compID, cu.chromaFormat); const int shiftSampleVer = ::getComponentScaleY(area.compID, cu.chromaFormat); const int pux = area.x % (m_ibcBufferWidth >> shiftSampleHor); const int puy = area.y % (m_ibcBufferHeight >> shiftSampleVer); #else const unsigned int lcuWidth = cu.cs->slice->getSPS()->getMaxCUWidth(); const int shiftSampleHor = ::getComponentScaleX(area.compID, cu.chromaFormat); const int shiftSampleVer = ::getComponentScaleY(area.compID, cu.chromaFormat); const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer; const int pux = area.x & ((m_ibcBufferWidth >> shiftSampleHor) - 1); const int puy = area.y & (( 1 << ctuSizeLog2Ver ) - 1); #endif const CompArea dstArea = CompArea(area.compID, cu.chromaFormat, Position(pux, puy), Size(area.width, area.height)); CPelBuf srcBuf = cu.cs->getRecoBuf(area); #if JVET_Z0118_GDR PelBuf dstBuf; // 1. copy to Dirty IBC Buffer dstBuf = m_ibcBuffer0.getBuf(dstArea); dstBuf.copyFrom(srcBuf); // 2. copy to Clean IBC Buffer if (useCleanIBCBuffer) { dstBuf = m_ibcBuffer1.getBuf(dstArea); dstBuf.copyFrom(srcBuf); } #else PelBuf dstBuf = m_ibcBuffer.getBuf(dstArea); dstBuf.copyFrom(srcBuf); #endif } } } void InterPrediction::xIntraBlockCopy(PredictionUnit &pu, PelUnitBuf &predBuf, const ComponentID compID) { #if JVET_Z0118_GDR bool isCleanCu = pu.cs->isClean(pu); bool useCleanIBCBuffer = pu.cs->isInGdrIntervalOrRecoveryPoc() && isCleanCu; #endif #if JVET_Z0153_IBC_EXT_REF const int shiftSampleHor = ::getComponentScaleX(compID, pu.chromaFormat); const int shiftSampleVer = ::getComponentScaleY(compID, pu.chromaFormat); #else const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); const int shiftSampleHor = ::getComponentScaleX(compID, pu.chromaFormat); const int shiftSampleVer = ::getComponentScaleY(compID, pu.chromaFormat); const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer; #endif pu.bv = pu.mv[REF_PIC_LIST_0]; pu.bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); int refx, refy; if (compID == COMPONENT_Y) { refx = pu.Y().x + pu.bv.hor; refy = pu.Y().y + pu.bv.ver; } else // compID == COMPONENT_Cb, COMPONENT_Cr { refx = pu.Cb().x + (pu.bv.hor >> shiftSampleHor); refy = pu.Cb().y + (pu.bv.ver >> shiftSampleVer); } #if JVET_Z0153_IBC_EXT_REF refx = refx % (m_ibcBufferWidth >> shiftSampleHor); refy = refy % (m_ibcBufferHeight >> shiftSampleVer); refx += (refx < 0) ? (m_ibcBufferWidth >> shiftSampleHor) : 0; refy += (refy < 0) ? (m_ibcBufferHeight >> shiftSampleVer) : 0; #else refx &= ((m_ibcBufferWidth >> shiftSampleHor) - 1); refy &= ((1 << ctuSizeLog2Ver) - 1); #endif #if JVET_Z0153_IBC_EXT_REF if (refy + predBuf.bufs[compID].height <= (m_ibcBufferHeight >> shiftSampleVer)) #else if (refx + predBuf.bufs[compID].width <= (m_ibcBufferWidth >> shiftSampleHor)) #endif { const CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, predBuf.bufs[compID].height)); #if JVET_Z0118_GDR if (useCleanIBCBuffer) { const CPelBuf refBuf = m_ibcBuffer1.getBuf(srcArea); predBuf.bufs[compID].copyFrom(refBuf); } else { const CPelBuf refBuf = m_ibcBuffer0.getBuf(srcArea); predBuf.bufs[compID].copyFrom(refBuf); } #else const CPelBuf refBuf = m_ibcBuffer.getBuf(srcArea); predBuf.bufs[compID].copyFrom(refBuf); #endif } else { //wrap around #if JVET_Z0153_IBC_EXT_REF #if JVET_Z0118_GDR if (useCleanIBCBuffer) { int height = (m_ibcBufferHeight >> shiftSampleVer) - refy; CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, height)); CPelBuf srcBuf = m_ibcBuffer1.getBuf(srcArea); PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(predBuf.bufs[compID].width, height)); dstBuf.copyFrom(srcBuf); height = refy + predBuf.bufs[compID].height - (m_ibcBufferHeight >> shiftSampleVer); srcArea = CompArea(compID, pu.chromaFormat, Position(refx, 0), Size(predBuf.bufs[compID].width, height)); srcBuf = m_ibcBuffer1.getBuf(srcArea); dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, (m_ibcBufferHeight >> shiftSampleVer) - refy)), predBuf.bufs[compID].stride, Size(predBuf.bufs[compID].width, height)); dstBuf.copyFrom(srcBuf); } else { int height = (m_ibcBufferHeight >> shiftSampleVer) - refy; CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, height)); CPelBuf srcBuf = m_ibcBuffer0.getBuf(srcArea); PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(predBuf.bufs[compID].width, height)); dstBuf.copyFrom(srcBuf); height = refy + predBuf.bufs[compID].height - (m_ibcBufferHeight >> shiftSampleVer); srcArea = CompArea(compID, pu.chromaFormat, Position(refx, 0), Size(predBuf.bufs[compID].width, height)); srcBuf = m_ibcBuffer0.getBuf(srcArea); dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, (m_ibcBufferHeight >> shiftSampleVer) - refy)), predBuf.bufs[compID].stride, Size(predBuf.bufs[compID].width, height)); dstBuf.copyFrom(srcBuf); } #else int height = (m_ibcBufferHeight >> shiftSampleVer) - refy; CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, height)); CPelBuf srcBuf = m_ibcBuffer.getBuf(srcArea); PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(predBuf.bufs[compID].width, height)); dstBuf.copyFrom(srcBuf); height = refy + predBuf.bufs[compID].height - (m_ibcBufferHeight >> shiftSampleVer); srcArea = CompArea(compID, pu.chromaFormat, Position(refx, 0), Size(predBuf.bufs[compID].width, height)); srcBuf = m_ibcBuffer.getBuf(srcArea); dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, (m_ibcBufferHeight >> shiftSampleVer) - refy)), predBuf.bufs[compID].stride, Size(predBuf.bufs[compID].width, height)); dstBuf.copyFrom(srcBuf); #endif #else int width = (m_ibcBufferWidth >> shiftSampleHor) - refx; CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(width, predBuf.bufs[compID].height)); #if JVET_Z0118_GDR if (useCleanIBCBuffer) { CPelBuf srcBuf = m_ibcBuffer1.getBuf(srcArea); PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); dstBuf.copyFrom(srcBuf); width = refx + predBuf.bufs[compID].width - (m_ibcBufferWidth >> shiftSampleHor); srcArea = CompArea(compID, pu.chromaFormat, Position(0, refy), Size(width, predBuf.bufs[compID].height)); srcBuf = m_ibcBuffer1.getBuf(srcArea); dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_ibcBufferWidth >> shiftSampleHor) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); dstBuf.copyFrom(srcBuf); } else { CPelBuf srcBuf = m_ibcBuffer0.getBuf(srcArea); PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); dstBuf.copyFrom(srcBuf); width = refx + predBuf.bufs[compID].width - (m_ibcBufferWidth >> shiftSampleHor); srcArea = CompArea(compID, pu.chromaFormat, Position(0, refy), Size(width, predBuf.bufs[compID].height)); srcBuf = m_ibcBuffer0.getBuf(srcArea); dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_ibcBufferWidth >> shiftSampleHor) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); dstBuf.copyFrom(srcBuf); } #else CPelBuf srcBuf = m_ibcBuffer.getBuf(srcArea); PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); dstBuf.copyFrom(srcBuf); width = refx + predBuf.bufs[compID].width - (m_ibcBufferWidth >> shiftSampleHor); srcArea = CompArea(compID, pu.chromaFormat, Position(0, refy), Size(width, predBuf.bufs[compID].height)); srcBuf = m_ibcBuffer.getBuf(srcArea); dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_ibcBufferWidth >> shiftSampleHor) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); dstBuf.copyFrom(srcBuf); #endif #endif } } void InterPrediction::resetIBCBuffer(const ChromaFormat chromaFormatIDC, const int ctuSize) { #if JVET_Z0153_IBC_EXT_REF const UnitArea area = UnitArea(chromaFormatIDC, Area(0, 0, m_ibcBufferWidth, m_ibcBufferHeight)); #else const UnitArea area = UnitArea(chromaFormatIDC, Area(0, 0, m_ibcBufferWidth, ctuSize)); #endif #if JVET_Z0118_GDR m_ibcBuffer0.getBuf(area).fill(-1); #else m_ibcBuffer.getBuf(area).fill(-1); #endif } #if JVET_Z0118_GDR void InterPrediction::resetCurIBCBuffer(const ChromaFormat chromaFormatIDC, const Area ctuArea, const int ctuSize, const Pel dirtyPel) { #if JVET_Z0153_IBC_EXT_REF const int shiftSampleHor = ::getComponentScaleX(COMPONENT_Y, chromaFormatIDC); const int shiftSampleVer = ::getComponentScaleY(COMPONENT_Y, chromaFormatIDC); const int pux = ctuArea.x % (m_ibcBufferWidth >> shiftSampleHor); const int puy = ctuArea.y % (m_ibcBufferHeight >> shiftSampleVer); #else const int shiftSampleHor = ::getComponentScaleX(COMPONENT_Y, chromaFormatIDC); const int shiftSampleVer = ::getComponentScaleY(COMPONENT_Y, chromaFormatIDC); const int ctuSizeLog2Ver = floorLog2(ctuSize) - shiftSampleVer; const int pux = ctuArea.x & ((m_ibcBufferWidth >> shiftSampleHor) - 1); const int puy = ctuArea.y & ((1 << ctuSizeLog2Ver) - 1); #endif const UnitArea area = UnitArea(chromaFormatIDC, Area(pux, puy, ctuSize, ctuSize)); m_ibcBuffer1.getBuf(area).fill(dirtyPel); } #endif void InterPrediction::resetVPDUforIBC(const ChromaFormat chromaFormatIDC, const int ctuSize, const int vSize, const int xPos, const int yPos) { #if JVET_Z0153_IBC_EXT_REF if(xPos == 0) { const UnitArea area = UnitArea(chromaFormatIDC, Area(0, yPos % m_ibcBufferHeight, m_ibcBufferWidth, ctuSize)); #if JVET_Z0118_GDR m_ibcBuffer0.getBuf(area).fill(-1); #else m_ibcBuffer.getBuf(area).fill(-1); #endif } #if JVET_AA0106_IBCBUF_CTU256 if(256 == ctuSize) { if(xPos - 2 * ctuSize >= 0) { const UnitArea area = UnitArea(chromaFormatIDC, Area((xPos - 2 * ctuSize) % m_ibcBufferWidth, (yPos + ctuSize) % m_ibcBufferHeight, ctuSize, ctuSize)); #if JVET_Z0118_GDR m_ibcBuffer0.getBuf(area).fill(-1); #else m_ibcBuffer.getBuf(area).fill(-1); #endif } } else #endif if(xPos - 3 * ctuSize >= 0) { const UnitArea area = UnitArea(chromaFormatIDC, Area((xPos - 3 * ctuSize) % m_ibcBufferWidth, (yPos + ctuSize) % m_ibcBufferHeight, ctuSize, ctuSize)); #if JVET_Z0118_GDR m_ibcBuffer0.getBuf(area).fill(-1); #else m_ibcBuffer.getBuf(area).fill(-1); #endif } #else const UnitArea area = UnitArea(chromaFormatIDC, Area(xPos & (m_ibcBufferWidth - 1), yPos & (ctuSize - 1), vSize, vSize)); #if JVET_Z0118_GDR m_ibcBuffer0.getBuf(area).fill(-1); #else m_ibcBuffer.getBuf(area).fill(-1); #endif #endif } bool InterPrediction::isLumaBvValid(const int ctuSize, const int xCb, const int yCb, const int width, const int height, const int xBv, const int yBv) { #if JVET_Z0153_IBC_EXT_REF int refTLx = xCb + xBv; int refTLy = yCb + yBv; #else if(((yCb + yBv) & (ctuSize - 1)) + height > ctuSize) { return false; } int refTLx = xCb + xBv; int refTLy = (yCb + yBv) & (ctuSize - 1); #endif #if JVET_Z0118_GDR PelBuf buf = m_ibcBuffer0.Y(); #else PelBuf buf = m_ibcBuffer.Y(); #endif for(int x = 0; x < width; x += 4) { for(int y = 0; y < height; y += 4) { #if JVET_Z0153_IBC_EXT_REF if(buf.at((x + refTLx) % m_ibcBufferWidth, (y + refTLy) % m_ibcBufferHeight) == -1) { return false; } if(buf.at((x + 3 + refTLx) % m_ibcBufferWidth, (y + refTLy) % m_ibcBufferHeight) == -1) { return false; } if(buf.at((x + refTLx) % m_ibcBufferWidth, (y + 3 + refTLy) % m_ibcBufferHeight) == -1) { return false; } if(buf.at((x + 3 + refTLx) % m_ibcBufferWidth, (y + 3 + refTLy) % m_ibcBufferHeight) == -1) { return false; } #else if(buf.at((x + refTLx) & (m_ibcBufferWidth - 1), y + refTLy) == -1) return false; if(buf.at((x + 3 + refTLx) & (m_ibcBufferWidth - 1), y + refTLy) == -1) return false; if(buf.at((x + refTLx) & (m_ibcBufferWidth - 1), y + 3 + refTLy) == -1) return false; if(buf.at((x + 3 + refTLx) & (m_ibcBufferWidth - 1), y + 3 + refTLy) == -1) return false; #endif } } return true; } bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, const PPS& pps, const CompArea &blk, const Picture* refPic, const Mv& mv, Pel* dst, const int dstStride, const bool bi, const bool wrapRef, const ClpRng& clpRng, const int filterIndex, const bool useAltHpelIf ) { const ChromaFormat chFmt = blk.chromaFormat; const ComponentID compID = blk.compID; const bool rndRes = !bi; int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleX( compID, chFmt ); int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleY( compID, chFmt ); int width = blk.width; int height = blk.height; CPelBuf refBuf; const bool scaled = refPic->isRefScaled( &pps ); if( scaled ) { int row, col; int refPicWidth = refPic->getPicWidthInLumaSamples(); int refPicHeight = refPic->getPicHeightInLumaSamples(); int xFilter = filterIndex; int yFilter = filterIndex; const int rprThreshold1 = ( 1 << SCALE_RATIO_BITS ) * 5 / 4; const int rprThreshold2 = ( 1 << SCALE_RATIO_BITS ) * 7 / 4; if( filterIndex == 0 ) { if( scalingRatio.first > rprThreshold2 ) { xFilter = 4; } else if( scalingRatio.first > rprThreshold1 ) { xFilter = 3; } if( scalingRatio.second > rprThreshold2 ) { yFilter = 4; } else if( scalingRatio.second > rprThreshold1 ) { yFilter = 3; } } if (filterIndex == 2) { if (isLuma(compID)) { if (scalingRatio.first > rprThreshold2) { xFilter = 6; } else if (scalingRatio.first > rprThreshold1) { xFilter = 5; } if (scalingRatio.second > rprThreshold2) { yFilter = 6; } else if (scalingRatio.second > rprThreshold1) { yFilter = 5; } } else { if (scalingRatio.first > rprThreshold2) { xFilter = 4; } else if (scalingRatio.first > rprThreshold1) { xFilter = 3; } if (scalingRatio.second > rprThreshold2) { yFilter = 4; } else if (scalingRatio.second > rprThreshold1) { yFilter = 3; } } } const int posShift = SCALE_RATIO_BITS - 4; int stepX = ( scalingRatio.first + 8 ) >> 4; int stepY = ( scalingRatio.second + 8 ) >> 4; int64_t x0Int; int64_t y0Int; int offX = 1 << ( posShift - shiftHor - 1 ); int offY = 1 << ( posShift - shiftVer - 1 ); const int64_t posX = ( ( blk.pos().x << ::getComponentScaleX( compID, chFmt ) ) - ( pps.getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) ) >> ::getComponentScaleX( compID, chFmt ); const int64_t posY = ( ( blk.pos().y << ::getComponentScaleY( compID, chFmt ) ) - ( pps.getScalingWindow().getWindowTopOffset() * SPS::getWinUnitY( chFmt ) ) ) >> ::getComponentScaleY( compID, chFmt ); int addX = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getHorCollocatedChromaFlag() ) * 8 * ( scalingRatio.first - SCALE_1X.first ); int addY = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getVerCollocatedChromaFlag() ) * 8 * ( scalingRatio.second - SCALE_1X.second ); x0Int = ( ( posX << ( 4 + ::getComponentScaleX( compID, chFmt ) ) ) + mv.getHor() ) * (int64_t)scalingRatio.first + addX; x0Int = SIGN( x0Int ) * ( ( llabs( x0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleX( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleX( compID, chFmt ) ) ) + ( ( refPic->getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) << ( ( posShift - ::getComponentScaleX( compID, chFmt ) ) ) ); y0Int = ( ( posY << ( 4 + ::getComponentScaleY( compID, chFmt ) ) ) + mv.getVer() ) * (int64_t)scalingRatio.second + addY; y0Int = SIGN( y0Int ) * ( ( llabs( y0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleY( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleY( compID, chFmt ) ) ) + ( ( refPic->getScalingWindow().getWindowTopOffset() * SPS::getWinUnitY( chFmt ) ) << ( ( posShift - ::getComponentScaleY( compID, chFmt ) ) ) ); const int extSize = isLuma( compID ) ? 1 : 2; #if IF_12TAP #if RPR_ENABLE const int iTap = 0; #else const int iTap = 1; #endif int vFilterSize = isLuma(compID) ? NTAPS_LUMA(iTap) : NTAPS_CHROMA; #else int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA; #endif int yInt0 = ( (int32_t)y0Int + offY ) >> posShift; #if IF_12TAP yInt0 = std::min(std::max(-(NTAPS_LUMA(iTap) / 2), yInt0), (refPicHeight >> ::getComponentScaleY(compID, chFmt)) + (NTAPS_LUMA(iTap) / 2)); #else yInt0 = std::min( std::max( -(NTAPS_LUMA / 2), yInt0 ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); #endif int xInt0 = ( (int32_t)x0Int + offX ) >> posShift; #if IF_12TAP xInt0 = std::min(std::max(-(NTAPS_LUMA(iTap) / 2), xInt0), (refPicWidth >> ::getComponentScaleX(compID, chFmt)) + (NTAPS_LUMA(iTap) / 2)); #else xInt0 = std::min( std::max( -(NTAPS_LUMA / 2), xInt0 ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); #endif int refHeight = ((((int32_t)y0Int + (height-1) * stepY) + offY ) >> posShift) - ((((int32_t)y0Int + 0 * stepY) + offY ) >> posShift) + 1; refHeight = std::max<int>( 1, refHeight ); CHECK( MAX_CU_SIZE * MAX_SCALING_RATIO + 16 < refHeight + vFilterSize - 1 + extSize, "Buffer is not large enough, increase MAX_SCALING_RATIO" ); Pel buffer[( MAX_CU_SIZE + 16 ) * ( MAX_CU_SIZE * MAX_SCALING_RATIO + 16 )]; int tmpStride = width; int xInt = 0, yInt = 0; for( col = 0; col < width; col++ ) { int posX = (int32_t)x0Int + col * stepX; xInt = ( posX + offX ) >> posShift; #if IF_12TAP xInt = std::min(std::max(-(NTAPS_LUMA(iTap) / 2), xInt), (refPicWidth >> ::getComponentScaleX(compID, chFmt)) + (NTAPS_LUMA(iTap) / 2)); #else xInt = std::min( std::max( -(NTAPS_LUMA / 2), xInt ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); #endif int xFrac = ( ( posX + offX ) >> ( posShift - shiftHor ) ) & ( ( 1 << shiftHor ) - 1 ); CHECK( xInt0 > xInt, "Wrong horizontal starting point" ); Position offset = Position( xInt, yInt0 ); refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, Size( 1, refHeight ) ), wrapRef ); Pel* tempBuf = buffer + col; m_if.filterHor( compID, (Pel*)refBuf.buf - ( ( vFilterSize >> 1 ) - 1 ) * refBuf.stride, refBuf.stride, tempBuf, tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, chFmt, clpRng, xFilter, false, useAltHpelIf && scalingRatio.first == 1 << SCALE_RATIO_BITS ); } for( row = 0; row < height; row++ ) { int posY = (int32_t)y0Int + row * stepY; yInt = ( posY + offY ) >> posShift; #if IF_12TAP yInt = std::min(std::max(-(NTAPS_LUMA(iTap) / 2), yInt), (refPicHeight >> ::getComponentScaleY(compID, chFmt)) + (NTAPS_LUMA(iTap) / 2)); #else yInt = std::min( std::max( -(NTAPS_LUMA / 2), yInt ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); #endif int yFrac = ( ( posY + offY ) >> ( posShift - shiftVer ) ) & ( ( 1 << shiftVer ) - 1 ); CHECK( yInt0 > yInt, "Wrong vertical starting point" ); Pel* tempBuf = buffer + ( yInt - yInt0 ) * tmpStride; JVET_J0090_SET_CACHE_ENABLE( false ); m_if.filterVer( compID, tempBuf + ( ( vFilterSize >> 1 ) - 1 ) * tmpStride, tmpStride, dst + row * dstStride, dstStride, width, 1, yFrac, false, rndRes, chFmt, clpRng, yFilter, false, useAltHpelIf && scalingRatio.second == 1 << SCALE_RATIO_BITS ); JVET_J0090_SET_CACHE_ENABLE( true ); } } return scaled; } #if INTER_LIC void InterPrediction::xLocalIlluComp(const PredictionUnit& pu, const ComponentID compID, const Picture& refPic, const Mv& mv, const bool biPred, PelBuf& dstBuf #if JVET_AA0146_WRAP_AROUND_FIX , bool wrapRef #endif ) { Pel* refLeftTemplate = m_pcLICRefLeftTemplate; Pel* refAboveTemplate = m_pcLICRefAboveTemplate; Pel* recLeftTemplate = m_pcLICRecLeftTemplate; Pel* recAboveTemplate = m_pcLICRecAboveTemplate; int numTemplate[2] = { 0 , 0 }; // 0:Above, 1:Left #if JVET_AA0146_WRAP_AROUND_FIX xGetSublkTemplate(*pu.cu, compID, refPic, mv, pu.blocks[compID].width, pu.blocks[compID].height, 0, 0, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, recAboveTemplate, wrapRef); #else xGetSublkTemplate(*pu.cu, compID, refPic, mv, pu.blocks[compID].width, pu.blocks[compID].height, 0, 0, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, recAboveTemplate); #endif int shift = 0, scale = 0, offset = 0; xGetLICParamGeneral(*pu.cu, compID, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, recAboveTemplate, shift, scale, offset); const ClpRng& clpRng = pu.cu->cs->slice->clpRng(compID); dstBuf.linearTransform(scale, shift, offset, true, clpRng); } void InterPrediction::xGetSublkTemplate(const CodingUnit& cu, const ComponentID compID, const Picture& refPic, const Mv& mv, const int sublkWidth, const int sublkHeight, const int posW, const int posH, int* numTemplate, Pel* refLeftTemplate, Pel* refAboveTemplate, Pel* recLeftTemplate, Pel* recAboveTemplate #if JVET_AA0146_WRAP_AROUND_FIX , bool wrapRef #endif ) { const int bitDepth = cu.cs->sps->getBitDepth(toChannelType(compID)); const int precShift = std::max(0, bitDepth - 12); const Picture& currPic = *cu.cs->picture; const CodingUnit* const cuAbove = cu.cs->getCU(cu.blocks[compID].pos().offset(0, -1), toChannelType(compID)); const CodingUnit* const cuLeft = cu.cs->getCU(cu.blocks[compID].pos().offset(-1, 0), toChannelType(compID)); const CPelBuf recBuf = cuAbove || cuLeft ? currPic.getRecoBuf(cu.cs->picture->blocks[compID]) : CPelBuf(); #if JVET_AA0146_WRAP_AROUND_FIX const CPelBuf refBuf = cuAbove || cuLeft ? refPic.getRecoBuf(refPic.blocks[compID], wrapRef) : CPelBuf(); #else const CPelBuf refBuf = cuAbove || cuLeft ? refPic.getRecoBuf(refPic.blocks[compID]) : CPelBuf(); #endif std::vector<Pel>& invLUT = m_pcReshape->getInvLUT(); // above if (cuAbove && posH == 0) { xGetPredBlkTpl<true>(cu, compID, refBuf, mv, posW, posH, sublkWidth, refAboveTemplate); const Pel* rec = recBuf.bufAt(cu.blocks[compID].pos().offset(0, -1)); for (int k = posW; k < posW + sublkWidth; k++) { int refVal = refAboveTemplate[k]; int recVal = rec[k]; if (isLuma(compID) && cu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { recVal = invLUT[recVal]; } recVal >>= precShift; refVal >>= precShift; refAboveTemplate[k] = refVal; recAboveTemplate[k] = recVal; numTemplate[0]++; } } // left if (cuLeft && posW == 0) { xGetPredBlkTpl<false>(cu, compID, refBuf, mv, posW, posH, sublkHeight, refLeftTemplate); const Pel* rec = recBuf.bufAt(cu.blocks[compID].pos().offset(-1, 0)); for (int k = posH; k < posH + sublkHeight; k++) { int refVal = refLeftTemplate[k]; int recVal = rec[recBuf.stride * k]; if (isLuma(compID) && cu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { recVal = invLUT[recVal]; } recVal >>= precShift; refVal >>= precShift; refLeftTemplate[k] = refVal; recLeftTemplate[k] = recVal; numTemplate[1]++; } } } void InterPrediction::xGetLICParamGeneral(const CodingUnit& cu, const ComponentID compID, int* numTemplate, Pel* refLeftTemplate, Pel* refAboveTemplate, Pel* recLeftTemplate, Pel* recAboveTemplate, int& shift, int& scale, int& offset ) { const int cuWidth = cu.blocks[compID].width; const int cuHeight = cu.blocks[compID].height; const int bitDepth = cu.cs->sps->getBitDepth(toChannelType(compID)); const int precShift = std::max(0, bitDepth - 12); const int maxNumMinus1 = 30 - 2 * std::min(bitDepth, 12) - 1; const int minDimBit = floorLog2(std::min(cuHeight, cuWidth)); const int minDim = 1 << minDimBit; int minStepBit = minDim > 8 ? 1 : 0; while (minDimBit > minStepBit + maxNumMinus1) { minStepBit++; } //make sure log2(2*minDim/tmpStep) + 2*min(bitDepth,12) <= 30 const int numSteps = minDim >> minStepBit; const int dimShift = minDimBit - minStepBit; //----- get correlation data ----- int x = 0, y = 0, xx = 0, xy = 0, cntShift = 0; // above if (numTemplate[0] != 0) { for (int k = 0; k < numSteps; k++) { CHECK(((k * cuWidth) >> dimShift) >= cuWidth, "Out of range"); int refVal = refAboveTemplate[((k * cuWidth) >> dimShift)]; int recVal = recAboveTemplate[((k * cuWidth) >> dimShift)]; x += refVal; y += recVal; xx += refVal * refVal; xy += refVal * recVal; } cntShift = dimShift; } // left if (numTemplate[1] != 0) { for (int k = 0; k < numSteps; k++) { CHECK(((k * cuHeight) >> dimShift) >= cuHeight, "Out of range"); int refVal = refLeftTemplate[((k * cuHeight) >> dimShift)]; int recVal = recLeftTemplate[((k * cuHeight) >> dimShift)]; x += refVal; y += recVal; xx += refVal * refVal; xy += refVal * recVal; } cntShift += (cntShift ? 1 : dimShift); } //----- determine scale and offset ----- shift = m_LICShift; if (cntShift == 0) { scale = (1 << shift); offset = 0; return; } const int cropShift = std::max(0, bitDepth - precShift + cntShift - 15); const int xzOffset = (xx >> m_LICRegShift); const int sumX = x << precShift; const int sumY = y << precShift; const int sumXX = ((xx + xzOffset) >> (cropShift << 1)) << cntShift; const int sumXY = ((xy + xzOffset) >> (cropShift << 1)) << cntShift; const int sumXsumX = (x >> cropShift) * (x >> cropShift); const int sumXsumY = (x >> cropShift) * (y >> cropShift); int a1 = sumXY - sumXsumY; int a2 = sumXX - sumXsumX; int scaleShiftA2 = getMSB(abs(a2)) - 6; int scaleShiftA1 = scaleShiftA2 - m_LICShiftDiff; scaleShiftA2 = std::max(0, scaleShiftA2); scaleShiftA1 = std::max(0, scaleShiftA1); const int scaleShiftA = scaleShiftA2 + 15 - shift - scaleShiftA1; a1 = a1 >> scaleShiftA1; a2 = Clip3(0, 63, a2 >> scaleShiftA2); scale = int((int64_t(a1) * int64_t(m_LICMultApprox[a2])) >> scaleShiftA); scale = Clip3(0, 1 << (shift + 2), scale); const int maxOffset = (1 << (bitDepth - 1)) - 1; const int minOffset = -1 - maxOffset; offset = (sumY - ((scale * sumX) >> shift) + ((1 << (cntShift)) >> 1)) >> cntShift; offset = Clip3(minOffset, maxOffset, offset); } template <bool trueAfalseL> void InterPrediction::xGetPredBlkTpl(const CodingUnit& cu, const ComponentID compID, const CPelBuf& refBuf, const Mv& mv, const int posW, const int posH, const int tplSize, Pel* predBlkTpl #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED , bool AML #endif ) { const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; const int horShift = (lumaShift + ::getComponentScaleX(compID, cu.chromaFormat)); const int verShift = (lumaShift + ::getComponentScaleY(compID, cu.chromaFormat)); const int xInt = mv.getHor() >> horShift; const int yInt = mv.getVer() >> verShift; const int xFrac = mv.getHor() & ((1 << horShift) - 1); const int yFrac = mv.getVer() & ((1 << verShift) - 1); const Pel* ref; Pel* dst; int refStride, dstStride, bw, bh; if( trueAfalseL ) { ref = refBuf.bufAt(cu.blocks[compID].pos().offset(xInt + posW, yInt + posH - 1)); dst = predBlkTpl + posW; refStride = refBuf.stride; dstStride = tplSize; bw = tplSize; bh = 1; } else { ref = refBuf.bufAt(cu.blocks[compID].pos().offset(xInt + posW - 1, yInt + posH)); dst = predBlkTpl + posH; refStride = refBuf.stride; dstStride = 1; bw = 1; bh = tplSize; } #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED int nFilterIdx = AML ? 1 : 0; #else const int nFilterIdx = 0; #endif const bool useAltHpelIf = false; if ( yFrac == 0 ) { m_if.filterHor( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, xFrac, true, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); } else if ( xFrac == 0 ) { m_if.filterVer( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, yFrac, true, true, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); } else { #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED #if IF_12TAP int vFilterSize = isLuma(compID) ? NTAPS_LUMA(0) : NTAPS_CHROMA; #else int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA; #endif if (isLuma(compID) && nFilterIdx == 1) { vFilterSize = NTAPS_BILINEAR; } #else #if IF_12TAP const int vFilterSize = isLuma(compID) ? NTAPS_LUMA(0) : NTAPS_CHROMA; #else const int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA; #endif #endif PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], Size(bw, bh+vFilterSize-1)); m_if.filterHor( compID, (Pel*)ref - ((vFilterSize>>1) -1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh+vFilterSize-1, xFrac, false, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); JVET_J0090_SET_CACHE_ENABLE( false ); m_if.filterVer( compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, true, cu.chromaFormat, cu.slice->clpRng(compID), nFilterIdx, false, useAltHpelIf); JVET_J0090_SET_CACHE_ENABLE( true ); } } #endif // INTER_LIC #if TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM Distortion InterPrediction::deriveTMMv(const PredictionUnit& pu, bool fillCurTpl, Distortion curBestCost, RefPicList eRefList, int refIdx, int maxSearchRounds, Mv& mv, const MvField* otherMvf) { CHECK(refIdx < 0, "Invalid reference index for TM"); const CodingUnit& cu = *pu.cu; #if JVET_Z0084_IBC_TM #if JVET_Y0128_NON_CTC if ( !CU::isIBC(cu) && cu.slice->getRefPic(eRefList, refIdx)->isRefScaled( pu.cs->pps ) ) { return std::numeric_limits<Distortion>::max(); } #endif CHECK(CU::isIBC(cu) && otherMvf != nullptr, "IBC TM for bidir is not allowed."); const Picture& refPic = CU::isIBC(cu) ? *cu.slice->getPic() : *cu.slice->getRefPic(eRefList, refIdx)->unscaledPic; #else #if JVET_Y0128_NON_CTC if ( cu.slice->getRefPic(eRefList, refIdx)->isRefScaled( pu.cs->pps ) ) { return std::numeric_limits<Distortion>::max(); } #endif const Picture& refPic = *cu.slice->getRefPic(eRefList, refIdx)->unscaledPic; #endif bool doSimilarityCheck = otherMvf == nullptr ? false : cu.slice->getRefPOC((RefPicList)eRefList, refIdx) == cu.slice->getRefPOC((RefPicList)(1 - eRefList), otherMvf->refIdx); InterPredResources interRes(m_pcReshape, m_pcRdCost, m_if, m_filteredBlockTmp[0][COMPONENT_Y] , m_filteredBlock[3][1][0], m_filteredBlock[3][0][0] ); TplMatchingCtrl tplCtrl(pu, interRes, refPic, fillCurTpl, COMPONENT_Y, true, maxSearchRounds, m_pcCurTplAbove, m_pcCurTplLeft, m_pcRefTplAbove, m_pcRefTplLeft, mv, (doSimilarityCheck ? &(otherMvf->mv) : nullptr), curBestCost); if (!tplCtrl.getTemplatePresentFlag()) { return std::numeric_limits<Distortion>::max(); } if (otherMvf == nullptr) // uni prediction { #if TM_MRG && JVET_AA0093_REFINED_MOTION_FOR_ARMC if (pu.reduceTplSize && pu.tmMergeFlag #if JVET_AA0132_CONFIGURABLE_TM_TOOLS && pu.cs->sps->getUseTMMrgMode() #endif ) { tplCtrl.deriveMvUni<1>(); } else { #endif tplCtrl.deriveMvUni<TM_TPL_SIZE>(); #if TM_MRG && JVET_AA0093_REFINED_MOTION_FOR_ARMC } #endif mv = tplCtrl.getFinalMv(); return tplCtrl.getMinCost(); } else // bi prediction { #if JVET_Y0128_NON_CTC if ( cu.slice->getRefPic((RefPicList)(1 - eRefList), otherMvf->refIdx)->isRefScaled(pu.cs->pps) ) { return std::numeric_limits<Distortion>::max(); } #endif const Picture& otherRefPic = *cu.slice->getRefPic((RefPicList)(1-eRefList), otherMvf->refIdx)->unscaledPic; #if TM_MRG && JVET_AA0093_REFINED_MOTION_FOR_ARMC if (pu.reduceTplSize && pu.tmMergeFlag #if JVET_AA0132_CONFIGURABLE_TM_TOOLS && pu.cs->sps->getUseTMMrgMode() #endif ) { tplCtrl.removeHighFreq<1>(otherRefPic, otherMvf->mv, getBcwWeight(cu.BcwIdx, eRefList)); tplCtrl.deriveMvUni<1>(); } else { #endif tplCtrl.removeHighFreq<TM_TPL_SIZE>(otherRefPic, otherMvf->mv, getBcwWeight(cu.BcwIdx, eRefList)); tplCtrl.deriveMvUni<TM_TPL_SIZE>(); #if TM_MRG && JVET_AA0093_REFINED_MOTION_FOR_ARMC } #endif mv = tplCtrl.getFinalMv(); int8_t intWeight = getBcwWeight(cu.BcwIdx, eRefList); return (tplCtrl.getMinCost() * intWeight + (g_BcwWeightBase >> 1)) >> g_BcwLog2WeightBase; } } #if TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG) #if JVET_AA0093_REFINED_MOTION_FOR_ARMC void InterPrediction::deriveTMMv(PredictionUnit& pu, Distortion* tmCost) #else void InterPrediction::deriveTMMv(PredictionUnit& pu) #endif { if( !pu.tmMergeFlag ) { return; } Distortion minCostUni[NUM_REF_PIC_LIST_01] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max() }; for (int iRefList = 0; iRefList < ( pu.cu->slice->isInterB() ? NUM_REF_PIC_LIST_01 : 1 ) ; ++iRefList) { if (pu.interDir & (iRefList + 1)) { minCostUni[iRefList] = deriveTMMv(pu, true, std::numeric_limits<Distortion>::max(), (RefPicList)iRefList, pu.refIdx[iRefList], TM_MAX_NUM_OF_ITERATIONS, pu.mv[iRefList]); } } #if JVET_AA0093_REFINED_MOTION_FOR_ARMC if (tmCost) { *tmCost = minCostUni[0] <= minCostUni[1] ? minCostUni[0] : minCostUni[1]; } #endif if (pu.cu->slice->isInterB() && pu.interDir == 3 #if MULTI_PASS_DMVR && !PU::checkBDMVRCondition(pu) #endif ) { if (minCostUni[0] == std::numeric_limits<Distortion>::max() || minCostUni[1] == std::numeric_limits<Distortion>::max()) { return; } RefPicList eTargetPicList = (minCostUni[0] <= minCostUni[1]) ? REF_PIC_LIST_1 : REF_PIC_LIST_0; MvField mvfBetterUni(pu.mv[1 - eTargetPicList], pu.refIdx[1 - eTargetPicList]); Distortion minCostBi = deriveTMMv(pu, true, std::numeric_limits<Distortion>::max(), eTargetPicList, pu.refIdx[eTargetPicList], TM_MAX_NUM_OF_ITERATIONS, pu.mv[eTargetPicList], &mvfBetterUni); if (minCostBi > (minCostUni[1 - eTargetPicList] + (minCostUni[1 - eTargetPicList] >> 3))) { pu.interDir = 1 + (1 - eTargetPicList); pu.mv [eTargetPicList] = Mv(); pu.refIdx[eTargetPicList] = NOT_VALID; } #if JVET_AA0093_REFINED_MOTION_FOR_ARMC else if (tmCost) { *tmCost = minCostBi; } #endif } } #endif // TM_MRG || (JVET_Z0084_IBC_TM && IBC_TM_MRG) #endif // TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM #if TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM TplMatchingCtrl::TplMatchingCtrl( const PredictionUnit& pu, InterPredResources& interRes, const Picture& refPic, const bool fillCurTpl, const ComponentID compID, const bool useWeight, const int maxSearchRounds, Pel* curTplAbove, Pel* curTplLeft, Pel* refTplAbove, Pel* refTplLeft, const Mv& mvStart, const Mv* otherRefListMv, const Distortion curBestCost ) : m_cu (*pu.cu) , m_pu (pu) , m_interRes (interRes) , m_refPic (refPic) , m_mvStart (mvStart) , m_mvFinal (mvStart) , m_otherRefListMv (otherRefListMv) , m_minCost (curBestCost) , m_useWeight (useWeight) , m_maxSearchRounds (maxSearchRounds) , m_compID (compID) { #if JVET_Z0067_RPR_ENABLE if ( refPic.isRefScaled(pu.cs->pps) ) { return; } #endif // Initialization #if TM_MRG && JVET_AA0093_REFINED_MOTION_FOR_ARMC if (m_pu.reduceTplSize && pu.tmMergeFlag #if JVET_AA0132_CONFIGURABLE_TM_TOOLS && m_pu.cs->sps->getUseTMMrgMode() #endif ) { bool tplAvalableAbove = xFillCurTemplate<1, true >((fillCurTpl ? curTplAbove : nullptr)); bool tplAvalableLeft = xFillCurTemplate<1, false>((fillCurTpl ? curTplLeft : nullptr)); tplAvalableAbove &= !(tplAvalableLeft && pu.lwidth()*2 < pu.lheight()); tplAvalableLeft &= !(tplAvalableAbove && pu.lheight()*2 < pu.lwidth()); m_curTplAbove = tplAvalableAbove ? PelBuf(curTplAbove, pu.lwidth(), 1 ) : PelBuf(); m_curTplLeft = tplAvalableLeft ? PelBuf(curTplLeft , 1, pu.lheight()) : PelBuf(); m_refTplAbove = tplAvalableAbove ? PelBuf(refTplAbove, m_curTplAbove ) : PelBuf(); m_refTplLeft = tplAvalableLeft ? PelBuf(refTplLeft , m_curTplLeft ) : PelBuf(); #if JVET_X0056_DMVD_EARLY_TERMINATION m_earlyTerminateTh = ((tplAvalableAbove ? m_pu.lwidth() : 0) + (tplAvalableLeft ? m_pu.lheight() : 0)); #endif // Pre-interpolate samples on search area #if JVET_Z0084_IBC_TM m_refSrAbove = tplAvalableAbove && maxSearchRounds > 0 && !CU::isIBC(m_cu) ? PelBuf(interRes.m_preFillBufA, m_curTplAbove.width + 2 * TM_SEARCH_RANGE, m_curTplAbove.height + 2 * TM_SEARCH_RANGE) : PelBuf(); #else m_refSrAbove = tplAvalableAbove && maxSearchRounds > 0 ? PelBuf(interRes.m_preFillBufA, m_curTplAbove.width + 2 * TM_SEARCH_RANGE, m_curTplAbove.height + 2 * TM_SEARCH_RANGE) : PelBuf(); #endif if (m_refSrAbove.buf != nullptr) { m_refSrAbove = xGetRefTemplate<1, true, TM_SEARCH_RANGE>(m_pu, m_refPic, mvStart, m_refSrAbove); m_refSrAbove = m_refSrAbove.subBuf(Position(TM_SEARCH_RANGE, TM_SEARCH_RANGE), m_curTplAbove); } #if JVET_Z0084_IBC_TM m_refSrLeft = tplAvalableLeft && maxSearchRounds > 0 && !CU::isIBC(m_cu) ? PelBuf(interRes.m_preFillBufL, m_curTplLeft .width + 2 * TM_SEARCH_RANGE, m_curTplLeft .height + 2 * TM_SEARCH_RANGE) : PelBuf(); #else m_refSrLeft = tplAvalableLeft && maxSearchRounds > 0 ? PelBuf(interRes.m_preFillBufL, m_curTplLeft .width + 2 * TM_SEARCH_RANGE, m_curTplLeft .height + 2 * TM_SEARCH_RANGE) : PelBuf(); #endif if (m_refSrLeft.buf != nullptr) { m_refSrLeft = xGetRefTemplate<1, false, TM_SEARCH_RANGE>(m_pu, m_refPic, mvStart, m_refSrLeft); m_refSrLeft = m_refSrLeft.subBuf(Position(TM_SEARCH_RANGE, TM_SEARCH_RANGE), m_curTplLeft); } } else { bool tplAvalableAbove = xFillCurTemplate<TM_TPL_SIZE, true >((fillCurTpl ? curTplAbove : nullptr)); bool tplAvalableLeft = xFillCurTemplate<TM_TPL_SIZE, false>((fillCurTpl ? curTplLeft : nullptr)); if (pu.reduceTplSize) { tplAvalableAbove &= !(tplAvalableLeft && pu.lwidth()*2 < pu.lheight()); tplAvalableLeft &= !(tplAvalableAbove && pu.lheight()*2 < pu.lwidth()); } #else const bool tplAvalableAbove = xFillCurTemplate<TM_TPL_SIZE, true >((fillCurTpl ? curTplAbove : nullptr)); const bool tplAvalableLeft = xFillCurTemplate<TM_TPL_SIZE, false>((fillCurTpl ? curTplLeft : nullptr)); #endif m_curTplAbove = tplAvalableAbove ? PelBuf(curTplAbove, pu.lwidth(), TM_TPL_SIZE ) : PelBuf(); m_curTplLeft = tplAvalableLeft ? PelBuf(curTplLeft , TM_TPL_SIZE, pu.lheight()) : PelBuf(); m_refTplAbove = tplAvalableAbove ? PelBuf(refTplAbove, m_curTplAbove ) : PelBuf(); m_refTplLeft = tplAvalableLeft ? PelBuf(refTplLeft , m_curTplLeft ) : PelBuf(); #if JVET_X0056_DMVD_EARLY_TERMINATION m_earlyTerminateTh = TM_TPL_SIZE * ((tplAvalableAbove ? m_pu.lwidth() : 0) + (tplAvalableLeft ? m_pu.lheight() : 0)); #endif // Pre-interpolate samples on search area #if JVET_Z0084_IBC_TM m_refSrAbove = tplAvalableAbove && maxSearchRounds > 0 && !CU::isIBC(m_cu) ? PelBuf(interRes.m_preFillBufA, m_curTplAbove.width + 2 * TM_SEARCH_RANGE, m_curTplAbove.height + 2 * TM_SEARCH_RANGE) : PelBuf(); #else m_refSrAbove = tplAvalableAbove && maxSearchRounds > 0 ? PelBuf(interRes.m_preFillBufA, m_curTplAbove.width + 2 * TM_SEARCH_RANGE, m_curTplAbove.height + 2 * TM_SEARCH_RANGE) : PelBuf(); #endif if (m_refSrAbove.buf != nullptr) { m_refSrAbove = xGetRefTemplate<TM_TPL_SIZE, true, TM_SEARCH_RANGE>(m_pu, m_refPic, mvStart, m_refSrAbove); m_refSrAbove = m_refSrAbove.subBuf(Position(TM_SEARCH_RANGE, TM_SEARCH_RANGE), m_curTplAbove); } #if JVET_Z0084_IBC_TM m_refSrLeft = tplAvalableLeft && maxSearchRounds > 0 && !CU::isIBC(m_cu) ? PelBuf(interRes.m_preFillBufL, m_curTplLeft .width + 2 * TM_SEARCH_RANGE, m_curTplLeft .height + 2 * TM_SEARCH_RANGE) : PelBuf(); #else m_refSrLeft = tplAvalableLeft && maxSearchRounds > 0 ? PelBuf(interRes.m_preFillBufL, m_curTplLeft .width + 2 * TM_SEARCH_RANGE, m_curTplLeft .height + 2 * TM_SEARCH_RANGE) : PelBuf(); #endif if (m_refSrLeft.buf != nullptr) { m_refSrLeft = xGetRefTemplate<TM_TPL_SIZE, false, TM_SEARCH_RANGE>(m_pu, m_refPic, mvStart, m_refSrLeft); m_refSrLeft = m_refSrLeft.subBuf(Position(TM_SEARCH_RANGE, TM_SEARCH_RANGE), m_curTplLeft); } #if TM_MRG && JVET_AA0093_REFINED_MOTION_FOR_ARMC } #endif } int TplMatchingCtrl::getDeltaMean(const PelBuf& bufCur, const PelBuf& bufRef, const int rowSubShift, const int bd) { int64_t deltaSum = g_pelBufOP.getSumOfDifference(bufCur.buf, bufCur.stride, bufRef.buf, bufRef.stride, bufCur.width, bufCur.height, rowSubShift, bd); return int(deltaSum / (int64_t)bufCur.area()); } template <int tplSize> void TplMatchingCtrl::deriveMvUni() { if (m_minCost == std::numeric_limits<Distortion>::max()) { m_minCost = xGetTempMatchError<tplSize>(m_mvStart); } if (m_maxSearchRounds <= 0) { return; } int searchStepShift = (m_cu.imv == IMV_4PEL ? MV_FRACTIONAL_BITS_INTERNAL + 2 : MV_FRACTIONAL_BITS_INTERNAL); xRefineMvSearch<tplSize, TplMatchingCtrl::TMSEARCH_DIAMOND>(m_maxSearchRounds, searchStepShift); xRefineMvSearch<tplSize, TplMatchingCtrl::TMSEARCH_CROSS >( 1, searchStepShift); xRefineMvSearch<tplSize, TplMatchingCtrl::TMSEARCH_CROSS >( 1, searchStepShift - 1); #if MULTI_PASS_DMVR if (!m_pu.bdmvrRefine) { #endif xRefineMvSearch<tplSize, TplMatchingCtrl::TMSEARCH_CROSS >( 1, searchStepShift - 2); xRefineMvSearch<tplSize, TplMatchingCtrl::TMSEARCH_CROSS >( 1, searchStepShift - 3); #if JVET_AA0093_REFINED_MOTION_FOR_ARMC xRefineMvSearch<tplSize, TplMatchingCtrl::TMSEARCH_CROSS >( 1, searchStepShift - 4); #endif #if MULTI_PASS_DMVR } else { xDeriveCostBasedMv<TplMatchingCtrl::TMSEARCH_CROSS>(); } #endif } template <int tplSize> void TplMatchingCtrl::removeHighFreq(const Picture& otherRefPic, const Mv& otherRefMv, const uint8_t curRefBcwWeight) { xRemoveHighFreq<tplSize, true>(otherRefPic, otherRefMv, curRefBcwWeight); xRemoveHighFreq<tplSize, false>(otherRefPic, otherRefMv, curRefBcwWeight); } template <int tplSize, bool trueAfalseL> bool TplMatchingCtrl::xFillCurTemplate(Pel* tpl) { const Position posOffset = trueAfalseL ? Position(0, -tplSize) : Position(-tplSize, 0); const CodingUnit* const cuNeigh = m_cu.cs->getCU(m_pu.blocks[m_compID].pos().offset(posOffset), toChannelType(m_compID)); if (cuNeigh == nullptr) { return false; } if (tpl == nullptr) { return true; } #if JVET_Z0084_IBC_TM // Stay in reference region for IBC if( CU::isIBC(m_cu) ) { const int cuPelX = m_pu.lx(); const int cuPelY = m_pu.ly(); const int roiWidth = trueAfalseL ? m_pu.lwidth() : tplSize; const int roiHeight = trueAfalseL ? tplSize : m_pu.lheight(); const int picWidth = m_pu.cs->slice->getPPS()->getPicWidthInLumaSamples(); const int picHeight = m_pu.cs->slice->getPPS()->getPicHeightInLumaSamples(); const uint32_t ctuSize = m_pu.cs->slice->getSPS()->getMaxCUWidth(); const Mv tempBv = trueAfalseL ? Mv(0, -tplSize) : Mv(-tplSize, 0); if (!PU::searchBv(m_pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, tempBv.getHor(), tempBv.getVer(), ctuSize)) { return false; } } #endif const Picture& currPic = *m_cu.cs->picture; const CPelBuf recBuf = currPic.getRecoBuf(m_cu.cs->picture->blocks[m_compID]); std::vector<Pel>& invLUT = m_interRes.m_pcReshape->getInvLUT(); const bool useLUT = isLuma(m_compID) && m_cu.cs->picHeader->getLmcsEnabledFlag() && m_interRes.m_pcReshape->getCTUFlag(); #if JVET_W0097_GPM_MMVD_TM & TM_MRG if (m_cu.geoFlag) { CHECK(m_pu.geoTmType == GEO_TM_OFF, "invalid geo template type value"); if (m_pu.geoTmType == GEO_TM_SHAPE_A) { if (trueAfalseL == 0) { return false; } } if (m_pu.geoTmType == GEO_TM_SHAPE_L) { if (trueAfalseL == 1) { return false; } } } #endif const Size dstSize = (trueAfalseL ? Size(m_pu.lwidth(), tplSize) : Size(tplSize, m_pu.lheight())); for (int h = 0; h < (int)dstSize.height; h++) { const Position recPos = trueAfalseL ? Position(0, -tplSize + h) : Position(-tplSize, h); const Pel* rec = recBuf.bufAt(m_pu.blocks[m_compID].pos().offset(recPos)); Pel* dst = tpl + h * dstSize.width; for (int w = 0; w < (int)dstSize.width; w++) { int recVal = rec[w]; dst[w] = useLUT ? invLUT[recVal] : recVal; } } return true; } template <int tplSize, bool trueAfalseL, int sr> PelBuf TplMatchingCtrl::xGetRefTemplate(const PredictionUnit& curPu, const Picture& refPic, const Mv& _mv, PelBuf& dstBuf) { #if JVET_Z0084_IBC_TM // Stay in reference region for IBC if( CU::isIBC(m_cu) ) { const int cuPelX = m_pu.lx(); const int cuPelY = m_pu.ly(); const int roiWidth = trueAfalseL ? m_pu.lwidth() : tplSize; const int roiHeight = trueAfalseL ? tplSize : m_pu.lheight(); const int picWidth = m_pu.cs->slice->getPPS()->getPicWidthInLumaSamples(); const int picHeight = m_pu.cs->slice->getPPS()->getPicHeightInLumaSamples(); const uint32_t ctuSize = m_pu.cs->slice->getSPS()->getMaxCUWidth(); Mv tempBv = _mv; tempBv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); tempBv += trueAfalseL ? Mv(0, -tplSize) : Mv(-tplSize, 0); if (!PU::searchBv(m_pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, tempBv.getHor(), tempBv.getVer(), ctuSize)) { return PelBuf(); } } #endif // read from pre-interpolated buffer PelBuf& refSrBuf = trueAfalseL ? m_refSrAbove : m_refSrLeft; #if JVET_Z0084_IBC_TM if (!CU::isIBC(m_cu) && sr == 0 && refPic.getPOC() == m_refPic.getPOC() && refSrBuf.buf != nullptr) #else if (sr == 0 && refPic.getPOC() == m_refPic.getPOC() && refSrBuf.buf != nullptr) #endif { Mv mvDiff = _mv - m_mvStart; if ((mvDiff.getAbsHor() & ((1 << MV_FRACTIONAL_BITS_INTERNAL) - 1)) == 0 && (mvDiff.getAbsVer() & ((1 << MV_FRACTIONAL_BITS_INTERNAL) - 1)) == 0) { mvDiff >>= MV_FRACTIONAL_BITS_INTERNAL; if (mvDiff.getAbsHor() <= TM_SEARCH_RANGE && mvDiff.getAbsVer() <= TM_SEARCH_RANGE) { return refSrBuf.subBuf(Position(mvDiff.getHor(), mvDiff.getVer()), dstBuf); } } } // Do interpolation on the fly Position blkPos = ( trueAfalseL ? Position(curPu.lx(), curPu.ly() - tplSize) : Position(curPu.lx() - tplSize, curPu.ly()) ); Size blkSize = Size(dstBuf.width, dstBuf.height); Mv mv = _mv - Mv(sr << MV_FRACTIONAL_BITS_INTERNAL, sr << MV_FRACTIONAL_BITS_INTERNAL); #if JVET_AA0146_WRAP_AROUND_FIX bool wrapRef = false; #if JVET_Z0084_IBC_TM if( !CU::isIBC(m_cu) ) #endif { if ( m_cu.cs->pps->getWrapAroundEnabledFlag() ) { wrapRef = wrapClipMv(mv, blkPos, blkSize, m_cu.cs->sps, m_cu.cs->pps); } else { clipMv( mv, blkPos, blkSize, *m_cu.cs->sps, *m_cu.cs->pps ); } } #else #if JVET_Z0084_IBC_TM if( !CU::isIBC(m_cu) ) #endif clipMv( mv, blkPos, blkSize, *m_cu.cs->sps, *m_cu.cs->pps ); #endif const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; const int horShift = (lumaShift + ::getComponentScaleX(m_compID, m_cu.chromaFormat)); const int verShift = (lumaShift + ::getComponentScaleY(m_compID, m_cu.chromaFormat)); const int xInt = mv.getHor() >> horShift; const int yInt = mv.getVer() >> verShift; const int xFrac = mv.getHor() & ((1 << horShift) - 1); const int yFrac = mv.getVer() & ((1 << verShift) - 1); #if JVET_AA0146_WRAP_AROUND_FIX const CPelBuf refBuf = refPic.getRecoBuf(refPic.blocks[m_compID], wrapRef); #else const CPelBuf refBuf = refPic.getRecoBuf(refPic.blocks[m_compID]); #endif const Pel* ref = refBuf.bufAt(blkPos.offset(xInt, yInt)); Pel* dst = dstBuf.buf; int refStride = refBuf.stride; int dstStride = dstBuf.stride; int bw = (int)blkSize.width; int bh = (int)blkSize.height; const int nFilterIdx = 1; const bool useAltHpelIf = false; const bool biMCForDMVR = false; if ( yFrac == 0 ) { m_interRes.m_if.filterHor( m_compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, xFrac, true, m_cu.chromaFormat, m_cu.slice->clpRng(m_compID), nFilterIdx, biMCForDMVR, useAltHpelIf ); } else if ( xFrac == 0 ) { m_interRes.m_if.filterVer( m_compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, yFrac, true, true, m_cu.chromaFormat, m_cu.slice->clpRng(m_compID), nFilterIdx, biMCForDMVR, useAltHpelIf ); } else { const int vFilterSize = isLuma(m_compID) ? NTAPS_BILINEAR : NTAPS_CHROMA; PelBuf tmpBuf = PelBuf(m_interRes.m_ifBuf, Size(bw, bh+vFilterSize-1)); m_interRes.m_if.filterHor( m_compID, (Pel*)ref - ((vFilterSize>>1) -1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh+vFilterSize-1, xFrac, false, m_cu.chromaFormat, m_cu.slice->clpRng(m_compID), nFilterIdx, biMCForDMVR, useAltHpelIf ); JVET_J0090_SET_CACHE_ENABLE( false ); m_interRes.m_if.filterVer( m_compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, true, m_cu.chromaFormat, m_cu.slice->clpRng(m_compID), nFilterIdx, biMCForDMVR, useAltHpelIf ); JVET_J0090_SET_CACHE_ENABLE( true ); } return dstBuf; } template <int tplSize, bool trueAfalseL> void TplMatchingCtrl::xRemoveHighFreq(const Picture& otherRefPic, const Mv& otherRefMv, const uint8_t curRefBcwWeight) { PelBuf& curTplBuf = trueAfalseL ? m_curTplAbove : m_curTplLeft; PelBuf refTplBuf = trueAfalseL ? m_refTplAbove : m_refTplLeft; if (curTplBuf.buf != nullptr) { refTplBuf = xGetRefTemplate<tplSize, trueAfalseL, 0>(m_pu, otherRefPic, otherRefMv, refTplBuf); if (curRefBcwWeight != g_BcwWeights[BCW_DEFAULT]) { curTplBuf.removeWeightHighFreq(refTplBuf, false, m_cu.slice->clpRng(m_compID), curRefBcwWeight); } else { curTplBuf.removeHighFreq(refTplBuf, false, m_cu.slice->clpRng(m_compID)); } } } template <int tplSize, int searchPattern> void TplMatchingCtrl::xRefineMvSearch(int maxSearchRounds, int searchStepShift) { static const int finestMvdPrec[NUM_IMV_MODES] = { MV_FRACTIONAL_BITS_INTERNAL - 2, MV_FRACTIONAL_BITS_INTERNAL, MV_FRACTIONAL_BITS_INTERNAL + 2, MV_FRACTIONAL_BITS_INTERNAL - 1 }; if (searchStepShift < finestMvdPrec[m_cu.imv] && (!m_pu.mergeFlag || m_cu.imv == IMV_HPEL)) { return; } #if JVET_Z0084_IBC_TM // Limit to integer pel search for IBC if( CU::isIBC(m_cu) && (searchStepShift < MV_FRACTIONAL_BITS_INTERNAL) ) { return; } #endif // Search pattern configuration static const Mv patternCross [4] = { Mv(0, 1), Mv(1, 0), Mv(0, -1), Mv(-1, 0) }; static const Mv patternDiamond[8] = { Mv(0, 2), Mv(1, 1), Mv(2, 0), Mv(1, -1), Mv(0, -2), Mv(-1, -1), Mv(-2, 0), Mv(-1, 1) }; int directStart = 0, directEnd = 0, directRounding = 0, directMask = 0; const Mv *pSearchOffset = nullptr; #if MULTI_PASS_DMVR Distortion *costArray = nullptr; #endif if (searchPattern == TMSEARCH_CROSS) { directEnd = 3; directRounding = 4; directMask = 0x03; pSearchOffset = patternCross; #if MULTI_PASS_DMVR memset(m_tmCostArrayCross, -1, sizeof(m_tmCostArrayCross)); costArray = m_tmCostArrayCross; costArray[4] = m_minCost; #endif } else if (searchPattern == TMSEARCH_DIAMOND) { directEnd = 7; directRounding = 8; directMask = 0x07; pSearchOffset = patternDiamond; #if MULTI_PASS_DMVR memset(m_tmCostArrayDiamond, -1, sizeof(m_tmCostArrayDiamond)); costArray = m_tmCostArrayDiamond; costArray[8] = m_minCost; #endif } else { CHECK(true, "Unknown search method for TM"); } #if JVET_Z0084_IBC_TM const int cuPelX = m_pu.lx(); const int cuPelY = m_pu.ly(); const int roiWidth = m_pu.lwidth(); const int roiHeight = m_pu.lheight(); const int picWidth = m_pu.cs->slice->getPPS()->getPicWidthInLumaSamples(); const int picHeight = m_pu.cs->slice->getPPS()->getPicHeightInLumaSamples(); const uint32_t ctuSize = m_pu.cs->slice->getSPS()->getMaxCUWidth(); #endif // Iterative search for (int uiRound = 0; uiRound < maxSearchRounds; uiRound++) { int directBest = -1; Mv mvCurCenter(m_mvFinal); #if JVET_X0056_DMVD_EARLY_TERMINATION Distortion prevMinCost = m_minCost; #endif for (int nIdx = directStart; nIdx <= directEnd; nIdx++) { int nDirect = (nIdx + directRounding) & directMask; Mv mvOffset = pSearchOffset[nDirect]; mvOffset <<= searchStepShift; Mv mvCand = mvCurCenter + mvOffset; #if JVET_Z0084_IBC_TM // Stay in reference region for IBC if( CU::isIBC(m_cu) ) { Mv tempBv = mvCand; tempBv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); if (!PU::searchBv(m_pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, tempBv.getHor(), tempBv.getVer(), ctuSize) || (m_curTplAbove.buf != nullptr && !PU::searchBv(m_pu, cuPelX, cuPelY, roiWidth, tplSize, picWidth, picHeight, tempBv.getHor(), tempBv.getVer()-tplSize, ctuSize)) || (m_curTplLeft.buf != nullptr && !PU::searchBv(m_pu, cuPelX, cuPelY, tplSize, roiHeight, picWidth, picHeight, tempBv.getHor()-tplSize, tempBv.getVer(), ctuSize))) { continue; } } #endif Distortion cost = InterPrediction::getDecoderSideDerivedMvCost(m_mvStart, mvCand, TM_SEARCH_RANGE, DECODER_SIDE_MV_WEIGHT); // MV cost is used just for skipping search if (cost >= m_minCost || (m_otherRefListMv != nullptr && *m_otherRefListMv == mvCand)) { continue; } cost = xGetTempMatchError<tplSize>(mvCand); #if MULTI_PASS_DMVR costArray[nDirect] = cost; #endif if (cost < m_minCost) { m_minCost = cost; m_mvFinal = mvCand; directBest = nDirect; } } if (directBest == -1) { break; } #if JVET_X0056_DMVD_EARLY_TERMINATION if (uiRound > 0 && prevMinCost < m_minCost + m_earlyTerminateTh) { break; } #endif int nStep = searchPattern == TMSEARCH_DIAMOND ? (2 - (directBest & 0x01)) : 1; directStart = directBest - nStep; directEnd = directBest + nStep; #if MULTI_PASS_DMVR if ((uiRound + 1) < maxSearchRounds) { xNextTmCostAarray<searchPattern>(directBest); } #endif } } #if MULTI_PASS_DMVR template <int searchPattern> void TplMatchingCtrl::xNextTmCostAarray(int bestDirect) { Distortion *costLog = searchPattern == TMSEARCH_CROSS ? m_tmCostArrayCross : (searchPattern == TMSEARCH_DIAMOND ? m_tmCostArrayDiamond : nullptr); if (searchPattern == TMSEARCH_CROSS) { CHECK(bestDirect < 0 || bestDirect > 3, "Error: Unknown bestDirect"); int prevCenter = (bestDirect + 2) & 0x3; costLog[prevCenter] = costLog[4]; costLog[4] = costLog[bestDirect]; for (int offset = 1; offset < 4; ++offset) { costLog[(prevCenter + offset + 4) & 0x3] = std::numeric_limits<Distortion>::max(); } } else if (searchPattern == TMSEARCH_DIAMOND) { } else { CHECK(true, "Unknown search method for TM"); } } template <int searchPattern> void TplMatchingCtrl::xDeriveCostBasedMv() { if (m_minCost == 0) { return; } if (searchPattern == TMSEARCH_CROSS) { xDeriveCostBasedOffset<true >(m_tmCostArrayCross[3], m_tmCostArrayCross[4], m_tmCostArrayCross[1], 0); xDeriveCostBasedOffset<false>(m_tmCostArrayCross[2], m_tmCostArrayCross[4], m_tmCostArrayCross[0], 0); } else { CHECK(true, "Unknown search method for TM"); } } template <bool TrueX_FalseY> void TplMatchingCtrl::xDeriveCostBasedOffset(Distortion costLorA, Distortion costCenter, Distortion costRorB, int log2StepSize) { if (!m_pu.mergeFlag || m_cu.imv != IMV_OFF) { return; } if (costLorA == std::numeric_limits<Distortion>::max() || costRorB == std::numeric_limits<Distortion>::max() || (costCenter > costLorA || costCenter > costRorB)) { return; } const int extraMvFracBit = MV_FRACTIONAL_BITS_INTERNAL - 1; int& mvComp = TrueX_FalseY ? m_mvFinal.hor : m_mvFinal.ver; int64_t numerator = (int64_t)(costLorA - costRorB); int64_t denominator = (int64_t)((costLorA + costRorB - (costCenter << 1)) << 1); if (denominator != 0) { if (costCenter != costLorA && costCenter != costRorB) { if (extraMvFracBit > 1 || log2StepSize > 1) { mvComp += xBinaryDivision(numerator, denominator, extraMvFracBit + log2StepSize); } } else { const int off = 1 << (extraMvFracBit - 1); mvComp += ((costCenter == costLorA ? -off : off) << log2StepSize); } } } int TplMatchingCtrl::xBinaryDivision(int64_t numerator, int64_t denominator, int fracBits) { if (fracBits < 2) // Because the result of division is assumed to be less than 0.5 { return 0; } int sign = 0; if (numerator < 0) { sign = 1; numerator = -numerator; } numerator <<= fracBits; denominator <<= (fracBits - 2); // This "-2" is by the assumption that the result of division is always less than 0.5 int quotient = 0; for (int binIdx = 0; binIdx < fracBits - 2; ++binIdx) { if (numerator >= denominator) { numerator -= denominator; ++quotient; } quotient <<= 1; denominator >>= 1; } if (numerator >= denominator) { ++quotient; } return sign ? -quotient : quotient; } #endif template <int tplSize> Distortion TplMatchingCtrl::xGetTempMatchError(const Mv& mv) { if (!getTemplatePresentFlag()) { return std::numeric_limits<Distortion>::max(); } Distortion sum = 0; sum += xGetTempMatchError<tplSize, true >(mv); sum += xGetTempMatchError<tplSize, false>(mv); return sum; } template <int tplSize, bool trueAfalseL> Distortion TplMatchingCtrl::xGetTempMatchError(const Mv& mv) { PelBuf& curTplBuf = trueAfalseL ? m_curTplAbove : m_curTplLeft; PelBuf refTplBuf = trueAfalseL ? m_refTplAbove : m_refTplLeft; if (curTplBuf.buf == nullptr) { return 0; } const int rowSubShift = 0; const int bitDepth = m_cu.slice->clpRng(m_compID).bd; // fetch reference template block refTplBuf = xGetRefTemplate<tplSize, trueAfalseL, 0>(m_pu, m_refPic, mv, refTplBuf); #if JVET_Z0084_IBC_TM if (refTplBuf.buf == nullptr) { return std::numeric_limits<Distortion>::max(); } #endif // compute matching cost Distortion partSum = 0; if (m_useWeight) { DistParam cDistParam; cDistParam.applyWeight = false; #if INTER_LIC cDistParam.useMR = m_cu.LICFlag; #endif int tmWeightIdx = (m_pu.lwidth() >= TM_MIN_CU_SIZE_FOR_ALT_WEIGHTED_COST && m_pu.lheight() >= TM_MIN_CU_SIZE_FOR_ALT_WEIGHTED_COST ? 1 : 0); m_interRes.m_pcRdCost->setDistParam( cDistParam, curTplBuf, refTplBuf, bitDepth, trueAfalseL, tmWeightIdx, rowSubShift, m_compID ); #if !JVET_AA0093_REFINED_MOTION_FOR_ARMC CHECK(TM_TPL_SIZE != 4, "The distortion function of template matching is implemetned currently only for size=4."); #endif partSum = cDistParam.distFunc( cDistParam ); } else { DistParam cDistParam; cDistParam.applyWeight = false; #if INTER_LIC cDistParam.useMR = m_cu.LICFlag; #endif m_interRes.m_pcRdCost->setDistParam( cDistParam, curTplBuf, refTplBuf, bitDepth, m_compID, false ); cDistParam.subShift = rowSubShift; partSum = cDistParam.distFunc( cDistParam ); #if FULL_NBIT partSum >>= (bitDepth > 8 ? bitDepth - 8 : 0); #endif } return partSum; } #endif // TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM #if TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM || MULTI_PASS_DMVR Distortion InterPrediction::getDecoderSideDerivedMvCost(const Mv& mvStart, const Mv& mvCur, int searchRangeInFullPel, int weight) { int searchRange = searchRangeInFullPel << MV_FRACTIONAL_BITS_INTERNAL; Mv mvDist = mvStart - mvCur; Distortion cost = std::numeric_limits<Distortion>::max(); if (mvDist.getAbsHor() <= searchRange && mvDist.getAbsVer() <= searchRange) { cost = (mvDist.getAbsHor() + mvDist.getAbsVer()) * weight; cost >>= MV_FRACTIONAL_BITS_DIFF; } return cost; } #if MULTI_PASS_DMVR void InterPrediction::xBDMVRUpdateSquareSearchCostLog( Distortion* costLog, int bestDirect ) { CHECK(bestDirect < 0 || bestDirect > 7, "Error: Unknown bestDirect"); int prevCenter = ( bestDirect + 4 ) & 0x7; costLog[prevCenter] = costLog[8]; costLog[8] = costLog[bestDirect]; if( prevCenter & 0x1 ) { costLog[( prevCenter - 1 + 8 ) & 0x7] = costLog[( prevCenter - 2 + 8 ) & 0x7]; costLog[( prevCenter + 1 + 8 ) & 0x7] = costLog[( prevCenter + 2 + 8 ) & 0x7]; costLog[( prevCenter - 2 + 8 ) & 0x7] = costLog[( prevCenter - 3 + 8 ) & 0x7]; costLog[( prevCenter + 2 + 8 ) & 0x7] = costLog[( prevCenter + 3 + 8 ) & 0x7]; for( int offset = 3 ; offset < 6 ; ++ offset ) { costLog[( prevCenter + offset + 8 ) & 0x7] = std::numeric_limits<Distortion>::max(); } } else { costLog[( prevCenter - 1 + 8 ) & 0x7] = costLog[( prevCenter - 3 + 8 ) & 0x7]; costLog[( prevCenter + 1 + 8 ) & 0x7] = costLog[( prevCenter + 3 + 8 ) & 0x7]; for( int offset = 2 ; offset < 7 ; ++ offset ) { costLog[( prevCenter + offset + 8 ) & 0x7] = std::numeric_limits<Distortion>::max(); } } } #endif #endif #if TM_AMVP void InterPrediction::clearTplAmvpBuffer() { for (int imv = 0; imv < NUM_IMV_MODES; ++imv) { for (int refIdx = 0; refIdx < MAX_NUM_REF; ++refIdx) { m_tplAmvpInfo [imv][0][refIdx] = AMVPInfo(); m_tplAmvpInfo [imv][1][refIdx] = AMVPInfo(); #if INTER_LIC m_tplAmvpInfoLIC[imv][0][refIdx] = AMVPInfo(); m_tplAmvpInfoLIC[imv][1][refIdx] = AMVPInfo(); #endif } } } void InterPrediction::writeTplAmvpBuffer(const AMVPInfo& src, const CodingUnit& cu, RefPicList eRefList, int refIdx) { #if INTER_LIC AMVPInfo& dst = (cu.LICFlag ? m_tplAmvpInfoLIC : m_tplAmvpInfo)[cu.imv][eRefList][refIdx]; #else AMVPInfo& dst = m_tplAmvpInfo[cu.imv][eRefList][refIdx]; #endif dst = src; } bool InterPrediction::readTplAmvpBuffer(AMVPInfo& dst, const CodingUnit& cu, RefPicList eRefList, int refIdx) { #if INTER_LIC AMVPInfo& src = (cu.LICFlag ? m_tplAmvpInfoLIC : m_tplAmvpInfo)[cu.imv][eRefList][refIdx]; #else AMVPInfo& src = m_tplAmvpInfo[cu.imv][eRefList][refIdx]; #endif if (src.numCand > 0) { dst = src; return true; } return false; } #endif #if MULTI_PASS_DMVR #if JVET_X0049_ADAPT_DMVR bool InterPrediction::processBDMVRPU2Dir(PredictionUnit& pu, bool subPURefine[2], Mv(&finalMvDir)[2]) { const int lumaArea = pu.lumaSize().area(); bool bUseMR = lumaArea > 64; #if JVET_Y0089_DMVR_BCW bUseMR |= (pu.cu->BcwIdx != BCW_DEFAULT); #endif subPURefine[0] = subPURefine[1] = true; Distortion minCost = std::numeric_limits<Distortion>::max(); Mv mvInitial_PU[2] = { pu.mv[0], pu.mv[1] }; Mv mvFinal[2] = { pu.mv[0], pu.mv[1] }; Distortion initCost = xBDMVRGetMatchingError(pu, mvInitial_PU, bUseMR, false); if (initCost < lumaArea) { subPURefine[0] = false; subPURefine[1] = false; finalMvDir[0] = mvFinal[0]; finalMvDir[1] = mvFinal[1]; return false; } #if JVET_AA0093_REFINED_MOTION_FOR_ARMC if (pu.cu->cs->pcv->isEncoder || (!pu.cu->cs->pcv->isEncoder && pu.bmDir == 1)) { #endif minCost = xBDMVRMvOneTemplateHPelSquareSearch<1>(mvFinal, initCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, false); subPURefine[0] = minCost >= lumaArea; finalMvDir[0] = mvFinal[0]; #if JVET_AA0093_REFINED_MOTION_FOR_ARMC } #endif #if JVET_AA0093_REFINED_MOTION_FOR_ARMC if (pu.cu->cs->pcv->isEncoder || (!pu.cu->cs->pcv->isEncoder && pu.bmDir == 2)) { #endif mvFinal[0] = mvInitial_PU[0]; mvFinal[1] = mvInitial_PU[1]; minCost = xBDMVRMvOneTemplateHPelSquareSearch<2>(mvFinal, initCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, false); subPURefine[1] = minCost >= lumaArea; finalMvDir[1] = mvFinal[1]; #if JVET_AA0093_REFINED_MOTION_FOR_ARMC } #endif return true; } void InterPrediction::processBDMVRSubPU(PredictionUnit& pu, bool subPURefine) { if (!subPURefine) { // span motion to subPU const int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); const int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); Position puPos = pu.lumaPos(); int subPuIdx = 0; const int dmvrSubPuStrideIncr = DMVR_SUBPU_STRIDE - std::max(1, (int)(pu.lumaSize().width >> DMVR_SUBCU_WIDTH_LOG2)); for (int y = puPos.y, yStart = 0; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) { for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) { m_bdmvrSubPuMvBuf[REF_PIC_LIST_0][subPuIdx] = pu.mv[0]; m_bdmvrSubPuMvBuf[REF_PIC_LIST_1][subPuIdx] = pu.mv[1]; subPuIdx++; } subPuIdx += dmvrSubPuStrideIncr; } return; } const int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); const int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); Position puPos = pu.lumaPos(); PredictionUnit subPu = pu; int subPuIdx = 0; const int dmvrSubPuStrideIncr = DMVR_SUBPU_STRIDE - std::max(1, (int)(pu.lumaSize().width >> DMVR_SUBCU_WIDTH_LOG2)); Distortion minCost = std::numeric_limits<Distortion>::max(); const Mv mvInitial[2] = { pu.mv[0], pu.mv[1] }; Mv mvFinal[2] = { pu.mv[0], pu.mv[1] }; Mv mvOffset; const Distortion earlyTerminateTh = dx * dy; const int adaptiveSearchRangeHor = (dx >> 1) < BDMVR_INTME_RANGE ? (dx >> 1) : BDMVR_INTME_RANGE; const int adaptiveSearchRangeVer = (dy >> 1) < BDMVR_INTME_RANGE ? (dy >> 1) : BDMVR_INTME_RANGE; const bool adaptRange = (adaptiveSearchRangeHor != BDMVR_INTME_RANGE || adaptiveSearchRangeVer != BDMVR_INTME_RANGE); const int maxSearchRound = pu.bmMergeFlag ? BM_MRG_SUB_PU_INT_MAX_SRCH_ROUND : BDMVR_INTME_FULL_SEARCH_MAX_NUM_ITERATIONS; // prepare cDistParam for cost calculation DistParam cDistParam; cDistParam.applyWeight = false; cDistParam.useMR = false; #if JVET_Y0089_DMVR_BCW cDistParam.useMR |= (pu.cu->BcwIdx != BCW_DEFAULT); #endif Pel* pelBuffer[2] = { nullptr, nullptr }; pelBuffer[0] = m_filteredBlock[3][REF_PIC_LIST_0][0] + BDMVR_CENTER_POSITION; pelBuffer[1] = m_filteredBlock[3][REF_PIC_LIST_1][0] + BDMVR_CENTER_POSITION; PelUnitBuf predBuf[2] = { PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_0], BDMVR_BUF_STRIDE, dx, dy)), PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_1], BDMVR_BUF_STRIDE, dx, dy)) }; bool useHadamard = true; // STAD cost function m_pcRdCost->setDistParam(cDistParam, predBuf[0].Y(), predBuf[1].Y(), pu.cu->slice->clpRng(COMPONENT_Y).bd, COMPONENT_Y, useHadamard); // prepare buffer for pre-interpolaction const Picture& refPic0 = *pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[REF_PIC_LIST_0])->unscaledPic; const Picture& refPic1 = *pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[REF_PIC_LIST_1])->unscaledPic; int iWidthExt = dx + (BDMVR_INTME_RANGE << 1); int iHeightExt = dy + (BDMVR_INTME_RANGE << 1); int iWidthOffset = BDMVR_SIMD_IF_FACTOR - (iWidthExt & (BDMVR_SIMD_IF_FACTOR - 1)); iWidthOffset &= (BDMVR_SIMD_IF_FACTOR - 1); iWidthExt += iWidthOffset; // This ensures that iWidthExt is a factor-of-n number, assuming BDMVR_SIMD_IF_FACTOR is equal to n PelUnitBuf predBufExt[2] = { (PelUnitBuf(pu.chromaFormat, PelBuf(m_filteredBlock[3][REF_PIC_LIST_0][0], BDMVR_BUF_STRIDE, iWidthExt, iHeightExt))), (PelUnitBuf(pu.chromaFormat, PelBuf(m_filteredBlock[3][REF_PIC_LIST_1][0], BDMVR_BUF_STRIDE, iWidthExt, iHeightExt))) }; Mv mvTopLeft[2] = { mvInitial[0] - Mv((BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL), (BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL)), mvInitial[1] - Mv((BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL), (BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL)) }; for (int y = puPos.y, yStart = 0; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) { for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) { subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); minCost = std::numeric_limits<Distortion>::max(); // Pre-interpolation xBDMVRFillBlkPredPelBuffer(subPu, refPic0, mvTopLeft[0], predBufExt[0], pu.cs->slice->clpRng(COMPONENT_Y)); xBDMVRFillBlkPredPelBuffer(subPu, refPic1, mvTopLeft[1], predBufExt[1], pu.cs->slice->clpRng(COMPONENT_Y)); if (adaptRange) { minCost = xBDMVRMvIntPelFullSearch<true, true>(mvOffset, minCost, mvInitial, maxSearchRound, adaptiveSearchRangeHor, adaptiveSearchRangeVer, pu.bmMergeFlag, earlyTerminateTh, cDistParam, pelBuffer, BDMVR_BUF_STRIDE); } else { minCost = xBDMVRMvIntPelFullSearch<false, true>(mvOffset, minCost, mvInitial, maxSearchRound, adaptiveSearchRangeHor, adaptiveSearchRangeVer, pu.bmMergeFlag, earlyTerminateTh, cDistParam, pelBuffer, BDMVR_BUF_STRIDE); } if (minCost >= earlyTerminateTh) { int bestOffsetIdx = (mvOffset.getVer() + BDMVR_INTME_RANGE) * BDMVR_INTME_STRIDE + (mvOffset.getHor() + BDMVR_INTME_RANGE); mvOffset <<= MV_FRACTIONAL_BITS_INTERNAL; mvFinal[0] = mvInitial[0] + mvOffset; mvFinal[1] = mvInitial[1] - mvOffset; minCost = m_sadEnlargeArrayBilMrg[bestOffsetIdx]; Distortion tmpCost = getDecoderSideDerivedMvCost(mvInitial[0], mvFinal[0], BDMVR_INTME_RANGE + 1, DECODER_SIDE_MV_WEIGHT); if (minCost >= tmpCost) { minCost += tmpCost; minCost = xBDMVRMvSquareSearch<true>(mvFinal, minCost/*std::numeric_limits<Distortion>::max()*/, subPu, mvInitial, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, false, true); } } else { mvOffset <<= MV_FRACTIONAL_BITS_INTERNAL; mvFinal[0] = mvInitial[0] + mvOffset; mvFinal[1] = mvInitial[1] - mvOffset; } m_bdmvrSubPuMvBuf[REF_PIC_LIST_0][subPuIdx] = mvFinal[0]; m_bdmvrSubPuMvBuf[REF_PIC_LIST_1][subPuIdx] = mvFinal[1]; subPuIdx++; } subPuIdx += dmvrSubPuStrideIncr; } } #endif #if JVET_AB0112_AFFINE_DMVR bool InterPrediction::processBDMVR4Affine(PredictionUnit& pu) { if (!pu.cs->slice->getSPS()->getUseDMVDMode() || !pu.cs->slice->isInterB()) { return false; } CHECK(!pu.mergeFlag, "Merge mode must be used here"); CHECK(pu.refIdx[0] < 0 || pu.refIdx[1] < 0, "Bilateral DMVR is performed for bi-prediction"); const int lumaArea = pu.lumaSize().area(); Mv mvFinal_PU[2]; Mv mvInitial_PU[2]; mvFinal_PU[0].setZero(); mvFinal_PU[1].setZero(); mvInitial_PU[0].setZero(); mvInitial_PU[1].setZero(); { Distortion minCost = std::numeric_limits<Distortion>::max(); bool bUseMR = lumaArea > 64; #if JVET_Y0089_DMVR_BCW bUseMR |= (pu.cu->BcwIdx != BCW_DEFAULT); #endif minCost = xBDMVRMvSquareSearchAffine<false>(mvFinal_PU, minCost, pu, mvInitial_PU, AFFINE_DMVR_MAX_NUM_ITERATIONS, MV_FRACTIONAL_BITS_INTERNAL, bUseMR, false); if (minCost > 0) { minCost = xBDMVRMvSquareSearchAffine<true>(mvFinal_PU, minCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, false); } } // span motion to subPU m_bdmvrSubPuMvBuf[REF_PIC_LIST_0][0] = mvFinal_PU[0]; m_bdmvrSubPuMvBuf[REF_PIC_LIST_1][0] = mvFinal_PU[1]; return true; } #endif #if JVET_AA0093_REFINED_MOTION_FOR_ARMC bool InterPrediction::processBDMVR(PredictionUnit& pu, int step, Distortion* tmCost) #else bool InterPrediction::processBDMVR(PredictionUnit& pu) #endif { if( !pu.cs->slice->getSPS()->getUseDMVDMode() || !pu.cs->slice->isInterB() ) { return false; } CHECK( !pu.mergeFlag, "Merge mode must be used here" ); CHECK( pu.refIdx[0] < 0 || pu.refIdx[1] < 0, "Bilateral DMVR is performed for bi-prediction" ); const int lumaArea = pu.lumaSize().area(); bool subPURefine = true; Mv puOrgMv[2] = { pu.mv[0], pu.mv[1] }; { Distortion minCost = std::numeric_limits<Distortion>::max(); bool bUseMR = lumaArea > 64; #if JVET_Y0089_DMVR_BCW bUseMR |= (pu.cu->BcwIdx != BCW_DEFAULT); #endif Mv mvFinal_PU[2] = { pu.mv[0], pu.mv[1] }; Mv mvInitial_PU[2] = { pu.mv[0], pu.mv[1] }; #if JVET_X0049_BDMVR_SW_OPT #if JVET_X0049_ADAPT_DMVR if (pu.bmDir == 1) { minCost = xBDMVRGetMatchingError(pu, mvInitial_PU, bUseMR, false); if (minCost >= lumaArea) { minCost = xBDMVRMvOneTemplateHPelSquareSearch<1>(mvFinal_PU, minCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, false); } } else if (pu.bmDir == 2) { minCost = xBDMVRGetMatchingError(pu, mvInitial_PU, bUseMR, false); if (minCost >= lumaArea) { minCost = xBDMVRMvOneTemplateHPelSquareSearch<2>(mvFinal_PU, minCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, false); } } else #endif { minCost = xBDMVRMvSquareSearch<false>( mvFinal_PU, minCost, pu, mvInitial_PU, BDMVR_INTME_SQUARE_SEARCH_MAX_NUM_ITERATIONS, MV_FRACTIONAL_BITS_INTERNAL, bUseMR, false ); if (minCost > 0) { minCost = xBDMVRMvSquareSearch<true>(mvFinal_PU, minCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, false); } } #else minCost = xBDMVRMvSquareSearch( mvFinal_PU, minCost, pu, mvInitial_PU, BDMVR_INTME_SQUARE_SEARCH_MAX_NUM_ITERATIONS, MV_FRACTIONAL_BITS_INTERNAL, bUseMR, false ); minCost = xBDMVRMvSquareSearch( mvFinal_PU, minCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, false ); #endif subPURefine = minCost >= lumaArea; pu.mv[REF_PIC_LIST_0] = mvFinal_PU[0]; pu.mv[REF_PIC_LIST_1] = mvFinal_PU[1]; } #if TM_MRG if (pu.tmMergeFlag #if JVET_AA0132_CONFIGURABLE_TM_TOOLS && pu.cs->sps->getUseTMMrgMode() #endif ) { #if JVET_AA0093_REFINED_MOTION_FOR_ARMC deriveTMMv(pu, tmCost); #else deriveTMMv(pu); #endif if (pu.interDir != 3) { return false; } } #endif #if JVET_AA0093_REFINED_MOTION_FOR_ARMC if (step == 1) { return true; } #endif if (!subPURefine) { // span motion to subPU const int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); const int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); Position puPos = pu.lumaPos(); int subPuIdx = 0; const int dmvrSubPuStrideIncr = DMVR_SUBPU_STRIDE - std::max(1, (int)(pu.lumaSize().width >> DMVR_SUBCU_WIDTH_LOG2)); for (int y = puPos.y, yStart = 0; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) { for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) { m_bdmvrSubPuMvBuf[REF_PIC_LIST_0][subPuIdx] = pu.mv[0]; m_bdmvrSubPuMvBuf[REF_PIC_LIST_1][subPuIdx] = pu.mv[1]; subPuIdx++; } subPuIdx += dmvrSubPuStrideIncr; } pu.mv[0] = puOrgMv[0]; pu.mv[1] = puOrgMv[1]; return true; } const int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); const int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); Position puPos = pu.lumaPos(); PredictionUnit subPu = pu; int subPuIdx = 0; const int dmvrSubPuStrideIncr = DMVR_SUBPU_STRIDE - std::max(1, (int)(pu.lumaSize().width >> DMVR_SUBCU_WIDTH_LOG2)); #if JVET_X0049_BDMVR_SW_OPT Distortion minCost = std::numeric_limits<Distortion>::max(); const Mv mvInitial[2] = { pu.mv[0], pu.mv[1] }; Mv mvFinal[2] = { pu.mv[0], pu.mv[1] }; Mv mvOffset; const Distortion earlyTerminateTh = dx * dy; const int adaptiveSearchRangeHor = (dx >> 1) < BDMVR_INTME_RANGE ? (dx >> 1) : BDMVR_INTME_RANGE; const int adaptiveSearchRangeVer = (dy >> 1) < BDMVR_INTME_RANGE ? (dy >> 1) : BDMVR_INTME_RANGE; const bool adaptRange = (adaptiveSearchRangeHor != BDMVR_INTME_RANGE || adaptiveSearchRangeVer != BDMVR_INTME_RANGE); #if JVET_X0049_ADAPT_DMVR const int maxSearchRound = pu.bmMergeFlag ? BM_MRG_SUB_PU_INT_MAX_SRCH_ROUND : BDMVR_INTME_FULL_SEARCH_MAX_NUM_ITERATIONS; #else const int maxSearchRound = BDMVR_INTME_FULL_SEARCH_MAX_NUM_ITERATIONS; #endif // prepare cDistParam for cost calculation DistParam cDistParam; cDistParam.applyWeight = false; cDistParam.useMR = false; #if JVET_Y0089_DMVR_BCW cDistParam.useMR |= (pu.cu->BcwIdx != BCW_DEFAULT); #endif Pel* pelBuffer[2] = { nullptr, nullptr }; pelBuffer[0] = m_filteredBlock[3][REF_PIC_LIST_0][0] + BDMVR_CENTER_POSITION; pelBuffer[1] = m_filteredBlock[3][REF_PIC_LIST_1][0] + BDMVR_CENTER_POSITION; PelUnitBuf predBuf[2] = { PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_0], BDMVR_BUF_STRIDE, dx, dy)), PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_1], BDMVR_BUF_STRIDE, dx, dy)) }; bool useHadamard = true; // STAD cost function m_pcRdCost->setDistParam(cDistParam, predBuf[0].Y(), predBuf[1].Y(), pu.cu->slice->clpRng(COMPONENT_Y).bd, COMPONENT_Y, useHadamard); // prepare buffer for pre-interpolaction const Picture& refPic0 = *pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[REF_PIC_LIST_0])->unscaledPic; const Picture& refPic1 = *pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[REF_PIC_LIST_1])->unscaledPic; int iWidthExt = dx + (BDMVR_INTME_RANGE << 1); int iHeightExt = dy + (BDMVR_INTME_RANGE << 1); int iWidthOffset = BDMVR_SIMD_IF_FACTOR - (iWidthExt & (BDMVR_SIMD_IF_FACTOR - 1)); iWidthOffset &= (BDMVR_SIMD_IF_FACTOR - 1); iWidthExt += iWidthOffset; // This ensures that iWidthExt is a factor-of-n number, assuming BDMVR_SIMD_IF_FACTOR is equal to n PelUnitBuf predBufExt[2] = { (PelUnitBuf(pu.chromaFormat, PelBuf(m_filteredBlock[3][REF_PIC_LIST_0][0], BDMVR_BUF_STRIDE, iWidthExt, iHeightExt))), (PelUnitBuf(pu.chromaFormat, PelBuf(m_filteredBlock[3][REF_PIC_LIST_1][0], BDMVR_BUF_STRIDE, iWidthExt, iHeightExt))) }; Mv mvTopLeft[2] = { mvInitial[0] - Mv((BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL), (BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL)), mvInitial[1] - Mv((BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL), (BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL)) }; #endif for (int y = puPos.y, yStart = 0; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) { for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) { #if JVET_X0049_BDMVR_SW_OPT subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); minCost = std::numeric_limits<Distortion>::max(); // Pre-interpolation xBDMVRFillBlkPredPelBuffer(subPu, refPic0, mvTopLeft[0], predBufExt[0], pu.cs->slice->clpRng(COMPONENT_Y)); xBDMVRFillBlkPredPelBuffer(subPu, refPic1, mvTopLeft[1], predBufExt[1], pu.cs->slice->clpRng(COMPONENT_Y)); if (adaptRange) { minCost = xBDMVRMvIntPelFullSearch<true, true>(mvOffset, minCost, mvInitial, maxSearchRound, adaptiveSearchRangeHor, adaptiveSearchRangeVer, #if JVET_X0056_DMVD_EARLY_TERMINATION true, #elif JVET_X0049_ADAPT_DMVR pu.bmMergeFlag, #else false, #endif earlyTerminateTh, cDistParam, pelBuffer, BDMVR_BUF_STRIDE); } else { minCost = xBDMVRMvIntPelFullSearch<false, true>(mvOffset, minCost, mvInitial, maxSearchRound, adaptiveSearchRangeHor, adaptiveSearchRangeVer, #if JVET_X0056_DMVD_EARLY_TERMINATION true, #elif JVET_X0049_ADAPT_DMVR pu.bmMergeFlag, #else false, #endif earlyTerminateTh, cDistParam, pelBuffer, BDMVR_BUF_STRIDE); } if (minCost >= earlyTerminateTh) { int bestOffsetIdx = (mvOffset.getVer() + BDMVR_INTME_RANGE) * BDMVR_INTME_STRIDE + (mvOffset.getHor() + BDMVR_INTME_RANGE); mvOffset <<= MV_FRACTIONAL_BITS_INTERNAL; mvFinal[0] = mvInitial[0] + mvOffset; mvFinal[1] = mvInitial[1] - mvOffset; minCost = m_sadEnlargeArrayBilMrg[bestOffsetIdx]; Distortion tmpCost = getDecoderSideDerivedMvCost(mvInitial[0], mvFinal[0], BDMVR_INTME_RANGE + 1, DECODER_SIDE_MV_WEIGHT); if (minCost >= tmpCost) { minCost += tmpCost; minCost = xBDMVRMvSquareSearch<true>(mvFinal, minCost/*std::numeric_limits<Distortion>::max()*/, subPu, mvInitial, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, false, true); } } else { mvOffset <<= MV_FRACTIONAL_BITS_INTERNAL; mvFinal[0] = mvInitial[0] + mvOffset; mvFinal[1] = mvInitial[1] - mvOffset; } #else subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); Distortion minCost = std::numeric_limits<Distortion>::max(); bool bUseMR = subPu.lumaSize().area() > 64; Mv mvInitial[2] = { pu.mv[0], pu.mv[1] }; Mv mvFinal[2] = { pu.mv[0], pu.mv[1] }; const int subPuBufOffset = 0; // will do interpolation inside search minCost = xBDMVRMvIntPelFullSearch( mvFinal, minCost, subPu, mvInitial, BDMVR_INTME_SQUARE_SEARCH_MAX_NUM_ITERATIONS, MV_FRACTIONAL_BITS_INTERNAL, bUseMR, subPuBufOffset ); minCost = (minCost < dx * dy) ? 0 : std::numeric_limits<Distortion>::max(); minCost = xBDMVRMvSquareSearch( mvFinal, minCost, subPu, mvInitial, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, false, true); #endif m_bdmvrSubPuMvBuf[REF_PIC_LIST_0][subPuIdx] = mvFinal[0]; m_bdmvrSubPuMvBuf[REF_PIC_LIST_1][subPuIdx] = mvFinal[1]; subPuIdx++; } subPuIdx += dmvrSubPuStrideIncr; } pu.mv[0] = puOrgMv[0]; pu.mv[1] = puOrgMv[1]; return true; } #if JVET_AB0112_AFFINE_DMVR void InterPrediction::xBDMVRFillBlkPredPelBufferAffine(const PredictionUnit& pu, const Picture& refPic, const Mv(&_mv)[3], PelUnitBuf& dstUnitBuf, const ClpRng& clpRng) { const ComponentID compID = COMPONENT_Y; const bool bi = true; JVET_J0090_SET_REF_PICTURE(refPic, compID); const ChromaFormat chFmt = pu.chromaFormat; Mv mvLT = _mv[0]; Mv mvRT = _mv[1]; Mv mvLB = _mv[2]; const int width = pu.Y().width; const int height = pu.Y().height; const int iBit = MAX_CU_DEPTH; int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY; iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(width)); iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(width)); if (pu.cu->affineType == AFFINEMODEL_6PARAM) { iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(height)); iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(height)); } else { iDMvVerX = -iDMvHorY; iDMvVerY = iDMvHorX; } const SPS &sps = *pu.cs->sps; const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; bool wrapRef = false; bool isLast = !bi; const int MAX_FILTER_SIZE = NTAPS_BILINEAR; const int dstExtW = ((width + 7) >> 3) << 3; const int refExtH = height + MAX_FILTER_SIZE - 1; PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], dstExtW, refExtH); PelBuf &dstBuf = dstUnitBuf.bufs[compID]; int blockWidth = AFFINE_MIN_BLOCK_SIZE; int blockHeight = AFFINE_MIN_BLOCK_SIZE; if (iDMvHorX == 0 && iDMvHorY == 0) { blockWidth = width; } else { int maxDmv = std::max(abs(iDMvHorX), abs(iDMvHorY)) * blockWidth; int TH = 1 << (iBit - 1); // Half pel while (maxDmv < TH && blockWidth < width) { blockWidth <<= 1; maxDmv <<= 1; } } if (iDMvVerX == 0 && iDMvVerY == 0) { blockHeight = height; } else { int maxDmv = std::max(abs(iDMvVerX), abs(iDMvVerY)) * blockHeight; int TH = 1 << (iBit - 1); // Half pel while (maxDmv < TH && blockHeight < height) { blockHeight <<= 1; maxDmv <<= 1; } } int iMvScaleHor = mvLT.getHor() << iBit; int iMvScaleVer = mvLT.getVer() << iBit; int iMvScaleTmpHor0 = iMvScaleHor + ((iDMvHorX * blockWidth + iDMvVerX * blockHeight) >> 1); int iMvScaleTmpVer0 = iMvScaleVer + ((iDMvHorY * blockWidth + iDMvVerY * blockHeight) >> 1); const bool biMCForDMVR = true; const int nFilterIdx = 1; const int vFilterSize = NTAPS_BILINEAR; for (int h = 0; h < height; h += blockHeight) { for (int w = 0; w < width; w += blockWidth) { int iMvScaleTmpHor, iMvScaleTmpVer; { iMvScaleTmpHor = iMvScaleTmpHor0 + iDMvHorX * w + iDMvVerX * h; iMvScaleTmpVer = iMvScaleTmpVer0 + iDMvHorY * w + iDMvVerY * h; } roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift); Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); tmpMv.clipToStorageBitDepth(); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); // clip and scale if (refPic.isWrapAroundEnabled(pu.cs->pps)) { Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); wrapRef = wrapClipMv(tmpMv, Position(pu.Y().x + w, pu.Y().y + h), Size(blockWidth, blockHeight), &sps, pu.cs->pps); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); } else { wrapRef = false; if (refPic.isRefScaled(pu.cs->pps) == false) { clipMv(tmpMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); } } // get the MV in high precision int xFrac, yFrac, xInt, yInt; xInt = iMvScaleTmpHor >> 4; xFrac = iMvScaleTmpHor & 15; yInt = iMvScaleTmpVer >> 4; yFrac = iMvScaleTmpVer & 15; const CPelBuf refBuf = refPic.getRecoBuf( CompArea(compID, chFmt, pu.blocks[compID].offset(xInt + w, yInt + h), pu.blocks[compID]), wrapRef); Pel *ref = (Pel *)refBuf.buf; Pel *dst = dstBuf.buf + w + h * dstBuf.stride; int refStride = refBuf.stride; int dstStride = dstBuf.stride; int bw = blockWidth; int bh = blockHeight; if (yFrac == 0) { m_if.filterHor(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); } else if (xFrac == 0) { m_if.filterVer(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); } else { m_if.filterHor(compID, (Pel*)ref - ((vFilterSize >> 1) - 1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh + vFilterSize - 1, xFrac, false, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); JVET_J0090_SET_CACHE_ENABLE(false); m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); JVET_J0090_SET_CACHE_ENABLE(true); } } } } void InterPrediction::xBDMVRFillBlkPredPelBufferAffineOPT(const PredictionUnit& pu, const Picture& refPic, const RefPicList eRefPicList, const Mv(&_mv)[3], const Mv mvCur, const Mv mvCenter, const bool doInterpolation, PelUnitBuf& dstUnitBuf, const ClpRng& clpRng , const bool profTh , const int blockWidth , const int blockHeight , const int memBlockWidthExt , const int memBlockHeight , const int memHeight , const int memStride ) { const ComponentID compID = COMPONENT_Y; const bool bi = true; JVET_J0090_SET_REF_PICTURE(refPic, compID); const ChromaFormat chFmt = pu.chromaFormat; Mv mvOffset = mvCur - mvCenter; CHECK(mvCenter.hor != 0 || mvCenter.ver != 0, ""); // get affine sub-block width and height const int width = pu.Y().width; const int height = pu.Y().height; bool wrapRef = false; bool isLast = !bi; const int MAX_FILTER_SIZE = NTAPS_BILINEAR; PelBuf &dstBuf = dstUnitBuf.bufs[compID]; PelBuf memBuf = PelBuf(m_affineDmvrBlockTmp[eRefPicList], memStride, memHeight); const int refExtH = memBlockHeight + MAX_FILTER_SIZE - 1; PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], memBlockWidthExt, refExtH); // get prediction block by block const bool biMCForDMVR = true; const int nFilterIdx = 1; const int vFilterSize = NTAPS_BILINEAR; for (int h = 0, memH = 0, hn = 0; h < height; h += blockHeight, memH += memBlockHeight, hn++) { for (int w = 0, memW = 0, wn = 0; w < width; w += blockWidth, memW += memBlockWidthExt, wn++) { int xOrgFrac, yOrgFrac, xOrgInt, yOrgInt; xOrgInt = m_affineSbMvIntX[eRefPicList][wn][hn]; yOrgInt = m_affineSbMvIntY[eRefPicList][wn][hn]; xOrgFrac = m_affineSbMvFracX[eRefPicList][wn][hn]; yOrgFrac = m_affineSbMvFracY[eRefPicList][wn][hn]; if (doInterpolation) { const CPelBuf refBuf = refPic.getRecoBuf(CompArea(compID, chFmt, pu.blocks[compID].offset(xOrgInt + w, yOrgInt + h), pu.blocks[compID]), wrapRef); Pel *ref = (Pel *)refBuf.buf; Pel *dst = memBuf.buf + memW + memH * memBuf.stride; int refStride = refBuf.stride; int dstStride = memBuf.stride; int bw = memBlockWidthExt; int bh = memBlockHeight; if (yOrgFrac == 0) { m_if.filterHor(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, xOrgFrac, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); } else if (xOrgFrac == 0) { m_if.filterVer(compID, (Pel*)ref, refStride, dst, dstStride, bw, bh, yOrgFrac, true, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); } else { m_if.filterHor(compID, (Pel*)ref - ((vFilterSize >> 1) - 1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh + vFilterSize - 1, xOrgFrac, false, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); JVET_J0090_SET_CACHE_ENABLE(false); m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yOrgFrac, false, isLast, chFmt, clpRng, nFilterIdx, biMCForDMVR, false); JVET_J0090_SET_CACHE_ENABLE(true); } } int xTmpInt, yTmpInt; xTmpInt = xOrgInt + AFFINE_DMVR_SEARCH_RANGE + (mvOffset.hor >> MV_FRACTIONAL_BITS_INTERNAL); yTmpInt = yOrgInt + AFFINE_DMVR_SEARCH_RANGE + (mvOffset.ver >> MV_FRACTIONAL_BITS_INTERNAL); Pel *dst = dstBuf.buf + w + h * dstBuf.stride; Pel *src = memBuf.buf + (memW + xTmpInt - xOrgInt) + (memH + yTmpInt - yOrgInt) * memBuf.stride; int dstStride = dstBuf.stride; int srcStride = memBuf.stride; for (unsigned y = 0; y < blockHeight; y++) { memcpy(dst, src, blockWidth * sizeof(Pel)); dst += dstStride; src += srcStride; } } } } void InterPrediction::xCalculteAffineParameters(const PredictionUnit& pu, const Picture& refPic, const Mv(&_mv)[3], int reflist, bool& profTH, int& blockWidth, int& blockHeight, int& memBlockWidthExt, int& memBlockHeight, int& memHeight, int& memStride) { Mv mvLT = _mv[0]; Mv mvRT = _mv[1]; Mv mvLB = _mv[2]; const int width = pu.Y().width; const int height = pu.Y().height; const int iBit = MAX_CU_DEPTH; const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY; iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(width)); iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(width)); if (pu.cu->affineType == AFFINEMODEL_6PARAM) { iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(height)); iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(height)); } else { iDMvVerX = -iDMvHorY; iDMvVerY = iDMvHorX; } blockWidth = AFFINE_MIN_BLOCK_SIZE; blockHeight = AFFINE_MIN_BLOCK_SIZE; if (iDMvHorX == 0 && iDMvHorY == 0) { blockWidth = width; } else { int maxDmv = std::max(abs(iDMvHorX), abs(iDMvHorY)) * blockWidth; int TH = 1 << (iBit - 1); // Half pel while (maxDmv < TH && blockWidth < width) { blockWidth <<= 1; maxDmv <<= 1; } } if (iDMvVerX == 0 && iDMvVerY == 0) { blockHeight = height; } else { int maxDmv = std::max(abs(iDMvVerX), abs(iDMvVerY)) * blockHeight; int TH = 1 << (iBit - 1); // Half pel while (maxDmv < TH && blockHeight < height) { blockHeight <<= 1; maxDmv <<= 1; } } memBlockHeight = blockHeight + (AFFINE_DMVR_SEARCH_RANGE << 1); int memBlockWidth = blockWidth + (AFFINE_DMVR_SEARCH_RANGE << 1); int memBlockWidthOffset = BDMVR_SIMD_IF_FACTOR - (memBlockWidth & (BDMVR_SIMD_IF_FACTOR - 1)); memBlockWidthOffset &= (BDMVR_SIMD_IF_FACTOR - 1); memBlockWidthExt = memBlockWidth + memBlockWidthOffset; // This ensures that iWidthExt is a factor-of-n number, assuming BDMVR_SIMD_IF_FACTOR is equal to n memStride = memBlockWidthExt * (width / blockWidth); memHeight = memBlockHeight * (height / blockHeight); int iMvScaleHor, iMvScaleVer, iMvScaleTmpHor0, iMvScaleTmpVer0; iMvScaleHor = mvLT.getHor() << iBit; iMvScaleVer = mvLT.getVer() << iBit; iMvScaleTmpHor0 = iMvScaleHor + ((iDMvHorX * blockWidth + iDMvVerX * blockHeight) >> 1); iMvScaleTmpVer0 = iMvScaleVer + ((iDMvHorY * blockWidth + iDMvVerY * blockHeight) >> 1); for (int h = 0, hn = 0; h < height; h += blockHeight, hn++) { for (int w = 0, wn = 0; w < width; w += blockWidth, wn++) { int iMvScaleOrgHor, iMvScaleOrgVer; iMvScaleOrgHor = iMvScaleTmpHor0 + iDMvHorX * w + iDMvVerX * h - (AFFINE_DMVR_SEARCH_RANGE << (MV_FRACTIONAL_BITS_INTERNAL + iBit)); iMvScaleOrgVer = iMvScaleTmpVer0 + iDMvHorY * w + iDMvVerY * h - (AFFINE_DMVR_SEARCH_RANGE << (MV_FRACTIONAL_BITS_INTERNAL + iBit)); roundAffineMv(iMvScaleOrgHor, iMvScaleOrgVer, shift); Mv tmpOrgMv(iMvScaleOrgHor, iMvScaleOrgVer); tmpOrgMv.clipToStorageBitDepth(); iMvScaleOrgHor = tmpOrgMv.getHor(); iMvScaleOrgVer = tmpOrgMv.getVer(); // clip and scale if (refPic.isWrapAroundEnabled(pu.cs->pps)) { Mv tmpOrgMv(iMvScaleOrgHor, iMvScaleOrgVer); iMvScaleOrgHor = tmpOrgMv.getHor(); iMvScaleOrgVer = tmpOrgMv.getVer(); } else { if (refPic.isRefScaled(pu.cs->pps) == false) { clipMv(tmpOrgMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); iMvScaleOrgHor = tmpOrgMv.getHor(); iMvScaleOrgVer = tmpOrgMv.getVer(); } } int xOrgFrac, yOrgFrac, xOrgInt, yOrgInt; xOrgInt = iMvScaleOrgHor >> 4; xOrgFrac = iMvScaleOrgHor & 15; yOrgInt = iMvScaleOrgVer >> 4; yOrgFrac = iMvScaleOrgVer & 15; m_affineSbMvIntX[reflist][wn][hn] = xOrgInt; m_affineSbMvIntY[reflist][wn][hn] = yOrgInt; m_affineSbMvFracX[reflist][wn][hn] = xOrgFrac; m_affineSbMvFracY[reflist][wn][hn] = yOrgFrac; } } } #endif void InterPrediction::xBDMVRFillBlkPredPelBuffer(const PredictionUnit& pu, const Picture& refPic, const Mv &_mv, PelUnitBuf &dstBuf, const ClpRng& clpRng) { const ComponentID compID = COMPONENT_Y; #if JVET_AA0146_WRAP_AROUND_FIX bool wrapRef = false; Mv mv(_mv); if (refPic.isWrapAroundEnabled( pu.cs->pps )) { wrapRef = wrapClipMv(mv, pu.lumaPos(), pu.lumaSize(), pu.cs->sps, pu.cs->pps); } else { clipMv(mv, pu.lumaPos(), pu.lumaSize(), *pu.cu->cs->sps, *pu.cu->cs->pps); } const CPelBuf refBuf = refPic.getRecoBuf(refPic.blocks[compID], wrapRef); #else const CPelBuf refBuf = refPic.getRecoBuf(refPic.blocks[compID]); #endif const int lumaShift = 2 + MV_FRACTIONAL_BITS_DIFF; const int horShift = (lumaShift + ::getComponentScaleX(compID, pu.chromaFormat)); const int verShift = (lumaShift + ::getComponentScaleY(compID, pu.chromaFormat)); #if !JVET_AA0146_WRAP_AROUND_FIX Mv mv(_mv); clipMv(mv, pu.lumaPos(), pu.lumaSize(), *pu.cu->cs->sps, *pu.cu->cs->pps); #endif const int xInt = mv.getHor() >> horShift; const int yInt = mv.getVer() >> verShift; const int xFrac = mv.getHor() & ((1 << horShift) - 1); const int yFrac = mv.getVer() & ((1 << verShift) - 1); const Pel* ref = refBuf.bufAt(pu.blocks[compID].pos().offset(xInt, yInt)); Pel* dst = dstBuf.bufs[compID].buf; int refStride = refBuf.stride; int dstStride = dstBuf.bufs[compID].stride; int bw = (int)dstBuf.bufs[compID].width; int bh = (int)dstBuf.bufs[compID].height; const int nFilterIdx = 0; const bool useAltHpelIf = pu.cu->imv == IMV_HPEL; const bool biMCForDMVR = true; if ( yFrac == 0 ) { m_if.filterHor( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, xFrac, false/*rndRes=!bi*/, pu.chromaFormat, pu.cu->slice->clpRng(compID), biMCForDMVR, biMCForDMVR, useAltHpelIf ); } else if ( xFrac == 0 ) { m_if.filterVer( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, yFrac, true, false/*rndRes=!bi*/, pu.chromaFormat, pu.cu->slice->clpRng(compID), biMCForDMVR, biMCForDMVR, useAltHpelIf ); } else { #if IF_12TAP int vFilterSize = isLuma(compID) ? (nFilterIdx == 1 ? NTAPS_BILINEAR : NTAPS_LUMA(0)) : NTAPS_CHROMA; #else int vFilterSize = isLuma(compID) ? (nFilterIdx == 1 ? NTAPS_BILINEAR : NTAPS_LUMA) : NTAPS_CHROMA; #endif if (biMCForDMVR) { vFilterSize = NTAPS_BILINEAR; } PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], Size(bw + 2 * BDMVR_INTME_RANGE, bh + 2 * BDMVR_INTME_RANGE)); m_if.filterHor( compID, (Pel*)ref - ((vFilterSize>>1) -1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh+vFilterSize-1, xFrac, false, pu.chromaFormat, pu.cu->slice->clpRng(compID), biMCForDMVR, biMCForDMVR, useAltHpelIf ); JVET_J0090_SET_CACHE_ENABLE( false ); m_if.filterVer( compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, false/*rndRes=!bi*/, pu.chromaFormat, pu.cu->slice->clpRng(compID), biMCForDMVR, biMCForDMVR, useAltHpelIf ); JVET_J0090_SET_CACHE_ENABLE( true ); } } #if JVET_X0049_ADAPT_DMVR template <uint8_t dir> #endif void InterPrediction::xBDMVRPreInterpolation(const PredictionUnit& pu, const Mv (&mvCenter)[2], bool doPreInterpolationFP, bool doPreInterpolationHP) { if (doPreInterpolationFP) { for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { #if JVET_X0049_ADAPT_DMVR if (!(dir & (1 << refList))) { continue; } #endif const Picture& refPic = *pu.cu->slice->getRefPic((RefPicList)refList, pu.refIdx[refList])->unscaledPic; int dstStride = MAX_CU_SIZE + ( BDMVR_INTME_RANGE << 1 ) + ( BDMVR_SIMD_IF_FACTOR - 2 ); int iWidthExt = (int)pu.lwidth () + ( BDMVR_INTME_RANGE << 1 ); int iHeightExt = (int)pu.lheight() + ( BDMVR_INTME_RANGE << 1 ); int iWidthOffset = BDMVR_SIMD_IF_FACTOR - ( iWidthExt & ( BDMVR_SIMD_IF_FACTOR - 1 ) ); iWidthOffset &= ( BDMVR_SIMD_IF_FACTOR - 1 ); iWidthExt += iWidthOffset; // This ensures that iWidthExt is a factor-of-n number, assuming BDMVR_SIMD_IF_FACTOR is equal to n Mv mv = mvCenter[refList] - Mv((BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL), (BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL)); PelUnitBuf predBuf = ( PelUnitBuf(pu.chromaFormat, PelBuf(m_filteredBlock[3][refList][0], dstStride, iWidthExt, iHeightExt ) ) ); xBDMVRFillBlkPredPelBuffer(pu, refPic, mv, predBuf, pu.cs->slice->clpRng(COMPONENT_Y)); } } if (doPreInterpolationHP) { const int offset = 0 - ( 1 << ( MV_FRACTIONAL_BITS_INTERNAL - 1 ) ); const Mv cPhaseOffset[3] = { Mv( offset , 0 ), Mv( offset, offset ), Mv( 0 , offset ) }; for (int refList = 0; refList < NUM_REF_PIC_LIST_01 ; refList++) { #if JVET_X0049_ADAPT_DMVR if (!(dir & (1 << refList))) { continue; } #endif const Picture& refPic = *pu.cu->slice->getRefPic((RefPicList)refList, pu.refIdx[refList])->unscaledPic; for (int phaseIdx = 0 ; phaseIdx < 3 ; phaseIdx++) { int iRefStride = MAX_CU_SIZE + ( BDMVR_INTME_RANGE << 1 ) + ( BDMVR_SIMD_IF_FACTOR - 2 ); int iWidthExt = (int)pu.lwidth () + 1 - ( phaseIdx >> 1); int iHeightExt = (int)pu.lheight() + 1 - ((2-phaseIdx) >> 1); int iWidthOffset = BDMVR_SIMD_IF_FACTOR - ( iWidthExt & ( BDMVR_SIMD_IF_FACTOR - 1 ) ); iWidthOffset &= ( BDMVR_SIMD_IF_FACTOR - 1 ); iWidthExt += iWidthOffset; // This ensures that iWidthExt is a factor-of-n number, assuming BDMVR_SIMD_IF_FACTOR is equal to n Mv mv = mvCenter[refList] + cPhaseOffset[phaseIdx]; PelUnitBuf predBuf = PelUnitBuf( pu.chromaFormat, PelBuf( m_filteredBlock[phaseIdx][refList][0], iRefStride, iWidthExt, iHeightExt ) ); xBDMVRFillBlkPredPelBuffer( pu, refPic, mv, predBuf, pu.cs->slice->clpRng(COMPONENT_Y) ); } } } } #if JVET_X0049_BDMVR_SW_OPT template <bool adaptRange, bool useHadamard> Distortion InterPrediction::xBDMVRMvIntPelFullSearch(Mv&mvOffset, Distortion curBestCost, const Mv(&initialMv)[2], const int32_t maxSearchRounds, const int maxHorOffset, const int maxVerOffset, const bool earlySkip, const Distortion earlyTerminateTh, DistParam &cDistParam, Pel* pelBuffer[2], const int stride) { // check initial cost mvOffset.setZero(); cDistParam.org.buf = pelBuffer[0]; cDistParam.cur.buf = pelBuffer[1]; #if FULL_NBIT if (useHadamard) { curBestCost = cDistParam.distFunc(cDistParam) >> 1; // magic shift, benefit for early terminate } else { int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0; curBestCost = cDistParam.distFunc(cDistParam) >> precisionAdj; } #else curBestCost = cDistParam.distFunc(cDistParam); #endif m_sadEnlargeArrayBilMrg[BDMVR_INTME_CENTER] = curBestCost; curBestCost = curBestCost - (curBestCost >> 2); // cost tuning if (curBestCost < earlyTerminateTh) { return curBestCost; } Distortion tmCost = MAX_UINT64; Distortion prevMinCost = MAX_UINT64; for (int searchPrio = 1; searchPrio < maxSearchRounds; searchPrio++) { prevMinCost = curBestCost; for (int currIdx = 0; currIdx < m_searchEnlargeOffsetNum[searchPrio]; currIdx++) { tmCost = 0; int horOffset = m_searchEnlargeOffsetBilMrg[searchPrio][currIdx].getHor(); int verOffset = m_searchEnlargeOffsetBilMrg[searchPrio][currIdx].getVer(); int searchOffsetIdx = m_searchEnlargeOffsetToIdx[searchPrio][currIdx]; if (adaptRange) { if (abs(horOffset) > maxHorOffset || abs(verOffset) > maxVerOffset) { continue; } } int bufOffset = verOffset * stride + horOffset; cDistParam.org.buf = pelBuffer[0] + bufOffset; cDistParam.cur.buf = pelBuffer[1] - bufOffset; #if FULL_NBIT if (useHadamard) { m_sadEnlargeArrayBilMrg[searchOffsetIdx] = cDistParam.distFunc(cDistParam) >> 1; // magic shift, benefit for early terminate } else { int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0; m_sadEnlargeArrayBilMrg[searchOffsetIdx] = cDistParam.distFunc(cDistParam) >> precisionAdj; } #else m_sadEnlargeArrayBilMrg[searchOffsetIdx] = cDistParam.distFunc(cDistParam); #endif tmCost += m_sadEnlargeArrayBilMrg[searchOffsetIdx]; tmCost += (m_sadEnlargeArrayBilMrg[searchOffsetIdx] >> m_costShiftBilMrg1[searchOffsetIdx]); tmCost += (m_sadEnlargeArrayBilMrg[searchOffsetIdx] >> m_costShiftBilMrg2[searchOffsetIdx]); if (tmCost < curBestCost) { mvOffset = Mv(horOffset, verOffset); curBestCost = tmCost; } } if (curBestCost < earlyTerminateTh) { break; } if (earlySkip && searchPrio > 1 && prevMinCost - curBestCost < earlyTerminateTh) { break; } } return curBestCost; } #else Distortion InterPrediction::xBDMVRMvIntPelFullSearch(Mv(&curBestMv)[2], Distortion curBestCost, PredictionUnit& pu, const Mv(&initialMv)[2], int32_t maxSearchRounds, int32_t searchStepShift, bool useMR, const int subPuBufOffset) { bool doPreInterpolation = true; bool useHadamard = true; // STAD cost function useMR = false; // STAD cost function const int adaptiveSearchRangeHor = (pu.lwidth() >> 1) < BDMVR_INTME_RANGE ? (pu.lwidth() >> 1) : BDMVR_INTME_RANGE; const int adaptiveSearchRangeVer = (pu.lheight() >> 1) < BDMVR_INTME_RANGE ? (pu.lheight() >> 1) : BDMVR_INTME_RANGE; // Calculate TM cost of initial MVs, if it is not set if (curBestCost == std::numeric_limits<Distortion>::max()) { curBestCost = xBDMVRGetMatchingError( pu, curBestMv, subPuBufOffset, useHadamard, useMR, doPreInterpolation, searchStepShift, curBestMv, initialMv, -1 ); } for (int i = 0; i < BDMVR_INTME_AREA; i++) { m_sadEnlargeArrayBilMrg[i] = MAX_UINT64; } m_sadEnlargeArrayBilMrg[BDMVR_INTME_CENTER] = curBestCost; curBestCost = curBestCost - (curBestCost >> 2); // cost tuning const Distortion earlyTerminateTh = pu.lumaSize().area(); Distortion tmCost = MAX_UINT64; #if JVET_X0056_DMVD_EARLY_TERMINATION Distortion prevMinCost = MAX_UINT64; #endif for( int searchPrio = 0 ; searchPrio < BDMVR_INTME_FULL_SEARCH_MAX_NUM_ITERATIONS; searchPrio++ ) { if( curBestCost < earlyTerminateTh ) { break; } #if JVET_X0056_DMVD_EARLY_TERMINATION prevMinCost = curBestCost; #endif for (int searchOffsetIdx = 0; searchOffsetIdx < BDMVR_INTME_AREA; searchOffsetIdx++) { tmCost = 0; if( m_searchPriorityBilMrg[searchOffsetIdx] != searchPrio ) { continue; } // adaptive search area base on block dimension if( m_searchEnlargeOffsetBilMrg[searchOffsetIdx].getAbsVer() > adaptiveSearchRangeVer ) { continue; } if( m_searchEnlargeOffsetBilMrg[searchOffsetIdx].getAbsHor() > adaptiveSearchRangeHor ) { continue; } Mv mvOffset(m_searchEnlargeOffsetBilMrg[searchOffsetIdx].getHor() << searchStepShift, m_searchEnlargeOffsetBilMrg[searchOffsetIdx].getVer() << searchStepShift); Mv mvCand[2] = {initialMv[0] + mvOffset, initialMv[1] - mvOffset}; if ( m_sadEnlargeArrayBilMrg[searchOffsetIdx] == MAX_UINT64 ) { m_sadEnlargeArrayBilMrg[searchOffsetIdx] = xBDMVRGetMatchingError( pu, mvCand, subPuBufOffset, useHadamard, useMR, doPreInterpolation, searchStepShift, initialMv, initialMv, -1 ); } tmCost += m_sadEnlargeArrayBilMrg[searchOffsetIdx]; tmCost += (m_sadEnlargeArrayBilMrg[searchOffsetIdx] >> m_costShiftBilMrg1[searchOffsetIdx]); tmCost += (m_sadEnlargeArrayBilMrg[searchOffsetIdx] >> m_costShiftBilMrg2[searchOffsetIdx]); if( tmCost < curBestCost ) { curBestCost = tmCost; curBestMv[0] = mvCand[0]; curBestMv[1] = mvCand[1]; } } #if JVET_X0056_DMVD_EARLY_TERMINATION if (searchPrio > 1 && prevMinCost - curBestCost < earlyTerminateTh) { break; } #endif } return curBestCost; } #endif #if JVET_X0049_BDMVR_SW_OPT template<bool hPel> #endif Distortion InterPrediction::xBDMVRMvSquareSearch(Mv (&curBestMv)[2], Distortion curBestCost, PredictionUnit& pu, const Mv (&initialMv)[2], int32_t maxSearchRounds, int32_t searchStepShift, bool useMR, bool useHadmard) { #if !JVET_X0049_BDMVR_SW_OPT if (curBestCost == 0) { return 0; } #endif static const Mv cSearchOffset[8] = { Mv( -1 , 1 ) , Mv( 0 , 1 ) , Mv( 1 , 1 ) , Mv( 1 , 0 ) , Mv( 1 , -1 ) , Mv( 0 , -1 ) , Mv( -1 , -1 ) , Mv( -1 , 0 ) }; int nDirectStart = 0; int nDirectEnd = 7; const int nDirectRounding = 8; const int nDirectMask = 0x07; bool doPreInterpolation = searchStepShift == MV_FRACTIONAL_BITS_INTERNAL; // Calculate TM cost of initial MVs, if it is not set if (curBestCost == std::numeric_limits<Distortion>::max()) { CHECK(searchStepShift < MV_FRACTIONAL_BITS_INTERNAL - 1, "this is not possible"); #if JVET_X0049_BDMVR_SW_OPT if (hPel) { doPreInterpolation = true; Distortion tmCost = getDecoderSideDerivedMvCost(initialMv[0], curBestMv[0], BDMVR_INTME_RANGE + (MV_FRACTIONAL_BITS_INTERNAL - searchStepShift), DECODER_SIDE_MV_WEIGHT); curBestCost = xBDMVRGetMatchingError(pu, curBestMv, useMR, useHadmard); #else if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL - 1) { doPreInterpolation = true; Distortion tmCost = getDecoderSideDerivedMvCost(initialMv[0], curBestMv[0], BDMVR_INTME_RANGE + (MV_FRACTIONAL_BITS_INTERNAL - searchStepShift), DECODER_SIDE_MV_WEIGHT); curBestCost = xBDMVRGetMatchingError( pu, curBestMv, 0/*subPuOffset*/, useHadmard, useMR, doPreInterpolation, MV_FRACTIONAL_BITS_INTERNAL, curBestMv, curBestMv, -1 ); #endif if( curBestCost < tmCost ) { return curBestCost; } curBestCost += tmCost; } else { #if JVET_X0049_ADAPT_DMVR curBestCost = xBDMVRGetMatchingError<3>(pu, curBestMv, 0/*subPuOffset*/, useHadmard, useMR, doPreInterpolation, searchStepShift, curBestMv, initialMv, -1); #else curBestCost = xBDMVRGetMatchingError( pu, curBestMv, 0/*subPuOffset*/, useHadmard, useMR, doPreInterpolation, searchStepShift, curBestMv, initialMv, -1 ); #endif } } Distortion localCostArray[9] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), curBestCost }; // Iterative search process for( uint32_t uiRound = 0 ; uiRound < maxSearchRounds ; uiRound++ ) { int nBestDirect = -1; Mv mvCurCenter[2] = {curBestMv[0], curBestMv[1]}; doPreInterpolation |= (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL - 1); for( int nIdx = nDirectStart ; nIdx <= nDirectEnd ; nIdx++ ) { int nDirect = ( nIdx + nDirectRounding ) & nDirectMask; Mv mvOffset(cSearchOffset[nDirect].getHor() << searchStepShift, cSearchOffset[nDirect].getVer() << searchStepShift); #if JVET_X0049_BDMVR_SW_OPT if(hPel && uiRound > 0) #else if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL - 1 && uiRound > 0) #endif { if( ( nDirect % 2 ) == 0 ) { continue; } } Mv mvCand[2] = {mvCurCenter[0] + mvOffset, mvCurCenter[1] - mvOffset}; #if JVET_X0049_BDMVR_SW_OPT if(!hPel) #else if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL) #endif { int currentIdx = BDMVR_INTME_CENTER + ((mvCand[0] -initialMv[0]).hor >> searchStepShift) + ((mvCand[0] -initialMv[0]).ver >> searchStepShift) * BDMVR_INTME_STRIDE; if( currentIdx < 0 || currentIdx >= BDMVR_INTME_AREA ) { continue; } } Distortion tmCost = getDecoderSideDerivedMvCost(initialMv[0], mvCand[0], BDMVR_INTME_RANGE + (MV_FRACTIONAL_BITS_INTERNAL - searchStepShift), DECODER_SIDE_MV_WEIGHT); if (tmCost > curBestCost) { localCostArray[nDirect] = 2 * tmCost; continue; } #if JVET_X0049_ADAPT_DMVR tmCost += xBDMVRGetMatchingError<3>(pu, mvCand, 0/*subPuOffset*/, useHadmard, useMR, doPreInterpolation, searchStepShift, mvCurCenter, initialMv, nDirect); #else tmCost += xBDMVRGetMatchingError( pu, mvCand, 0/*subPuOffset*/, useHadmard, useMR, doPreInterpolation, searchStepShift, mvCurCenter, initialMv, nDirect ); #endif localCostArray[nDirect] = tmCost; #if JVET_X0049_BDMVR_SW_OPT if(hPel && uiRound > 0) #else if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL - 1 && uiRound > 0) #endif { continue; } if( tmCost < curBestCost ) { nBestDirect = nDirect; curBestCost = tmCost; curBestMv[0] = mvCand[0]; curBestMv[1] = mvCand[1]; } } if( nBestDirect == -1 ) { break; } int nStep = 2 - ( nBestDirect & 0x01 ); nDirectStart = nBestDirect - nStep; nDirectEnd = nBestDirect + nStep; if ((uiRound + 1) < maxSearchRounds) { xBDMVRUpdateSquareSearchCostLog(localCostArray, nBestDirect); } } #if JVET_X0049_BDMVR_SW_OPT if(!hPel) #else if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL) #endif { return curBestCost; } // Model-based fractional MVD optimization Mv mvDiff = curBestMv[0] - initialMv[0]; if (localCostArray[8] > 0 && localCostArray[8] == curBestCost && mvDiff.getAbsHor() != (BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL) && mvDiff.getAbsVer() != (BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL)) { uint64_t sadbuffer[5]; sadbuffer[0] = (uint64_t)localCostArray[8]; // center sadbuffer[1] = (uint64_t)localCostArray[7]; // left sadbuffer[2] = (uint64_t)localCostArray[5]; // above sadbuffer[3] = (uint64_t)localCostArray[3]; // right sadbuffer[4] = (uint64_t)localCostArray[1]; // bottom int32_t tempDeltaMv[2] = {0, 0}; xSubPelErrorSrfc(sadbuffer, tempDeltaMv); curBestMv[0] += Mv(tempDeltaMv[0], tempDeltaMv[1]); curBestMv[1] -= Mv(tempDeltaMv[0], tempDeltaMv[1]); } return curBestCost; } #if JVET_AB0112_AFFINE_DMVR template<bool hPel> Distortion InterPrediction::xBDMVRMvSquareSearchAffine(Mv(&curBestMv)[2], Distortion curBestCost, PredictionUnit& pu, const Mv(&initialMv)[2], int32_t maxSearchRounds, int32_t searchStepShift, bool useMR, bool useHadmard) { #if !JVET_X0049_BDMVR_SW_OPT if (curBestCost == 0) { return 0; } #endif static const Mv cSearchOffset[8] = { Mv(-1 , 1) , Mv(0 , 1) , Mv(1 , 1) , Mv(1 , 0) , Mv(1 , -1) , Mv(0 , -1) , Mv(-1 , -1) , Mv(-1 , 0) }; int nDirectStart = 0; int nDirectEnd = 7; const int nDirectRounding = 8; const int nDirectMask = 0x07; bool doPreInterpolation = searchStepShift == MV_FRACTIONAL_BITS_INTERNAL; bool profTh[2] = { true,true }; int blockWidth[2] = { 0,0 }, blockHeight[2] = { 0,0 }, memBlockWidthExt[2] = { 0,0 }, memBlockHeight[2] = { 0,0 }, memHeight[2] = { 0,0 }, memStride[2] = { 0,0 }; // Calculate TM cost of initial MVs, if it is not set if (curBestCost == std::numeric_limits<Distortion>::max()) { CHECK(searchStepShift < MV_FRACTIONAL_BITS_INTERNAL - 1, "this is not possible"); Mv cpMV[2][3] = { { pu.mvAffi[0][0] , pu.mvAffi[0][1] , pu.mvAffi[0][2] },{ pu.mvAffi[1][0] , pu.mvAffi[1][1] , pu.mvAffi[1][2] } }; if (hPel) { Distortion tmCost = getDecoderSideDerivedMvCost(initialMv[0], curBestMv[0], AFFINE_DMVR_SEARCH_RANGE + (MV_FRACTIONAL_BITS_INTERNAL - searchStepShift), DECODER_SIDE_MV_WEIGHT); for (int i = 0; i < 2; i++) { for (int j = 0; j < 3; j++) { cpMV[i][j] += curBestMv[i]; } } curBestCost = xBDMVRGetMatchingErrorAffine(pu, cpMV, curBestMv,initialMv,doPreInterpolation,hPel,useMR, useHadmard,profTh, blockWidth, blockHeight, memBlockWidthExt, memBlockHeight, memHeight, memStride); if (curBestCost < tmCost) { return curBestCost; } curBestCost += tmCost; } else { CHECK(doPreInterpolation != true, ""); const Picture& refPic0 = *pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->unscaledPic; xCalculteAffineParameters(pu, refPic0, cpMV[0],0,profTh[0], blockWidth[0], blockHeight[0], memBlockWidthExt[0], memBlockHeight[0], memHeight[0], memStride[0]); const Picture& refPic1 = *pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[1])->unscaledPic; xCalculteAffineParameters(pu, refPic1,cpMV[1],1,profTh[1], blockWidth[1], blockHeight[1], memBlockWidthExt[1], memBlockHeight[1], memHeight[1], memStride[1] ); curBestCost = xBDMVRGetMatchingErrorAffine(pu, cpMV,curBestMv,initialMv,doPreInterpolation,hPel,useMR, useHadmard, profTh, blockWidth, blockHeight, memBlockWidthExt, memBlockHeight, memHeight, memStride); } } Distortion localCostArray[9] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), curBestCost }; // Iterative search process for (uint32_t uiRound = 0; uiRound < maxSearchRounds; uiRound++) { int nBestDirect = -1; Mv mvCurCenter[2] = { curBestMv[0], curBestMv[1] }; for (int nIdx = nDirectStart; nIdx <= nDirectEnd; nIdx++) { int nDirect = (nIdx + nDirectRounding) & nDirectMask; Mv mvOffset(cSearchOffset[nDirect].getHor() << searchStepShift, cSearchOffset[nDirect].getVer() << searchStepShift); #if JVET_X0049_BDMVR_SW_OPT if (hPel && uiRound > 0) #else if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL - 1 && uiRound > 0) #endif { if ((nDirect % 2) == 0) { continue; } } Mv mvCand[2] = { mvCurCenter[0] + mvOffset, mvCurCenter[1] - mvOffset }; Mv cpMV[2][3] = { { pu.mvAffi[0][0] , pu.mvAffi[0][1], pu.mvAffi[0][2] },{ pu.mvAffi[1][0], pu.mvAffi[1][1], pu.mvAffi[1][2]} }; #if JVET_X0049_BDMVR_SW_OPT if (!hPel) #else if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL) #endif { int currentIdx = (AFFINE_DMVR_SEARCH_RANGE*2+1)*AFFINE_DMVR_SEARCH_RANGE+ AFFINE_DMVR_SEARCH_RANGE + ((mvCand[0] - initialMv[0]).hor >> searchStepShift) + ((mvCand[0] - initialMv[0]).ver >> searchStepShift) * (2* AFFINE_DMVR_SEARCH_RANGE+1); if (currentIdx < 0 || currentIdx >= (2*AFFINE_DMVR_SEARCH_RANGE+1)*(2 * AFFINE_DMVR_SEARCH_RANGE + 1)) { continue; } } Distortion tmCost = getDecoderSideDerivedMvCost(initialMv[0], mvCand[0], AFFINE_DMVR_SEARCH_RANGE + (MV_FRACTIONAL_BITS_INTERNAL - searchStepShift), DECODER_SIDE_MV_WEIGHT); if (tmCost > curBestCost) { localCostArray[nDirect] = 2 * tmCost; continue; } if (hPel) { for (int i = 0; i < 2; i++) { for (int j = 0; j < 3; j++) { cpMV[i][j] += mvCand[i]; } } } tmCost += xBDMVRGetMatchingErrorAffine(pu, cpMV, mvCand,initialMv,doPreInterpolation,hPel,useMR, useHadmard, profTh , blockWidth, blockHeight, memBlockWidthExt, memBlockHeight, memHeight, memStride); localCostArray[nDirect] = tmCost; if (hPel && uiRound > 0) { continue; } if (tmCost < curBestCost) { nBestDirect = nDirect; curBestCost = tmCost; curBestMv[0] = mvCand[0]; curBestMv[1] = mvCand[1]; } } if (nBestDirect == -1) { break; } int nStep = 2 - (nBestDirect & 0x01); nDirectStart = nBestDirect - nStep; nDirectEnd = nBestDirect + nStep; if ((uiRound + 1) < maxSearchRounds) { xBDMVRUpdateSquareSearchCostLog(localCostArray, nBestDirect); } } if (!hPel) { return curBestCost; } // Model-based fractional MVD optimization Mv mvDiff = curBestMv[0] - initialMv[0]; if (localCostArray[8] > 0 && localCostArray[8] == curBestCost && mvDiff.getAbsHor() != (AFFINE_DMVR_SEARCH_RANGE << MV_FRACTIONAL_BITS_INTERNAL) && mvDiff.getAbsVer() != (AFFINE_DMVR_SEARCH_RANGE << MV_FRACTIONAL_BITS_INTERNAL)) { uint64_t sadbuffer[5]; sadbuffer[0] = (uint64_t)localCostArray[8]; // center sadbuffer[1] = (uint64_t)localCostArray[7]; // left sadbuffer[2] = (uint64_t)localCostArray[5]; // above sadbuffer[3] = (uint64_t)localCostArray[3]; // right sadbuffer[4] = (uint64_t)localCostArray[1]; // bottom int32_t tempDeltaMv[2] = { 0, 0 }; xSubPelErrorSrfc(sadbuffer, tempDeltaMv); curBestMv[0] += Mv(tempDeltaMv[0], tempDeltaMv[1]); curBestMv[1] -= Mv(tempDeltaMv[0], tempDeltaMv[1]); } return curBestCost; } #endif #if JVET_X0049_ADAPT_DMVR template <uint8_t dir> Distortion InterPrediction::xBDMVRMvOneTemplateHPelSquareSearch(Mv(&curBestMv)[2], Distortion curBestCost, PredictionUnit& pu, const Mv(&initialMv)[2], int32_t maxSearchRounds, int32_t searchStepShift, bool useMR, bool useHadmard) { if (curBestCost == 0) { return 0; } static const Mv cSearchOffset[8] = { Mv(-1 , 1) , Mv(0 , 1) , Mv(1 , 1) , Mv(1 , 0) , Mv(1 , -1) , Mv(0 , -1) , Mv(-1 , -1) , Mv(-1 , 0) }; int nDirectStart = 0; int nDirectEnd = 7; const int nDirectRounding = 8; const int nDirectMask = 0x07; bool doPreInterpolation = searchStepShift == MV_FRACTIONAL_BITS_INTERNAL; const int curRefList = (dir >> 1); const int templateRefList = 1 - curRefList; // Calculate TM cost of initial MVs, if it is not set if (curBestCost == std::numeric_limits<Distortion>::max()) { CHECK(searchStepShift < MV_FRACTIONAL_BITS_INTERNAL - 1, "this is not possible"); Distortion tmCost = getDecoderSideDerivedMvCost(initialMv[curRefList], curBestMv[curRefList], BDMVR_INTME_RANGE + (MV_FRACTIONAL_BITS_INTERNAL - searchStepShift), DECODER_SIDE_MV_WEIGHT); curBestCost = xBDMVRGetMatchingError(pu, curBestMv, useMR, useHadmard); if (curBestCost < tmCost) { return curBestCost; } curBestCost += tmCost; } Distortion localCostArray[9] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max(), curBestCost }; // Iterative search process for (uint32_t uiRound = 0; uiRound < maxSearchRounds; uiRound++) { int nBestDirect = -1; Mv mvCurCenter[2] = { curBestMv[0], curBestMv[1] }; doPreInterpolation |= (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL - 1); for (int nIdx = nDirectStart; nIdx <= nDirectEnd; nIdx++) { int nDirect = (nIdx + nDirectRounding) & nDirectMask; Mv mvOffset(cSearchOffset[nDirect].getHor() << searchStepShift, cSearchOffset[nDirect].getVer() << searchStepShift); if (uiRound > 0) { if ((nDirect % 2) == 0) { continue; } } Mv mvCand[2] = { mvCurCenter[0] + mvOffset, mvCurCenter[1] - mvOffset }; mvCand[templateRefList] = initialMv[templateRefList]; Distortion tmCost = getDecoderSideDerivedMvCost(initialMv[curRefList], mvCand[curRefList], BDMVR_INTME_RANGE + (MV_FRACTIONAL_BITS_INTERNAL - searchStepShift), DECODER_SIDE_MV_WEIGHT); if (tmCost > curBestCost) { localCostArray[nDirect] = 2 * tmCost; continue; } tmCost += xBDMVRGetMatchingError<dir>(pu, mvCand, 0/*subPuOffset*/, useHadmard, useMR, doPreInterpolation, searchStepShift, mvCurCenter, initialMv, nDirect); localCostArray[nDirect] = tmCost; if (uiRound > 0) { continue; } if (tmCost < curBestCost) { nBestDirect = nDirect; curBestCost = tmCost; curBestMv[0] = mvCand[0]; curBestMv[1] = mvCand[1]; } } if (nBestDirect == -1) { break; } int nStep = 2 - (nBestDirect & 0x01); nDirectStart = nBestDirect - nStep; nDirectEnd = nBestDirect + nStep; if ((uiRound + 1) < maxSearchRounds) { xBDMVRUpdateSquareSearchCostLog(localCostArray, nBestDirect); } } CHECK(curBestMv[templateRefList] != initialMv[templateRefList], "this is not possible"); // Model-based fractional MVD optimization Mv mvDiff = curBestMv[curRefList] - initialMv[curRefList]; if (localCostArray[8] > 0 && localCostArray[8] == curBestCost && mvDiff.getAbsHor() != (BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL) && mvDiff.getAbsVer() != (BDMVR_INTME_RANGE << MV_FRACTIONAL_BITS_INTERNAL)) { uint64_t sadbuffer[5]; sadbuffer[0] = (uint64_t)localCostArray[8]; // center sadbuffer[1] = (uint64_t)localCostArray[7]; // left sadbuffer[2] = (uint64_t)localCostArray[5]; // above sadbuffer[3] = (uint64_t)localCostArray[3]; // right sadbuffer[4] = (uint64_t)localCostArray[1]; // bottom int32_t tempDeltaMv[2] = { 0, 0 }; xSubPelErrorSrfc(sadbuffer, tempDeltaMv); if (dir == 1) curBestMv[0] += Mv(tempDeltaMv[0], tempDeltaMv[1]); else curBestMv[1] -= Mv(tempDeltaMv[0], tempDeltaMv[1]); } return curBestCost; } #endif #if JVET_AB0112_AFFINE_DMVR Distortion InterPrediction::xBDMVRGetMatchingErrorAffine(const PredictionUnit& pu, Mv(&mv)[2][3] ,Mv(&mvOffset)[2] ,const Mv(&initialMv)[2] ,bool& doInterpolation ,bool hPel ,bool useMR, bool useHadmard , const bool (&profTh)[2] , const int(&blockWidth)[2], const int(&blockHeight)[2], const int(&memBlockWidthExt)[2], const int (&memBlockHeight)[2], const int (&memHeight)[2], const int(&memStride)[2] ) { // Fill L0'a and L1's prediction blocks #if JVET_X0049_BDMVR_SW_OPT Pel* pelBuffer[2] = { m_filteredBlock[3][REF_PIC_LIST_0][0] + BDMVR_CENTER_POSITION, m_filteredBlock[3][REF_PIC_LIST_1][0] + BDMVR_CENTER_POSITION }; const SizeType stride = BDMVR_BUF_STRIDE; #else Pel* pelBuffer[2] = { m_filteredBlock[3][REF_PIC_LIST_0][0], m_filteredBlock[3][REF_PIC_LIST_1][0] }; const SizeType stride = pu.lwidth(); #endif PelUnitBuf predBuf[2] = { PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_0], stride, pu.lwidth(), pu.lheight())), PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_1], stride, pu.lwidth(), pu.lheight())) }; for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { const Picture& refPic = *pu.cu->slice->getRefPic((RefPicList)refList, pu.refIdx[refList])->unscaledPic; if (!hPel) { xBDMVRFillBlkPredPelBufferAffineOPT(pu, refPic, (RefPicList)refList, mv[refList], mvOffset[refList], initialMv[refList], doInterpolation, predBuf[refList], pu.cs->slice->clpRng(COMPONENT_Y), profTh[refList] , blockWidth[refList], blockHeight[refList], memBlockWidthExt[refList],memBlockHeight[refList], memHeight[refList], memStride[refList]); } else { xBDMVRFillBlkPredPelBufferAffine(pu, refPic, mv[refList], predBuf[refList], pu.cs->slice->clpRng(COMPONENT_Y)); } } doInterpolation = false; // Compute distortion between L0'a and L1's prediction blocks DistParam cDistParam; cDistParam.applyWeight = false; cDistParam.useMR = useMR; #if JVET_X0049_BDMVR_SW_OPT m_pcRdCost->setDistParam(cDistParam, predBuf[0].Y(), predBuf[1].Y(), pu.cu->slice->clpRng(COMPONENT_Y).bd, COMPONENT_Y, useHadmard); #if FULL_NBIT if (useHadmard) { return cDistParam.distFunc(cDistParam) >> 1; // magic shift, benefit for early terminate } else { int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0; return cDistParam.distFunc(cDistParam) >> precisionAdj; } #else return cDistParam.distFunc(cDistParam); #endif #else m_pcRdCost->setDistParam(cDistParam, predBuf[0].Y(), predBuf[1].Y(), pu.cu->slice->clpRng(COMPONENT_Y).bd, COMPONENT_Y, false); #if FULL_NBIT int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0; return cDistParam.distFunc(cDistParam) >> precisionAdj; #else return cDistParam.distFunc(cDistParam); #endif #endif } #endif #if JVET_X0049_BDMVR_SW_OPT Distortion InterPrediction::xBDMVRGetMatchingError(const PredictionUnit& pu, const Mv(&mv)[2], bool useMR, bool useHadmard) #else Distortion InterPrediction::xBDMVRGetMatchingError(const PredictionUnit& pu, const Mv(&mv)[2], bool useMR) #endif { // Fill L0'a and L1's prediction blocks #if JVET_X0049_BDMVR_SW_OPT Pel* pelBuffer[2] = { m_filteredBlock[3][REF_PIC_LIST_0][0] + BDMVR_CENTER_POSITION, m_filteredBlock[3][REF_PIC_LIST_1][0] + BDMVR_CENTER_POSITION }; const SizeType stride = BDMVR_BUF_STRIDE; #else Pel* pelBuffer[2] = { m_filteredBlock[3][REF_PIC_LIST_0][0], m_filteredBlock[3][REF_PIC_LIST_1][0] }; const SizeType stride = pu.lwidth(); #endif PelUnitBuf predBuf[2] = { PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_0], stride, pu.lwidth(), pu.lheight())), PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_1], stride, pu.lwidth(), pu.lheight())) }; for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { #if JVET_X0083_BM_AMVP_MERGE_MODE if (pu.amvpMergeModeFlag[1 - refList]) { continue; } #endif const Picture& refPic = *pu.cu->slice->getRefPic((RefPicList)refList, pu.refIdx[refList])->unscaledPic; xBDMVRFillBlkPredPelBuffer( pu, refPic, mv[refList] , predBuf[refList], pu.cs->slice->clpRng(COMPONENT_Y) ); } // Compute distortion between L0'a and L1's prediction blocks DistParam cDistParam; cDistParam.applyWeight = false; cDistParam.useMR = useMR; #if JVET_X0049_BDMVR_SW_OPT m_pcRdCost->setDistParam(cDistParam, predBuf[0].Y(), predBuf[1].Y(), pu.cu->slice->clpRng(COMPONENT_Y).bd, COMPONENT_Y, useHadmard); #if FULL_NBIT if (useHadmard) { return cDistParam.distFunc(cDistParam) >> 1; // magic shift, benefit for early terminate } else { int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0; return cDistParam.distFunc(cDistParam) >> precisionAdj; } #else return cDistParam.distFunc(cDistParam); #endif #else m_pcRdCost->setDistParam( cDistParam, predBuf[0].Y(), predBuf[1].Y(), pu.cu->slice->clpRng(COMPONENT_Y).bd, COMPONENT_Y, false ); #if FULL_NBIT int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0; return cDistParam.distFunc( cDistParam ) >> precisionAdj; #else return cDistParam.distFunc( cDistParam ); #endif #endif } #if MULTI_PASS_DMVR #if JVET_X0049_ADAPT_DMVR template <uint8_t dir> #endif Distortion InterPrediction::xBDMVRGetMatchingError(const PredictionUnit& pu, const Mv (&mv)[2], const int subPuBufOffset, bool useHadmard, bool useMR , bool& doPreInterpolation, int32_t searchStepShift, const Mv (&mvCenter)[2] , const Mv (&mvInitial)[2] , int nDirect ) { // Pre-interpolation if (doPreInterpolation) { #if JVET_X0049_ADAPT_DMVR xBDMVRPreInterpolation<dir>(pu, mvCenter, searchStepShift == MV_FRACTIONAL_BITS_INTERNAL, searchStepShift == MV_FRACTIONAL_BITS_INTERNAL - 1); #else xBDMVRPreInterpolation( pu, mvCenter, searchStepShift == MV_FRACTIONAL_BITS_INTERNAL, searchStepShift == MV_FRACTIONAL_BITS_INTERNAL - 1 ); #endif doPreInterpolation = false; } // Locate L0'a and L1's prediction blocks in pre-interpolation buffer #if JVET_X0049_BDMVR_SW_OPT const int32_t stride = BDMVR_BUF_STRIDE; #else const int32_t stride = MAX_CU_SIZE + ( BDMVR_INTME_RANGE << 1 ) + ( BDMVR_SIMD_IF_FACTOR - 2 ); #endif Pel* pelBuffer[2] = { nullptr, nullptr }; if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL) { Mv mvDiff[2] = { mv[0] - mvInitial[0], mv[1] - mvInitial[1] }; mvDiff[0] >>= MV_FRACTIONAL_BITS_INTERNAL; mvDiff[1] >>= MV_FRACTIONAL_BITS_INTERNAL; #if JVET_X0049_ADAPT_DMVR if (dir == 1) { // fix template at refList1 CHECK(subPuBufOffset != 0, "this is not possible"); pelBuffer[0] = m_filteredBlock[3][REF_PIC_LIST_0][0] + subPuBufOffset + BDMVR_CENTER_POSITION + mvDiff[0].getVer() * stride + mvDiff[0].getHor(); pelBuffer[1] = m_filteredBlock[3][REF_PIC_LIST_1][0] + BDMVR_CENTER_POSITION; } else if (dir == 2) { CHECK(subPuBufOffset != 0, "this is not possible"); pelBuffer[0] = m_filteredBlock[3][REF_PIC_LIST_0][0] + BDMVR_CENTER_POSITION; pelBuffer[1] = m_filteredBlock[3][REF_PIC_LIST_1][0] + subPuBufOffset + BDMVR_CENTER_POSITION + mvDiff[1].getVer() * stride + mvDiff[1].getHor(); } else { pelBuffer[0] = m_filteredBlock[3][REF_PIC_LIST_0][0] + subPuBufOffset + BDMVR_CENTER_POSITION + mvDiff[0].getVer() * stride + mvDiff[0].getHor(); pelBuffer[1] = m_filteredBlock[3][REF_PIC_LIST_1][0] + subPuBufOffset + BDMVR_CENTER_POSITION + mvDiff[1].getVer() * stride + mvDiff[1].getHor(); } #else pelBuffer[0] = m_filteredBlock[3][REF_PIC_LIST_0][0] + subPuBufOffset + ( BDMVR_INTME_RANGE + mvDiff[0].getVer() ) * stride + BDMVR_INTME_RANGE + mvDiff[0].getHor(); pelBuffer[1] = m_filteredBlock[3][REF_PIC_LIST_1][0] + subPuBufOffset + ( BDMVR_INTME_RANGE + mvDiff[1].getVer() ) * stride + BDMVR_INTME_RANGE + mvDiff[1].getHor(); #endif } else if (searchStepShift == MV_FRACTIONAL_BITS_INTERNAL - 1) { const int32_t cFracBufOffset[8] = { stride, stride, stride + 1, 1, 1, 0, 0, 0 }; static const uint32_t phaseIdxList[4] = { 1, 2, 1, 0 }; int phaseIdx = phaseIdxList[ nDirect & 0x3 ]; #if JVET_X0049_ADAPT_DMVR if (dir == 3) { pelBuffer[0] = m_filteredBlock[phaseIdx][REF_PIC_LIST_0][0] + cFracBufOffset[nDirect]; pelBuffer[1] = m_filteredBlock[phaseIdx][REF_PIC_LIST_1][0] + cFracBufOffset[(nDirect + 4) & 0x7]; } else if (dir == 1) { pelBuffer[0] = m_filteredBlock[phaseIdx][REF_PIC_LIST_0][0] + cFracBufOffset[nDirect]; pelBuffer[1] = m_filteredBlock[3][REF_PIC_LIST_1][0] + BDMVR_CENTER_POSITION; } else { pelBuffer[0] = m_filteredBlock[3][REF_PIC_LIST_0][0] + BDMVR_CENTER_POSITION; pelBuffer[1] = m_filteredBlock[phaseIdx][REF_PIC_LIST_1][0] + cFracBufOffset[(nDirect + 4) & 0x7]; } #else pelBuffer[0] = m_filteredBlock[phaseIdx][REF_PIC_LIST_0][0] + cFracBufOffset[ nDirect ]; pelBuffer[1] = m_filteredBlock[phaseIdx][REF_PIC_LIST_1][0] + cFracBufOffset[( nDirect + 4 ) & 0x7]; #endif } else { return xBDMVRGetMatchingError(pu, mv, useMR); } PelUnitBuf predBuf[2] = { PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_0], stride, pu.lwidth(), pu.lheight())), PelUnitBuf(pu.chromaFormat, PelBuf(pelBuffer[REF_PIC_LIST_1], stride, pu.lwidth(), pu.lheight())) }; // Compute distortion between L0'a and L1's prediction blocks DistParam cDistParam; cDistParam.applyWeight = false; cDistParam.useMR = useMR; m_pcRdCost->setDistParam( cDistParam, predBuf[0].Y(), predBuf[1].Y(), pu.cu->slice->clpRng(COMPONENT_Y).bd, COMPONENT_Y, useHadmard ); #if FULL_NBIT if (useHadmard) { return cDistParam.distFunc( cDistParam ) >> 1; // magic shift, benefit for early terminate } else { int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0; return cDistParam.distFunc( cDistParam ) >> precisionAdj; } #else return cDistParam.distFunc( cDistParam ); #endif } #endif #endif #if MULTI_HYP_PRED void InterPrediction::xAddHypMC(PredictionUnit& pu, PelUnitBuf& predBuf, PelUnitBuf* predBufWOBIO, const bool lumaOnly) { CHECK(pu.Y().area() <= MULTI_HYP_PRED_RESTRICT_BLOCK_SIZE, "Multi Hyp: Block too small!"); CHECK(pu.cu->geoFlag, "multi-hyp does not work with geo"); CHECK(pu.ciipFlag, "multi-hyp does not work with intra/inter"); CHECK(!pu.mergeFlag && pu.interDir != 3, "multihyp selected for AMVP uni prediction"); // get prediction for current additional hypothesis const UnitArea unitAreaFromPredBuf(predBuf.chromaFormat, Area(Position(0, 0), predBuf.Y())); PelUnitBuf tempBuf = m_additionalHypothesisStorage.getBuf(unitAreaFromPredBuf); const auto savedAffine = pu.cu->affine; const auto savedIMV = pu.cu->imv; #if INTER_LIC auto savedLICFlag = pu.cu->LICFlag; #endif MultiHypVec savedHypVec = pu.addHypData; pu.addHypData.clear(); pu.mvRefine = true; motionCompensation(pu, predBuf, REF_PIC_LIST_X, true, !lumaOnly, predBufWOBIO); pu.mvRefine = false; #if INTER_LIC m_storeBeforeLIC = false; #endif PredictionUnit fakePredData = pu; fakePredData.cu->affine = false; fakePredData.mergeFlag = false; fakePredData.mergeType = MRG_TYPE_DEFAULT_N; fakePredData.mmvdMergeFlag = false; fakePredData.ciipFlag = false; #if MULTI_PASS_DMVR fakePredData.bdmvrRefine = false; #endif for (int i = 0; i < savedHypVec.size(); i++) { const MultiHypPredictionData mhData = savedHypVec[i]; // get legacy ref list and ref idx const auto &MHRefPics = pu.cs->slice->getMultiHypRefPicList(); CHECK(mhData.refIdx < 0, "Multi Hyp: mhData.refIdx < 0"); const int iRefPicList = mhData.isMrg ? mhData.refList : MHRefPics[mhData.refIdx].refList; const int iRefIdx = mhData.isMrg ? mhData.refIdx : MHRefPics[mhData.refIdx].refIdx; // construct fake object using legacy indexing fakePredData.interDir = iRefPicList + 1; fakePredData.mv[iRefPicList] = mhData.mv; fakePredData.refIdx[iRefPicList] = iRefIdx; fakePredData.refIdx[1 - iRefPicList] = -1; #if INTER_LIC fakePredData.cu->LICFlag = mhData.LICFlag; #endif fakePredData.cu->imv = mhData.imv; fakePredData.mvRefine = true; motionCompensation(fakePredData, tempBuf, REF_PIC_LIST_X, true, !lumaOnly); fakePredData.mvRefine = false; CHECK(mhData.weightIdx < 0, "Multi Hyp: mhData.weightIdx < 0"); CHECK(mhData.weightIdx >= MULTI_HYP_PRED_NUM_WEIGHTS, "Multi Hyp: mhData.weightIdx >= MULTI_HYP_PRED_NUM_WEIGHTS"); predBuf.addHypothesisAndClip(tempBuf, g_addHypWeight[mhData.weightIdx], pu.cs->slice->clpRngs(), lumaOnly); } #if INTER_LIC pu.cu->LICFlag = savedLICFlag; #endif pu.cu->imv = savedIMV; pu.cu->affine = savedAffine; pu.addHypData = savedHypVec; } #endif #if JVET_X0083_BM_AMVP_MERGE_MODE void InterPrediction::getAmvpMergeModeMergeList(PredictionUnit& pu, MvField* mvFieldAmListCommon, const int decAmvpRefIdx) { RefPicList refListMerge = pu.amvpMergeModeFlag[0] ? REF_PIC_LIST_0 : REF_PIC_LIST_1; RefPicList refListAmvp = RefPicList(1 - refListMerge); #if JVET_Y0129_MVD_SIGNAL_AMVP_MERGE_MODE for (int idx = 0; idx < pu.cu->slice->getNumRefIdx(refListAmvp) * AMVP_MAX_NUM_CANDS_MEM; idx++) #else for (int idx = 0; idx < pu.cu->slice->getNumRefIdx(refListAmvp) * AMVP_MAX_NUM_CANDS; idx++) #endif { mvFieldAmListCommon[idx] = MvField(); mvFieldAmListCommon[MAX_NUM_AMVP_CANDS_MAX_REF + idx] = MvField(); } int amvpRefIdxStart = 0; int amvpRefIdxEnd = pu.cu->slice->getNumRefIdx(refListAmvp); int decAmvpMvpIdx = -1; if (decAmvpRefIdx >= 0) { amvpRefIdxStart = decAmvpRefIdx; amvpRefIdxEnd = decAmvpRefIdx + 1; decAmvpMvpIdx = pu.mvpIdx[refListAmvp]; } #if !JVET_Y0128_NON_CTC const int curPoc = pu.cu->slice->getPOC(); #endif const bool useMR = pu.lumaSize().area() > 64; for (int refIdxAmvp = amvpRefIdxStart; refIdxAmvp < amvpRefIdxEnd; refIdxAmvp++) { #if JVET_Y0128_NON_CTC if (pu.cu->slice->getAmvpMergeModeValidRefIdx(refListAmvp, refIdxAmvp) == false) { continue; } #if JVET_AA0124_AMVPMERGE_DMVD_OFF_RPR_ON if (pu.cu->slice->getSPS()->getUseDMVDMode() == true) { #endif CHECK(pu.cu->slice->getRefPic(refListAmvp, refIdxAmvp)->isRefScaled(pu.cu->cs->pps), "this is not possible"); #if JVET_AA0124_AMVPMERGE_DMVD_OFF_RPR_ON } #endif #else const int amvpRefPoc = pu.cu->slice->getRefPOC(refListAmvp, refIdxAmvp); bool findValidMergeRefPic = false; for (int refIdxCandMerge = 0; refIdxCandMerge < pu.cu->slice->getNumRefIdx(refListMerge); refIdxCandMerge++) { const int candMergePoc = pu.cu->slice->getRefPOC(refListMerge, refIdxCandMerge); if ((amvpRefPoc - curPoc) * (candMergePoc - curPoc) < 0) { findValidMergeRefPic = true; break; } } if (findValidMergeRefPic == false) { continue; } #endif pu.refIdx[refListAmvp] = refIdxAmvp; #if JVET_AB0078_AMVPMERGE_LDB const int pocAmvp = pu.cu->slice->getRefPOC(refListAmvp, pu.refIdx[refListAmvp]); const int curPoc = pu.cu->slice->getPOC(); #endif AMVPInfo amvpInfo; PU::fillMvpCand( pu, refListAmvp, refIdxAmvp, amvpInfo #if TM_AMVP , this #endif ); MergeCtx bmMergeCtx; PU::getInterMergeCandidates(pu, bmMergeCtx, 0, AMVP_MERGE_MODE_MERGE_LIST_MAX_CANDS - 1); int bestMvpIdxLoopStart = 0; int bestMvpIdxLoopEnd = amvpInfo.numCand; if (decAmvpRefIdx >= 0) { bestMvpIdxLoopStart = decAmvpMvpIdx; bestMvpIdxLoopEnd = bestMvpIdxLoopStart + 1; } for (int bestMvpIdxToTest = bestMvpIdxLoopStart; bestMvpIdxToTest < bestMvpIdxLoopEnd; bestMvpIdxToTest++) { #if JVET_Y0129_MVD_SIGNAL_AMVP_MERGE_MODE const int mvFieldMergeIdx = refIdxAmvp * AMVP_MAX_NUM_CANDS_MEM + bestMvpIdxToTest; #else const int mvFieldMergeIdx = refIdxAmvp * AMVP_MAX_NUM_CANDS + bestMvpIdxToTest; #endif const int mvFieldAmvpIdx = MAX_NUM_AMVP_CANDS_MAX_REF + mvFieldMergeIdx; pu.mv[refListAmvp] = amvpInfo.mvCand[bestMvpIdxToTest]; // BM select merge candidate struct bmCostSort { int mergeIdx; Distortion bmCost; }; bmCostSort temp; const auto CostIncSort = [](const bmCostSort &x, const bmCostSort &y) { return x.bmCost < y.bmCost; }; std::vector<bmCostSort> input; // here to select the merge cand which has minimum BM cost, at each cand, the cost is derived by minBMcost(mvpIdx0, mvpIdx1) if (bmMergeCtx.numValidMergeCand > 1) { // pre Fill AMVP prediction blocks #if JVET_X0049_BDMVR_SW_OPT Pel* pelBufferAmvp = m_filteredBlock[3][refListAmvp][0] + BDMVR_CENTER_POSITION; const SizeType stride = BDMVR_BUF_STRIDE; #else Pel* pelBufferAmvp = m_filteredBlock[3][refListAmvp][0]; const SizeType stride = pu.lwidth(); #endif PelUnitBuf predBufAmvp = PelUnitBuf(pu.chromaFormat, PelBuf(pelBufferAmvp, stride, pu.lwidth(), pu.lheight())); const Picture& refPicAmvp = *pu.cu->slice->getRefPic((RefPicList)refListAmvp, pu.refIdx[refListAmvp])->unscaledPic; xBDMVRFillBlkPredPelBuffer( pu, refPicAmvp, pu.mv[refListAmvp] , predBufAmvp, pu.cs->slice->clpRng(COMPONENT_Y) ); Mv mvAmBdmvr[2/*refListId*/]; for (int mergeIdx = 0; mergeIdx < bmMergeCtx.numValidMergeCand; mergeIdx++) { pu.refIdx[refListMerge] = bmMergeCtx.mvFieldNeighbours[(mergeIdx << 1) + refListMerge].refIdx; mvAmBdmvr[refListAmvp] = amvpInfo.mvCand[bestMvpIdxToTest]; mvAmBdmvr[refListMerge] = bmMergeCtx.mvFieldNeighbours[(mergeIdx << 1) + refListMerge].mv; #if JVET_Y0128_NON_CTC #if JVET_AA0124_AMVPMERGE_DMVD_OFF_RPR_ON #if JVET_AB0078_AMVPMERGE_LDB if (pu.cu->slice->getSPS()->getUseDMVDMode() == true && !pu.cu->slice->getCheckLDC()) #else if (pu.cu->slice->getSPS()->getUseDMVDMode() == true) #endif { #endif CHECK(pu.cu->slice->getRefPic((RefPicList)refListMerge, pu.refIdx[refListMerge])->isRefScaled(pu.cs->pps), "this is not possible"); #if JVET_AA0124_AMVPMERGE_DMVD_OFF_RPR_ON } #endif #endif #if JVET_Z0085_AMVPMERGE_DMVD_OFF #if JVET_AB0078_AMVPMERGE_LDB const int pocMerge = pu.cu->slice->getRefPOC(refListMerge, pu.refIdx[refListMerge]); if (pu.cu->cs->sps->getUseDMVDMode() && ((pocAmvp - curPoc) * (pocMerge - curPoc) < 0)) #else if (pu.cu->cs->sps->getUseDMVDMode()) #endif { #endif Distortion tmpBmCost = xBDMVRGetMatchingError(pu, mvAmBdmvr, useMR); temp.mergeIdx = mergeIdx; temp.bmCost = tmpBmCost; #if JVET_Z0085_AMVPMERGE_DMVD_OFF } else { temp.mergeIdx = mergeIdx; temp.bmCost = std::numeric_limits<Distortion>::max(); } #endif input.push_back(temp); } stable_sort(input.begin(), input.end(), CostIncSort); } #if JVET_Y0129_MVD_SIGNAL_AMVP_MERGE_MODE else { temp.mergeIdx = 0; temp.bmCost = 0; input.push_back(temp); } #else if (bmMergeCtx.numValidMergeCand == 1) { pu.mv[refListMerge] = bmMergeCtx.mvFieldNeighbours[refListMerge].mv; pu.refIdx[refListMerge] = bmMergeCtx.mvFieldNeighbours[refListMerge].refIdx; } else #endif { pu.mv[refListMerge] = bmMergeCtx.mvFieldNeighbours[(input[0].mergeIdx << 1) + refListMerge].mv; pu.refIdx[refListMerge] = bmMergeCtx.mvFieldNeighbours[(input[0].mergeIdx << 1) + refListMerge].refIdx; } #if JVET_Y0129_MVD_SIGNAL_AMVP_MERGE_MODE if (bestMvpIdxToTest == 0 || bestMvpIdxToTest == 2) { #endif amvpMergeModeMvRefinement(pu, mvFieldAmListCommon, mvFieldMergeIdx, mvFieldAmvpIdx); #if JVET_Y0129_MVD_SIGNAL_AMVP_MERGE_MODE } else if (bmMergeCtx.numValidMergeCand == 1) { mvFieldAmListCommon[mvFieldMergeIdx].refIdx = bmMergeCtx.mvFieldNeighbours[(input[0].mergeIdx << 1) + refListMerge].refIdx; mvFieldAmListCommon[mvFieldMergeIdx].mv = bmMergeCtx.mvFieldNeighbours[(input[0].mergeIdx << 1) + refListMerge].mv; mvFieldAmListCommon[mvFieldAmvpIdx].refIdx = refIdxAmvp; mvFieldAmListCommon[mvFieldAmvpIdx].mv = amvpInfo.mvCand[bestMvpIdxToTest]; } else { pu.mv[refListAmvp] = amvpInfo.mvCand[bestMvpIdxToTest]; pu.refIdx[refListAmvp] = refIdxAmvp; pu.mv[refListMerge] = bmMergeCtx.mvFieldNeighbours[(input[1].mergeIdx << 1) + refListMerge].mv; pu.refIdx[refListMerge] = bmMergeCtx.mvFieldNeighbours[(input[1].mergeIdx << 1) + refListMerge].refIdx; amvpMergeModeMvRefinement(pu, mvFieldAmListCommon, mvFieldMergeIdx, mvFieldAmvpIdx); } if (bestMvpIdxToTest == 2) { mvFieldAmListCommon[mvFieldAmvpIdx].mv.roundTransPrecInternal2Amvr(pu.cu->imv); } #endif } // bestMvpIdxLoop } // refIdxAmvp loop } void InterPrediction::amvpMergeModeMvRefinement(PredictionUnit& pu, MvField* mvFieldAmListCommon, const int mvFieldMergeIdx, const int mvFieldAmvpIdx) { const RefPicList refListMerge = pu.amvpMergeModeFlag[0] ? REF_PIC_LIST_0 : REF_PIC_LIST_1; const RefPicList refListAmvp = RefPicList(1 - refListMerge); const int curPoc = pu.cu->slice->getPOC(); const int mergeRefPoc = pu.cu->slice->getRefPOC(refListMerge, pu.refIdx[refListMerge]); const bool useMR = pu.lumaSize().area() > 64; const int amvpRefPoc = pu.cu->slice->getRefPOC(refListAmvp, pu.refIdx[refListAmvp]); #if JVET_Y0128_NON_CTC #if JVET_AA0124_AMVPMERGE_DMVD_OFF_RPR_ON if (pu.cu->slice->getSPS()->getUseDMVDMode() == true) { #endif CHECK(pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->isRefScaled(pu.cs->pps), "this is not possible"); CHECK(pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[1])->isRefScaled(pu.cs->pps), "this is not possible"); #if JVET_AA0124_AMVPMERGE_DMVD_OFF_RPR_ON } #endif #endif #if JVET_Z0085_AMVPMERGE_DMVD_OFF if (pu.cu->cs->sps->getUseDMVDMode()) { #endif if ((mergeRefPoc - curPoc) == (curPoc - amvpRefPoc)) { Mv mvInitial[2]; mvInitial[refListAmvp] = pu.mv[refListAmvp];; mvInitial[refListMerge] = pu.mv[refListMerge]; Mv mvFinal[2] = { mvInitial[0], mvInitial[1] }; Distortion curBmCost = std::numeric_limits<Distortion>::max(); #if JVET_X0049_BDMVR_SW_OPT curBmCost = xBDMVRMvSquareSearch<false>(mvFinal, curBmCost, pu, mvInitial, AMVP_MERGE_MODE_REDUCED_MV_REFINE_SEARCH_ROUND, MV_FRACTIONAL_BITS_INTERNAL, useMR, false); curBmCost = xBDMVRMvSquareSearch<true>(mvFinal, curBmCost, pu, mvInitial, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, useMR, false); #else curBmCost = xBDMVRMvSquareSearch( mvFinal, curBmCost, pu, mvInitial, AMVP_MERGE_MODE_REDUCED_MV_REFINE_SEARCH_ROUND, MV_FRACTIONAL_BITS_INTERNAL, useMR, false ); curBmCost = xBDMVRMvSquareSearch( mvFinal, curBmCost, pu, mvInitial, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, useMR, false ); #endif pu.mv[refListMerge] = mvFinal[refListMerge]; pu.mv[refListAmvp] = mvFinal[refListAmvp]; } #if TM_AMVP || TM_MRG else #if TM_AMVP && TM_MRG #if JVET_AA0132_CONFIGURABLE_TM_TOOLS if (pu.cu->cs->sps->getUseTMAmvpMode() || pu.cu->cs->sps->getUseTMMrgMode()) #endif #elif TM_AMVP #if JVET_AA0132_CONFIGURABLE_TM_TOOLS if (pu.cu->cs->sps->getUseTMAmvpMode()) #endif #elif TM_MRG #if JVET_AA0132_CONFIGURABLE_TM_TOOLS if (pu.cu->cs->sps->getUseTMMrgMode()) #endif #endif { Distortion tmCost[2]; tmCost[refListMerge] = deriveTMMv(pu, true, std::numeric_limits<Distortion>::max(), refListMerge, pu.refIdx[refListMerge], 0, pu.mv[refListMerge]); tmCost[refListAmvp] = deriveTMMv(pu, true, std::numeric_limits<Distortion>::max(), refListAmvp, pu.refIdx[refListAmvp], 0, pu.mv[refListAmvp]); RefPicList refListToBeRefined = (tmCost[refListMerge] < tmCost[refListAmvp]) ? refListAmvp : refListMerge; MvField mvfBetterUni(pu.mv[1 - refListToBeRefined], pu.refIdx[1 - refListToBeRefined]); deriveTMMv(pu, true, std::numeric_limits<Distortion>::max(), refListToBeRefined, pu.refIdx[refListToBeRefined], AMVP_MERGE_MODE_REDUCED_MV_REFINE_SEARCH_ROUND, pu.mv[refListToBeRefined], &mvfBetterUni); } #endif #if JVET_Z0085_AMVPMERGE_DMVD_OFF } #endif #if !JVET_Y0129_MVD_SIGNAL_AMVP_MERGE_MODE pu.mv[refListAmvp].roundTransPrecInternal2Amvr(pu.cu->imv); #endif mvFieldAmListCommon[mvFieldMergeIdx].refIdx = pu.refIdx[refListMerge]; mvFieldAmListCommon[mvFieldMergeIdx].mv = pu.mv[refListMerge]; mvFieldAmListCommon[mvFieldAmvpIdx].refIdx = pu.refIdx[refListAmvp]; mvFieldAmListCommon[mvFieldAmvpIdx].mv = pu.mv[refListAmvp]; } #endif #if JVET_Z0054_BLK_REF_PIC_REORDER #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED void InterPrediction::deriveMVDCandVecFromMotionInforPredGeneral(const PredictionUnit &pu, std::vector<MotionInfoPred> &miPredList, RefPicList eRefPicList, std::vector<Mv> &cMvdDerivedVec) { cMvdDerivedVec.clear(); deriveMVDcand(pu, eRefPicList, cMvdDerivedVec); if (!cMvdDerivedVec.empty() && !miPredList.empty()) { std::stable_sort(miPredList.begin(), miPredList.end(), [](const MotionInfoPred & l, const MotionInfoPred & r) {return l.cost < r.cost; }); std::vector<MotionInfoPred> miPredListSub; miPredListSub.clear(); { for (std::vector<MotionInfoPred>::iterator it = miPredList.begin(); it != miPredList.end(); ++it) { if (it->interDir == pu.interDir && it->refIdx[eRefPicList] == pu.refIdx[eRefPicList]) { if (it->interDir == 3 && eRefPicList == REF_PIC_LIST_1 && (it->mvd[0] != pu.mvd[0])) { continue; } bool add = true; for (std::vector<MotionInfoPred>::iterator itSub = miPredListSub.begin(); itSub != miPredListSub.end(); ++itSub) { if (it->mvd[eRefPicList] == itSub->mvd[eRefPicList]) { add = false; break; } } if (add) { miPredListSub.push_back(*it); } } } CHECK(!miPredListSub.empty() && cMvdDerivedVec.size() != miPredListSub.size(), "cMvdDerivedVec.size() != miPredListSub.size()"); // when template doesn't exist, miPredListSub is empty but cMvdDerivedVec might not be. for (size_t i = 0; i < miPredListSub.size(); i++) { cMvdDerivedVec[i] = (miPredListSub[i].mvd[eRefPicList]); } } } } void InterPrediction::deriveAffineMVDCandVecFromMotionInforPredGeneral(const PredictionUnit &pu, std::vector<MotionInfoPred> &miPredList, RefPicList eRefPicList, std::vector<Mv> cMvdDerivedVec[3]) { cMvdDerivedVec[0].clear(); cMvdDerivedVec[1].clear(); cMvdDerivedVec[2].clear(); deriveMVDcandAffine(pu, eRefPicList, cMvdDerivedVec); if (!cMvdDerivedVec[0].empty() && !miPredList.empty()) { std::stable_sort(miPredList.begin(), miPredList.end(), [](const MotionInfoPred & l, const MotionInfoPred & r) {return l.cost < r.cost; }); std::vector<MotionInfoPred> miPredListSub; miPredListSub.clear(); for (std::vector<MotionInfoPred>::iterator it = miPredList.begin(); it != miPredList.end(); ++it) { if (it->interDir == pu.interDir && it->refIdx[eRefPicList] == pu.refIdx[eRefPicList]) { if (it->interDir == 3 && eRefPicList == REF_PIC_LIST_1 && (it->mvdAffi[0][0] != pu.mvdAffi[0][0] || it->mvdAffi[0][1] != pu.mvdAffi[0][1] || (pu.cu->affineType == AFFINEMODEL_6PARAM && it->mvdAffi[0][2] != pu.mvdAffi[0][2]))) { continue; } bool add = true; for (std::vector<MotionInfoPred>::iterator itSub = miPredListSub.begin(); itSub != miPredListSub.end(); ++itSub) { if (it->mvdAffi[eRefPicList][0] == itSub->mvdAffi[eRefPicList][0] && it->mvdAffi[eRefPicList][1] == itSub->mvdAffi[eRefPicList][1] && (pu.cu->affineType == AFFINEMODEL_4PARAM || it->mvdAffi[eRefPicList][2] == itSub->mvdAffi[eRefPicList][2])) { add = false; break; } } if (add) { miPredListSub.push_back(*it); } } } CHECK(cMvdDerivedVec[0].size() != miPredListSub.size(), "cMvdDerivedVec[0].size() != miPredListSub.size()"); for (size_t i = 0; i < miPredListSub.size(); i++) { cMvdDerivedVec[0][i] = (miPredListSub[i].mvdAffi[eRefPicList][0]); cMvdDerivedVec[1][i] = (miPredListSub[i].mvdAffi[eRefPicList][1]); cMvdDerivedVec[2][i] = (miPredListSub[i].mvdAffi[eRefPicList][2]); } } } void InterPrediction::deriveMVDCandVecFromMotionInforPred(const PredictionUnit &pu, std::vector<MotionInfoPred> &miPredList, RefPicList eRefPicList, std::vector<Mv> &cMvdDerivedVec) { cMvdDerivedVec.clear(); deriveMVDcand(pu, eRefPicList, cMvdDerivedVec); if (!cMvdDerivedVec.empty()) { std::vector<MotionInfoPred> miPredListSub; miPredListSub.clear(); if (!miPredList.empty()) { for (std::vector<MotionInfoPred>::iterator it = miPredList.begin(); it != miPredList.end(); ++it) { if (it->interDir == pu.interDir && it->refIdx[eRefPicList] == pu.refIdx[eRefPicList]) { miPredListSub.push_back(*it); } } std::stable_sort(miPredListSub.begin(), miPredListSub.end(), [](const MotionInfoPred & l, const MotionInfoPred & r) {return l.cost < r.cost; }); } CHECK(!miPredListSub.empty() && cMvdDerivedVec.size() != miPredListSub.size(), "cMvdDerivedVec.size() != miPredListSub.size()"); // when template doesn't exist, miPredListSub is empty but cMvdDerivedVec might not be. for (size_t i = 0; i < miPredListSub.size(); i++) { cMvdDerivedVec[i] = (miPredListSub[i].mvd[eRefPicList]); } } } void InterPrediction::deriveAffineMVDCandVecFromMotionInforPred(const PredictionUnit &pu, std::vector<MotionInfoPred> &miPredList, RefPicList eRefPicList, std::vector<Mv> cMvdDerivedVec[3]) { cMvdDerivedVec[0].clear(); cMvdDerivedVec[1].clear(); cMvdDerivedVec[2].clear(); deriveMVDcandAffine(pu, eRefPicList, cMvdDerivedVec); if (!cMvdDerivedVec[0].empty()) { std::vector<MotionInfoPred> miPredListSub; miPredListSub.clear(); if (!miPredList.empty()) { for (std::vector<MotionInfoPred>::iterator it = miPredList.begin(); it != miPredList.end(); ++it) { if (it->interDir == pu.interDir && it->refIdx[eRefPicList] == pu.refIdx[eRefPicList]) { miPredListSub.push_back(*it); } } std::stable_sort(miPredListSub.begin(), miPredListSub.end(), [](const MotionInfoPred & l, const MotionInfoPred & r) {return l.cost < r.cost; }); } CHECK(!miPredListSub.empty() && cMvdDerivedVec[0].size() != miPredListSub.size(), "cMvdDerivedVec[0].size() != miPredListSub.size()"); for (size_t i = 0; i < miPredListSub.size(); i++) { cMvdDerivedVec[0][i] = (miPredListSub[i].mvdAffi[eRefPicList][0]); cMvdDerivedVec[1][i] = (miPredListSub[i].mvdAffi[eRefPicList][1]); cMvdDerivedVec[2][i] = (miPredListSub[i].mvdAffi[eRefPicList][2]); } } } #endif void InterPrediction::reorderRefCombList(PredictionUnit &pu, std::vector<RefListAndRefIdx> &refListComb , RefPicList currRefList , std::vector<MotionInfoPred> &miPredList ) { int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED if (!xAMLGetCurBlkTemplate(pu, nWidth, nHeight) && !pu.isMvsdApplicable()) #else if (refListComb.size() < 2 || !xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) #endif { return; } PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PredictionUnit tmpPU = pu; DistParam cDistParam; cDistParam.applyWeight = false; Distortion uiCost; if (pu.cu->affine) { #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED std::vector<Mv> cMvdCandList[3]; cMvdCandList[0].push_back(tmpPU.mvdAffi[currRefList][0]); cMvdCandList[1].push_back(tmpPU.mvdAffi[currRefList][1]); cMvdCandList[2].push_back(tmpPU.mvdAffi[currRefList][2]); if (pu.isMvsdApplicable()) { deriveMVDcandAffine(tmpPU, currRefList, cMvdCandList); } #endif for (int idx = 0; idx < refListComb.size(); idx++) { RefPicList eRefList = refListComb[idx].refList; int refIdx = refListComb[idx].refIdx; tmpPU.interDir = 1 << eRefList; tmpPU.refIdx[1 - eRefList] = -1; tmpPU.refIdx[eRefList] = refIdx; tmpPU.mvpIdx[eRefList] = pu.mvpIdx[currRefList]; tmpPU.mvdAffi[eRefList][0] = tmpPU.mvdAffi[currRefList][0]; tmpPU.mvdAffi[eRefList][1] = tmpPU.mvdAffi[currRefList][1]; tmpPU.mvdAffi[eRefList][2] = tmpPU.mvdAffi[currRefList][2]; AffineAMVPInfo affineAMVPInfo; PU::fillAffineMvpCand(tmpPU, eRefList, tmpPU.refIdx[eRefList], affineAMVPInfo); const unsigned mvp_idx = tmpPU.mvpIdx[eRefList]; #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED refListComb[idx].cost = std::numeric_limits<Distortion>::max(); for (size_t i = 0; i < cMvdCandList[0].size(); i++) { Mv mvLT = affineAMVPInfo.mvCandLT[mvp_idx] + cMvdCandList[0][i]; Mv mvRT = affineAMVPInfo.mvCandRT[mvp_idx] + cMvdCandList[1][i]; mvRT += cMvdCandList[0][i]; Mv mvLB; if (tmpPU.cu->affineType == AFFINEMODEL_6PARAM) { mvLB = affineAMVPInfo.mvCandLB[mvp_idx] + cMvdCandList[2][i]; mvLB += cMvdCandList[0][i]; } #else Mv mvLT = affineAMVPInfo.mvCandLT[mvp_idx] + tmpPU.mvdAffi[eRefList][0]; Mv mvRT = affineAMVPInfo.mvCandRT[mvp_idx] + tmpPU.mvdAffi[eRefList][1]; mvRT += tmpPU.mvdAffi[eRefList][0]; Mv mvLB; if (tmpPU.cu->affineType == AFFINEMODEL_6PARAM) { mvLB = affineAMVPInfo.mvCandLB[mvp_idx] + tmpPU.mvdAffi[eRefList][2]; mvLB += tmpPU.mvdAffi[eRefList][0]; } #endif tmpPU.mvAffi[eRefList][0] = mvLT; tmpPU.mvAffi[eRefList][1] = mvRT; tmpPU.mvAffi[eRefList][2] = mvLB; #if !JVET_Z0067_RPR_ENABLE getAffAMLRefTemplate(tmpPU, pcBufPredRefTop, pcBufPredRefLeft); #endif uiCost = 0; bool bRefIsRescaled = (tmpPU.refIdx[eRefList] >= 0) ? tmpPU.cu->slice->getRefPic(eRefList, tmpPU.refIdx[eRefList])->isRefScaled(pu.cs->pps) : false; if (bRefIsRescaled) { uiCost = std::numeric_limits<Distortion>::max(); } else { #if JVET_Z0067_RPR_ENABLE getAffAMLRefTemplate(tmpPU, pcBufPredRefTop, pcBufPredRefLeft); #endif if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } } #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED MotionInfoPred miPred; miPred.interDir = 1 << eRefList; miPred.refIdx[eRefList] = refIdx; miPred.mvdAffi[eRefList][0] = cMvdCandList[0][i]; miPred.mvdAffi[eRefList][1] = cMvdCandList[1][i]; miPred.mvdAffi[eRefList][2] = cMvdCandList[2][i]; miPred.mvAffi[eRefList][0] = tmpPU.mvAffi[eRefList][0]; miPred.mvAffi[eRefList][1] = tmpPU.mvAffi[eRefList][1]; miPred.mvAffi[eRefList][2] = tmpPU.mvAffi[eRefList][2]; miPred.cost = uiCost; miPredList.push_back(miPred); if (uiCost < refListComb[idx].cost) { refListComb[idx].cost = uiCost; } } #else refListComb[idx].cost = uiCost; #endif } } else { #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED std::vector<Mv> cMvdCandList; cMvdCandList.push_back(tmpPU.mvd[currRefList]); if (pu.isMvsdApplicable()) { deriveMVDcand(tmpPU, currRefList, cMvdCandList); } #endif for (int idx = 0; idx < refListComb.size(); idx++) { RefPicList eRefList = refListComb[idx].refList; int refIdx = refListComb[idx].refIdx; tmpPU.interDir = 1 << eRefList; tmpPU.refIdx[1 - eRefList] = -1; tmpPU.refIdx[eRefList] = refIdx; tmpPU.mvpIdx[eRefList] = tmpPU.mvpIdx[currRefList]; AMVPInfo amvpInfo; PU::fillMvpCand(tmpPU, eRefList, tmpPU.refIdx[eRefList], amvpInfo #if TM_AMVP , this #endif ); #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED refListComb[idx].cost = std::numeric_limits<Distortion>::max(); for (std::vector<Mv>::iterator it = cMvdCandList.begin(); it != cMvdCandList.end(); ++it) { tmpPU.mvd[eRefList] = *it; #else tmpPU.mvd[eRefList] = tmpPU.mvd[currRefList]; #endif tmpPU.mv[eRefList] = amvpInfo.mvCand[tmpPU.mvpIdx[eRefList]] + tmpPU.mvd[eRefList]; tmpPU.mv[eRefList].mvCliptoStorageBitDepth(); uiCost = 0; bool bRefIsRescaled = (tmpPU.refIdx[eRefList] >= 0) ? tmpPU.cu->slice->getRefPic(eRefList, tmpPU.refIdx[eRefList])->isRefScaled(pu.cs->pps) : false; if (bRefIsRescaled) { uiCost = std::numeric_limits<Distortion>::max(); } else { getBlkAMLRefTemplate(tmpPU, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } } #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED MotionInfoPred miPred; miPred.interDir = 1 << eRefList; miPred.refIdx[eRefList] = refIdx; miPred.mvd[eRefList] = tmpPU.mvd[eRefList]; miPred.mv[eRefList] = tmpPU.mv[eRefList]; miPred.cost = uiCost; miPredList.push_back(miPred); if (uiCost < refListComb[idx].cost) { refListComb[idx].cost = uiCost; } } #else refListComb[idx].cost = uiCost; #endif } } std::stable_sort(refListComb.begin(), refListComb.end(), [](const RefListAndRefIdx & l, const RefListAndRefIdx & r) {return l.cost < r.cost; }); } void InterPrediction::setUniRefIdxLC(PredictionUnit &pu) { RefPicList eRefPicList; std::vector<RefListAndRefIdx> refListComb; #if JVET_X0083_BM_AMVP_MERGE_MODE if (pu.amvpMergeModeFlag[0] || pu.amvpMergeModeFlag[1]) { eRefPicList = pu.amvpMergeModeFlag[0] ? REF_PIC_LIST_1 : REF_PIC_LIST_0; refListComb = pu.cs->slice->getRefPicCombinedListAmvpMerge(); } else #endif { eRefPicList = pu.interDir == 1 ? REF_PIC_LIST_0 : REF_PIC_LIST_1; refListComb = pu.cs->slice->getRefPicCombinedList(); std::vector<MotionInfoPred> miPredList; miPredList.clear(); reorderRefCombList(pu, refListComb, RefPicList(pu.interDir >> 1), miPredList); #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED if (pu.isMvsdApplicable()) { if (pu.cu->affine) { std::vector<Mv> cMvdDerivedVec[3]; deriveAffineMVDCandVecFromMotionInforPred(pu, miPredList, (pu.interDir == 1 ? REF_PIC_LIST_0 : REF_PIC_LIST_1), cMvdDerivedVec); pu.mvsdIdx[eRefPicList] = deriveMVSDIdxFromMVDAffine(pu, eRefPicList, cMvdDerivedVec[0], cMvdDerivedVec[1], cMvdDerivedVec[2]); } else { std::vector<Mv> cMvdDerivedVec; deriveMVDCandVecFromMotionInforPred(pu, miPredList, eRefPicList, cMvdDerivedVec); pu.mvsdIdx[eRefPicList] = deriveMVSDIdxFromMVDTrans(pu.mvd[eRefPicList], cMvdDerivedVec); } } #endif } int refIdx = pu.refIdx[eRefPicList]; for (int8_t idx = 0; idx < refListComb.size(); idx++) { if (refListComb[idx].refList == eRefPicList && refListComb[idx].refIdx == refIdx) { pu.refIdxLC = idx; break; } } } void InterPrediction::setUniRefListAndIdx(PredictionUnit &pu) { RefPicList eRefList; std::vector<RefListAndRefIdx> refListComb; std::vector<MotionInfoPred> miPredList; miPredList.clear(); #if JVET_X0083_BM_AMVP_MERGE_MODE if (pu.amvpMergeModeFlag[0] || pu.amvpMergeModeFlag[1]) { refListComb = pu.cs->slice->getRefPicCombinedListAmvpMerge(); eRefList = refListComb[pu.refIdxLC].refList; pu.refIdx[eRefList] = refListComb[pu.refIdxLC].refIdx; pu.interDir = 3; pu.amvpMergeModeFlag[0] = (eRefList == REF_PIC_LIST_0 ? false : true); pu.amvpMergeModeFlag[1] = (eRefList == REF_PIC_LIST_0 ? true : false); } else #endif { refListComb = pu.cs->slice->getRefPicCombinedList(); reorderRefCombList(pu, refListComb, RefPicList(pu.interDir >> 1), miPredList); eRefList = refListComb[pu.refIdxLC].refList; pu.interDir = 1 << eRefList; pu.refIdx[eRefList] = refListComb[pu.refIdxLC].refIdx; pu.refIdx[1 - eRefList] = -1; // some code relies on whether refIdx equals to -1 instead of interDir to determine inter prediction direction } //move other motion informations from temporally locations to the correct ones if (pu.cu->affine) { pu.mvdAffi[eRefList][0] = pu.mvdAffi[0][0]; pu.mvdAffi[eRefList][1] = pu.mvdAffi[0][1]; pu.mvdAffi[eRefList][2] = pu.mvdAffi[0][2]; } else { pu.mvd[eRefList] = pu.mvd[0]; } pu.mvpIdx[eRefList] = pu.mvpIdx[0]; #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED pu.mvsdIdx[eRefList] = pu.mvsdIdx[0]; #endif #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED #if JVET_X0083_BM_AMVP_MERGE_MODE if (pu.amvpMergeModeFlag[0] || pu.amvpMergeModeFlag[1]) { return; } #endif if (pu.isMvsdApplicable()) { if (pu.cu->affine) { std::vector<Mv> cMvdDerivedVec[3]; deriveAffineMVDCandVecFromMotionInforPred(pu, miPredList, (pu.interDir == 1 ? REF_PIC_LIST_0 : REF_PIC_LIST_1), cMvdDerivedVec); deriveMVDFromMVSDIdxAffine(pu, eRefList, cMvdDerivedVec[0], cMvdDerivedVec[1], cMvdDerivedVec[2]); } else { std::vector<Mv> cMvdDerivedVec; deriveMVDCandVecFromMotionInforPred(pu, miPredList, eRefList, cMvdDerivedVec); if (!cMvdDerivedVec.empty()) { int mvsdIdx = pu.mvsdIdx[eRefList]; pu.mvd[eRefList] = deriveMVDFromMVSDIdxTrans(mvsdIdx, cMvdDerivedVec); } } } #endif } void InterPrediction::setBiRefPairIdx(PredictionUnit &pu) { std::vector<RefPicPair> refPicPairList = pu.cs->slice->getRefPicPairList(); std::vector<MotionInfoPred> miPredList; miPredList.clear(); reorderRefPairList(pu, refPicPairList, miPredList); #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED if (pu.isMvsdApplicable()) { uint8_t numCandL0 = 0; for (int8_t refList = 0; refList < 2; refList++) { RefPicList eRefPicList = (RefPicList)refList; if (refList && numCandL0 >= 2) { continue; } if (pu.cu->affine) { std::vector<Mv> cMvdDerivedVec[3]; deriveAffineMVDCandVecFromMotionInforPredGeneral(pu, miPredList, eRefPicList, cMvdDerivedVec); numCandL0 = (uint8_t)cMvdDerivedVec[0].size(); pu.mvsdIdx[eRefPicList] = deriveMVSDIdxFromMVDAffine(pu, eRefPicList, cMvdDerivedVec[0], cMvdDerivedVec[1], cMvdDerivedVec[2]); } else { std::vector<Mv> cMvdDerivedVec; deriveMVDCandVecFromMotionInforPredGeneral(pu, miPredList, eRefPicList, cMvdDerivedVec); numCandL0 = (uint8_t)cMvdDerivedVec.size(); pu.mvsdIdx[eRefPicList] = deriveMVSDIdxFromMVDTrans(pu.mvd[eRefPicList], cMvdDerivedVec); } } } #endif for (int8_t idx = 0; idx < refPicPairList.size(); idx++) { if (refPicPairList[idx].refIdx[0] == pu.refIdx[0] && refPicPairList[idx].refIdx[1] == pu.refIdx[1]) { pu.refPairIdx = idx; break; } } CHECK(pu.refPairIdx < 0, ""); } void InterPrediction::setBiRefIdx(PredictionUnit &pu) { std::vector<RefPicPair> refPicPairList = pu.cs->slice->getRefPicPairList(); std::vector<MotionInfoPred> miPredList; miPredList.clear(); reorderRefPairList(pu, refPicPairList, miPredList); pu.refIdx[0] = refPicPairList[pu.refPairIdx].refIdx[0]; pu.refIdx[1] = refPicPairList[pu.refPairIdx].refIdx[1]; #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED if (pu.isMvsdApplicable()) { uint8_t numCandL0 = 0; for (int8_t refList = 0; refList < 2; refList++) { RefPicList eRefList = (RefPicList)refList; if (refList && numCandL0 >= 2) { continue; } if (pu.cu->affine) { std::vector<Mv> cMvdDerivedVec[3]; deriveAffineMVDCandVecFromMotionInforPredGeneral(pu, miPredList, eRefList, cMvdDerivedVec); numCandL0 = (uint8_t)cMvdDerivedVec[0].size(); deriveMVDFromMVSDIdxAffine(pu, eRefList, cMvdDerivedVec[0], cMvdDerivedVec[1], cMvdDerivedVec[2]); } else { std::vector<Mv> cMvdDerivedVec; deriveMVDCandVecFromMotionInforPredGeneral(pu, miPredList, eRefList, cMvdDerivedVec); numCandL0 = (uint8_t)cMvdDerivedVec.size(); if (!cMvdDerivedVec.empty()) { int mvsdIdx = pu.mvsdIdx[eRefList]; pu.mvd[eRefList] = deriveMVDFromMVSDIdxTrans(mvsdIdx, cMvdDerivedVec); } } } } #endif } void InterPrediction::reorderRefPairList(PredictionUnit &pu, std::vector<RefPicPair> &refPairList , std::vector<MotionInfoPred> &miPredList ) { int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (refPairList.size() < 2 || !xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PredictionUnit tmpPU = pu; DistParam cDistParam; cDistParam.applyWeight = false; Distortion uiCost; if (pu.cu->affine) { #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED std::vector<Mv> cMvdCandList[2][3]; for (int refList = 0; refList < 2; refList++) { cMvdCandList[refList][0].push_back(tmpPU.mvdAffi[refList][0]); cMvdCandList[refList][1].push_back(tmpPU.mvdAffi[refList][1]); cMvdCandList[refList][2].push_back(tmpPU.mvdAffi[refList][2]); if (pu.isMvsdApplicable() && (!refList || cMvdCandList[0][0].size() < 2)) { deriveMVDcandAffine(tmpPU, RefPicList(refList), cMvdCandList[refList]); } } #endif for (int idx = 0; idx < refPairList.size(); idx++) { tmpPU.refIdx[0] = refPairList[idx].refIdx[0]; tmpPU.refIdx[1] = refPairList[idx].refIdx[1]; AffineAMVPInfo affineAMVPInfo[2]; for (int refList = 0; refList < 2; refList++) { RefPicList eRefList = (RefPicList)refList; PU::fillAffineMvpCand(tmpPU, eRefList, tmpPU.refIdx[eRefList], affineAMVPInfo[eRefList]); #if !JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED const unsigned mvp_idx = tmpPU.mvpIdx[eRefList]; Mv mvLT = affineAMVPInfo[eRefList].mvCandLT[mvp_idx] + tmpPU.mvdAffi[eRefList][0]; Mv mvRT = affineAMVPInfo[eRefList].mvCandRT[mvp_idx] + tmpPU.mvdAffi[eRefList][1]; mvRT += tmpPU.mvdAffi[eRefList][0]; Mv mvLB; if (tmpPU.cu->affineType == AFFINEMODEL_6PARAM) { mvLB = affineAMVPInfo[eRefList].mvCandLB[mvp_idx] + tmpPU.mvdAffi[eRefList][2]; mvLB += tmpPU.mvdAffi[eRefList][0]; } tmpPU.mvAffi[eRefList][0] = mvLT; tmpPU.mvAffi[eRefList][1] = mvRT; tmpPU.mvAffi[eRefList][2] = mvLB; #endif } #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED refPairList[idx].cost = std::numeric_limits<Distortion>::max(); for (size_t i = 0; i < cMvdCandList[0][0].size(); i++) { for (size_t j = 0; j < cMvdCandList[1][0].size(); j++) { MotionInfoPred miPred; miPred.interDir = 3; int refList = 0; unsigned mvp_idx = tmpPU.mvpIdx[refList]; Mv mvLT = affineAMVPInfo[refList].mvCandLT[mvp_idx] + cMvdCandList[refList][0][i]; Mv mvRT = affineAMVPInfo[refList].mvCandRT[mvp_idx] + cMvdCandList[refList][1][i]; mvRT += cMvdCandList[refList][0][i]; Mv mvLB; if (tmpPU.cu->affineType == AFFINEMODEL_6PARAM) { mvLB = affineAMVPInfo[refList].mvCandLB[mvp_idx] + cMvdCandList[refList][2][i]; mvLB += cMvdCandList[refList][0][i]; } tmpPU.mvAffi[refList][0] = mvLT; tmpPU.mvAffi[refList][1] = mvRT; tmpPU.mvAffi[refList][2] = mvLB; miPred.refIdx[refList] = tmpPU.refIdx[refList]; miPred.mvdAffi[refList][0] = cMvdCandList[refList][0][i]; miPred.mvdAffi[refList][1] = cMvdCandList[refList][1][i]; miPred.mvdAffi[refList][2] = cMvdCandList[refList][2][i]; miPred.mvAffi[refList][0] = tmpPU.mvAffi[refList][0]; miPred.mvAffi[refList][1] = tmpPU.mvAffi[refList][1]; miPred.mvAffi[refList][2] = tmpPU.mvAffi[refList][2]; refList = 1; mvp_idx = tmpPU.mvpIdx[refList]; mvLT = affineAMVPInfo[refList].mvCandLT[mvp_idx] + cMvdCandList[refList][0][j]; mvRT = affineAMVPInfo[refList].mvCandRT[mvp_idx] + cMvdCandList[refList][1][j]; mvRT += cMvdCandList[refList][0][j]; if (tmpPU.cu->affineType == AFFINEMODEL_6PARAM) { mvLB = affineAMVPInfo[refList].mvCandLB[mvp_idx] + cMvdCandList[refList][2][j]; mvLB += cMvdCandList[refList][0][j]; } tmpPU.mvAffi[refList][0] = mvLT; tmpPU.mvAffi[refList][1] = mvRT; tmpPU.mvAffi[refList][2] = mvLB; miPred.refIdx[refList] = tmpPU.refIdx[refList]; miPred.mvdAffi[refList][0] = cMvdCandList[refList][0][j]; miPred.mvdAffi[refList][1] = cMvdCandList[refList][1][j]; miPred.mvdAffi[refList][2] = cMvdCandList[refList][2][j]; miPred.mvAffi[refList][0] = tmpPU.mvAffi[refList][0]; miPred.mvAffi[refList][1] = tmpPU.mvAffi[refList][1]; miPred.mvAffi[refList][2] = tmpPU.mvAffi[refList][2]; #endif #if !JVET_Z0067_RPR_ENABLE getAffAMLRefTemplate(tmpPU, pcBufPredRefTop, pcBufPredRefLeft); #endif uiCost = 0; bool bRefIsRescaled = false; for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { const RefPicList eRefPicList = refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0; bRefIsRescaled |= (tmpPU.refIdx[refList] >= 0) ? tmpPU.cu->slice->getRefPic(eRefPicList, tmpPU.refIdx[refList])->isRefScaled(tmpPU.cs->pps) : false; } if (bRefIsRescaled) { uiCost = std::numeric_limits<Distortion>::max(); } else { #if JVET_Z0067_RPR_ENABLE getAffAMLRefTemplate(tmpPU, pcBufPredRefTop, pcBufPredRefLeft); #endif if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } } #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED miPred.cost = uiCost; miPredList.push_back(miPred); if (uiCost < refPairList[idx].cost) { refPairList[idx].cost = uiCost; } } } #else refPairList[idx].cost = uiCost; #endif } } else { #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED std::vector<Mv> cMvdCandList[2]; for (int refList = 0; refList < 2; refList++) { cMvdCandList[refList].push_back(tmpPU.mvd[refList]); if (pu.isMvsdApplicable() && (!refList || cMvdCandList[0].size() < 2)) { deriveMVDcand(tmpPU, RefPicList(refList), cMvdCandList[refList]); } } #endif for (int idx = 0; idx < refPairList.size(); idx++) { tmpPU.refIdx[0] = refPairList[idx].refIdx[0]; tmpPU.refIdx[1] = refPairList[idx].refIdx[1]; AMVPInfo amvpInfo[2]; for (int refList = 0; refList < 2; refList++) { RefPicList eRefList = (RefPicList)refList; { PU::fillMvpCand(tmpPU, eRefList, tmpPU.refIdx[eRefList], amvpInfo[eRefList] #if TM_AMVP , this #endif ); #if !JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED tmpPU.mv[eRefList] = amvpInfo[eRefList].mvCand[tmpPU.mvpIdx[eRefList]] + tmpPU.mvd[eRefList]; tmpPU.mv[eRefList].mvCliptoStorageBitDepth(); #endif } } #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED refPairList[idx].cost = MAX_UINT; for (size_t i = 0; i < cMvdCandList[0].size(); i++) { for (size_t j = 0; j < cMvdCandList[1].size(); j++) { MotionInfoPred miPred; miPred.interDir = 3; tmpPU.mv[REF_PIC_LIST_0] = amvpInfo[REF_PIC_LIST_0].mvCand[tmpPU.mvpIdx[REF_PIC_LIST_0]] + cMvdCandList[REF_PIC_LIST_0][i]; tmpPU.mv[REF_PIC_LIST_0].mvCliptoStorageBitDepth(); miPred.refIdx[REF_PIC_LIST_0] = tmpPU.refIdx[REF_PIC_LIST_0]; miPred.mvd[REF_PIC_LIST_0] = cMvdCandList[REF_PIC_LIST_0][i]; miPred.mv[REF_PIC_LIST_0] = tmpPU.mv[REF_PIC_LIST_0]; tmpPU.mv[REF_PIC_LIST_1] = amvpInfo[REF_PIC_LIST_1].mvCand[tmpPU.mvpIdx[REF_PIC_LIST_1]] + cMvdCandList[REF_PIC_LIST_1][j]; tmpPU.mv[REF_PIC_LIST_1].mvCliptoStorageBitDepth(); miPred.refIdx[REF_PIC_LIST_1] = tmpPU.refIdx[REF_PIC_LIST_1]; miPred.mvd[REF_PIC_LIST_1] = cMvdCandList[REF_PIC_LIST_1][j]; miPred.mv[REF_PIC_LIST_1] = tmpPU.mv[REF_PIC_LIST_1]; #endif uiCost = 0; bool bRefIsRescaled = false; for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { const RefPicList eRefPicList = refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0; bRefIsRescaled |= (tmpPU.refIdx[refList] >= 0) ? tmpPU.cu->slice->getRefPic(eRefPicList, tmpPU.refIdx[refList])->isRefScaled(tmpPU.cs->pps) : false; } if (bRefIsRescaled) { uiCost = std::numeric_limits<Distortion>::max(); } else { getBlkAMLRefTemplate(tmpPU, pcBufPredRefTop, pcBufPredRefLeft); if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } } #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED miPred.cost = uiCost; miPredList.push_back(miPred); if (uiCost < refPairList[idx].cost) { refPairList[idx].cost = uiCost; } } } #else refPairList[idx].cost = uiCost; #endif } } std::stable_sort(refPairList.begin(), refPairList.end(), [](const RefPicPair & l, const RefPicPair & r) {return l.cost < r.cost; }); } #endif #if JVET_Y0067_ENHANCED_MMVD_MVD_SIGN_PRED #if JVET_Z0054_BLK_REF_PIC_REORDER void InterPrediction::deriveMVDcand(const PredictionUnit& pu, RefPicList eRefPicList, std::vector<Mv>& cMvdCandList) { Mv cMvdKnownAtDecoder; cMvdKnownAtDecoder.set(pu.mvd[eRefPicList].getAbsHor(), pu.mvd[eRefPicList].getAbsVer()); const static int patternsX[4][2] = { { +1, +1 }, { +1, -1 }, }; const static int patternsY[4][2] = { { +1, +1 }, { -1, +1 }, }; const static int patternsXY[4][2] = { { +1, +1 }, { +1, -1 }, { -1, +1 }, { -1, -1 }, }; typedef int Int2[2]; const Int2* patterns = 0; uint16_t patternsNum = 0; if (cMvdKnownAtDecoder.getHor() == 0 && cMvdKnownAtDecoder.getVer() == 0) { return; } if (cMvdKnownAtDecoder.getHor() == 0) { patterns = patternsX; patternsNum = 2; } else if (cMvdKnownAtDecoder.getVer() == 0) { patterns = patternsY; patternsNum = 2; } else { patterns = patternsXY; patternsNum = 4; } cMvdCandList.clear(); for (int n = 0; n < patternsNum; ++n) { auto sign = patterns[n]; auto cMv = Mv(sign[0] * cMvdKnownAtDecoder.getHor(), sign[1] * cMvdKnownAtDecoder.getVer()); cMvdCandList.push_back(cMv); } } void InterPrediction::deriveMVDcandAffine(const PredictionUnit& pu, RefPicList eRefPicList, std::vector<Mv> cMvdCandList[3]) { Mv cMvdKnownAtDecoder[3]; for (int i = 0; i < 3; i++) { cMvdKnownAtDecoder[i].set(pu.mvdAffi[eRefPicList][i].getAbsHor(), pu.mvdAffi[eRefPicList][i].getAbsVer()); } const static int patterns2[2][1] = { { +1 }, { -1 }, }; const static int patterns4[4][2] = { { +1, +1 }, { +1, -1 }, { -1, +1 }, { -1, -1 }, }; const static int patterns8[8][3] = { { +1, +1, +1 }, { +1, +1, -1 }, { +1, -1, +1 }, { +1, -1, -1 }, { -1, +1, +1 }, { -1, +1, -1 }, { -1, -1, +1 }, { -1, -1, -1 }, }; const static int patterns16[16][4] = { { +1, +1, +1, +1 }, { +1, +1, +1, -1 }, { +1, +1, -1, +1 }, { +1, +1, -1, -1 }, { +1, -1, +1, +1 }, { +1, -1, +1, -1 }, { +1, -1, -1, +1 }, { +1, -1, -1, -1 }, { -1, +1, +1, +1 }, { -1, +1, +1, -1 }, { -1, +1, -1, +1 }, { -1, +1, -1, -1 }, { -1, -1, +1, +1 }, { -1, -1, +1, -1 }, { -1, -1, -1, +1 }, { -1, -1, -1, -1 }, }; const static int patterns32[32][5] = { { +1, +1, +1, +1, +1 }, { +1, +1, +1, +1, -1 }, { +1, +1, +1, -1, +1 }, { +1, +1, +1, -1, -1 }, { +1, +1, -1, +1, +1 }, { +1, +1, -1, +1, -1 }, { +1, +1, -1, -1, +1 }, { +1, +1, -1, -1, -1 }, { +1, -1, +1, +1, +1 }, { +1, -1, +1, +1, -1 }, { +1, -1, +1, -1, +1 }, { +1, -1, +1, -1, -1 }, { +1, -1, -1, +1, +1 }, { +1, -1, -1, +1, -1 }, { +1, -1, -1, -1, +1 }, { +1, -1, -1, -1, -1 }, { -1, +1, +1, +1, +1 }, { -1, +1, +1, +1, -1 }, { -1, +1, +1, -1, +1 }, { -1, +1, +1, -1, -1 }, { -1, +1, -1, +1, +1 }, { -1, +1, -1, +1, -1 }, { -1, +1, -1, -1, +1 }, { -1, +1, -1, -1, -1 }, { -1, -1, +1, +1, +1 }, { -1, -1, +1, +1, -1 }, { -1, -1, +1, -1, +1 }, { -1, -1, +1, -1, -1 }, { -1, -1, -1, +1, +1 }, { -1, -1, -1, +1, -1 }, { -1, -1, -1, -1, +1 }, { -1, -1, -1, -1, -1 }, }; const static int patterns64[64][6] = { { +1, +1, +1, +1, +1, +1 }, { +1, +1, +1, +1, +1, -1 }, { +1, +1, +1, +1, -1, +1 }, { +1, +1, +1, +1, -1, -1 }, { +1, +1, +1, -1, +1, +1 }, { +1, +1, +1, -1, +1, -1 }, { +1, +1, +1, -1, -1, +1 }, { +1, +1, +1, -1, -1, -1 }, { +1, +1, -1, +1, +1, +1 }, { +1, +1, -1, +1, +1, -1 }, { +1, +1, -1, +1, -1, +1 }, { +1, +1, -1, +1, -1, -1 }, { +1, +1, -1, -1, +1, +1 }, { +1, +1, -1, -1, +1, -1 }, { +1, +1, -1, -1, -1, +1 }, { +1, +1, -1, -1, -1, -1 }, { +1, -1, +1, +1, +1, +1 }, { +1, -1, +1, +1, +1, -1 }, { +1, -1, +1, +1, -1, +1 }, { +1, -1, +1, +1, -1, -1 }, { +1, -1, +1, -1, +1, +1 }, { +1, -1, +1, -1, +1, -1 }, { +1, -1, +1, -1, -1, +1 }, { +1, -1, +1, -1, -1, -1 }, { +1, -1, -1, +1, +1, +1 }, { +1, -1, -1, +1, +1, -1 }, { +1, -1, -1, +1, -1, +1 }, { +1, -1, -1, +1, -1, -1 }, { +1, -1, -1, -1, +1, +1 }, { +1, -1, -1, -1, +1, -1 }, { +1, -1, -1, -1, -1, +1 }, { +1, -1, -1, -1, -1, -1 }, { -1, +1, +1, +1, +1, +1 }, { -1, +1, +1, +1, +1, -1 }, { -1, +1, +1, +1, -1, +1 }, { -1, +1, +1, +1, -1, -1 }, { -1, +1, +1, -1, +1, +1 }, { -1, +1, +1, -1, +1, -1 }, { -1, +1, +1, -1, -1, +1 }, { -1, +1, +1, -1, -1, -1 }, { -1, +1, -1, +1, +1, +1 }, { -1, +1, -1, +1, +1, -1 }, { -1, +1, -1, +1, -1, +1 }, { -1, +1, -1, +1, -1, -1 }, { -1, +1, -1, -1, +1, +1 }, { -1, +1, -1, -1, +1, -1 }, { -1, +1, -1, -1, -1, +1 }, { -1, +1, -1, -1, -1, -1 }, { -1, -1, +1, +1, +1, +1 }, { -1, -1, +1, +1, +1, -1 }, { -1, -1, +1, +1, -1, +1 }, { -1, -1, +1, +1, -1, -1 }, { -1, -1, +1, -1, +1, +1 }, { -1, -1, +1, -1, +1, -1 }, { -1, -1, +1, -1, -1, +1 }, { -1, -1, +1, -1, -1, -1 }, { -1, -1, -1, +1, +1, +1 }, { -1, -1, -1, +1, +1, -1 }, { -1, -1, -1, +1, -1, +1 }, { -1, -1, -1, +1, -1, -1 }, { -1, -1, -1, -1, +1, +1 }, { -1, -1, -1, -1, +1, -1 }, { -1, -1, -1, -1, -1, +1 }, { -1, -1, -1, -1, -1, -1 }, }; std::vector<int> isZeroComp(6, 0); if (cMvdKnownAtDecoder[0].getHor() == 0) { isZeroComp[0] = 1; } if (cMvdKnownAtDecoder[0].getVer() == 0) { isZeroComp[1] = 1; } if (cMvdKnownAtDecoder[1].getHor() == 0) { isZeroComp[2] = 1; } if (cMvdKnownAtDecoder[1].getVer() == 0) { isZeroComp[3] = 1; } if (cMvdKnownAtDecoder[2].getHor() == 0) { isZeroComp[4] = 1; } if (cMvdKnownAtDecoder[2].getVer() == 0) { isZeroComp[5] = 1; } int nZeroComp = isZeroComp[0] + isZeroComp[1] + isZeroComp[2] + isZeroComp[3] + isZeroComp[4] + isZeroComp[5]; if (nZeroComp == 6) { return; } uint16_t patternsNum = 0; if (nZeroComp == 0) { patternsNum = 64; cMvdCandList[0].resize(patternsNum); cMvdCandList[1].resize(patternsNum); cMvdCandList[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto sign = patterns64[n]; cMvdCandList[0][n] = Mv(sign[0] * cMvdKnownAtDecoder[0].getHor(), sign[1] * cMvdKnownAtDecoder[0].getVer()); cMvdCandList[1][n] = Mv(sign[2] * cMvdKnownAtDecoder[1].getHor(), sign[3] * cMvdKnownAtDecoder[1].getVer()); cMvdCandList[2][n] = Mv(sign[4] * cMvdKnownAtDecoder[2].getHor(), sign[5] * cMvdKnownAtDecoder[2].getVer()); } } else if (nZeroComp == 1) { patternsNum = 32; cMvdCandList[0].resize(patternsNum); cMvdCandList[1].resize(patternsNum); cMvdCandList[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto signR = patterns32[n]; int sign[6]; int k = 0; for (int i = 0; i < 6; i++) { if (isZeroComp[i]) { sign[i] = +1; } else { sign[i] = signR[k]; k++; } } cMvdCandList[0][n] = Mv(sign[0] * cMvdKnownAtDecoder[0].getHor(), sign[1] * cMvdKnownAtDecoder[0].getVer()); cMvdCandList[1][n] = Mv(sign[2] * cMvdKnownAtDecoder[1].getHor(), sign[3] * cMvdKnownAtDecoder[1].getVer()); cMvdCandList[2][n] = Mv(sign[4] * cMvdKnownAtDecoder[2].getHor(), sign[5] * cMvdKnownAtDecoder[2].getVer()); } } else if (nZeroComp == 2) { patternsNum = 16; cMvdCandList[0].resize(patternsNum); cMvdCandList[1].resize(patternsNum); cMvdCandList[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto signR = patterns16[n]; int sign[6]; int k = 0; for (int i = 0; i < 6; i++) { if (isZeroComp[i]) { sign[i] = +1; } else { sign[i] = signR[k]; k++; } } cMvdCandList[0][n] = Mv(sign[0] * cMvdKnownAtDecoder[0].getHor(), sign[1] * cMvdKnownAtDecoder[0].getVer()); cMvdCandList[1][n] = Mv(sign[2] * cMvdKnownAtDecoder[1].getHor(), sign[3] * cMvdKnownAtDecoder[1].getVer()); cMvdCandList[2][n] = Mv(sign[4] * cMvdKnownAtDecoder[2].getHor(), sign[5] * cMvdKnownAtDecoder[2].getVer()); } } else if (nZeroComp == 3) { patternsNum = 8; cMvdCandList[0].resize(patternsNum); cMvdCandList[1].resize(patternsNum); cMvdCandList[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto signR = patterns8[n]; int sign[6]; int k = 0; for (int i = 0; i < 6; i++) { if (isZeroComp[i]) { sign[i] = +1; } else { sign[i] = signR[k]; k++; } } cMvdCandList[0][n] = Mv(sign[0] * cMvdKnownAtDecoder[0].getHor(), sign[1] * cMvdKnownAtDecoder[0].getVer()); cMvdCandList[1][n] = Mv(sign[2] * cMvdKnownAtDecoder[1].getHor(), sign[3] * cMvdKnownAtDecoder[1].getVer()); cMvdCandList[2][n] = Mv(sign[4] * cMvdKnownAtDecoder[2].getHor(), sign[5] * cMvdKnownAtDecoder[2].getVer()); } } else if (nZeroComp == 4) { patternsNum = 4; cMvdCandList[0].resize(patternsNum); cMvdCandList[1].resize(patternsNum); cMvdCandList[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto signR = patterns4[n]; int sign[6]; int k = 0; for (int i = 0; i < 6; i++) { if (isZeroComp[i]) { sign[i] = +1; } else { sign[i] = signR[k]; k++; } } cMvdCandList[0][n] = Mv(sign[0] * cMvdKnownAtDecoder[0].getHor(), sign[1] * cMvdKnownAtDecoder[0].getVer()); cMvdCandList[1][n] = Mv(sign[2] * cMvdKnownAtDecoder[1].getHor(), sign[3] * cMvdKnownAtDecoder[1].getVer()); cMvdCandList[2][n] = Mv(sign[4] * cMvdKnownAtDecoder[2].getHor(), sign[5] * cMvdKnownAtDecoder[2].getVer()); } } else { patternsNum = 2; cMvdCandList[0].resize(patternsNum); cMvdCandList[1].resize(patternsNum); cMvdCandList[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto signR = patterns2[n]; int sign[6]; int k = 0; for (int i = 0; i < 6; i++) { if (isZeroComp[i]) { sign[i] = +1; } else { sign[i] = signR[k]; k++; } } cMvdCandList[0][n] = Mv(sign[0] * cMvdKnownAtDecoder[0].getHor(), sign[1] * cMvdKnownAtDecoder[0].getVer()); cMvdCandList[1][n] = Mv(sign[2] * cMvdKnownAtDecoder[1].getHor(), sign[3] * cMvdKnownAtDecoder[1].getVer()); cMvdCandList[2][n] = Mv(sign[4] * cMvdKnownAtDecoder[2].getHor(), sign[5] * cMvdKnownAtDecoder[2].getVer()); } } } #endif void InterPrediction::deriveMvdSign(const Mv& cMvPred, const Mv& cMvdKnownAtDecoder, PredictionUnit& pu, RefPicList eRefList, int refIdx, std::vector<Mv>& cMvdDerived) { #if JVET_Z0054_BLK_REF_PIC_REORDER deriveMVDcand(pu, eRefList, cMvdDerived); #else const static int patternsX[4][2] = { { +1, +1 }, { +1, -1 }, }; const static int patternsY[4][2] = { { +1, +1 }, { -1, +1 }, }; const static int patternsXY[4][2] = { { +1, +1 }, { +1, -1 }, { -1, +1 }, { -1, -1 }, }; typedef int Int2[2]; const Int2* patterns = 0; uint16_t patternsNum = 0; if (cMvdKnownAtDecoder.getHor() == 0) { patterns = patternsX; patternsNum = 2; } else if (cMvdKnownAtDecoder.getVer() == 0) { patterns = patternsY; patternsNum = 2; } else { patterns = patternsXY; patternsNum = 4; } cMvdDerived.resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto sign = patterns[n]; auto cMv = Mv(sign[0] * cMvdKnownAtDecoder.getHor(), sign[1] * cMvdKnownAtDecoder.getVer()); cMvdDerived[n] = cMv; } #endif if (!pu.lumaPos().x && !pu.lumaPos().y) { return; } CHECK(refIdx < 0, "Invalid reference index for FRUC"); const Picture& refPic = *pu.cu->slice->getRefPic(eRefList, refIdx)->unscaledPic; InterPredResources interRes(m_pcReshape, m_pcRdCost, m_if, m_filteredBlockTmp[0][COMPONENT_Y] , m_filteredBlock[3][1][0], m_filteredBlock[3][0][0] ); TplMatchingCtrl tplCtrl(pu, interRes, refPic, true, COMPONENT_Y, true, 0, m_pcCurTplAbove, m_pcCurTplLeft, m_pcRefTplAbove, m_pcRefTplLeft, Mv(0, 0), nullptr, 0); #if JVET_Z0054_BLK_REF_PIC_REORDER size_t patternsNum = cMvdDerived.size(); #endif std::vector<std::pair<Mv, Distortion>> aMvCostVec(patternsNum); #if JVET_Z0067_RPR_ENABLE bool bIsRefScaled = pu.cu->slice->getRefPic(eRefList, refIdx)->isRefScaled( pu.cs->pps ); for (int n = 0; n < patternsNum; ++n) { auto cMvdTest = cMvdDerived[n]; Mv cMvTest = cMvPred + cMvdTest; Distortion uiCost = bIsRefScaled ? std::numeric_limits<Distortion>::max() : tplCtrl.xGetTempMatchError<TM_TPL_SIZE>(cMvTest); aMvCostVec[n] = { cMvdTest, uiCost }; } #else for (int n = 0; n < patternsNum; ++n) { auto cMvdTest = cMvdDerived[n]; Mv cMvTest = cMvPred + cMvdTest; Distortion uiCost = tplCtrl.xGetTempMatchError<TM_TPL_SIZE>(cMvTest); aMvCostVec[n] = { cMvdTest, uiCost }; } #endif std::stable_sort(aMvCostVec.begin(), aMvCostVec.end(), [](const std::pair<Mv, Distortion> & l, const std::pair<Mv, Distortion> & r) {return l.second < r.second; }); #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC int sizeCandList = (int) distance(aMvCostVec.begin(), aMvCostVec.end()); if (sizeCandList > 2) { Distortion lambdaTh = pu.cs->slice->getCostForARMC(); bool enoughDiverse = false; uint32_t diverseCand = 0; uint32_t posToBeInserted = 0; for (uint32_t uiCand = 0; uiCand < sizeCandList - 1; ++uiCand) { for (uint32_t uiMergeCand = uiCand + 1; uiMergeCand < sizeCandList; ++uiMergeCand) { if (lambdaTh <= abs((int)(aMvCostVec[uiMergeCand].second - aMvCostVec[uiCand].second))) { enoughDiverse = true; for (uint32_t uiMergeCand3 = 0; uiMergeCand3 < uiCand; ++uiMergeCand3) { if (lambdaTh > abs((int)(aMvCostVec[uiMergeCand].second - aMvCostVec[uiMergeCand3].second))) { enoughDiverse = false; break; } } if (enoughDiverse) { diverseCand = uiMergeCand; posToBeInserted = uiCand + 1; break; } } } if (enoughDiverse) { Distortion saveCandCost = aMvCostVec[diverseCand].second; Mv mvSave = aMvCostVec[diverseCand].first; for (int i = diverseCand; i > posToBeInserted - 1; i--) { aMvCostVec[i].second = aMvCostVec[i - 1].second; aMvCostVec[i].first = aMvCostVec[i - 1].first; } aMvCostVec[posToBeInserted].second = saveCandCost; aMvCostVec[posToBeInserted].first = mvSave; } } } #endif for (int n = 0; n < patternsNum; ++n) { cMvdDerived[n] = aMvCostVec[n].first; } } void InterPrediction::deriveMvdSignSMVD(const Mv& cMvPred, const Mv& cMvPred2, const Mv& cMvdKnownAtDecoder, PredictionUnit& pu, std::vector<Mv>& cMvdDerived) { #if JVET_Z0054_BLK_REF_PIC_REORDER deriveMVDcand(pu, REF_PIC_LIST_0, cMvdDerived); int nWidth = pu.lumaSize().width; int nHeight = pu.lumaSize().height; if (cMvdDerived.size() < 2 || !xAMLGetCurBlkTemplate(pu, nWidth, nHeight)) { return; } std::vector<std::pair<Mv, Distortion>> aMvCostVec; #if JVET_AA0132_CONFIGURABLE_TM_TOOLS if (pu.cs->sps->getUseARL()) { #endif PelUnitBuf pcBufPredRefTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredCurTop = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[0][0], nWidth, AML_MERGE_TEMPLATE_SIZE))); PelUnitBuf pcBufPredRefLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvRefAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PelUnitBuf pcBufPredCurLeft = (PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvCurAMLTemplate[1][0], AML_MERGE_TEMPLATE_SIZE, nHeight))); PredictionUnit tmpPU = pu; AMVPInfo amvpInfo; PU::fillMvpCand(tmpPU, REF_PIC_LIST_0, tmpPU.refIdx[REF_PIC_LIST_0], amvpInfo #if TM_AMVP , this #endif ); AMVPInfo amvpInfo1; PU::fillMvpCand(tmpPU, REF_PIC_LIST_1, tmpPU.refIdx[REF_PIC_LIST_1], amvpInfo1 #if TM_AMVP , this #endif ); DistParam cDistParam; cDistParam.applyWeight = false; Distortion uiCost; for (std::vector<Mv>::iterator it = cMvdDerived.begin(); it != cMvdDerived.end(); ++it) { tmpPU.mvd[0] = *it; tmpPU.mv[0] = amvpInfo.mvCand[tmpPU.mvpIdx[0]] + tmpPU.mvd[0]; tmpPU.mv[0].mvCliptoStorageBitDepth(); tmpPU.mv[1] = amvpInfo1.mvCand[tmpPU.mvpIdx[1]] - tmpPU.mvd[0]; tmpPU.mv[1].mvCliptoStorageBitDepth(); getBlkAMLRefTemplate(tmpPU, pcBufPredRefTop, pcBufPredRefLeft); uiCost = 0; if (m_bAMLTemplateAvailabe[0]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); uiCost += cDistParam.distFunc(cDistParam); } aMvCostVec.push_back(std::pair<Mv, Distortion>(*it, uiCost)); } #if JVET_AA0132_CONFIGURABLE_TM_TOOLS } else { InterPredResources interRes(m_pcReshape, m_pcRdCost, m_if, m_filteredBlockTmp[0][COMPONENT_Y], m_filteredBlock[3][1][0], m_filteredBlock[3][0][0]); // For L0 int refIdx = pu.cs->slice->getSymRefIdx(REF_PIC_LIST_0); CHECK(refIdx < 0, "Invalid reference index for SMVD L0"); const Picture& refPic = *pu.cu->slice->getRefPic(REF_PIC_LIST_0, refIdx)->unscaledPic; TplMatchingCtrl tplCtrl(pu, interRes, refPic, true, COMPONENT_Y, true, 0, m_pcCurTplAbove, m_pcCurTplLeft, m_pcRefTplAbove, m_pcRefTplLeft, Mv(0, 0), nullptr, 0); #if JVET_Z0067_RPR_ENABLE bool bIsRefScaled = pu.cu->slice->getRefPic(REF_PIC_LIST_0, refIdx)->isRefScaled(pu.cs->pps); #endif for (std::vector<Mv>::iterator it = cMvdDerived.begin(); it != cMvdDerived.end(); ++it) { Mv cMvTest = cMvPred + *it; Distortion uiCost = #if JVET_Z0067_RPR_ENABLE bIsRefScaled ? std::numeric_limits<Distortion>::max() : #endif tplCtrl.xGetTempMatchError<TM_TPL_SIZE>(cMvTest); aMvCostVec.push_back(std::pair<Mv, Distortion>(*it, uiCost)); } } #endif std::stable_sort(aMvCostVec.begin(), aMvCostVec.end(), [](const std::pair<Mv, Distortion> & l, const std::pair<Mv, Distortion> & r) {return l.second < r.second; }); #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC int sizeCandList = (int) distance( aMvCostVec.begin(), aMvCostVec.end()); if (sizeCandList > 2) { Distortion lambdaTh = pu.cs->slice->getCostForARMC(); bool enoughDiverse = false; uint32_t diverseCand = 0; uint32_t posToBeInserted = 0; for (uint32_t uiCand = 0; uiCand < sizeCandList - 1; ++uiCand) { for (uint32_t uiMergeCand = uiCand + 1; uiMergeCand < sizeCandList; ++uiMergeCand) { if (lambdaTh <= abs((int)(aMvCostVec[uiMergeCand].second - aMvCostVec[uiCand].second))) { enoughDiverse = true; for (uint32_t uiMergeCand3 = 0; uiMergeCand3 < uiCand; ++uiMergeCand3) { if (lambdaTh > abs((int)(aMvCostVec[uiMergeCand].second - aMvCostVec[uiMergeCand3].second))) { enoughDiverse = false; break; } } if (enoughDiverse) { diverseCand = uiMergeCand; posToBeInserted = uiCand + 1; break; } } } if (enoughDiverse) { Distortion saveCandCost = aMvCostVec[diverseCand].second; Mv mvSave = aMvCostVec[diverseCand].first; for (int i = diverseCand; i > posToBeInserted - 1; i--) { aMvCostVec[i].second = aMvCostVec[i - 1].second; aMvCostVec[i].first = aMvCostVec[i - 1].first; } aMvCostVec[posToBeInserted].second = saveCandCost; aMvCostVec[posToBeInserted].first = mvSave; } } } #endif for (size_t n = 0; n < cMvdDerived.size(); ++n) { cMvdDerived[n] = aMvCostVec[n].first; } #else const static int patternsX[4][2] = { { +1, +1 }, { +1, -1 }, }; const static int patternsY[4][2] = { { +1, +1 }, { -1, +1 }, }; const static int patternsXY[4][2] = { { +1, +1 }, { +1, -1 }, { -1, +1 }, { -1, -1 }, }; typedef int Int2[2]; const Int2* patterns = 0; uint16_t patternsNum = 0; if (cMvdKnownAtDecoder.getHor() == 0) { patterns = patternsX; patternsNum = 2; } else if (cMvdKnownAtDecoder.getVer() == 0) { patterns = patternsY; patternsNum = 2; } else { patterns = patternsXY; patternsNum = 4; } cMvdDerived.resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto sign = patterns[n]; auto cMv = Mv(sign[0] * cMvdKnownAtDecoder.getHor(), sign[1] * cMvdKnownAtDecoder.getVer()); cMvdDerived[n] = cMv; } if (!pu.lumaPos().x && !pu.lumaPos().y) { return; } std::vector<std::pair<Mv, Distortion>> aMvCostVec(patternsNum); InterPredResources interRes(m_pcReshape, m_pcRdCost, m_if, m_filteredBlockTmp[0][COMPONENT_Y] , m_filteredBlock[3][1][0], m_filteredBlock[3][0][0] ); // For L0 int refIdx = pu.cs->slice->getSymRefIdx(REF_PIC_LIST_0); CHECK(refIdx < 0, "Invalid reference index for SMVD L0"); const Picture& refPic = *pu.cu->slice->getRefPic(REF_PIC_LIST_0, refIdx)->unscaledPic; TplMatchingCtrl tplCtrl(pu, interRes, refPic, true, COMPONENT_Y, true, 0, m_pcCurTplAbove, m_pcCurTplLeft, m_pcRefTplAbove, m_pcRefTplLeft, Mv(0, 0), nullptr, 0); #if JVET_Z0067_RPR_ENABLE bool bIsRefScaled = pu.cu->slice->getRefPic(REF_PIC_LIST_0, refIdx)->isRefScaled(pu.cs->pps); for (int n = 0; n < patternsNum; ++n) { auto cMvdTest = cMvdDerived[n]; Mv cMvTest = cMvPred + cMvdTest; Distortion uiCost = bIsRefScaled ? std::numeric_limits<Distortion>::max() : tplCtrl.xGetTempMatchError<TM_TPL_SIZE>(cMvTest); aMvCostVec[n] = { cMvdTest, uiCost }; } #else for (int n = 0; n < patternsNum; ++n) { auto cMvdTest = cMvdDerived[n]; Mv cMvTest = cMvPred + cMvdTest; Distortion uiCost = tplCtrl.xGetTempMatchError<TM_TPL_SIZE>(cMvTest); aMvCostVec[n] = { cMvdTest, uiCost }; } #endif std::stable_sort(aMvCostVec.begin(), aMvCostVec.end(), [](const std::pair<Mv, Distortion> & l, const std::pair<Mv, Distortion> & r) {return l.second < r.second; }); #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC int sizeCandList = (int) distance(aMvCostVec.begin(), aMvCostVec.end()); if (sizeCandList > 2) { Distortion lambdaTh = pu.cs->slice->getCostForARMC(); bool enoughDiverse = false; uint32_t diverseCand = 0; uint32_t posToBeInserted = 0; for (uint32_t uiCand = 0; uiCand < sizeCandList - 1; ++uiCand) { for (uint32_t uiMergeCand = uiCand + 1; uiMergeCand < sizeCandList; ++uiMergeCand) { if (lambdaTh <= abs((int)(aMvCostVec[uiMergeCand].second - aMvCostVec[uiCand].second))) { enoughDiverse = true; for (uint32_t uiMergeCand3 = 0; uiMergeCand3 < uiCand; ++uiMergeCand3) { if (lambdaTh > abs((int)(aMvCostVec[uiMergeCand].second - aMvCostVec[uiMergeCand3].second))) { enoughDiverse = false; break; } } if (enoughDiverse) { diverseCand = uiMergeCand; posToBeInserted = uiCand + 1; break; } } } if (enoughDiverse) { Distortion saveCandCost = aMvCostVec[diverseCand].second; Mv mvSave = aMvCostVec[diverseCand].first; for (int i = diverseCand; i > posToBeInserted - 1; i--) { aMvCostVec[i].second = aMvCostVec[i - 1].second; aMvCostVec[i].first = aMvCostVec[i - 1].first; } aMvCostVec[posToBeInserted].second = saveCandCost; aMvCostVec[posToBeInserted].first = mvSave; } } } #endif for (int n = 0; n < patternsNum; ++n) { cMvdDerived[n] = aMvCostVec[n].first; } #endif } void InterPrediction::deriveMvdSignAffine(const Mv& cMvPred, const Mv& cMvPred2, const Mv& cMvPred3, #if JVET_Z0054_BLK_REF_PIC_REORDER const Mv cMvdKnownAtDecoder[3], #else const Mv& cMvdKnownAtDecoder, const Mv& cMvdKnownAtDecoder2, const Mv& cMvdKnownAtDecoder3, #endif PredictionUnit& pu, RefPicList eRefList, int refIdx, std::vector<Mv>& cMvdDerived, std::vector<Mv>& cMvdDerived2, std::vector<Mv>& cMvdDerived3) { #if JVET_Z0054_BLK_REF_PIC_REORDER std::vector<Mv> MvdCand[3]; deriveMVDcandAffine(pu, eRefList, MvdCand); size_t patternsNum = MvdCand[0].size(); #else int patterns2[2][1] = { { +1 }, { -1 }, }; int patterns4[4][2] = { { +1, +1 }, { +1, -1 }, { -1, +1 }, { -1, -1 }, }; int patterns8[8][3] = { {+1, +1, +1 }, {+1, +1, -1 }, {+1, -1, +1 }, {+1, -1, -1 }, {-1, +1, +1 }, {-1, +1, -1 }, {-1, -1, +1 }, {-1, -1, -1 }, }; int patterns16[16][4] = { {+1, +1, +1, +1 }, {+1, +1, +1, -1 }, {+1, +1, -1, +1 }, {+1, +1, -1, -1 }, {+1, -1, +1, +1 }, {+1, -1, +1, -1 }, {+1, -1, -1, +1 }, {+1, -1, -1, -1 }, {-1, +1, +1, +1 }, {-1, +1, +1, -1 }, {-1, +1, -1, +1 }, {-1, +1, -1, -1 }, {-1, -1, +1, +1 }, {-1, -1, +1, -1 }, {-1, -1, -1, +1 }, {-1, -1, -1, -1 }, }; int patterns32[32][5] = { {+1, +1, +1, +1, +1 }, {+1, +1, +1, +1, -1 }, {+1, +1, +1, -1, +1 }, {+1, +1, +1, -1, -1 }, {+1, +1, -1, +1, +1 }, {+1, +1, -1, +1, -1 }, {+1, +1, -1, -1, +1 }, {+1, +1, -1, -1, -1 }, {+1, -1, +1, +1, +1 }, {+1, -1, +1, +1, -1 }, {+1, -1, +1, -1, +1 }, {+1, -1, +1, -1, -1 }, {+1, -1, -1, +1, +1 }, {+1, -1, -1, +1, -1 }, {+1, -1, -1, -1, +1 }, {+1, -1, -1, -1, -1 }, {-1, +1, +1, +1, +1 }, {-1, +1, +1, +1, -1 }, {-1, +1, +1, -1, +1 }, {-1, +1, +1, -1, -1 }, {-1, +1, -1, +1, +1 }, {-1, +1, -1, +1, -1 }, {-1, +1, -1, -1, +1 }, {-1, +1, -1, -1, -1 }, {-1, -1, +1, +1, +1 }, {-1, -1, +1, +1, -1 }, {-1, -1, +1, -1, +1 }, {-1, -1, +1, -1, -1 }, {-1, -1, -1, +1, +1 }, {-1, -1, -1, +1, -1 }, {-1, -1, -1, -1, +1 }, {-1, -1, -1, -1, -1 }, }; int patterns64[64][6] = { {+1, +1, +1, +1, +1, +1 }, {+1, +1, +1, +1, +1, -1 }, {+1, +1, +1, +1, -1, +1 }, {+1, +1, +1, +1, -1, -1 }, {+1, +1, +1, -1, +1, +1 }, {+1, +1, +1, -1, +1, -1 }, {+1, +1, +1, -1, -1, +1 }, {+1, +1, +1, -1, -1, -1 }, {+1, +1, -1, +1, +1, +1 }, {+1, +1, -1, +1, +1, -1 }, {+1, +1, -1, +1, -1, +1 }, {+1, +1, -1, +1, -1, -1 }, {+1, +1, -1, -1, +1, +1 }, {+1, +1, -1, -1, +1, -1 }, {+1, +1, -1, -1, -1, +1 }, {+1, +1, -1, -1, -1, -1 }, {+1, -1, +1, +1, +1, +1 }, {+1, -1, +1, +1, +1, -1 }, {+1, -1, +1, +1, -1, +1 }, {+1, -1, +1, +1, -1, -1 }, {+1, -1, +1, -1, +1, +1 }, {+1, -1, +1, -1, +1, -1 }, {+1, -1, +1, -1, -1, +1 }, {+1, -1, +1, -1, -1, -1 }, {+1, -1, -1, +1, +1, +1 }, {+1, -1, -1, +1, +1, -1 }, {+1, -1, -1, +1, -1, +1 }, {+1, -1, -1, +1, -1, -1 }, {+1, -1, -1, -1, +1, +1 }, {+1, -1, -1, -1, +1, -1 }, {+1, -1, -1, -1, -1, +1 }, {+1, -1, -1, -1, -1, -1 }, {-1, +1, +1, +1, +1, +1 }, {-1, +1, +1, +1, +1, -1 }, {-1, +1, +1, +1, -1, +1 }, {-1, +1, +1, +1, -1, -1 }, {-1, +1, +1, -1, +1, +1 }, {-1, +1, +1, -1, +1, -1 }, {-1, +1, +1, -1, -1, +1 }, {-1, +1, +1, -1, -1, -1 }, {-1, +1, -1, +1, +1, +1 }, {-1, +1, -1, +1, +1, -1 }, {-1, +1, -1, +1, -1, +1 }, {-1, +1, -1, +1, -1, -1 }, {-1, +1, -1, -1, +1, +1 }, {-1, +1, -1, -1, +1, -1 }, {-1, +1, -1, -1, -1, +1 }, {-1, +1, -1, -1, -1, -1 }, {-1, -1, +1, +1, +1, +1 }, {-1, -1, +1, +1, +1, -1 }, {-1, -1, +1, +1, -1, +1 }, {-1, -1, +1, +1, -1, -1 }, {-1, -1, +1, -1, +1, +1 }, {-1, -1, +1, -1, +1, -1 }, {-1, -1, +1, -1, -1, +1 }, {-1, -1, +1, -1, -1, -1 }, {-1, -1, -1, +1, +1, +1 }, {-1, -1, -1, +1, +1, -1 }, {-1, -1, -1, +1, -1, +1 }, {-1, -1, -1, +1, -1, -1 }, {-1, -1, -1, -1, +1, +1 }, {-1, -1, -1, -1, +1, -1 }, {-1, -1, -1, -1, -1, +1 }, {-1, -1, -1, -1, -1, -1 }, }; std::vector<int> isZeroComp(6, 0); if (cMvdKnownAtDecoder.getHor() == 0) { isZeroComp[0] = 1; } if (cMvdKnownAtDecoder.getVer() == 0) { isZeroComp[1] = 1; } if (cMvdKnownAtDecoder2.getHor() == 0) { isZeroComp[2] = 1; } if (cMvdKnownAtDecoder2.getVer() == 0) { isZeroComp[3] = 1; } if (cMvdKnownAtDecoder3.getHor() == 0) { isZeroComp[4] = 1; } if (cMvdKnownAtDecoder3.getVer() == 0) { isZeroComp[5] = 1; } int nZeroComp = isZeroComp[0] + isZeroComp[1] + isZeroComp[2] + isZeroComp[3] + isZeroComp[4] + isZeroComp[5]; CHECK(nZeroComp == 6, "nnZeroComp == 6"); uint16_t patternsNum = 0; std::vector<Mv> MvdCand[3]; if (nZeroComp == 0) { patternsNum = 64; MvdCand[0].resize(patternsNum); MvdCand[1].resize(patternsNum); MvdCand[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto sign = patterns64[n]; MvdCand[0][n] = Mv(sign[0] * cMvdKnownAtDecoder.getHor(), sign[1] * cMvdKnownAtDecoder.getVer()); MvdCand[1][n] = Mv(sign[2] * cMvdKnownAtDecoder2.getHor(), sign[3] * cMvdKnownAtDecoder2.getVer()); MvdCand[2][n] = Mv(sign[4] * cMvdKnownAtDecoder3.getHor(), sign[5] * cMvdKnownAtDecoder3.getVer()); } } else if (nZeroComp == 1) { patternsNum = 32; MvdCand[0].resize(patternsNum); MvdCand[1].resize(patternsNum); MvdCand[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto signR = patterns32[n]; int sign[6]; int k = 0; for (int i = 0; i < 6; i++) { if (isZeroComp[i]) { sign[i] = +1; } else { sign[i] = signR[k]; k++; } } MvdCand[0][n] = Mv(sign[0] * cMvdKnownAtDecoder.getHor(), sign[1] * cMvdKnownAtDecoder.getVer()); MvdCand[1][n] = Mv(sign[2] * cMvdKnownAtDecoder2.getHor(), sign[3] * cMvdKnownAtDecoder2.getVer()); MvdCand[2][n] = Mv(sign[4] * cMvdKnownAtDecoder3.getHor(), sign[5] * cMvdKnownAtDecoder3.getVer()); } } else if (nZeroComp == 2) { patternsNum = 16; MvdCand[0].resize(patternsNum); MvdCand[1].resize(patternsNum); MvdCand[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto signR = patterns16[n]; int sign[6]; int k = 0; for (int i = 0; i < 6; i++) { if (isZeroComp[i]) { sign[i] = +1; } else { sign[i] = signR[k]; k++; } } MvdCand[0][n] = Mv(sign[0] * cMvdKnownAtDecoder.getHor(), sign[1] * cMvdKnownAtDecoder.getVer()); MvdCand[1][n] = Mv(sign[2] * cMvdKnownAtDecoder2.getHor(), sign[3] * cMvdKnownAtDecoder2.getVer()); MvdCand[2][n] = Mv(sign[4] * cMvdKnownAtDecoder3.getHor(), sign[5] * cMvdKnownAtDecoder3.getVer()); } } else if (nZeroComp == 3) { patternsNum = 8; MvdCand[0].resize(patternsNum); MvdCand[1].resize(patternsNum); MvdCand[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto signR = patterns8[n]; int sign[6]; int k = 0; for (int i = 0; i < 6; i++) { if (isZeroComp[i]) { sign[i] = +1; } else { sign[i] = signR[k]; k++; } } MvdCand[0][n] = Mv(sign[0] * cMvdKnownAtDecoder.getHor(), sign[1] * cMvdKnownAtDecoder.getVer()); MvdCand[1][n] = Mv(sign[2] * cMvdKnownAtDecoder2.getHor(), sign[3] * cMvdKnownAtDecoder2.getVer()); MvdCand[2][n] = Mv(sign[4] * cMvdKnownAtDecoder3.getHor(), sign[5] * cMvdKnownAtDecoder3.getVer()); } } else if (nZeroComp == 4) { patternsNum = 4; MvdCand[0].resize(patternsNum); MvdCand[1].resize(patternsNum); MvdCand[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto signR = patterns4[n]; int sign[6]; int k = 0; for (int i = 0; i < 6; i++) { if (isZeroComp[i]) { sign[i] = +1; } else { sign[i] = signR[k]; k++; } } MvdCand[0][n] = Mv(sign[0] * cMvdKnownAtDecoder.getHor(), sign[1] * cMvdKnownAtDecoder.getVer()); MvdCand[1][n] = Mv(sign[2] * cMvdKnownAtDecoder2.getHor(), sign[3] * cMvdKnownAtDecoder2.getVer()); MvdCand[2][n] = Mv(sign[4] * cMvdKnownAtDecoder3.getHor(), sign[5] * cMvdKnownAtDecoder3.getVer()); } } else { patternsNum = 2; MvdCand[0].resize(patternsNum); MvdCand[1].resize(patternsNum); MvdCand[2].resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { auto signR = patterns2[n]; int sign[6]; int k = 0; for (int i = 0; i < 6; i++) { if (isZeroComp[i]) { sign[i] = +1; } else { sign[i] = signR[k]; k++; } } MvdCand[0][n] = Mv(sign[0] * cMvdKnownAtDecoder.getHor(), sign[1] * cMvdKnownAtDecoder.getVer()); MvdCand[1][n] = Mv(sign[2] * cMvdKnownAtDecoder2.getHor(), sign[3] * cMvdKnownAtDecoder2.getVer()); MvdCand[2][n] = Mv(sign[4] * cMvdKnownAtDecoder3.getHor(), sign[5] * cMvdKnownAtDecoder3.getVer()); } } #endif cMvdDerived.resize(patternsNum); cMvdDerived2.resize(patternsNum); cMvdDerived3.resize(patternsNum); for (int n = 0; n < patternsNum; ++n) { cMvdDerived[n] = MvdCand[0][n]; cMvdDerived2[n] = MvdCand[1][n]; cMvdDerived3[n] = MvdCand[2][n]; } if (!pu.lumaPos().x && !pu.lumaPos().y) { for (int n = 0; n < patternsNum; ++n) { cMvdDerived[n] = MvdCand[0][n]; cMvdDerived2[n] = MvdCand[1][n]; cMvdDerived3[n] = MvdCand[2][n]; } return; } ///////////////////////////////////////////////////////////////// Pel* refLeftTemplate = m_pcLICRefLeftTemplate; Pel* refAboveTemplate = m_pcLICRefAboveTemplate; Pel* recLeftTemplate = m_pcLICRecLeftTemplate; Pel* recAboveTemplate = m_pcLICRecAboveTemplate; int numTemplate[2] = { 0 , 0 }; // 0:Above, 1:Left const int width = pu.Y().width; const int height = pu.Y().height; int blockWidth = AFFINE_MIN_BLOCK_SIZE; int blockHeight = AFFINE_MIN_BLOCK_SIZE; const int iHalfBW = blockWidth >> 1; const int iHalfBH = blockHeight >> 1; const int iBit = MAX_CU_DEPTH; const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY; CHECK(refIdx < 0, "Invalid reference index for FRUC"); const Picture& refPic = *pu.cu->slice->getRefPic(eRefList, refIdx)->unscaledPic; std::vector<std::pair<int, Distortion>> aMvCostVec(patternsNum); Distortion uiCost = 0; #if JVET_Z0067_RPR_ENABLE bool bIsRefScaled = pu.cu->slice->getRefPic(eRefList, refIdx)->isRefScaled( pu.cs->pps ); if ( bIsRefScaled ) { for (int n = 0; n < patternsNum; ++n) { aMvCostVec[n] = { n, uiCost }; } } else { #endif for (int n = 0; n < patternsNum; ++n) { uiCost = 0; //--------------------- (derive CPMVs)----------------------------------------------// Mv mvLT = cMvPred + MvdCand[0][n]; Mv mvRT = cMvPred2 + MvdCand[1][n]; mvRT += MvdCand[0][n]; Mv mvLB; if (pu.cu->affineType == AFFINEMODEL_6PARAM) { mvLB = cMvPred3 + MvdCand[2][n]; mvLB += MvdCand[0][n]; } //--------------- Calculate dMVs ------------------------------------------// iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(width)); iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(width)); if (pu.cu->affineType == AFFINEMODEL_6PARAM) { iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(height)); iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(height)); } else { iDMvVerX = -iDMvHorY; iDMvVerY = iDMvHorX; } int iMvScaleHor = mvLT.getHor() << iBit; int iMvScaleVer = mvLT.getVer() << iBit; int mvScaleHorLine = iMvScaleHor + iDMvHorX * iHalfBW + iDMvVerX * iHalfBH; int mvScaleVerLine = iMvScaleVer + iDMvHorY * iHalfBW + iDMvVerY * iHalfBH; int deltaMvHorXBlk = iDMvHorX * blockWidth; int deltaMvHorYBlk = iDMvHorY * blockWidth; // get prediction block by block for (int h = 0; h < height; h += blockHeight) { int mvScaleHorBlk = mvScaleHorLine; int mvScaleVerBlk = mvScaleVerLine; for (int w = 0; w < width; w += blockWidth) { if (w != 0 && h != 0) continue; //applies only on boundary subblocks. int iMvScaleTmpHor, iMvScaleTmpVer; iMvScaleTmpHor = mvScaleHorBlk; iMvScaleTmpVer = mvScaleVerBlk; mvScaleHorBlk += deltaMvHorXBlk; mvScaleVerBlk += deltaMvHorYBlk; roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift); Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); tmpMv.clipToStorageBitDepth(); //clip #if JVET_AA0146_WRAP_AROUND_FIX bool wrapRef = false; if (refPic.isWrapAroundEnabled( pu.cs->pps )) { wrapRef = wrapClipMv(tmpMv, pu.lumaPos(), pu.lumaSize(), pu.cs->sps, pu.cs->pps); } else #endif clipMv(tmpMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); #if JVET_AA0146_WRAP_AROUND_FIX uiCost += xGetSublkTemplateCost(*pu.cu, COMPONENT_Y, refPic, Mv(iMvScaleTmpHor, iMvScaleTmpVer), blockWidth, blockHeight, w, h, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, recAboveTemplate, wrapRef); #else uiCost += xGetSublkTemplateCost(*pu.cu, COMPONENT_Y, refPic, Mv(iMvScaleTmpHor, iMvScaleTmpVer), blockWidth, blockHeight, w, h, numTemplate, refLeftTemplate, refAboveTemplate, recLeftTemplate, recAboveTemplate); #endif } } aMvCostVec[n] = { n, uiCost }; } #if JVET_Z0067_RPR_ENABLE } #endif //--------------------------------------------------------------------------------// ///////////////////////////////////////////////////////////////// std::stable_sort(aMvCostVec.begin(), aMvCostVec.end(), [](const std::pair<int, Distortion> & l, const std::pair<int, Distortion> & r) {return l.second < r.second; }); #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC int sizeCandList = (int) distance(aMvCostVec.begin(), aMvCostVec.end()); if (sizeCandList > 2) { Distortion lambdaTh = pu.cs->slice->getCostForARMC(); bool enoughDiverse = false; uint32_t diverseCand = 0; uint32_t posToBeInserted = 0; for (uint32_t uiCand = 0; uiCand < sizeCandList - 1; ++uiCand) { for (uint32_t uiMergeCand = uiCand + 1; uiMergeCand < sizeCandList; ++uiMergeCand) { if (lambdaTh <= abs((int)(aMvCostVec[uiMergeCand].second - aMvCostVec[uiCand].second))) { enoughDiverse = true; for (uint32_t uiMergeCand3 = 0; uiMergeCand3 < uiCand; ++uiMergeCand3) { if (lambdaTh > abs((int)(aMvCostVec[uiMergeCand].second - aMvCostVec[uiMergeCand3].second))) { enoughDiverse = false; break; } } if (enoughDiverse) { diverseCand = uiMergeCand; posToBeInserted = uiCand + 1; break; } } } if (enoughDiverse) { Distortion saveCandCost = aMvCostVec[diverseCand].second; int mvSave = aMvCostVec[diverseCand].first; for (int i = diverseCand; i > posToBeInserted - 1; i--) { aMvCostVec[i].second = aMvCostVec[i - 1].second; aMvCostVec[i].first = aMvCostVec[i - 1].first; } aMvCostVec[posToBeInserted].second = saveCandCost; aMvCostVec[posToBeInserted].first = mvSave; } } } #endif for (int n = 0; n < patternsNum; ++n) { int index = aMvCostVec[n].first; cMvdDerived[n] = MvdCand[0][index]; cMvdDerived2[n] = MvdCand[1][index]; cMvdDerived3[n] = MvdCand[2][index]; } } Distortion InterPrediction::xGetSublkTemplateCost(const CodingUnit& cu, const ComponentID compID, const Picture& refPic, const Mv& mv, const int sublkWidth, const int sublkHeight, const int posW, const int posH, int* numTemplate, Pel* refLeftTemplate, Pel* refAboveTemplate, Pel* recLeftTemplate, Pel* recAboveTemplate #if JVET_AA0146_WRAP_AROUND_FIX , bool wrapRef #endif ) { #if JVET_Z0067_RPR_ENABLE CHECK(refPic.isRefScaled(cu.cs->pps), "xGetSublkTemplateCost ref Scaled not supported"); #endif const int bitDepth = cu.cs->sps->getBitDepth(toChannelType(compID)); const int precShift = std::max(0, bitDepth - 12); Distortion cost = 0; const Picture& currPic = *cu.cs->picture; const CodingUnit* const cuAbove = cu.cs->getCU(cu.blocks[compID].pos().offset(0, -1), toChannelType(compID)); const CodingUnit* const cuLeft = cu.cs->getCU(cu.blocks[compID].pos().offset(-1, 0), toChannelType(compID)); const CPelBuf recBuf = cuAbove || cuLeft ? currPic.getRecoBuf(cu.cs->picture->blocks[compID]) : CPelBuf(); #if JVET_AA0146_WRAP_AROUND_FIX const CPelBuf refBuf = cuAbove || cuLeft ? refPic.getRecoBuf(refPic.blocks[compID], wrapRef) : CPelBuf(); #else const CPelBuf refBuf = cuAbove || cuLeft ? refPic.getRecoBuf(refPic.blocks[compID]) : CPelBuf(); #endif std::vector<Pel>& invLUT = m_pcReshape->getInvLUT(); // above if (cuAbove && posH == 0) { xGetPredBlkTpl<true>(cu, compID, refBuf, mv, posW, posH, sublkWidth, refAboveTemplate); const Pel* rec = recBuf.bufAt(cu.blocks[compID].pos().offset(0, -1)); for (int k = posW; k < posW + sublkWidth; k++) { int refVal = refAboveTemplate[k]; int recVal = rec[k]; if (isLuma(compID) && cu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { recVal = invLUT[recVal]; } recVal >>= precShift; refVal >>= precShift; refAboveTemplate[k] = refVal; recAboveTemplate[k] = recVal; numTemplate[0]++; cost += (Distortion)(refVal - recVal) * (refVal - recVal); } } // left if (cuLeft && posW == 0) { xGetPredBlkTpl<false>(cu, compID, refBuf, mv, posW, posH, sublkHeight, refLeftTemplate); const Pel* rec = recBuf.bufAt(cu.blocks[compID].pos().offset(-1, 0)); for (int k = posH; k < posH + sublkHeight; k++) { int refVal = refLeftTemplate[k]; int recVal = rec[recBuf.stride * k]; if (isLuma(compID) && cu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { recVal = invLUT[recVal]; } recVal >>= precShift; refVal >>= precShift; refLeftTemplate[k] = refVal; recLeftTemplate[k] = recVal; numTemplate[1]++; cost += (Distortion)(refVal - recVal) * (refVal - recVal); } } return cost; } int InterPrediction::deriveMVSDIdxFromMVDAffine(PredictionUnit& pu, RefPicList eRefList, std::vector<Mv>& cMvdDerived, std::vector<Mv>& cMvdDerived2, std::vector<Mv>& cMvdDerived3) { int mvsdIdx = 0; int shift = 0; int bin = 0; if (pu.mvdAffi[eRefList][0].getHor()) { bin = (cMvdDerived[0].getHor() == pu.mvdAffi[eRefList][0].getHor()) ? 0 : 1; mvsdIdx += bin << shift; shift++; } if (pu.mvdAffi[eRefList][0].getVer()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i].getHor() == pu.mvdAffi[eRefList][0].getHor()) { bin = (cMvdDerived[i].getVer() == pu.mvdAffi[eRefList][0].getVer()) ? 0 : 1; mvsdIdx += bin << shift; shift++; break; } } } if (pu.mvdAffi[eRefList][1].getHor()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i] == pu.mvdAffi[eRefList][0]) { bin = (cMvdDerived2[i].getHor() == pu.mvdAffi[eRefList][1].getHor()) ? 0 : 1; mvsdIdx += bin << shift; shift++; break; } } } if (pu.mvdAffi[eRefList][1].getVer()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i] == pu.mvdAffi[eRefList][0] && cMvdDerived2[i].getHor() == pu.mvdAffi[eRefList][1].getHor()) { bin = (cMvdDerived2[i].getVer() == pu.mvdAffi[eRefList][1].getVer()) ? 0 : 1; mvsdIdx += bin << shift; shift++; break; } } } if (pu.cu->affineType == AFFINEMODEL_6PARAM) { if (pu.mvdAffi[eRefList][2].getHor()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i] == pu.mvdAffi[eRefList][0] && cMvdDerived2[i] == pu.mvdAffi[eRefList][1]) { bin = (cMvdDerived3[i].getHor() == pu.mvdAffi[eRefList][2].getHor()) ? 0 : 1; mvsdIdx += bin << shift; shift++; break; } } } if (pu.mvdAffi[eRefList][2].getVer()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i] == pu.mvdAffi[eRefList][0] && cMvdDerived2[i] == pu.mvdAffi[eRefList][1] && cMvdDerived3[i].getHor() == pu.mvdAffi[eRefList][2].getHor()) { bin = (cMvdDerived3[i].getVer() == pu.mvdAffi[eRefList][2].getVer()) ? 0 : 1; mvsdIdx += bin << shift; shift++; break; } } } } return mvsdIdx; } void InterPrediction::deriveMVDFromMVSDIdxAffine(PredictionUnit& pu, RefPicList eRefList, std::vector<Mv>& cMvdDerived, std::vector<Mv>& cMvdDerived2, std::vector<Mv>& cMvdDerived3) { int mvsdIdx = pu.mvsdIdx[eRefList]; int bin = 0; if (pu.mvdAffi[eRefList][0].getHor()) { bin = mvsdIdx & 1; int val = bin ? -cMvdDerived[0].getHor() : cMvdDerived[0].getHor(); pu.mvdAffi[eRefList][0].setHor(val); mvsdIdx >>= 1; } if (pu.mvdAffi[eRefList][0].getVer()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i].getHor() == pu.mvdAffi[eRefList][0].getHor()) { bin = mvsdIdx & 1; int val = bin ? -cMvdDerived[i].getVer() : cMvdDerived[i].getVer(); pu.mvdAffi[eRefList][0].setVer(val); mvsdIdx >>= 1; break; } } } if (pu.mvdAffi[eRefList][1].getHor()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i] == pu.mvdAffi[eRefList][0]) { bin = mvsdIdx & 1; int val = bin ? -cMvdDerived2[i].getHor() : cMvdDerived2[i].getHor(); pu.mvdAffi[eRefList][1].setHor(val); mvsdIdx >>= 1; break; } } } if (pu.mvdAffi[eRefList][1].getVer()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i] == pu.mvdAffi[eRefList][0] && cMvdDerived2[i].getHor() == pu.mvdAffi[eRefList][1].getHor()) { bin = mvsdIdx & 1; int val = bin ? -cMvdDerived2[i].getVer() : cMvdDerived2[i].getVer(); pu.mvdAffi[eRefList][1].setVer(val); mvsdIdx >>= 1; break; } } } if (pu.cu->affineType == AFFINEMODEL_6PARAM) { if (pu.mvdAffi[eRefList][2].getHor()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i] == pu.mvdAffi[eRefList][0] && cMvdDerived2[i] == pu.mvdAffi[eRefList][1]) { bin = mvsdIdx & 1; int val = bin ? -cMvdDerived3[i].getHor() : cMvdDerived3[i].getHor(); pu.mvdAffi[eRefList][2].setHor(val); mvsdIdx >>= 1; break; } } } if (pu.mvdAffi[eRefList][2].getVer()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i] == pu.mvdAffi[eRefList][0] && cMvdDerived2[i] == pu.mvdAffi[eRefList][1] && cMvdDerived3[i].getHor() == pu.mvdAffi[eRefList][2].getHor()) { bin = mvsdIdx & 1; int val = bin ? -cMvdDerived3[i].getVer() : cMvdDerived3[i].getVer(); pu.mvdAffi[eRefList][2].setVer(val); mvsdIdx >>= 1; break; } } } } } int InterPrediction::deriveMVSDIdxFromMVDTrans(Mv cMvd, std::vector<Mv>& cMvdDerived) { int mvsdIdx = 0; int shift = 0; int bin = 0; if (cMvd.getHor()) { bin = (cMvdDerived[0].getHor() == cMvd.getHor()) ? 0 : 1; mvsdIdx += bin << shift; shift++; } if (cMvd.getVer()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i].getHor() == cMvd.getHor()) { bin = (cMvdDerived[i].getVer() == cMvd.getVer()) ? 0 : 1; mvsdIdx += bin << shift; shift++; break; } } } return mvsdIdx; } Mv InterPrediction::deriveMVDFromMVSDIdxTrans(int mvsdIdx, std::vector<Mv>& cMvdDerived) { int bin = 0; Mv cMvd = Mv(0, 0); if (cMvdDerived[0].getHor()) { bin = mvsdIdx & 1; int val = bin ? -cMvdDerived[0].getHor() : cMvdDerived[0].getHor(); cMvd.setHor(val); mvsdIdx >>= 1; } if (cMvdDerived[0].getVer()) { for (int i = 0; i < (int)cMvdDerived.size(); i++) { if (cMvdDerived[i].getHor() == cMvd.getHor()) { bin = mvsdIdx & 1; int val = bin ? -cMvdDerived[i].getVer() : cMvdDerived[i].getVer(); cMvd.setVer(val); mvsdIdx >>= 1; break; } } } return cMvd; } #endif #if JVET_AA0096_MC_BOUNDARY_PADDING void InterPrediction::mcFramePad(Picture *pcCurPic, Slice &slice) { const Size blkSizeBuff = Size(slice.getSPS()->getMaxCUWidth(), slice.getSPS()->getMaxCUWidth()); const Area blkAreaBuff = Area(Position(), blkSizeBuff); const UnitArea blkUnitAreaBuff(slice.getSPS()->getChromaFormatIdc(), blkAreaBuff); const Size blkSizeCurBuff = Size(4, 4); const Area blkAreaCurBuff = Area(Position(), blkSizeCurBuff); const UnitArea blkUnitCurAreaBuff(slice.getSPS()->getChromaFormatIdc(), blkAreaCurBuff); const Size blkSizeConBuff = Size(MC_PAD_SIZE, MC_PAD_SIZE); const Area blkAreaConBuff = Area(Position(), blkSizeConBuff); const UnitArea blkUnitConAreaBuff(slice.getSPS()->getChromaFormatIdc(), blkAreaConBuff); PelStorage *pPadBuffYUV = new PelStorage; pPadBuffYUV->create(blkUnitAreaBuff); PelStorage *pPadYUVContainerDyn = new PelStorage; pPadYUVContainerDyn->create(blkUnitConAreaBuff); PelStorage *pCurBuffYUV = new PelStorage; pCurBuffYUV->create(blkUnitCurAreaBuff); PredictionUnit blkDataTmp(blkUnitAreaBuff); CodingUnit blkCUTmp(blkUnitAreaBuff); blkDataTmp.cs = pcCurPic->cs; blkDataTmp.cu = &blkCUTmp; blkDataTmp.cu->BcwIdx = BCW_DEFAULT; blkDataTmp.cu->LICFlag = false; blkDataTmp.cu->affine = false; blkDataTmp.cu->geoFlag = false; blkDataTmp.cu->imv = IMV_OFF; blkDataTmp.cu->slice = &slice; blkDataTmp.cu->cs = pcCurPic->cs; // four directions MC padding #if JVET_Z0118_GDR int numPt = pcCurPic->cs->isGdrEnabled() ? 2 : 1; for (int i = 0; i < numPt; i++) { PictureType pt = (i == 0) ? PIC_RECONSTRUCTION_0 : PIC_RECONSTRUCTION_1; mcFramePadOneSide(pcCurPic, slice, PAD_TOP, pPadBuffYUV, &blkDataTmp, pPadYUVContainerDyn, blkUnitAreaBuff, pCurBuffYUV, pt); mcFramePadOneSide(pcCurPic, slice, PAD_BOTTEM, pPadBuffYUV, &blkDataTmp, pPadYUVContainerDyn, blkUnitAreaBuff, pCurBuffYUV, pt); mcFramePadOneSide(pcCurPic, slice, PAD_LEFT, pPadBuffYUV, &blkDataTmp, pPadYUVContainerDyn, blkUnitAreaBuff, pCurBuffYUV, pt); mcFramePadOneSide(pcCurPic, slice, PAD_RIGHT, pPadBuffYUV, &blkDataTmp, pPadYUVContainerDyn, blkUnitAreaBuff, pCurBuffYUV, pt); // repetitive padding for the extend padding area mcFramePadRepExt(pcCurPic, slice, pt); } #else mcFramePadOneSide(pcCurPic, slice, PAD_TOP, pPadBuffYUV, &blkDataTmp, pPadYUVContainerDyn, blkUnitAreaBuff, pCurBuffYUV); mcFramePadOneSide(pcCurPic, slice, PAD_BOTTEM, pPadBuffYUV, &blkDataTmp, pPadYUVContainerDyn, blkUnitAreaBuff, pCurBuffYUV); mcFramePadOneSide(pcCurPic, slice, PAD_LEFT, pPadBuffYUV, &blkDataTmp, pPadYUVContainerDyn, blkUnitAreaBuff, pCurBuffYUV); mcFramePadOneSide(pcCurPic, slice, PAD_RIGHT, pPadBuffYUV, &blkDataTmp, pPadYUVContainerDyn, blkUnitAreaBuff, pCurBuffYUV); // repetitive padding for the extend padding area mcFramePadRepExt(pcCurPic, slice); #endif pPadBuffYUV->destroy(); delete pPadBuffYUV; pPadYUVContainerDyn->destroy(); delete pPadYUVContainerDyn; pCurBuffYUV->destroy(); delete pCurBuffYUV; } #if JVET_Z0118_GDR void InterPrediction::mcFramePadOneSide(Picture *pcCurPic, Slice &slice, PadDirection padDir, PelStorage *pPadBuffYUV, PredictionUnit *blkDataTmp, PelStorage *pPadYUVContainerDyn, const UnitArea blkUnitAreaBuff, PelStorage *pCurBuffYUV, PictureType pt) #else void InterPrediction::mcFramePadOneSide(Picture *pcCurPic, Slice &slice, PadDirection padDir, PelStorage *pPadBuffYUV, PredictionUnit *blkDataTmp, PelStorage *pPadYUVContainerDyn, const UnitArea blkUnitAreaBuff, PelStorage *pCurBuffYUV) #endif { const int ctuSize = slice.getSPS()->getMaxCUWidth(); const int iWidthFrm = slice.getPPS()->getPicWidthInLumaSamples(); const int iHeightFrm = slice.getPPS()->getPicHeightInLumaSamples(); const int numCtuInWidth = iWidthFrm / ctuSize + (iWidthFrm % ctuSize != 0); const int numCtuInHeight = iHeightFrm / ctuSize + (iHeightFrm % ctuSize != 0); const int xBlkBoundIdx = (iWidthFrm % ctuSize) == 0 ? (ctuSize / 4 - 1) : ((iWidthFrm % ctuSize) / 4) - 1; const int yBlkBoundIdx = (iHeightFrm % ctuSize) == 0 ? (ctuSize / 4 - 1) : ((iHeightFrm % ctuSize) / 4) - 1; const int maxCtuIdx = (padDir == PAD_TOP || padDir == PAD_BOTTEM) ? numCtuInWidth : numCtuInHeight; int maxCh = pcCurPic->chromaFormat == CHROMA_400 ? 0 : 2; #if JVET_Z0118_GDR if (pcCurPic->cs->isGdrEnabled() && pcCurPic->cs->isInGdrIntervalOrRecoveryPoc()) { // switch recon based on picture type pcCurPic->cs->setReconBuf(pt); for (int rlist = REF_PIC_LIST_0; rlist < NUM_REF_PIC_LIST_01; rlist++) { int n = slice.getNumRefIdx((RefPicList)rlist); for (int idx = 0; idx < n; idx++) { Picture *refPic = slice.getReferencePicture((RefPicList)rlist, idx); if (refPic) { refPic->setCleanDirty((bool)pt); } } } } #endif for (int ctuIdx = 0; ctuIdx < maxCtuIdx; ctuIdx++) { Position ctuPos; if (padDir == PAD_TOP) { ctuPos = ctuPos.offset(ctuSize * ctuIdx, 0); } else if (padDir == PAD_BOTTEM) { ctuPos = ctuPos.offset(ctuSize * ctuIdx, ctuSize * (numCtuInHeight - 1)); } else if (padDir == PAD_LEFT) { ctuPos = ctuPos.offset(0, ctuSize * ctuIdx); } else { ctuPos = ctuPos.offset(ctuSize * (numCtuInWidth - 1), ctuSize * ctuIdx); } int maxIdxSubBlkPlus1; if (padDir == PAD_TOP || padDir == PAD_BOTTEM) { maxIdxSubBlkPlus1 = (ctuIdx == (numCtuInWidth - 1)) ? (xBlkBoundIdx + 1) : (ctuSize / 4); } else { maxIdxSubBlkPlus1 = (ctuIdx == (numCtuInHeight - 1)) ? (yBlkBoundIdx + 1) : (ctuSize / 4); } // MC for (int subBlkIdx = 0; subBlkIdx < maxIdxSubBlkPlus1; subBlkIdx++) { blkDataTmp->cu->BcwIdx = BCW_DEFAULT; Position subBlkPos = ctuPos; if (padDir == PAD_TOP || padDir == PAD_BOTTEM) { subBlkPos = subBlkPos.offset(subBlkIdx * 4, 0); } else { subBlkPos = subBlkPos.offset(0, subBlkIdx * 4); } Position subBlkMvPos = subBlkPos; if (padDir == PAD_BOTTEM) { subBlkMvPos = subBlkMvPos.offset(0, yBlkBoundIdx * 4); } else if (padDir == PAD_RIGHT) { subBlkMvPos = subBlkMvPos.offset(xBlkBoundIdx * 4, 0); } short reflistIdx[2] = { -1, -1 }; Mv subBlkMv[2]; if (pcCurPic->cs->getMotionInfo(subBlkMvPos).isInter && !pcCurPic->cs->getMotionInfo(subBlkMvPos).isIBCmot) { reflistIdx[0] = pcCurPic->cs->getMotionInfo(subBlkMvPos).refIdx[REF_PIC_LIST_0]; reflistIdx[1] = pcCurPic->cs->getMotionInfo(subBlkMvPos).refIdx[REF_PIC_LIST_1]; subBlkMv[0] = pcCurPic->cs->getMotionInfo(subBlkMvPos).mv[REF_PIC_LIST_0]; subBlkMv[1] = pcCurPic->cs->getMotionInfo(subBlkMvPos).mv[REF_PIC_LIST_1]; } int useList = -1; if (reflistIdx[0] >= 0 && reflistIdx[1] >= 0) { CHECK(CU::isIBC(*blkDataTmp->cu), "this is not possible"); if (padDir == PAD_TOP) { useList = (subBlkMv[0].getVer() > subBlkMv[1].getVer()) ? 0 : 1; } else if (padDir == PAD_BOTTEM) { useList = (subBlkMv[0].getVer() <= subBlkMv[1].getVer()) ? 0 : 1; } else if (padDir == PAD_LEFT) { useList = (subBlkMv[0].getHor() > subBlkMv[1].getHor()) ? 0 : 1; } else { useList = (subBlkMv[0].getHor() <= subBlkMv[1].getHor()) ? 0 : 1; } } else { useList = (reflistIdx[0] >= 0) ? 0 : 1; } reflistIdx[1 - useList] = -1; int validPadSize = 0; if (reflistIdx[useList] >= 0) { int iMVBitShift = MV_FRACTIONAL_BITS_INTERNAL; MvField tempBiMvFieldAddOffset[2]; Mv mvAddOffset; Position subBlkMCPos = subBlkPos, mcBlksize; if (padDir == PAD_TOP) { mvAddOffset.set(0, -ctuSize << iMVBitShift); validPadSize = (((subBlkMv[useList].getVer() >> iMVBitShift) + 3) >> 2) << 2; if (subBlkMv[useList].getVer() > 0 && !slice.getRefPic((useList == 1) ? REF_PIC_LIST_1 : REF_PIC_LIST_0, reflistIdx[useList])->unscaledPic->cs->slice->isIntra() && slice.getTLayer() >= PAD_MORE_TL) { validPadSize = std::max(validPadSize, 4); } validPadSize = std::max(validPadSize, 0); validPadSize = std::min(validPadSize, MC_PAD_SIZE); mcBlksize = mcBlksize.offset(4, validPadSize); subBlkMCPos = subBlkMCPos.offset(0, (ctuSize - validPadSize)); } else if (padDir == PAD_BOTTEM) { mvAddOffset.set(0, ((yBlkBoundIdx + 1) * 4) << iMVBitShift); validPadSize = (((-subBlkMv[useList].getVer() >> iMVBitShift) + 3) >> 2) << 2; if (subBlkMv[useList].getVer() < 0 && !slice.getRefPic((useList == 1) ? REF_PIC_LIST_1 : REF_PIC_LIST_0, reflistIdx[useList])->unscaledPic->cs->slice->isIntra() && slice.getTLayer() >= PAD_MORE_TL) { validPadSize = std::max(validPadSize, 4); } validPadSize = std::max(validPadSize, 0); validPadSize = std::min(validPadSize, MC_PAD_SIZE); mcBlksize = mcBlksize.offset(4, validPadSize); } else if (padDir == PAD_LEFT) { mvAddOffset.set(-ctuSize << iMVBitShift, 0); validPadSize = (((subBlkMv[useList].getHor() >> iMVBitShift) + 3) >> 2) << 2; if (subBlkMv[useList].getHor() > 0 && !slice.getRefPic((useList == 1) ? REF_PIC_LIST_1 : REF_PIC_LIST_0, reflistIdx[useList])->unscaledPic->cs->slice->isIntra() && slice.getTLayer() >= PAD_MORE_TL) { validPadSize = std::max(validPadSize, 4); } validPadSize = std::max(validPadSize, 0); validPadSize = std::min(validPadSize, MC_PAD_SIZE); mcBlksize = mcBlksize.offset(validPadSize, 4); subBlkMCPos = subBlkMCPos.offset((ctuSize - validPadSize), 0); } else { mvAddOffset.set(((xBlkBoundIdx + 1) * 4) << iMVBitShift, 0); validPadSize = (((-subBlkMv[useList].getHor() >> iMVBitShift) + 3) >> 2) << 2; if (subBlkMv[useList].getHor() < 0 && !slice.getRefPic((useList == 1) ? REF_PIC_LIST_1 : REF_PIC_LIST_0, reflistIdx[useList])->unscaledPic->cs->slice->isIntra() && slice.getTLayer() >= PAD_MORE_TL) { validPadSize = std::max(validPadSize, 4); } validPadSize = std::max(validPadSize, 0); validPadSize = std::min(validPadSize, MC_PAD_SIZE); mcBlksize = mcBlksize.offset(validPadSize, 4); } tempBiMvFieldAddOffset[useList].mv = mvAddOffset + subBlkMv[useList]; tempBiMvFieldAddOffset[useList].refIdx = (int8_t) (reflistIdx[useList]); if (reflistIdx[1 - useList] >= 0) { tempBiMvFieldAddOffset[1 - useList].mv = mvAddOffset + subBlkMv[1 - useList]; tempBiMvFieldAddOffset[1 - useList].refIdx = (int8_t) reflistIdx[1 - useList]; } if (validPadSize > 0) { // start to predict the DC compensate area const Size blkSizeCurBuff = Size(4, 4); const Area blkAreaCurBuff = Area(Position(), blkSizeCurBuff); const UnitArea blkUnitCurAreaBuff(slice.getSPS()->getChromaFormatIdc(), blkAreaCurBuff); int CompDiff[3] = { 0, 0, 0 }; for (int chan = 0; chan <= maxCh; chan++) { Position curposition(subBlkMvPos.getX() >> getComponentScaleX(ComponentID(chan), CHROMA_420), subBlkMvPos.getY() >> getComponentScaleY(ComponentID(chan), CHROMA_420)); blkDataTmp->blocks[chan].pos().repositionTo(curposition); blkDataTmp->cu->blocks[chan].pos().repositionTo(curposition); } PelUnitBuf pcYuvPred = pCurBuffYUV->getBuf(blkUnitCurAreaBuff); PredictionUnit resizePu4X4 = *blkDataTmp; CHECK(pcYuvPred.Y().width != 4, "this is not possible"); CHECK(pcYuvPred.Y().height != 4, "this is not possible"); resizePu4X4.UnitArea::operator=( UnitArea(blkDataTmp->chromaFormat, Area(blkDataTmp->lumaPos().x, blkDataTmp->lumaPos().y, 4, 4))); blkDataTmp->refIdx[useList] = (int8_t) reflistIdx[useList]; blkDataTmp->mv[useList] = subBlkMv[useList]; blkDataTmp->refIdx[1 - useList] = (int8_t) reflistIdx[1 - useList]; blkDataTmp->mv[1 - useList] = subBlkMv[1 - useList]; blkDataTmp->interDir = useList + 1; resizePu4X4 = *blkDataTmp; xPredInterUni(resizePu4X4, RefPicList(useList), pcYuvPred, false, false, true, true); for (int chan = 0; chan <= maxCh; chan++) { const ComponentID ch = ComponentID(chan); #if JVET_Z0118_GDR Pel *piTxtRec = pcCurPic->getBuf(ch, pt) .bufAt(subBlkMvPos.getX() >> getComponentScaleX(ComponentID(chan), CHROMA_420), subBlkMvPos.getY() >> getComponentScaleY(ComponentID(chan), CHROMA_420)); const int iStrideRec = pcCurPic->getBuf(ch, pt).stride; #else Pel *piTxtRec = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION) .bufAt(subBlkMvPos.getX() >> getComponentScaleX(ComponentID(chan), CHROMA_420), subBlkMvPos.getY() >> getComponentScaleY(ComponentID(chan), CHROMA_420)); const int iStrideRec = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION).stride; #endif Pel * piTxtBuff = pCurBuffYUV->getBuf(blkUnitCurAreaBuff).bufs[ch].bufAt(0, 0); const int iStrideBuff = pCurBuffYUV->getBuf(blkUnitCurAreaBuff).bufs[ch].stride; for (int idy = 0; idy < (4 >> getComponentScaleY(ComponentID(chan), CHROMA_420)); idy++) { for (int idx = 0; idx < (4 >> getComponentScaleX(ComponentID(chan), CHROMA_420)); idx++) { CompDiff[chan] += (piTxtRec[idx] - piTxtBuff[idx]); } piTxtRec += iStrideRec; piTxtBuff += iStrideBuff; } CompDiff[chan] /= 16 >> getComponentScaleX(ComponentID(chan), CHROMA_420) >> getComponentScaleY(ComponentID(chan), CHROMA_420); } // start to predict the padding area for (int chan = 0; chan <= maxCh; chan++) { Position curposition(subBlkMCPos.getX() >> getComponentScaleX(ComponentID(chan), CHROMA_420), subBlkMCPos.getY() >> getComponentScaleY(ComponentID(chan), CHROMA_420)); blkDataTmp->blocks[chan].pos().repositionTo(curposition); blkDataTmp->cu->blocks[chan].pos().repositionTo(curposition); } Size blkSizeConBuff = Size(mcBlksize.getX(), mcBlksize.getY()); Area blkAreaConBuff = Area(Position(), blkSizeConBuff); UnitArea blkUnitConAreaBuff(slice.getSPS()->getChromaFormatIdc(), blkAreaConBuff); PelUnitBuf pcYuvPad = pPadYUVContainerDyn->getBuf(blkUnitConAreaBuff); PredictionUnit resizePuPad = *blkDataTmp; CHECK(pcYuvPad.Y().width != mcBlksize.getX(), "this is not possible"); CHECK(pcYuvPad.Y().height != mcBlksize.getY(), "this is not possible"); resizePuPad.UnitArea::operator=( UnitArea(blkDataTmp->chromaFormat, Area(blkDataTmp->lumaPos().x, blkDataTmp->lumaPos().y, mcBlksize.getX(), mcBlksize.getY()))); blkDataTmp->refIdx[useList] = tempBiMvFieldAddOffset[useList].refIdx; blkDataTmp->mv[useList] = tempBiMvFieldAddOffset[useList].mv; blkDataTmp->refIdx[1 - useList] = tempBiMvFieldAddOffset[1 - useList].refIdx; blkDataTmp->mv[1 - useList] = tempBiMvFieldAddOffset[1 - useList].mv; blkDataTmp->interDir = useList + 1; resizePuPad = *blkDataTmp; xPredInterUni(resizePuPad, RefPicList(useList), pcYuvPad, false, false, true, true); for (int chan = 0; chan <= maxCh; chan++) { const ComponentID ch = ComponentID(chan); Pel *piTxtBuff = pPadBuffYUV->getBuf(blkUnitAreaBuff) .bufs[ch] .bufAt((subBlkMCPos - ctuPos).getX() >> getComponentScaleX(ComponentID(chan), CHROMA_420), (subBlkMCPos - ctuPos).getY() >> getComponentScaleY(ComponentID(chan), CHROMA_420)); const int iStrideBuff = pPadBuffYUV->getBuf(blkUnitAreaBuff).bufs[ch].stride; Pel * piTmpBuff = pPadYUVContainerDyn->getBuf(blkUnitConAreaBuff).bufs[ch].bufAt(0, 0); const int iStrideTmp = pPadYUVContainerDyn->getBuf(blkUnitConAreaBuff).bufs[ch].stride; for (int idy = 0; idy < mcBlksize.getY() >> getComponentScaleY(ComponentID(chan), CHROMA_420); idy++) { for (int idx = 0; idx < mcBlksize.getX() >> getComponentScaleX(ComponentID(chan), CHROMA_420); idx++) { piTxtBuff[idx] = piTmpBuff[idx]; piTxtBuff[idx] += CompDiff[chan]; piTxtBuff[idx] = (piTxtBuff[idx] < 0) ? 0 : piTxtBuff[idx]; piTxtBuff[idx] = (piTxtBuff[idx] > 1023) ? 1023 : piTxtBuff[idx]; } piTxtBuff += iStrideBuff; piTmpBuff += iStrideTmp; } } } } for (int chan = 0; chan <= maxCh; chan++) { const ComponentID ch = ComponentID(chan); Position subBlkRepSrcPos; Position subBlkRepPos; Position repBlkSize; const int iStrideBuff = pPadBuffYUV->getBuf(blkUnitAreaBuff).bufs[ch].stride; int iStrideSrc; if (padDir == PAD_TOP) { repBlkSize = repBlkSize.offset(4 >> getComponentScaleX(ch, CHROMA_420), (ctuSize - validPadSize) >> getComponentScaleY(ch, CHROMA_420)); subBlkRepPos = subBlkRepPos.offset(subBlkIdx * 4, 0); Pel *piTxtBuff = pPadBuffYUV->getBuf(blkUnitAreaBuff) .bufs[ch] .bufAt(subBlkRepPos.getX() >> getComponentScaleX(ch, CHROMA_420), subBlkRepPos.getY() >> getComponentScaleY(ch, CHROMA_420)); Pel *piTxtSrc; if (validPadSize == 0) { subBlkRepSrcPos = subBlkRepSrcPos.offset(subBlkPos.getX(), subBlkPos.getY()); #if JVET_Z0118_GDR piTxtSrc = pcCurPic->getBuf(ch, pt) .bufAt(subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420), subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)); #else piTxtSrc = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION) .bufAt(subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420), subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)); #endif } else { subBlkRepSrcPos = subBlkRepSrcPos.offset(subBlkIdx * 4, (ctuSize - validPadSize)); piTxtSrc = pPadBuffYUV->getBuf(blkUnitAreaBuff) .bufs[ch] .bufAt(subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420), subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)); } for (int idy = 0; idy < repBlkSize.getY(); idy++) { memcpy(piTxtBuff, piTxtSrc, sizeof(Pel) * repBlkSize.getX()); piTxtBuff += iStrideBuff; } } else if (padDir == PAD_BOTTEM) { repBlkSize = repBlkSize.offset(4 >> getComponentScaleX(ch, CHROMA_420), (ctuSize - validPadSize) >> getComponentScaleY(ch, CHROMA_420)); subBlkRepPos = subBlkRepPos.offset(subBlkIdx * 4, validPadSize); Pel *piTxtBuff = pPadBuffYUV->getBuf(blkUnitAreaBuff) .bufs[ch] .bufAt(subBlkRepPos.getX() >> getComponentScaleX(ch, CHROMA_420), subBlkRepPos.getY() >> getComponentScaleY(ch, CHROMA_420)); Pel *piTxtSrc; if (validPadSize == 0) { subBlkRepSrcPos = subBlkRepSrcPos.offset(subBlkPos.getX(), iHeightFrm); #if JVET_Z0118_GDR piTxtSrc = pcCurPic->getBuf(ch, pt) .bufAt(subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420), (subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)) - 1); #else piTxtSrc = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION) .bufAt(subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420), (subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)) - 1); #endif } else { subBlkRepSrcPos = subBlkRepSrcPos.offset(subBlkIdx * 4, validPadSize); piTxtSrc = pPadBuffYUV->getBuf(blkUnitAreaBuff) .bufs[ch] .bufAt(subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420), (subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)) - 1); } for (int idy = 0; idy < repBlkSize.getY(); idy++) { memcpy(piTxtBuff, piTxtSrc, sizeof(Pel) * repBlkSize.getX()); piTxtBuff += iStrideBuff; } } else if (padDir == PAD_LEFT) { repBlkSize = repBlkSize.offset((ctuSize - validPadSize) >> getComponentScaleX(ch, CHROMA_420), 4 >> getComponentScaleY(ch, CHROMA_420)); subBlkRepPos = subBlkRepPos.offset(0, subBlkIdx * 4); Pel *piTxtBuff = pPadBuffYUV->getBuf(blkUnitAreaBuff) .bufs[ch] .bufAt(subBlkRepPos.getX() >> getComponentScaleX(ch, CHROMA_420), subBlkRepPos.getY() >> getComponentScaleY(ch, CHROMA_420)); Pel *piTxtSrc; if (validPadSize == 0) { subBlkRepSrcPos = subBlkRepSrcPos.offset(subBlkPos.getX(), subBlkPos.getY()); #if JVET_Z0118_GDR piTxtSrc = pcCurPic->getBuf(ch, pt) .bufAt(subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420), subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)); iStrideSrc = pcCurPic->getBuf(ch, pt).stride; #else piTxtSrc = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION) .bufAt(subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420), subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)); iStrideSrc = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION).stride; #endif } else { subBlkRepSrcPos = subBlkRepSrcPos.offset((ctuSize - validPadSize), subBlkIdx * 4); piTxtSrc = pPadBuffYUV->getBuf(blkUnitAreaBuff) .bufs[ch] .bufAt(subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420), subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)); iStrideSrc = pPadBuffYUV->getBuf(blkUnitAreaBuff).bufs[ch].stride; } for (int idy = 0; idy < repBlkSize.getY(); idy++) { for (int idx = 0; idx < repBlkSize.getX(); idx++) { piTxtBuff[idx] = piTxtSrc[0]; } piTxtBuff += iStrideBuff; piTxtSrc += iStrideSrc; } } else { subBlkRepPos = subBlkRepPos.offset(validPadSize, subBlkIdx * 4); Pel *piTxtBuff = pPadBuffYUV->getBuf(blkUnitAreaBuff) .bufs[ch] .bufAt(subBlkRepPos.getX() >> getComponentScaleX(ch, CHROMA_420), subBlkRepPos.getY() >> getComponentScaleY(ch, CHROMA_420)); repBlkSize = repBlkSize.offset((ctuSize - validPadSize) >> getComponentScaleX(ch, CHROMA_420), 4 >> getComponentScaleY(ch, CHROMA_420)); Pel *piTxtSrc; if (validPadSize == 0) { subBlkRepSrcPos = subBlkRepSrcPos.offset(iWidthFrm, subBlkPos.getY()); #if JVET_Z0118_GDR piTxtSrc = pcCurPic->getBuf(ch, pt) .bufAt((subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420)) - 1, subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)); iStrideSrc = pcCurPic->getBuf(ch, pt).stride; #else piTxtSrc = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION) .bufAt((subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420)) - 1, subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)); iStrideSrc = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION).stride; #endif } else { subBlkRepSrcPos = subBlkRepSrcPos.offset(validPadSize, subBlkIdx * 4); piTxtSrc = pPadBuffYUV->getBuf(blkUnitAreaBuff) .bufs[ch] .bufAt((subBlkRepSrcPos.getX() >> getComponentScaleX(ch, CHROMA_420)) - 1, subBlkRepSrcPos.getY() >> getComponentScaleY(ch, CHROMA_420)); iStrideSrc = pPadBuffYUV->getBuf(blkUnitAreaBuff).bufs[ch].stride; } for (int idy = 0; idy < repBlkSize.getY(); idy++) { for (int idx = 0; idx < repBlkSize.getX(); idx++) { piTxtBuff[idx] = piTxtSrc[0]; } piTxtBuff += iStrideBuff; piTxtSrc += iStrideSrc; } } } } // Copy MC results from Buffer to Rec for (int chan = 0; chan <= maxCh; chan++) { const ComponentID ch = ComponentID(chan); #if JVET_Z0118_GDR Pel * piTxtRec = pcCurPic->getBuf(ch, pt) .bufAt(ctuPos.getX() >> getComponentScaleX(ch, CHROMA_420), ctuPos.getY() >> getComponentScaleY(ch, CHROMA_420)); const int iStrideRec = pcCurPic->getBuf(ch, pt).stride; #else Pel * piTxtRec = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION) .bufAt(ctuPos.getX() >> getComponentScaleX(ch, CHROMA_420), ctuPos.getY() >> getComponentScaleY(ch, CHROMA_420)); const int iStrideRec = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION).stride; #endif Pel * piTxtBuff = pPadBuffYUV->getBuf(blkUnitAreaBuff).bufs[ch].bufAt(0, 0); const int iStrideBuff = pPadBuffYUV->getBuf(blkUnitAreaBuff).bufs[ch].stride; int iWidthBuff, iHeightBuff; if (padDir == PAD_TOP || padDir == PAD_BOTTEM) { iWidthBuff = (maxIdxSubBlkPlus1 * 4) >> getComponentScaleX(ch, CHROMA_420); iHeightBuff = ctuSize >> getComponentScaleY(ch, CHROMA_420); } else { iWidthBuff = ctuSize >> getComponentScaleX(ch, CHROMA_420); iHeightBuff = (maxIdxSubBlkPlus1 * 4) >> getComponentScaleY(ch, CHROMA_420); } const int blkSize = (4 >> getComponentScaleX(ch, CHROMA_420)); if (padDir == PAD_TOP) { piTxtRec -= iStrideRec * iHeightBuff; } else if (padDir == PAD_BOTTEM) { piTxtRec += iStrideRec * (yBlkBoundIdx + 1) * blkSize; } else if (padDir == PAD_LEFT) { piTxtRec -= iWidthBuff; } else { piTxtRec += (xBlkBoundIdx + 1) * blkSize; } for (int idy = -iHeightBuff; idy < 0; idy++) { memcpy(piTxtRec, piTxtBuff, sizeof(Pel) * iWidthBuff); piTxtBuff += iStrideBuff; piTxtRec += iStrideRec; } } } } #if JVET_Z0118_GDR void InterPrediction::mcFramePadRepExt(Picture *pcCurPic, Slice &slice, PictureType pt) #else void InterPrediction::mcFramePadRepExt(Picture *pcCurPic, Slice &slice) #endif { int maxCh = pcCurPic->chromaFormat == CHROMA_400 ? 0 : 2; for (int chan = 0; chan <= maxCh; chan++) { const ComponentID ch = ComponentID(chan); #if JVET_Z0118_GDR Pel * piTxtRec = pcCurPic->getBuf(ch, pt).bufAt(0, 0); const int iStrideRec = pcCurPic->getBuf(ch, pt).stride; #else Pel * piTxtRec = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION).bufAt(0, 0); const int iStrideRec = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION).stride; #endif const int iWidthFrm = slice.getPPS()->getPicWidthInLumaSamples() >> getComponentScaleX(ch, CHROMA_420); const int iHeightFrm = slice.getPPS()->getPicHeightInLumaSamples() >> getComponentScaleY(ch, CHROMA_420); int ctuSize = slice.getSPS()->getMaxCUWidth() >> getComponentScaleX(ch, CHROMA_420); int extPadSizeX = (16 + MC_PAD_SIZE) >> getComponentScaleX(ch, CHROMA_420); int extPadSizeY = (16 + MC_PAD_SIZE) >> getComponentScaleY(ch, CHROMA_420); // left and right piTxtRec -= ctuSize * iStrideRec; for (int idy = -ctuSize; idy < iHeightFrm + ctuSize; idy++) { for (int idx = -(ctuSize + extPadSizeX); idx < -ctuSize; idx++) { piTxtRec[idx] = piTxtRec[-ctuSize]; } for (int idx = iWidthFrm + ctuSize; idx < iWidthFrm + ctuSize + extPadSizeX; idx++) { piTxtRec[idx] = piTxtRec[iWidthFrm + ctuSize - 1]; } piTxtRec += iStrideRec; } // Top Pel *piTxtRecSrc; #if JVET_Z0118_GDR piTxtRec = pcCurPic->getBuf(ch, pt).bufAt(0, 0); #else piTxtRec = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION).bufAt(0, 0); #endif piTxtRec -= (ctuSize + extPadSizeX); piTxtRecSrc = piTxtRec; for (int idy = 0; idy < ctuSize; idy++) { piTxtRec -= iStrideRec; memcpy(piTxtRec, piTxtRecSrc, sizeof(Pel) * (ctuSize + extPadSizeX)); memcpy(piTxtRec + (ctuSize + extPadSizeX + iWidthFrm), piTxtRecSrc + (ctuSize + extPadSizeX + iWidthFrm), sizeof(Pel) * (ctuSize + extPadSizeX)); } piTxtRecSrc -= ctuSize * iStrideRec; for (int idy = 0; idy < extPadSizeY; idy++) { piTxtRec -= iStrideRec; memcpy(piTxtRec, piTxtRecSrc, sizeof(Pel) * (((ctuSize + extPadSizeX) << 1) + iWidthFrm)); } // Bottem #if JVET_Z0118_GDR piTxtRec = pcCurPic->getBuf(ch, pt).bufAt(0, 0); #else piTxtRec = pcCurPic->getBuf(ch, PIC_RECONSTRUCTION).bufAt(0, 0); #endif piTxtRec -= (ctuSize + extPadSizeX); piTxtRec += (iHeightFrm - 1) * iStrideRec; piTxtRecSrc = piTxtRec; for (int idy = 0; idy < ctuSize; idy++) { piTxtRec += iStrideRec; memcpy(piTxtRec, piTxtRecSrc, sizeof(Pel) * (ctuSize + extPadSizeX)); memcpy(piTxtRec + (ctuSize + extPadSizeX + iWidthFrm), piTxtRecSrc + (ctuSize + extPadSizeX + iWidthFrm), sizeof(Pel) * (ctuSize + extPadSizeX)); } piTxtRecSrc += ctuSize * iStrideRec; for (int idy = 0; idy < extPadSizeY; idy++) { piTxtRec += iStrideRec; memcpy(piTxtRec, piTxtRecSrc, sizeof(Pel) * (((ctuSize + extPadSizeX) << 1) + iWidthFrm)); } } } #endif