Newer
Older

Karsten Suehring
committed
/* The copyright in this software is being made available under the BSD
* License, included below. This software may be subject to other third party
* and contributor rights, including patent rights, and no such rights are
* granted under this license.
*
* Copyright (c) 2010-2019, ITU/ISO/IEC

Karsten Suehring
committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/** \file EncSearch.cpp
* \brief encoder intra search class
*/
#include "IntraSearch.h"
#include "EncModeCtrl.h"
#include "CommonLib/CommonDef.h"
#include "CommonLib/Rom.h"
#include "CommonLib/Picture.h"
#include "CommonLib/UnitTools.h"
#include "CommonLib/dtrace_next.h"
#include "CommonLib/dtrace_buffer.h"
#include <math.h>
#include <limits>
//! \ingroup EncoderLib
//! \{
Yung-Hsuan Chao (Jessie)
committed
#define PLTCtx(c) SubCtx( Ctx::Palette, c )

Karsten Suehring
committed
IntraSearch::IntraSearch()
: m_pSplitCS (nullptr)
, m_pFullCS (nullptr)
, m_pBestCS (nullptr)
, m_pcEncCfg (nullptr)
, m_pcTrQuant (nullptr)
, m_pcRdCost (nullptr)

Karsten Suehring
committed
, m_CABACEstimator(nullptr)
, m_CtxCache (nullptr)
, m_isInitialized (false)
{
for( uint32_t ch = 0; ch < MAX_NUM_TBLOCKS; ch++ )
{
m_pSharedPredTransformSkip[ch] = nullptr;
}
}
void IntraSearch::destroy()
{
CHECK( !m_isInitialized, "Not initialized" );
if( m_pcEncCfg )
{
const uint32_t uiNumLayersToAllocateSplit = 1;
const uint32_t uiNumLayersToAllocateFull = 1;

Karsten Suehring
committed
const int uiNumSaveLayersToAllocate = 2;
for( uint32_t layer = 0; layer < uiNumSaveLayersToAllocate; layer++ )
{
m_pSaveCS[layer]->destroy();
delete m_pSaveCS[layer];
}
uint32_t numWidths = gp_sizeIdxInfo->numWidths();
uint32_t numHeights = gp_sizeIdxInfo->numHeights();
for( uint32_t width = 0; width < numWidths; width++ )
{
for( uint32_t height = 0; height < numHeights; height++ )
{
if( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( width ) ) && gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( height ) ) )

Karsten Suehring
committed
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
{
for( uint32_t layer = 0; layer < uiNumLayersToAllocateSplit; layer++ )
{
m_pSplitCS[width][height][layer]->destroy();
delete m_pSplitCS[width][height][layer];
}
for( uint32_t layer = 0; layer < uiNumLayersToAllocateFull; layer++ )
{
m_pFullCS[width][height][layer]->destroy();
delete m_pFullCS[width][height][layer];
}
delete[] m_pSplitCS[width][height];
delete[] m_pFullCS [width][height];
m_pBestCS[width][height]->destroy();
m_pTempCS[width][height]->destroy();
delete m_pTempCS[width][height];
delete m_pBestCS[width][height];
}
}
delete[] m_pSplitCS[width];
delete[] m_pFullCS [width];
delete[] m_pTempCS[width];
delete[] m_pBestCS[width];
}
delete[] m_pSplitCS;
delete[] m_pFullCS;
delete[] m_pBestCS;
delete[] m_pTempCS;
delete[] m_pSaveCS;
}
m_pSplitCS = m_pFullCS = nullptr;
m_pBestCS = m_pTempCS = nullptr;
m_pSaveCS = nullptr;
for( uint32_t ch = 0; ch < MAX_NUM_TBLOCKS; ch++ )
{
delete[] m_pSharedPredTransformSkip[ch];
m_pSharedPredTransformSkip[ch] = nullptr;
}

Karsten Suehring
committed
m_isInitialized = false;
}
IntraSearch::~IntraSearch()
{
if( m_isInitialized )
{
destroy();
}
}
void IntraSearch::init( EncCfg* pcEncCfg,
TrQuant* pcTrQuant,
RdCost* pcRdCost,
CABACWriter* CABACEstimator,
CtxCache* ctxCache,
const uint32_t maxCUWidth,
const uint32_t maxCUHeight,
const uint32_t maxTotalCUDepth

Karsten Suehring
committed
)
{
CHECK(m_isInitialized, "Already initialized");
m_pcEncCfg = pcEncCfg;
m_pcTrQuant = pcTrQuant;
m_pcRdCost = pcRdCost;
m_CABACEstimator = CABACEstimator;
m_CtxCache = ctxCache;

Karsten Suehring
committed
const ChromaFormat cform = pcEncCfg->getChromaFormatIdc();
IntraPrediction::init( cform, pcEncCfg->getBitDepth( CHANNEL_TYPE_LUMA ) );
m_tmpStorageLCU.create(UnitArea(cform, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));

Karsten Suehring
committed
for( uint32_t ch = 0; ch < MAX_NUM_TBLOCKS; ch++ )
{
m_pSharedPredTransformSkip[ch] = new Pel[MAX_CU_SIZE * MAX_CU_SIZE];
}
uint32_t numWidths = gp_sizeIdxInfo->numWidths();
uint32_t numHeights = gp_sizeIdxInfo->numHeights();
const uint32_t uiNumLayersToAllocateSplit = 1;
const uint32_t uiNumLayersToAllocateFull = 1;

Karsten Suehring
committed
m_pBestCS = new CodingStructure**[numWidths];
m_pTempCS = new CodingStructure**[numWidths];
m_pFullCS = new CodingStructure***[numWidths];
m_pSplitCS = new CodingStructure***[numWidths];
for( uint32_t width = 0; width < numWidths; width++ )
{
m_pBestCS[width] = new CodingStructure*[numHeights];
m_pTempCS[width] = new CodingStructure*[numHeights];
m_pFullCS [width] = new CodingStructure**[numHeights];
m_pSplitCS[width] = new CodingStructure**[numHeights];
for( uint32_t height = 0; height < numHeights; height++ )
{
if( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( width ) ) && gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( height ) ) )

Karsten Suehring
committed
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
{
m_pBestCS[width][height] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
m_pTempCS[width][height] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
m_pBestCS[width][height]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
m_pTempCS[width][height]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
m_pFullCS [width][height] = new CodingStructure*[uiNumLayersToAllocateFull];
m_pSplitCS[width][height] = new CodingStructure*[uiNumLayersToAllocateSplit];
for( uint32_t layer = 0; layer < uiNumLayersToAllocateFull; layer++ )
{
m_pFullCS [width][height][layer] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
m_pFullCS [width][height][layer]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
}
for( uint32_t layer = 0; layer < uiNumLayersToAllocateSplit; layer++ )
{
m_pSplitCS[width][height][layer] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
m_pSplitCS[width][height][layer]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
}
}
else
{
m_pBestCS[width][height] = nullptr;
m_pTempCS[width][height] = nullptr;
m_pFullCS [width][height] = nullptr;
m_pSplitCS[width][height] = nullptr;
}
}
}
const int uiNumSaveLayersToAllocate = 2;
m_pSaveCS = new CodingStructure*[uiNumSaveLayersToAllocate];
for( uint32_t depth = 0; depth < uiNumSaveLayersToAllocate; depth++ )
{
m_pSaveCS[depth] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
m_pSaveCS[depth]->create( UnitArea( cform, Area( 0, 0, maxCUWidth, maxCUHeight ) ), false );
}
m_isInitialized = true;
}
//////////////////////////////////////////////////////////////////////////
// INTRA PREDICTION
//////////////////////////////////////////////////////////////////////////
static constexpr double COST_UNKNOWN = -65536.0;
double IntraSearch::findInterCUCost( CodingUnit &cu )
{
if( cu.isConsIntra() && !cu.slice->isIntra() )
{
//search corresponding inter CU cost
for( int i = 0; i < m_numCuInSCIPU; i++ )
{
if( cu.lumaPos() == m_cuAreaInSCIPU[i].pos() && cu.lumaSize() == m_cuAreaInSCIPU[i].size() )
{
return m_cuCostInSCIPU[i];
}
}
}
return COST_UNKNOWN;
}

Karsten Suehring
committed
bool IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, const double bestCostSoFar, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst )

Karsten Suehring
committed
{
CodingStructure &cs = *cu.cs;
const SPS &sps = *cs.sps;
const uint32_t uiWidthBit = floorLog2(partitioner.currArea().lwidth() );
const uint32_t uiHeightBit = floorLog2(partitioner.currArea().lheight());

Karsten Suehring
committed
// Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantization divisor is 1.
const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(cu.transQuantBypass) * FRAC_BITS_SCALE;

Karsten Suehring
committed
//===== loop over partitions =====
const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() );
const TempCtx ctxStartMipFlag ( m_CtxCache, SubCtx( Ctx::MipFlag, m_CABACEstimator->getCtx() ) );
const TempCtx ctxStartIspMode ( m_CtxCache, SubCtx( Ctx::ISPMode, m_CABACEstimator->getCtx() ) );
const TempCtx ctxStartPlanarFlag ( m_CtxCache, SubCtx( Ctx::IntraLumaPlanarFlag, m_CABACEstimator->getCtx() ) );
const TempCtx ctxStartIntraMode(m_CtxCache, SubCtx(Ctx::IntraLumaMpmFlag, m_CABACEstimator->getCtx()));
const TempCtx ctxStartMrlIdx ( m_CtxCache, SubCtx( Ctx::MultiRefLineIdx, m_CABACEstimator->getCtx() ) );

Karsten Suehring
committed
CHECK( !cu.firstPU, "CU has no PUs" );
const bool keepResi = cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS;
// variables for saving fast intra modes scan results across multiple LFNST passes
bool LFNSTLoadFlag = sps.getUseLFNST() && cu.lfnstIdx != 0;
bool LFNSTSaveFlag = sps.getUseLFNST() && cu.lfnstIdx == 0;
LFNSTSaveFlag &= sps.getUseIntraMTS() ? cu.mtsFlag == 0 : true;
const uint32_t lfnstIdx = cu.lfnstIdx;
double costInterCU = findInterCUCost( cu );

Karsten Suehring
committed
const int width = partitioner.currArea().lwidth();
const int height = partitioner.currArea().lheight();
// Marking MTS usage for faster MTS
// 0: MTS is either not applicable for current CU (cuWidth > MTS_INTRA_MAX_CU_SIZE or cuHeight > MTS_INTRA_MAX_CU_SIZE), not active in the config file or the fast decision algorithm is not used in this case
// 1: MTS fast algorithm can be applied for the current CU, and the DCT2 is being checked
// 2: MTS is being checked for current CU. Stored results of DCT2 can be utilized for speedup
uint8_t mtsUsageFlag = 0;
const int maxSizeEMT = MTS_INTRA_MAX_CU_SIZE;
if( width <= maxSizeEMT && height <= maxSizeEMT && sps.getUseIntraMTS() )
{
mtsUsageFlag = ( sps.getUseLFNST() && cu.mtsFlag == 1 ) ? 2 : 1;
}
if( width * height < 64 && !m_pcEncCfg->getUseFastLFNST() )
{
mtsUsageFlag = 0;
}
Santiago de Luxán Hernández
committed
double bestCurrentCost = bestCostSoFar;
bool testISP = sps.getUseISP() && cu.mtsFlag == 0 && cu.lfnstIdx == 0 && CU::canUseISP( width, height, cu.cs->sps->getMaxTbSize() );
if( testISP )
Yin Zhao
committed
//reset the variables used for the tests
m_ispCandListHor.clear();
m_ispCandListVer.clear();
m_regIntraRDListWithCosts.clear();
m_ispTestedModes.clear();
//save the number of subpartitions
m_ispTestedModes.numTotalParts[0] = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT));
m_ispTestedModes.numTotalParts[1] = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT));

Karsten Suehring
committed
const bool testBDPCM = sps.getBDPCMEnabledFlag() && CU::bdpcmAllowed( cu, ComponentID( partitioner.chType ) ) && cu.mtsFlag == 0 && cu.lfnstIdx == 0;
static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiHadModeList;

Karsten Suehring
committed
static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList;
static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList;

Karsten Suehring
committed
auto &pu = *cu.firstPU;
bool validReturn = false;

Karsten Suehring
committed
{
CandHadList.clear();
CandCostList.clear();
uiHadModeList.clear();
CHECK(pu.cu != &cu, "PU is not contained in the CU");
//===== determine set of modes to be tested (using prediction signal only) =====
int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
const bool fastMip = sps.getUseMIP() && m_pcEncCfg->getUseFastMIP();
const bool mipAllowed = sps.getUseMIP() && isLuma(partitioner.chType) && pu.lwidth() <= cu.cs->sps->getMaxTbSize() && pu.lheight() <= cu.cs->sps->getMaxTbSize() && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.firstPU->lumaSize()));
const bool testMip = mipAllowed && mipModesAvailable(pu.Y());
static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiRdModeList;

Karsten Suehring
committed
int numModesForFullRD = 3;
numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[uiWidthBit - MIN_CU_LOG2][uiHeightBit - MIN_CU_LOG2];

Karsten Suehring
committed
#if INTRA_FULL_SEARCH
numModesForFullRD = numModesAvailable;
#endif
if( mtsUsageFlag != 2 )

Karsten Suehring
committed
{
// this should always be true
CHECK( !pu.Y().valid(), "PU is not valid" );
bool isFirstLineOfCtu = (((pu.block(COMPONENT_Y).y)&((pu.cs->sps)->getMaxCUWidth() - 1)) == 0);
int numOfPassesExtendRef = (isFirstLineOfCtu ? 1 : MRL_NUM_REF_LINES);
pu.multiRefIdx = 0;

Karsten Suehring
committed
if( numModesForFullRD != numModesAvailable )
{
CHECK( numModesForFullRD >= numModesAvailable, "Too many modes for full RD search" );
const CompArea &area = pu.Y();
PelBuf piOrg = cs.getOrgBuf(area);
PelBuf piPred = cs.getPredBuf(area);
DistParam distParamSad;
DistParam distParamHad;
if (cu.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
{
CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
PelBuf tmpOrg = m_tmpStorageLCU.getBuf(tmpArea);
tmpOrg.copyFrom(piOrg);
tmpOrg.rspSignal(m_pcReshape->getFwdLUT());
m_pcRdCost->setDistParam(distParamSad, tmpOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); // Use SAD cost
m_pcRdCost->setDistParam(distParamHad, tmpOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); // Use HAD (SATD) cost
{
m_pcRdCost->setDistParam(distParamSad, piOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); // Use SAD cost
m_pcRdCost->setDistParam(distParamHad, piOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); // Use HAD (SATD) cost
}

Karsten Suehring
committed
distParamSad.applyWeight = false;
distParamHad.applyWeight = false;

Karsten Suehring
committed
numModesForFullRD += fastMip? std::max(numModesForFullRD, floorLog2(std::min(pu.lwidth(), pu.lheight())) - 1) : numModesForFullRD;
}
const int numHadCand = (testMip ? 2 : 1) * 3;
//*** Derive (regular) candidates using Hadamard
cu.mipFlag = false;
//===== init pattern for luma prediction =====
initIntraPatternChType(cu, pu.Y(), true);

Karsten Suehring
committed
bool bSatdChecked[NUM_INTRA_MODE];
memset( bSatdChecked, 0, sizeof( bSatdChecked ) );
if( !LFNSTLoadFlag )

Karsten Suehring
committed
{
for( int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )
{
uint32_t uiMode = modeIdx;
Distortion minSadHad = 0;

Karsten Suehring
committed
// Skip checking extended Angular modes in the first round of SATD
if( uiMode > DC_IDX && ( uiMode & 1 ) )
{
continue;
}
bSatdChecked[uiMode] = true;
pu.intraDir[0] = modeIdx;
Alexey Filippov
committed
initPredIntraParams(pu, pu.Y(), sps);

Karsten Suehring
committed
if( useDPCMForFirstPassIntraEstimation( pu, uiMode ) )
{
encPredIntraDPCM( COMPONENT_Y, piOrg, piPred, uiMode );
}
else
{
Alexey Filippov
committed
predIntraAng( COMPONENT_Y, piPred, pu);

Karsten Suehring
committed
}
// Use the min between SAD and HAD as the cost criterion
// SAD is scaled by 2 to align with the scaling of HAD
minSadHad += std::min(distParamSad.distFunc(distParamSad)*2, distParamHad.distFunc(distParamHad));

Karsten Suehring
committed
// NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
m_CABACEstimator->getCtx() = SubCtx( Ctx::MipFlag, ctxStartMipFlag );
m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode );
m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag);
m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx );

Karsten Suehring
committed
uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA);
double cost = ( double ) minSadHad + (double)fracModeBits * sqrtLambdaForFirstPass;
DTRACE(g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiMode);

Karsten Suehring
committed
updateCandList( ModeInfo(false, 0, NOT_INTRA_SUBPARTITIONS, uiMode), cost, uiRdModeList, CandCostList, numModesForFullRD );
updateCandList( ModeInfo(false, 0, NOT_INTRA_SUBPARTITIONS, uiMode), (double)minSadHad, uiHadModeList, CandHadList, numHadCand );

Karsten Suehring
committed
}
if( !sps.getUseMIP() && LFNSTSaveFlag )
{
// save found best modes
m_uiSavedNumRdModesLFNST = numModesForFullRD;
m_uiSavedRdModeListLFNST = uiRdModeList;
m_dSavedModeCostLFNST = CandCostList;
// PBINTRA fast
m_uiSavedHadModeListLFNST = uiHadModeList;
m_dSavedHadListLFNST = CandHadList;
LFNSTSaveFlag = false;
}

Karsten Suehring
committed
} // NSSTFlag
if( !sps.getUseMIP() && LFNSTLoadFlag )
{
// restore saved modes
numModesForFullRD = m_uiSavedNumRdModesLFNST;
uiRdModeList = m_uiSavedRdModeListLFNST;
CandCostList = m_dSavedModeCostLFNST;
// PBINTRA fast
uiHadModeList = m_uiSavedHadModeListLFNST;
CandHadList = m_dSavedHadListLFNST;
} // !LFNSTFlag

Karsten Suehring
committed
if (!(sps.getUseMIP() && LFNSTLoadFlag))
{
static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> parentCandList = uiRdModeList;

Karsten Suehring
committed
// Second round of SATD for extended Angular modes
for (int modeIdx = 0; modeIdx < numModesForFullRD; modeIdx++)
{
unsigned parentMode = parentCandList[modeIdx].modeId;

Karsten Suehring
committed
if (parentMode > (DC_IDX + 1) && parentMode < (NUM_LUMA_MODE - 1))
{
for (int subModeIdx = -1; subModeIdx <= 1; subModeIdx += 2)
{
unsigned mode = parentMode + subModeIdx;
if (!bSatdChecked[mode])
{
pu.intraDir[0] = mode;
Alexey Filippov
committed
initPredIntraParams(pu, pu.Y(), sps);

Karsten Suehring
committed
if (useDPCMForFirstPassIntraEstimation(pu, mode))
{
encPredIntraDPCM(COMPONENT_Y, piOrg, piPred, mode);
}
else
{
Alexey Filippov
committed
predIntraAng(COMPONENT_Y, piPred, pu );

Karsten Suehring
committed
}
// Use the min between SAD and SATD as the cost criterion
// SAD is scaled by 2 to align with the scaling of HAD
Distortion minSadHad = std::min(distParamSad.distFunc(distParamSad)*2, distParamHad.distFunc(distParamHad));

Karsten Suehring
committed
// NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
m_CABACEstimator->getCtx() = SubCtx( Ctx::MipFlag, ctxStartMipFlag );
m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode );
m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag);
m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx );

Karsten Suehring
committed
uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;

Karsten Suehring
committed
updateCandList( ModeInfo( false, 0, NOT_INTRA_SUBPARTITIONS, mode ), cost, uiRdModeList, CandCostList, numModesForFullRD );
updateCandList( ModeInfo( false, 0, NOT_INTRA_SUBPARTITIONS, mode ), (double)minSadHad, uiHadModeList, CandHadList, numHadCand );

Karsten Suehring
committed
bSatdChecked[mode] = true;
}
}
}
}
Santiago de Luxán Hernández
committed
if ( testISP )
Yin Zhao
committed
// we save the regular intra modes list
const int numMPMs = NUM_MOST_PROBABLE_MODES;
unsigned multiRefMPM [numMPMs];
PU::getIntraMPMs(pu, multiRefMPM);
for (int mRefNum = 1; mRefNum < numOfPassesExtendRef; mRefNum++)
{
int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum];
pu.multiRefIdx = multiRefIdx;
{
Alexey Filippov
committed
initIntraPatternChType(cu, pu.Y(), true);
for (int x = 1; x < numMPMs; x++)
{
uint32_t mode = multiRefMPM[x];
{
pu.intraDir[0] = mode;
Alexey Filippov
committed
initPredIntraParams(pu, pu.Y(), sps);
if (useDPCMForFirstPassIntraEstimation(pu, mode))
{
encPredIntraDPCM(COMPONENT_Y, piOrg, piPred, mode);
}
else
{
Alexey Filippov
committed
predIntraAng(COMPONENT_Y, piPred, pu);
// Use the min between SAD and SATD as the cost criterion
// SAD is scaled by 2 to align with the scaling of HAD
Distortion minSadHad = std::min(distParamSad.distFunc(distParamSad)*2, distParamHad.distFunc(distParamHad));
// NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
m_CABACEstimator->getCtx() = SubCtx( Ctx::MipFlag, ctxStartMipFlag );
m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode );
m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag);
m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx );
uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
double cost = (double)minSadHad + (double)fracModeBits * sqrtLambdaForFirstPass;
updateCandList( ModeInfo( false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode ), cost, uiRdModeList, CandCostList, numModesForFullRD );
updateCandList( ModeInfo( false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode ), (double)minSadHad, uiHadModeList, CandHadList, numHadCand );
CHECKD( uiRdModeList.size() != numModesForFullRD, "Error: RD mode list size" );
if (LFNSTSaveFlag && testMip && !allowLfnstWithMip(cu.firstPU->lumaSize())) // save a different set for the next run
{
Yin Zhao
committed
// save found best modes
m_uiSavedRdModeListLFNST = uiRdModeList;
m_dSavedModeCostLFNST = CandCostList;
// PBINTRA fast
m_uiSavedHadModeListLFNST = uiHadModeList;
m_dSavedHadListLFNST = CandHadList;
m_uiSavedNumRdModesLFNST = g_aucIntraModeNumFast_UseMPM_2D[uiWidthBit - MIN_CU_LOG2][uiHeightBit - MIN_CU_LOG2];
m_uiSavedRdModeListLFNST.resize(m_uiSavedNumRdModesLFNST);
m_dSavedModeCostLFNST.resize(m_uiSavedNumRdModesLFNST);
// PBINTRA fast
m_uiSavedHadModeListLFNST.resize(3);
m_dSavedHadListLFNST.resize(3);
LFNSTSaveFlag = false;
}
//*** Derive MIP candidates using Hadamard
if (testMip)
{
cu.mipFlag = true;
pu.multiRefIdx = 0;
double mipHadCost[MAX_NUM_MIP_MODE] = { MAX_DOUBLE };
initIntraPatternChType(cu, pu.Y());
initIntraMip( pu );
for (uint32_t uiMode = 0; uiMode < getNumModesMip(pu.Y()); uiMode++)
{
pu.intraDir[CHANNEL_TYPE_LUMA] = uiMode;
predIntraMip(COMPONENT_Y, piPred, pu);
// Use the min between SAD and HAD as the cost criterion
// SAD is scaled by 2 to align with the scaling of HAD
Distortion minSadHad = std::min(distParamSad.distFunc(distParamSad)*2, distParamHad.distFunc(distParamHad));
m_CABACEstimator->getCtx() = SubCtx( Ctx::MipFlag, ctxStartMipFlag );
uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA);
double cost = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass;
mipHadCost[uiMode] = cost;
DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMIP: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiMode);
updateCandList(ModeInfo(true, 0, NOT_INTRA_SUBPARTITIONS, uiMode), cost, uiRdModeList, CandCostList, numModesForFullRD + 1);
updateCandList(ModeInfo(true, 0, NOT_INTRA_SUBPARTITIONS, uiMode), 0.8*double(minSadHad), uiHadModeList, CandHadList, numHadCand);
}
const double thresholdHadCost = 1.0 + 1.4 / sqrt((double)(pu.lwidth()*pu.lheight()));
reduceHadCandList(uiRdModeList, CandCostList, numModesForFullRD, thresholdHadCost, mipHadCost, pu, fastMip);
if ( sps.getUseMIP() && LFNSTSaveFlag)
{
// save found best modes
m_uiSavedNumRdModesLFNST = numModesForFullRD;
m_uiSavedRdModeListLFNST = uiRdModeList;
m_dSavedModeCostLFNST = CandCostList;
// PBINTRA fast
m_uiSavedHadModeListLFNST = uiHadModeList;
m_dSavedHadListLFNST = CandHadList;
LFNSTSaveFlag = false;
}
}
else //if( sps.getUseMIP() && LFNSTLoadFlag)
{
// restore saved modes
numModesForFullRD = m_uiSavedNumRdModesLFNST;
uiRdModeList = m_uiSavedRdModeListLFNST;
CandCostList = m_dSavedModeCostLFNST;
// PBINTRA fast
uiHadModeList = m_uiSavedHadModeListLFNST;
CandHadList = m_dSavedHadListLFNST;
}

Karsten Suehring
committed
if( m_pcEncCfg->getFastUDIUseMPMEnabled() )
{
const int numMPMs = NUM_MOST_PROBABLE_MODES;
unsigned uiPreds[numMPMs];

Karsten Suehring
committed

Karsten Suehring
committed
const int numCand = PU::getIntraMPMs( pu, uiPreds );
for( int j = 0; j < numCand; j++ )
{
bool mostProbableModeIncluded = false;
ModeInfo mostProbableMode( false, 0, NOT_INTRA_SUBPARTITIONS, uiPreds[j] );

Karsten Suehring
committed
for( int i = 0; i < numModesForFullRD; i++ )
{
mostProbableModeIncluded |= ( mostProbableMode == uiRdModeList[i] );

Karsten Suehring
committed
}
if( !mostProbableModeIncluded )
{
numModesForFullRD++;
uiRdModeList.push_back( mostProbableMode );
CandCostList.push_back(0);

Karsten Suehring
committed
}
}
Santiago de Luxán Hernández
committed
if ( testISP )
Yin Zhao
committed
// we add the MPMs to the list that contains only regular intra modes
for (int j = 0; j < numCand; j++)
{
bool mostProbableModeIncluded = false;
ModeInfo mostProbableMode(false, 0, NOT_INTRA_SUBPARTITIONS, uiPreds[j]);
for (int i = 0; i < m_ispCandListHor.size(); i++)
{
mostProbableModeIncluded |= (mostProbableMode == m_ispCandListHor[i]);
}
if (!mostProbableModeIncluded)
{
m_ispCandListHor.push_back(mostProbableMode);
}
}

Karsten Suehring
committed
}
else
{
THROW( "Full search not supported for MIP" );
}
if( sps.getUseLFNST() && mtsUsageFlag == 1 )
{
// Store the modes to be checked with RD
m_savedNumRdModes[ lfnstIdx ] = numModesForFullRD;
std::copy_n( uiRdModeList.begin(), numModesForFullRD, m_savedRdModeList[ lfnstIdx ] );
}
}
else //mtsUsage = 2 (here we potentially reduce the number of modes that will be full-RD checked)
{
if( ( m_pcEncCfg->getUseFastLFNST() || !cu.slice->isIntra() ) && m_bestModeCostValid[ lfnstIdx ] )
{
numModesForFullRD = 0;
double thresholdSkipMode = 1.0 + ( ( cu.lfnstIdx > 0 ) ? 0.1 : 1.0 ) * ( 1.4 / sqrt( ( double ) ( width*height ) ) );
// Skip checking the modes with much larger R-D cost than the best mode
for( int i = 0; i < m_savedNumRdModes[ lfnstIdx ]; i++ )
{
if( m_modeCostStore[ lfnstIdx ][ i ] <= thresholdSkipMode * m_bestModeCostStore[ lfnstIdx ] )
{
uiRdModeList.push_back( m_savedRdModeList[ lfnstIdx ][ i ] );
numModesForFullRD++;
}
}
}
else //this is necessary because we skip the candidates list calculation, since it was already obtained for the DCT-II. Now we load it
{
// Restore the modes to be checked with RD
numModesForFullRD = m_savedNumRdModes[ lfnstIdx ];
uiRdModeList.resize( numModesForFullRD );
std::copy_n( m_savedRdModeList[ lfnstIdx ], m_savedNumRdModes[ lfnstIdx ], uiRdModeList.begin() );
CandCostList.resize( numModesForFullRD );

Karsten Suehring
committed
}
}

Karsten Suehring
committed
CHECK( numModesForFullRD != uiRdModeList.size(), "Inconsistent state!" );
// after this point, don't use numModesForFullRD
// PBINTRA fast
if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable && !cs.slice->getDisableSATDForRD() && ( mtsUsageFlag != 2 || lfnstIdx > 0 ) )

Karsten Suehring
committed
{
Philipp Merkle
committed
double pbintraRatio = (lfnstIdx > 0) ? 1.25 : PBINTRA_RATIO;
ModeInfo bestMipMode;
int bestMipIdx = -1;
for( int idx = 0; idx < uiRdModeList.size(); idx++ )
{
if( uiRdModeList[idx].mipFlg )
{
bestMipMode = uiRdModeList[idx];
bestMipIdx = idx;
break;
}
}
const int numHadCand = 3;
Philipp Merkle
committed
for (int k = numHadCand - 1; k >= 0; k--)
Philipp Merkle
committed
if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio) { maxSize = k; }
}
if (maxSize > 0)
{
uiRdModeList.resize(std::min<size_t>(uiRdModeList.size(), maxSize));
if( bestMipIdx >= 0 )
{
if( uiRdModeList.size() <= bestMipIdx )
{
uiRdModeList.push_back(bestMipMode);
}
}
Santiago de Luxán Hernández
committed
if ( testISP )
m_ispCandListHor.resize(std::min<size_t>(m_ispCandListHor.size(), maxSize));
}
}
if (maxSize == 0)
{
cs.dist = std::numeric_limits<Distortion>::max();
cs.interHad = 0;
//===== reset context models =====
m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag);
m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx);

Karsten Suehring
committed
}
}
int numNonISPModes = (int)uiRdModeList.size();
Santiago de Luxán Hernández
committed
if ( testISP )
Santiago de Luxán Hernández
committed
{
// we reserve positions for ISP in the common full RD list
const int maxNumRDModesISP = 16;
for (int i = 0; i < maxNumRDModesISP; i++)
uiRdModeList.push_back(ModeInfo(false, 0, INTRA_SUBPARTITIONS_RESERVED, 0));
Santiago de Luxán Hernández
committed
}

Karsten Suehring
committed
//===== check modes (using r-d costs) =====
int bestBDPCMMode = 0;
double bestCostNonBDPCM = MAX_DOUBLE;

Karsten Suehring
committed
CodingStructure *csTemp = m_pTempCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )];
CodingStructure *csBest = m_pBestCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )];
csTemp->slice = cs.slice;
csBest->slice = cs.slice;
csTemp->initStructData();
csBest->initStructData();
csTemp->picture = cs.picture;
csBest->picture = cs.picture;

Karsten Suehring
committed
static_vector<int, FAST_UDI_MAX_RDMODE_NUM> rdModeIdxList;
Philipp Merkle
committed
if (testMip)
{
static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> rdModeListTemp;
for( int i = 0; i < uiRdModeList.size(); i++)
if( !uiRdModeList[i].mipFlg && uiRdModeList[i].ispMod==NOT_INTRA_SUBPARTITIONS )
rdModeIdxList.push_back( i );
for( int i = 0; i < uiRdModeList.size(); i++)
if( uiRdModeList[i].mipFlg || uiRdModeList[i].ispMod!=NOT_INTRA_SUBPARTITIONS )
rdModeIdxList.push_back( i );
for( int i = 0; i < uiRdModeList.size(); i++)
Philipp Merkle
committed
}
static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> rdModeListTemp;
for( int i = 0; i < uiRdModeList.size(); i++ )
{
if( !uiRdModeList[i].mipFlg )
{
uiRdModeList.resize(rdModeListTemp.size());
for( int i = 0; i < rdModeListTemp.size(); i++ )

Karsten Suehring
committed
// just to be sure
numModesForFullRD = ( int ) uiRdModeList.size();
TUIntraSubPartitioner subTuPartitioner( partitioner );
if( !cu.ispMode && !cu.mtsFlag )
{
m_modeCtrl->setMtsFirstPassNoIspCost( MAX_DOUBLE );
}
for (int mode = -2 * int(testBDPCM); mode < (int)uiRdModeList.size(); mode++)
{
// set CU/PU to luma prediction mode
ModeInfo uiOrgMode;
cu.bdpcmMode = -mode;
Weijia
committed
uiOrgMode = ModeInfo(false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmMode == 2 ? VER_IDX : HOR_IDX);
cu.mipFlag = uiOrgMode.mipFlg;
cu.ispMode = uiOrgMode.ispMod;
pu.multiRefIdx = uiOrgMode.mRefId;
pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId;
}
else
{
cu.bdpcmMode = 0;
if (uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
{
if (mode == numNonISPModes) // the list needs to be sorted only once
{
xSortISPCandList(bestCurrentCost, csBest->cost);
}
xGetNextISPMode(uiRdModeList[mode], (mode > 0 ? &uiRdModeList[mode - 1] : nullptr), Size(width, height));
if (uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
continue;
}
uiOrgMode = uiRdModeList[mode];
cu.mipFlag = uiOrgMode.mipFlg;
cu.ispMode = uiOrgMode.ispMod;
pu.multiRefIdx = uiOrgMode.mRefId;
pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId;
CHECK(cu.mipFlag && pu.multiRefIdx, "Error: combination of MIP and MRL not supported");
CHECK(pu.multiRefIdx && (pu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported");
CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
CHECK(cu.ispMode && pu.multiRefIdx, "Error: combination of ISP and MRL not supported");

Karsten Suehring
committed
// set context models
m_CABACEstimator->getCtx() = ctxStart;
// determine residual for partition
cs.initSubStructure( *csTemp, partitioner.chType, cs.area, true );
bool tmpValidReturn = false;
if( cu.ispMode )
{
tmpValidReturn = xIntraCodingLumaISP(*csTemp, subTuPartitioner, bestCurrentCost);
if (csTemp->tus.size() == 0)
{
// no TUs were coded
csTemp->cost = MAX_DOUBLE;
continue;
}
if (!cu.mtsFlag && !cu.lfnstIdx)
{
// we save the data for future tests
m_ispTestedModes.setModeResults((ISPType)cu.ispMode, (int)uiOrgMode.modeId, (int)csTemp->tus.size(), csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ? csTemp->cost : MAX_DOUBLE, csBest->cost);
}
}
else
{
tmpValidReturn = xRecurIntraCodingLumaQT( *csTemp, partitioner, uiBestPUMode.ispMod ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, uiBestPUMode.ispMod,
mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst );
if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP)
{
m_regIntraRDListWithCosts.push_back(ModeInfoWithCost(cu.mipFlag, pu.multiRefIdx, cu.ispMode, uiOrgMode.modeId, csTemp->cost));
}
if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] )
{
csTemp->cost = MAX_DOUBLE;
tmpValidReturn = false;
validReturn |= tmpValidReturn;
if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 )
{
m_modeCostStore[ lfnstIdx ][ testMip ? rdModeIdxList[ mode ] : mode ] = tmpValidReturn ? csTemp->cost : ( MAX_DOUBLE / 2.0 ); //(MAX_DOUBLE / 2.0) ??
DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
cu.blocks[0].y, (int)width, (int)height, csTemp->cost, uiOrgMode.modeId, uiOrgMode.ispMod,
pu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);

Karsten Suehring
committed
if( tmpValidReturn )

Karsten Suehring
committed
{
// check r-d cost
if( csTemp->cost < csBest->cost )
{
std::swap( csTemp, csBest );

Karsten Suehring
committed
uiBestPUMode = uiOrgMode;
bestBDPCMMode = cu.bdpcmMode;
if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode )
{
m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost;