Speed-Up for ISP when JVET_M0464_UNI_MTS is enabled. It can be...

Speed-Up for ISP when JVET_M0464_UNI_MTS is enabled. It can be enabled/disabled with the config. file parameter ISPFast.

When enabled,
-it merges all full RD intra mode lists into one,
-it tests fewer non-DCT-II transforms if ISP is likely to become the best mode and
-it stops testing intra modes for an ISP split if all sub-partitions obtained a zero-cbf.

Results in CTC:

AI -> 0.04% loss, EncT = 97%, DecT = 100%
RA -> 0.01% loss, EncT = 99%, DecT = 100%
parent ed4a934b
......@@ -117,6 +117,7 @@ LumaReshapeEnable : 1 # luma reshaping. 0: disable 1:enable
# Fast tools
PBIntraFast : 1
ISPFast : 1
FastMrg : 1
AMaxBT : 1
......
......@@ -153,6 +153,7 @@ DMVR : 1
# Fast tools
PBIntraFast : 1
ISPFast : 1
FastMrg : 1
AMaxBT : 1
......
......@@ -326,6 +326,9 @@ void EncApp::xInitLibCfg()
m_cEncLib.setUseBLambdaForNonKeyLowDelayPictures ( m_bUseBLambdaForNonKeyLowDelayPictures );
m_cEncLib.setPCMLog2MinSize ( m_uiPCMLog2MinSize);
m_cEncLib.setUsePCM ( m_usePCM );
#if JVET_M0102_INTRA_SUBPARTITIONS
m_cEncLib.setUseFastISP ( m_useFastISP );
#endif
// set internal bit-depth and constants
for (uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++)
......
......@@ -1027,6 +1027,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
#else
("TransformSkipFast", m_useTransformSkipFast, false, "Fast intra transform skipping")
("TransformSkipLog2MaxSize", m_log2MaxTransformSkipBlockSize, 2U, "Specify transform-skip maximum size. Minimum 2. (not valid in V1 profiles)")
#endif
#if JVET_M0102_INTRA_SUBPARTITIONS
("ISPFast", m_useFastISP, false, "Fast encoder search for ISP")
#endif
("ImplicitResidualDPCM", m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT], false, "Enable implicitly signalled residual DPCM for intra (also known as sample-adaptive intra predict) (not valid in V1 profiles)")
("ExplicitResidualDPCM", m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT], false, "Enable explicitly signalled residual DPCM for inter (not valid in V1 profiles)")
......@@ -3235,6 +3238,9 @@ void EncAppCfg::xPrintParameter()
if( m_MTS ) msg( VERBOSE, "MTSMaxCand: %1d(intra) %1d(inter) ", m_MTSIntraMaxCand, m_MTSInterMaxCand );
#else
if( m_EMT ) msg( VERBOSE, "EMTFast: %1d(intra) %1d(inter) ", ( m_FastEMT & m_EMT & 1 ), ( m_FastEMT >> 1 ) & ( m_EMT >> 1 ) & 1 );
#endif
#if JVET_M0102_INTRA_SUBPARTITIONS
msg( VERBOSE, "ISPFast:%d ", m_useFastISP );
#endif
msg( VERBOSE, "AMaxBT:%d ", m_useAMaxBT );
msg( VERBOSE, "E0023FastEnc:%d ", m_e0023FastEnc );
......
......@@ -150,6 +150,9 @@ protected:
bool m_rdpcmEnabledFlag[NUMBER_OF_RDPCM_SIGNALLING_MODES];///< control flags for residual DPCM
bool m_persistentRiceAdaptationEnabledFlag; ///< control flag for Golomb-Rice parameter adaptation over each slice
bool m_cabacBypassAlignmentEnabledFlag;
#if JVET_M0102_INTRA_SUBPARTITIONS
bool m_useFastISP; ///< flag for enabling fast methods for ISP
#endif
// coding quality
#if QP_SWITCHING_FOR_PARALLEL
......
......@@ -1386,6 +1386,16 @@ public:
iterator it = const_cast<iterator>( _pos ); _size += numEl;
while( first != last ) *it++ = *first++;
return const_cast<iterator>( _pos ); }
#if JVET_M0102_INTRA_SUBPARTITIONS && JVET_M0464_UNI_MTS
iterator insert( const_iterator _pos, size_t numEl, const T& val )
{ //const difference_type numEl = last - first;
CHECKD( _size + numEl >= N, "capacity exceeded" );
for( difference_type i = _size - 1; i >= _pos - _arr; i-- ) _arr[i + numEl] = _arr[i];
iterator it = const_cast<iterator>( _pos ); _size += numEl;
for ( int k = 0; k < numEl; k++) *it++ = val;
return const_cast<iterator>( _pos ); }
#endif
};
......
......@@ -390,6 +390,9 @@ protected:
int* m_aidQP;
uint32_t m_uiDeltaQpRD;
bool m_bFastDeltaQP;
#if JVET_M0102_INTRA_SUBPARTITIONS
bool m_useFastISP;
#endif
bool m_bUseConstrainedIntraPred;
bool m_bFastUDIUseMPMEnabled;
......@@ -1104,6 +1107,10 @@ public:
void setLog2MaxTransformSkipBlockSize ( uint32_t u ) { m_log2MaxTransformSkipBlockSize = u; }
bool getIntraSmoothingDisabledFlag () const { return m_intraSmoothingDisabledFlag; }
void setIntraSmoothingDisabledFlag (bool bValue) { m_intraSmoothingDisabledFlag=bValue; }
#if JVET_M0102_INTRA_SUBPARTITIONS
bool getUseFastISP () { return m_useFastISP; }
void setUseFastISP ( bool b ) { m_useFastISP = b; }
#endif
const int* getdQPs () const { return m_aidQP; }
uint32_t getDeltaQpRD () const { return m_uiDeltaQpRD; }
......
......@@ -811,6 +811,45 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner )
}
}
#if JVET_M0102_INTRA_SUBPARTITIONS && JVET_M0464_UNI_MTS
if ( nOptionsForISP > 1 )
{
//we create a single full RD list that includes all intra modes using regular intra, MRL and ISP
auto* firstIspList = ispOptions[1] == HOR_INTRA_SUBPARTITIONS ? &m_rdModeListWithoutMrlHor : &m_rdModeListWithoutMrlVer;
auto* secondIspList = ispOptions[1] == HOR_INTRA_SUBPARTITIONS ? &m_rdModeListWithoutMrlVer : &m_rdModeListWithoutMrlHor;
if ( m_pcEncCfg->getUseFastISP() )
{
// find the first non-MRL mode
size_t indexFirstMode = std::find( extendRefList.begin(), extendRefList.end(), 0 ) - extendRefList.begin();
// if not found, just take the last mode
if( indexFirstMode >= extendRefList.size() ) indexFirstMode = extendRefList.size() - 1;
// move the mode indicated by indexFirstMode to the beginning
for( int idx = ((int)indexFirstMode) - 1; idx >= 0; idx-- )
{
std::swap( extendRefList[idx], extendRefList[idx + 1] );
std::swap( uiRdModeList [idx], uiRdModeList [idx + 1] );
}
//insert all ISP modes after the first non-mrl mode
uiRdModeList.insert( uiRdModeList.begin() + 1, secondIspList->begin(), secondIspList->end() );
uiRdModeList.insert( uiRdModeList.begin() + 1, firstIspList->begin() , firstIspList->end() );
extendRefList.insert( extendRefList.begin() + 1, secondIspList->size(), MRL_NUM_REF_LINES + ispOptions[2] );
extendRefList.insert( extendRefList.begin() + 1, firstIspList->size() , MRL_NUM_REF_LINES + ispOptions[1] );
}
else
{
//insert all ISP modes at the end of the current list
uiRdModeList.insert( uiRdModeList.end(), secondIspList->begin(), secondIspList->end() );
uiRdModeList.insert( uiRdModeList.end(), firstIspList->begin() , firstIspList->end() );
extendRefList.insert( extendRefList.end(), secondIspList->size(), MRL_NUM_REF_LINES + ispOptions[2] );
extendRefList.insert( extendRefList.end(), firstIspList->size() , MRL_NUM_REF_LINES + ispOptions[1] );
}
}
CHECKD(uiRdModeList.size() != extendRefList.size(),"uiRdModeList and extendRefList do not have the same size!");
#endif
//===== check modes (using r-d costs) =====
uint32_t uiBestPUMode = 0;
int bestExtendRef = 0;
......@@ -830,12 +869,21 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner )
int bestNormalIntraModeIndex = -1;
uint8_t bestIspOption = NOT_INTRA_SUBPARTITIONS;
TUIntraSubPartitioner subTuPartitioner( partitioner );
#if !JVET_M0464_UNI_MTS
#if JVET_M0464_UNI_MTS
bool ispHorAllZeroCbfs = false, ispVerAllZeroCbfs = false;
for (uint32_t uiMode = 0; uiMode < numModesForFullRD; uiMode++)
{
// set luma prediction mode
uint32_t uiOrgMode = uiRdModeList[uiMode];
cu.ispMode = extendRefList[uiMode] > MRL_NUM_REF_LINES ? extendRefList[uiMode] - MRL_NUM_REF_LINES : NOT_INTRA_SUBPARTITIONS;
#else
if ( !cu.ispMode && !cu.emtFlag )
{
m_modeCtrl->setEmtFirstPassNoIspCost( MAX_DOUBLE );
}
#endif
for( uint32_t ispOptionIdx = 0; ispOptionIdx < nOptionsForISP; ispOptionIdx++ )
{
cu.ispMode = ispOptions[ispOptionIdx];
......@@ -844,7 +892,7 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner )
{
// set luma prediction mode
uint32_t uiOrgMode = cu.ispMode == NOT_INTRA_SUBPARTITIONS ? uiRdModeList[uiMode] : cu.ispMode == HOR_INTRA_SUBPARTITIONS ? m_rdModeListWithoutMrlHor[uiMode] : m_rdModeListWithoutMrlVer[uiMode];
#endif
pu.intraDir[0] = uiOrgMode;
int multiRefIdx = 0;
......@@ -853,6 +901,12 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner )
{
intraSubPartitionsProcOrder = CU::getISPType( cu, COMPONENT_Y );
bool tuIsDividedInRows = CU::divideTuInRows( cu );
#if JVET_M0464_UNI_MTS
if ( ( tuIsDividedInRows && ispHorAllZeroCbfs ) || ( !tuIsDividedInRows && ispVerAllZeroCbfs ) )
{
continue;
}
#endif
if( m_intraModeDiagRatio.at( bestNormalIntraModeIndex ) > 1.25 )
{
continue;
......@@ -894,11 +948,25 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner )
}
else
{
#if JVET_M0464_UNI_MTS
xRecurIntraCodingLumaQT( *csTemp, partitioner, bestIspOption ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, bestIspOption );
#else
xRecurIntraCodingLumaQT( *csTemp, partitioner, MAX_DOUBLE, -1 );
#endif
}
if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] )
{
#if JVET_M0464_UNI_MTS
if ( cu.ispMode == HOR_INTRA_SUBPARTITIONS )
{
ispHorAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lheight() > 2 && csTemp->cost >= bestCurrentCost );
}
else
{
ispVerAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lwidth() > 2 && csTemp->cost >= bestCurrentCost );
}
#endif
csTemp->cost = MAX_DOUBLE;
}
#else
......@@ -959,8 +1027,8 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner )
{
m_modeCtrl->setEmtFirstPassNoIspCost(csBest->cost);
}
#endif
}
#endif
cu.ispMode = bestIspOption;
#endif
......@@ -1980,7 +2048,11 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp
}
#if JVET_M0102_INTRA_SUBPARTITIONS
#if JVET_M0464_UNI_MTS
void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, const double bestCostSoFar, const int subTuIdx, const PartSplit ispType, const bool ispIsCurrentWinnder )
#else
void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, const double bestCostSoFar, const int subTuIdx, const PartSplit ispType )
#endif
{
int subTuCounter = subTuIdx;
const UnitArea &currArea = partitioner.currArea();
......@@ -2122,6 +2194,10 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
#endif
#if JVET_M0464_UNI_MTS
#if JVET_M0102_INTRA_SUBPARTITIONS
double bestDCT2cost = MAX_DOUBLE;
double threshold = m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinnder && nNumTransformCands > 1 ? 1 + 1.4 / sqrt( cu.lwidth() * cu.lheight() ) : 1;
#endif
for( int modeId = firstCheckId; modeId < nNumTransformCands; modeId++ )
{
if( !cbfDCT2 || ( m_pcEncCfg->getUseTransformSkipFast() && bestModeId[COMPONENT_Y] == 1 ) )
......@@ -2132,6 +2208,13 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
{
continue;
}
#if JVET_M0102_INTRA_SUBPARTITIONS
//we compare the DCT-II cost against the best ISP cost so far (except for TS)
if ( m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinnder && trModes[modeId].first != 0 && ( trModes[modeId].first != 1 || !tsAllowed ) && bestDCT2cost > bestCostSoFar * threshold )
{
continue;
}
#endif
tu.mtsIdx = trModes[modeId].first;
#else
for( int modeId = firstCheckId; modeId <= lastCheckId; modeId++ )
......@@ -2248,6 +2331,13 @@ void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
singleCostTmp = m_pcRdCost->calcRdCost( singleTmpFracBits, singleDistTmpLuma );
}
#if JVET_M0102_INTRA_SUBPARTITIONS && JVET_M0464_UNI_MTS
if ( !cu.ispMode && nNumTransformCands > 1 && modeId == firstCheckId )
{
bestDCT2cost = singleCostTmp;
}
#endif
if (singleCostTmp < dSingleCost)
{
dSingleCost = singleCostTmp;
......
......@@ -195,7 +195,11 @@ protected:
#if JVET_M0102_INTRA_SUBPARTITIONS
ChromaCbfs xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const PartSplit ispType = TU_NO_ISP );
#if JVET_M0464_UNI_MTS
void xRecurIntraCodingLumaQT ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, const bool ispIsCurrentWinnder = false );
#else
void xRecurIntraCodingLumaQT ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP );
#endif
#else
ChromaCbfs xRecurIntraChromaCodingQT (CodingStructure &cs, Partitioner& pm);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment