From 8e16ada0855a08e590ceb5770c2915a01bd7ae84 Mon Sep 17 00:00:00 2001 From: Frank Bossen <fbossen@gmail.com> Date: Sun, 18 Aug 2019 11:47:06 -0400 Subject: [PATCH] Clean up: reduce memory reallocation in CodingStructure::createCoeffs Use vectors and resize them if needed instead of deleting and reallocating memory for every picture. This appears to have a significant impact on decoder run time. Use PLTRunMode type where appropriate instead of bool since using vector<bool> creates a number of issues --- source/Lib/CommonLib/Buffer.h | 4 +-- source/Lib/CommonLib/CodingStructure.cpp | 40 +++++++----------------- source/Lib/CommonLib/CodingStructure.h | 8 ++--- source/Lib/CommonLib/TypeDef.h | 6 +++- source/Lib/CommonLib/Unit.cpp | 7 +++-- source/Lib/CommonLib/Unit.h | 6 ++-- source/Lib/DecoderLib/CABACReader.cpp | 13 +++++--- source/Lib/EncoderLib/CABACWriter.cpp | 7 +++-- source/Lib/EncoderLib/EncModeCtrl.cpp | 10 +++--- source/Lib/EncoderLib/EncModeCtrl.h | 2 +- source/Lib/EncoderLib/IntraSearch.cpp | 6 ++-- 11 files changed, 54 insertions(+), 55 deletions(-) diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index e76cdd7a5..895ad0023 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -177,8 +177,8 @@ typedef AreaBuf<const MotionInfo> CMotionBuf; typedef AreaBuf< TCoeff> PLTescapeBuf; typedef AreaBuf<const TCoeff> CPLTescapeBuf; -typedef AreaBuf< bool> PLTtypeBuf; -typedef AreaBuf<const bool> CPLTtypeBuf; +typedef AreaBuf<PLTRunMode> PLTtypeBuf; +typedef AreaBuf<const PLTRunMode> CPLTtypeBuf; #endif #define SIZE_AWARE_PER_EL_OP( OP, INC ) \ diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp index d8814defc..9f350676e 100644 --- a/source/Lib/CommonLib/CodingStructure.cpp +++ b/source/Lib/CommonLib/CodingStructure.cpp @@ -76,13 +76,6 @@ CodingStructure::CodingStructure(CUCache& cuCache, PUCache& puCache, TUCache& tu { for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ ) { - m_coeffs[ i ] = nullptr; - m_pcmbuf[ i ] = nullptr; -#if JVET_O0119_BASE_PALETTE_444 - m_runType[i] = nullptr; - m_runLength[i] = nullptr; -#endif - m_offsets[ i ] = 0; } @@ -640,7 +633,7 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c TCoeff *coeffs[5] = { nullptr, nullptr, nullptr, nullptr, nullptr }; Pel *pcmbuf[5] = { nullptr, nullptr, nullptr, nullptr, nullptr }; #if JVET_O0119_BASE_PALETTE_444 - bool *runType[5] = { nullptr, nullptr, nullptr, nullptr, nullptr }; + PLTRunMode *runType[5] = { nullptr, nullptr, nullptr, nullptr, nullptr }; Pel *runLength[5] = { nullptr, nullptr, nullptr, nullptr, nullptr }; #endif @@ -677,11 +670,11 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c } } - coeffs[i] = m_coeffs[i] + m_offsets[i]; - pcmbuf[i] = m_pcmbuf[i] + m_offsets[i]; + coeffs[i] = m_coeffs[i].data() + m_offsets[i]; + pcmbuf[i] = m_pcmbuf[i].data() + m_offsets[i]; #if JVET_O0119_BASE_PALETTE_444 - runType[i] = m_runType[i] + m_offsets[i]; - runLength[i] = m_runLength[i] + m_offsets[i]; + runType[i] = m_runType[i].data() + m_offsets[i]; + runLength[i] = m_runLength[i].data() + m_offsets[i]; #endif unsigned areaSize = tu->blocks[i].area(); @@ -969,32 +962,23 @@ void CodingStructure::rebindPicBufs() void CodingStructure::createCoeffs() { - const unsigned numCh = getNumberValidComponents( area.chromaFormat ); + const size_t numCh = getNumberValidComponents(area.chromaFormat); - for( unsigned i = 0; i < numCh; i++ ) + for (size_t i = 0; i < numCh; i++) { - unsigned _area = area.blocks[i].area(); + size_t _area = area.blocks[i].area(); - m_coeffs[i] = _area > 0 ? ( TCoeff* ) xMalloc( TCoeff, _area ) : nullptr; - m_pcmbuf[i] = _area > 0 ? ( Pel* ) xMalloc( Pel, _area ) : nullptr; + m_coeffs[i].resize(_area); + m_pcmbuf[i].resize(_area); #if JVET_O0119_BASE_PALETTE_444 - m_runType[i] = _area > 0 ? ( bool* ) xMalloc( bool, _area ) : nullptr; - m_runLength[i] = _area > 0 ? ( Pel* ) xMalloc( Pel, _area ) : nullptr; + m_runType[i].resize(_area); + m_runLength[i].resize(_area); #endif } } void CodingStructure::destroyCoeffs() { - for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ ) - { - if( m_coeffs[i] ) { xFree( m_coeffs[i] ); m_coeffs[i] = nullptr; } - if( m_pcmbuf[i] ) { xFree( m_pcmbuf[i] ); m_pcmbuf[i] = nullptr; } -#if JVET_O0119_BASE_PALETTE_444 - if (m_runType[i]) { xFree(m_runType[i]); m_runType[i] = nullptr; } - if (m_runLength[i]) { xFree(m_runLength[i]); m_runLength[i] = nullptr; } -#endif - } } void CodingStructure::initSubStructure( CodingStructure& subStruct, const ChannelType _chType, const UnitArea &subArea, const bool &isTuEnc ) diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h index 8a11a3bef..5ee873eba 100644 --- a/source/Lib/CommonLib/CodingStructure.h +++ b/source/Lib/CommonLib/CodingStructure.h @@ -247,11 +247,11 @@ private: PelStorage m_reco; PelStorage m_orgr; - TCoeff *m_coeffs [ MAX_NUM_COMPONENT ]; - Pel *m_pcmbuf [ MAX_NUM_COMPONENT ]; + std::vector<TCoeff> m_coeffs[MAX_NUM_COMPONENT]; + std::vector<Pel> m_pcmbuf[MAX_NUM_COMPONENT]; #if JVET_O0119_BASE_PALETTE_444 - bool *m_runType [MAX_NUM_COMPONENT]; - Pel *m_runLength[MAX_NUM_COMPONENT]; + std::vector<PLTRunMode> m_runType[MAX_NUM_COMPONENT]; + std::vector<Pel> m_runLength[MAX_NUM_COMPONENT]; #endif int m_offsets[ MAX_NUM_COMPONENT ]; diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 2a72e25f2..1665bc551 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -1131,7 +1131,11 @@ enum PLTRunMode PLT_RUN_INDEX = 0, PLT_RUN_COPY = 1, NUM_PLT_RUN = 2 -}; +} +#if __GNUC__ +__attribute__((packed)) +#endif +; #endif /// parameters for deblocking filter struct LFCUParam diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 05d854815..c2c18d717 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -755,7 +755,7 @@ void TransformUnit::initData() } #if JVET_O0119_BASE_PALETTE_444 -void TransformUnit::init(TCoeff **coeffs, Pel **pcmbuf, Pel **runLength, bool **runType) +void TransformUnit::init(TCoeff **coeffs, Pel **pcmbuf, Pel **runLength, PLTRunMode **runType) #else void TransformUnit::init(TCoeff **coeffs, Pel **pcmbuf) #endif @@ -848,7 +848,10 @@ const CPLTescapeBuf TransformUnit::getescapeValue(const ComponentID id) const { Pel* TransformUnit::getPLTIndex (const ComponentID id) { return m_pcmbuf[id]; } Pel* TransformUnit::getRunLens (const ComponentID id) { return m_runLength[id]; } - bool* TransformUnit::getRunTypes (const ComponentID id) { return m_runType[id]; } + PLTRunMode * TransformUnit::getRunTypes(const ComponentID id) + { + return m_runType[id]; + } #endif void TransformUnit::checkTuNoResidual( unsigned idx ) diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index f5c92b9d8..8a8deffeb 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -486,7 +486,7 @@ struct TransformUnit : public UnitArea TransformUnit *prev; #if JVET_O0119_BASE_PALETTE_444 - void init(TCoeff **coeffs, Pel **pcmbuf, Pel **runLength, bool **runType); + void init(TCoeff **coeffs, Pel **pcmbuf, Pel **runLength, PLTRunMode **runType); #else void init(TCoeff **coeffs, Pel **pcmbuf); #endif @@ -515,7 +515,7 @@ struct TransformUnit : public UnitArea const CPLTescapeBuf getescapeValue(const ComponentID id) const; Pel* getPLTIndex(const ComponentID id); Pel* getRunLens(const ComponentID id); - bool* getRunTypes(const ComponentID id); + PLTRunMode * getRunTypes(const ComponentID id); #endif #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM @@ -527,7 +527,7 @@ private: TCoeff *m_coeffs[ MAX_NUM_TBLOCKS ]; Pel *m_pcmbuf[ MAX_NUM_TBLOCKS ]; #if JVET_O0119_BASE_PALETTE_444 - bool *m_runType[MAX_NUM_TBLOCKS]; + PLTRunMode *m_runType[MAX_NUM_TBLOCKS]; Pel *m_runLength[MAX_NUM_TBLOCKS]; #endif }; diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index 78e8ff012..41aedf216 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -1923,7 +1923,7 @@ void CABACReader::cu_palette_info(CodingUnit& cu, ComponentID compBegin, uint32_ uint32_t width = cu.block(compBegin).width; int numCopyIndexRuns = -1; - bool lastRunType = 0; + PLTRunMode lastRunType = PLT_RUN_INDEX; uint32_t numIndices = 0; uint32_t adjust = 0; uint32_t symbol = 0; @@ -1940,7 +1940,7 @@ void CABACReader::cu_palette_info(CodingUnit& cu, ComponentID compBegin, uint32_ adjust = 1; parsedIdxList.push_back(symbol); } - lastRunType = m_BinDecoder.decodeBin(Ctx::RunTypeFlag()); + lastRunType = m_BinDecoder.decodeBin(Ctx::RunTypeFlag()) ? PLT_RUN_COPY : PLT_RUN_INDEX; parseScanRotationModeFlag(cu, compBegin); adjust = 0; } @@ -2002,9 +2002,11 @@ void CABACReader::cu_palette_info(CodingUnit& cu, ComponentID compBegin, uint32_ } else { - if (numCopyIndexRuns && strPos < endPos - 1) // JC: if numIndices (decoder will know this value) == 0 - > only CopyAbove, if strPos == endPos - 1, the last RunType was already coded + if (numCopyIndexRuns + && strPos < endPos - 1) // JC: if numIndices (decoder will know this value) == 0 - > only CopyAbove, if + // strPos == endPos - 1, the last PLTRunMode was already coded { - runType.at(posx, posy) = (m_BinDecoder.decodeBin(Ctx::RunTypeFlag())); + runType.at(posx, posy) = m_BinDecoder.decodeBin(Ctx::RunTypeFlag()) ? PLT_RUN_COPY : PLT_RUN_INDEX; } else { @@ -2046,7 +2048,8 @@ void CABACReader::cu_palette_info(CodingUnit& cu, ComponentID compBegin, uint32_ lastRun = numCopyIndexRuns == 0 && runType.at(posx, posy) == lastRunType; if (!lastRun) { - runLength.at(posx, posy) = cu_run_val((PLTRunMode)runType.at(posx, posy), curLevel, endPos - strPos - numCopyIndexRuns - 1 - lastRunType) + 1; + runLength.at(posx, posy) = + cu_run_val(runType.at(posx, posy), curLevel, endPos - strPos - numCopyIndexRuns - 1 - lastRunType) + 1; } else { diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index 8156fa3a1..82558c058 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -1821,7 +1821,9 @@ void CABACWriter::cu_palette_info(const CodingUnit& cu, ComponentID compBegin, u } else { - if (numIndices && strPos < endPos - 1) // if numIndices (decoder will know this value) == 0 - > only CopyAbove, if strPos == endPos - 1, the last RunType was already coded + if (numIndices + && strPos < endPos - 1) // if numIndices (decoder will know this value) == 0 - > only CopyAbove, if strPos + // == endPos - 1, the last PLTRunMode was already coded { m_BinEncoder.encodeBin((runType.at(posx, posy)), Ctx::RunTypeFlag()); } @@ -1846,7 +1848,8 @@ void CABACWriter::cu_palette_info(const CodingUnit& cu, ComponentID compBegin, u if (lastRunPos != strPos) { numIndices -= (runType.at(posx, posy) == PLT_RUN_INDEX); - cu_run_val(runLength.at(posx, posy) - 1, (PLTRunMode)runType.at(posx, posy), curLevel, endPos - strPos - numIndices - 1 - lastRunType); + cu_run_val(runLength.at(posx, posy) - 1, runType.at(posx, posy), curLevel, + endPos - strPos - numIndices - 1 - lastRunType); } } diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index fc7a30a4d..2de1f8c89 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -783,14 +783,14 @@ void BestEncInfoCache::init( const Slice &slice ) m_pCoeff = new TCoeff[numCoeff*MAX_NUM_TUS]; m_pPcmBuf = new Pel [numCoeff*MAX_NUM_TUS]; #if JVET_O0119_BASE_PALETTE_444 - m_runType = new bool[numCoeff*MAX_NUM_TUS]; + m_runType = new PLTRunMode[numCoeff * MAX_NUM_TUS]; m_runLength = new Pel [numCoeff*MAX_NUM_TUS]; #endif #else m_pCoeff = new TCoeff[numCoeff]; m_pPcmBuf = new Pel [numCoeff]; #if JVET_O0119_BASE_PALETTE_444 - m_runType = new bool[numCoeff]; + m_runType = new PLTRunMode[numCoeff]; m_runLength = new Pel [numCoeff]; #endif #endif @@ -798,7 +798,7 @@ void BestEncInfoCache::init( const Slice &slice ) TCoeff *coeffPtr = m_pCoeff; Pel *pcmPtr = m_pPcmBuf; #if JVET_O0119_BASE_PALETTE_444 - bool *runTypePtr = m_runType; + PLTRunMode *runTypePtr = m_runType; Pel *runLengthPtr = m_runLength; #endif @@ -817,7 +817,9 @@ void BestEncInfoCache::init( const Slice &slice ) TCoeff *coeff[MAX_NUM_TBLOCKS] = { 0, }; Pel *pcmbf[MAX_NUM_TBLOCKS] = { 0, }; #if JVET_O0119_BASE_PALETTE_444 - bool *runType[MAX_NUM_TBLOCKS] = { 0, }; + PLTRunMode *runType[MAX_NUM_TBLOCKS] = { + 0, + }; Pel *runLength[MAX_NUM_TBLOCKS] = { 0, }; #endif diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h index 32dabb5d2..6bc5de1f9 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.h +++ b/source/Lib/EncoderLib/EncModeCtrl.h @@ -480,7 +480,7 @@ private: TCoeff *m_pCoeff; Pel *m_pPcmBuf; #if JVET_O0119_BASE_PALETTE_444 - bool *m_runType; + PLTRunMode * m_runType; Pel *m_runLength; #endif CodingStructure m_dummyCS; diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index d9c2b821a..654fedfed 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -1780,7 +1780,7 @@ void IntraSearch::PLTSearch(CodingStructure &cs, Partitioner& partitioner, Compo } Pel *runLength = tu.getRunLens (compBegin); - bool *runType = tu.getRunTypes(compBegin); + PLTRunMode *runType = tu.getRunTypes(compBegin); cu.lastPLTSize[compBegin] = cs.prevPLT.curPLTSize[compBegin]; //derive palette derivePLTLossy(cs, partitioner, compBegin, numComp); @@ -1796,7 +1796,7 @@ void IntraSearch::PLTSearch(CodingStructure &cs, Partitioner& partitioner, Compo deriveRunAndCalcBits(cs, partitioner, compBegin, numComp, PLT_SCAN_VERTRAV, bits); } cu.useRotation[compBegin] = m_bestScanRotationMode; - memcpy(runType, m_runTypeRD, sizeof(bool)*width*height); + memcpy(runType, m_runTypeRD, sizeof(PLTRunMode) * width * height); memcpy(runLength, m_runLengthRD, sizeof(Pel)*width*height); //reconstruct pixel PelBuf curPLTIdx = tu.getcurPLTIdx(compBegin); @@ -1878,7 +1878,7 @@ void IntraSearch::deriveRunAndCalcBits(CodingStructure& cs, Partitioner& partiti uint32_t height = cu.block(compBegin).height; uint32_t width = cu.block(compBegin).width; Pel *runLength = tu.getRunLens (compBegin); - bool *runType = tu.getRunTypes(compBegin); + PLTRunMode * runType = tu.getRunTypes(compBegin); cu.useRotation[compBegin] = (pltScanMode == PLT_SCAN_VERTRAV); m_scanOrder = g_scanOrder[SCAN_UNGROUPED][(cu.useRotation[compBegin]) ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)]; -- GitLab