From 4b0cf65bc282cc45e2630af35ccaf919456a8f82 Mon Sep 17 00:00:00 2001 From: Karsten Suehring <karsten.suehring@hhi.fraunhofer.de> Date: Fri, 23 Aug 2019 17:33:47 +0200 Subject: [PATCH] replace ceil(log2()) double arithmetic with integer - create ceilLog2() function based on floorLog2 - use intrinsics for MSVC in floorLog2 - replace all ceil(log2()) calls in HLS with ceilLog2() --- source/Lib/CommonLib/CommonDef.h | 14 ++++++++++++++ source/Lib/DecoderLib/VLCReader.cpp | 14 +++++++------- source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp | 2 +- source/Lib/EncoderLib/VLCWriter.cpp | 10 +++++----- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 600cf39626..d46f0610e2 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -58,6 +58,8 @@ #if _MSC_VER < 1900 #error "MS Visual Studio version not supported. Please upgrade to Visual Studio 2015 or higher (or use other compilers)" #endif + +#include <intrin.h> #endif //! \ingroup CommonLib @@ -687,6 +689,11 @@ static inline int floorLog2(uint32_t x) } #ifdef __GNUC__ return 31 - __builtin_clz(x); +#else +#ifdef _MSC_VER + unsigned long r = 0; + _BitScanReverse(&r, x); + return r; #else int result = 0; if (x & 0xffff0000) @@ -716,8 +723,15 @@ static inline int floorLog2(uint32_t x) } return result; #endif +#endif } +static inline int ceilLog2(uint32_t x) +{ + return floorLog2(x - 1) + 1; +} + + //CASE-BREAK for breakpoints #if defined ( _MSC_VER ) && defined ( _DEBUG ) #define _CASE(_x) if(_x) diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index 1fe4f55691..ac9997d2ee 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -546,7 +546,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana const uint32_t tileRowsMinus1 = pcPPS->getNumTileRowsMinus1(); const uint32_t numSlicesInPic = pcPPS->getNumSlicesInPicMinus1() + 1; const uint32_t numTilesInPic = (tileColumnsMinus1 + 1) * (tileRowsMinus1 + 1); - int codeLength = (int)ceil(log2(numTilesInPic)); + int codeLength = ceilLog2(numTilesInPic); int codeLength2 = codeLength; if (numSlicesInPic > 0) { @@ -559,7 +559,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana { READ_CODE( codeLength, uiCode, "top_left_brick_idx" ); topLeft[i] = uiCode; - codeLength2 = (int)ceil(log2((numTilesInPic - topLeft[i] < 2) ? 2 : numTilesInPic - topLeft[i])); //Bugfix + codeLength2 = ceilLog2((numTilesInPic - topLeft[i] < 2) ? 2 : numTilesInPic - topLeft[i]); } READ_CODE( codeLength2, uiCode, "bottom_right_brick_idx_delta"); bottomRight[i] = topLeft[i] + uiCode; @@ -652,7 +652,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana #else uint32_t picWidth = parameterSetManager->getSPS( pcPPS->getSPSId() )->getPicWidthInLumaSamples(); // pcPPS->getPicWidthInLumaSamples(); #endif - int numBits = (int)ceil(log2(picWidth) - 3); + int numBits = ceilLog2(picWidth) - 3; for( unsigned i = 0; i < pcPPS->getNumVerVirtualBoundaries(); i++ ) { READ_CODE( numBits, uiCode, "pps_virtual_boundaries_pos_x" ); pcPPS->setVirtualBoundariesPosX( uiCode << 3, i ); @@ -663,7 +663,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana #else uint32_t picHeight = parameterSetManager->getSPS( pcPPS->getSPSId() )->getPicHeightInLumaSamples(); // pcPPS->getPicHeightInLumaSamples(); #endif - numBits = (int)ceil(log2(picHeight) - 3); + numBits = ceilLog2(picHeight) - 3; for( unsigned i = 0; i < pcPPS->getNumHorVirtualBoundaries(); i++ ) { READ_CODE( numBits, uiCode, "pps_virtual_boundaries_pos_y" ); pcPPS->setVirtualBoundariesPosY( uiCode << 3, i ); @@ -855,7 +855,7 @@ void HLSyntaxReader::parseAlfAps( APS* aps ) if (param.numLumaFilters > 1) { #if JVET_O0491_HLS_CLEANUP - const int length = (int)ceil(log2(param.numLumaFilters)); + const int length = ceilLog2(param.numLumaFilters); #endif for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) { @@ -1790,7 +1790,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para { if (sps->getNumRPL0() > 1) { - int numBits = (int)ceil(log2(sps->getNumRPL0())); + int numBits = ceilLog2(sps->getNumRPL0()); READ_CODE(numBits, uiCode, "ref_pic_list_idx[0]"); pcSlice->setRPL0idx(uiCode); pcSlice->setRPL0(sps->getRPLList0()->getReferencePictureList(uiCode)); @@ -1845,7 +1845,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para { if (sps->getNumRPL1() > 1) { - int numBits = (int)ceil(log2(sps->getNumRPL1())); + int numBits = ceilLog2(sps->getNumRPL1()); READ_CODE(numBits, uiCode, "ref_pic_list_idx[1]"); pcSlice->setRPL1idx(uiCode); pcSlice->setRPL1(sps->getRPLList1()->getReferencePictureList(uiCode)); diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp index afa62f1e25..671d5a9fbf 100644 --- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp +++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp @@ -1620,7 +1620,7 @@ int EncAdaptiveLoopFilter::getNonFilterCoeffRate( AlfParam& alfParam ) if( alfParam.numLumaFilters > 1 ) { #if JVET_O0491_HLS_CLEANUP - const int coeffLength = (int)ceil(log2(alfParam.numLumaFilters)); + const int coeffLength = ceilLog2(alfParam.numLumaFilters); #endif for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ ) { diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index 327f3fa486..b234dfca15 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -322,14 +322,14 @@ void HLSWriter::codePPS( const PPS* pcPPS ) WRITE_UVLC( pcPPS->getNumSlicesInPicMinus1(), "num_slices_in_pic_minus1" ); int numSlicesInPic = pcPPS->getNumSlicesInPicMinus1() + 1; int numTilesInPic = (pcPPS->getNumTileColumnsMinus1() + 1) * (pcPPS->getNumTileRowsMinus1() + 1); - int codeLength = (int)ceil(log2(numTilesInPic)); + int codeLength = ceilLog2(numTilesInPic); int codeLength2 = codeLength; for (int i = 0; i < numSlicesInPic; ++i) { if (i > 0) { WRITE_CODE(pcPPS->getTopLeftBrickIdx(i), codeLength, "top_left_brick_idx "); - codeLength2 = (int)ceil(log2((numTilesInPic - pcPPS->getTopLeftBrickIdx(i) < 2) ? 2 : numTilesInPic - pcPPS->getTopLeftBrickIdx(i))); + codeLength2 = ceilLog2((numTilesInPic - pcPPS->getTopLeftBrickIdx(i) < 2) ? 2 : numTilesInPic - pcPPS->getTopLeftBrickIdx(i)); } WRITE_CODE(pcPPS->getBottomRightBrickIdx(i) - pcPPS->getTopLeftBrickIdx(i), codeLength2, "bottom_right_brick_idx_delta"); } @@ -383,13 +383,13 @@ void HLSWriter::codePPS( const PPS* pcPPS ) if( pcPPS->getLoopFilterAcrossVirtualBoundariesDisabledFlag() ) { WRITE_CODE( pcPPS->getNumVerVirtualBoundaries(), 2, "pps_num_ver_virtual_boundaries"); - int numBits = (int)ceil(log2(pcPPS->pcv->lumaWidth) - 3); + int numBits = ceilLog2(pcPPS->pcv->lumaWidth) - 3; for( unsigned i = 0; i < pcPPS->getNumVerVirtualBoundaries(); i++ ) { WRITE_CODE( pcPPS->getVirtualBoundariesPosX( i ) >> 3, numBits, "pps_virtual_boundaries_pos_x" ); } WRITE_CODE( pcPPS->getNumHorVirtualBoundaries(), 2, "pps_num_hor_virtual_boundaries"); - numBits = (int)ceil(log2(pcPPS->pcv->lumaHeight) - 3); + numBits = ceilLog2(pcPPS->pcv->lumaHeight) - 3; for( unsigned i = 0; i < pcPPS->getNumHorVirtualBoundaries(); i++ ) { WRITE_CODE( pcPPS->getVirtualBoundariesPosY( i ) >> 3, numBits, "pps_virtual_boundaries_pos_y" ); @@ -535,7 +535,7 @@ void HLSWriter::codeAlfAps( APS* pcAPS ) if (param.numLumaFilters > 1) { #if JVET_O0491_HLS_CLEANUP - const int length = (int)ceil(log2( param.numLumaFilters)); + const int length = ceilLog2( param.numLumaFilters); #endif for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) { -- GitLab