From 4b0cf65bc282cc45e2630af35ccaf919456a8f82 Mon Sep 17 00:00:00 2001
From: Karsten Suehring <karsten.suehring@hhi.fraunhofer.de>
Date: Fri, 23 Aug 2019 17:33:47 +0200
Subject: [PATCH] replace ceil(log2()) double arithmetic with integer

- create ceilLog2() function based on floorLog2
- use intrinsics for MSVC in floorLog2
- replace all ceil(log2()) calls in HLS with ceilLog2()
---
 source/Lib/CommonLib/CommonDef.h                | 14 ++++++++++++++
 source/Lib/DecoderLib/VLCReader.cpp             | 14 +++++++-------
 source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp |  2 +-
 source/Lib/EncoderLib/VLCWriter.cpp             | 10 +++++-----
 4 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 600cf39626..d46f0610e2 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -58,6 +58,8 @@
 #if _MSC_VER < 1900
 #error "MS Visual Studio version not supported. Please upgrade to Visual Studio 2015 or higher (or use other compilers)"
 #endif
+
+#include <intrin.h>
 #endif
 
 //! \ingroup CommonLib
@@ -687,6 +689,11 @@ static inline int floorLog2(uint32_t x)
   }
 #ifdef __GNUC__
   return 31 - __builtin_clz(x);
+#else
+#ifdef _MSC_VER
+  unsigned long r = 0;
+  _BitScanReverse(&r, x);
+  return r;
 #else
   int result = 0;
   if (x & 0xffff0000)
@@ -716,8 +723,15 @@ static inline int floorLog2(uint32_t x)
   }
   return result;
 #endif
+#endif
 }
 
+static inline int ceilLog2(uint32_t x)
+{
+  return floorLog2(x - 1) + 1;
+}
+
+
 //CASE-BREAK for breakpoints
 #if defined ( _MSC_VER ) && defined ( _DEBUG )
 #define _CASE(_x) if(_x)
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 1fe4f55691..ac9997d2ee 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -546,7 +546,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana
       const uint32_t tileRowsMinus1 = pcPPS->getNumTileRowsMinus1();
       const uint32_t numSlicesInPic = pcPPS->getNumSlicesInPicMinus1() + 1;
       const uint32_t numTilesInPic = (tileColumnsMinus1 + 1) * (tileRowsMinus1 + 1);
-      int codeLength = (int)ceil(log2(numTilesInPic));
+      int codeLength = ceilLog2(numTilesInPic);
       int codeLength2 = codeLength;
       if (numSlicesInPic > 0)
       {
@@ -559,7 +559,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana
           {
             READ_CODE( codeLength, uiCode, "top_left_brick_idx" );
             topLeft[i] = uiCode;
-            codeLength2 = (int)ceil(log2((numTilesInPic - topLeft[i] < 2) ? 2 : numTilesInPic - topLeft[i]));  //Bugfix
+            codeLength2 = ceilLog2((numTilesInPic - topLeft[i] < 2) ? 2 : numTilesInPic - topLeft[i]);
           }
           READ_CODE( codeLength2, uiCode, "bottom_right_brick_idx_delta");
           bottomRight[i] = topLeft[i] + uiCode;
@@ -652,7 +652,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana
 #else
     uint32_t picWidth = parameterSetManager->getSPS( pcPPS->getSPSId() )->getPicWidthInLumaSamples(); // pcPPS->getPicWidthInLumaSamples();
 #endif
-    int numBits = (int)ceil(log2(picWidth) - 3);
+    int numBits = ceilLog2(picWidth) - 3;
     for( unsigned i = 0; i < pcPPS->getNumVerVirtualBoundaries(); i++ )
     {
       READ_CODE( numBits, uiCode, "pps_virtual_boundaries_pos_x" ); pcPPS->setVirtualBoundariesPosX( uiCode << 3, i );
@@ -663,7 +663,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana
 #else
     uint32_t picHeight = parameterSetManager->getSPS( pcPPS->getSPSId() )->getPicHeightInLumaSamples(); // pcPPS->getPicHeightInLumaSamples();
 #endif
-    numBits = (int)ceil(log2(picHeight) - 3);
+    numBits = ceilLog2(picHeight) - 3;
     for( unsigned i = 0; i < pcPPS->getNumHorVirtualBoundaries(); i++ )
     {
       READ_CODE( numBits, uiCode, "pps_virtual_boundaries_pos_y" ); pcPPS->setVirtualBoundariesPosY( uiCode << 3, i );
@@ -855,7 +855,7 @@ void HLSyntaxReader::parseAlfAps( APS* aps )
     if (param.numLumaFilters > 1)
     {
 #if JVET_O0491_HLS_CLEANUP
-      const int length =  (int)ceil(log2(param.numLumaFilters));
+      const int length =  ceilLog2(param.numLumaFilters);
 #endif
       for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++)
       {
@@ -1790,7 +1790,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
       {
         if (sps->getNumRPL0() > 1)
         {
-          int numBits = (int)ceil(log2(sps->getNumRPL0()));
+          int numBits = ceilLog2(sps->getNumRPL0());
           READ_CODE(numBits, uiCode, "ref_pic_list_idx[0]");
           pcSlice->setRPL0idx(uiCode);
           pcSlice->setRPL0(sps->getRPLList0()->getReferencePictureList(uiCode));
@@ -1845,7 +1845,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
         {
           if (sps->getNumRPL1() > 1)
           {
-            int numBits = (int)ceil(log2(sps->getNumRPL1()));
+            int numBits = ceilLog2(sps->getNumRPL1());
             READ_CODE(numBits, uiCode, "ref_pic_list_idx[1]");
             pcSlice->setRPL1idx(uiCode);
             pcSlice->setRPL1(sps->getRPLList1()->getReferencePictureList(uiCode));
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
index afa62f1e25..671d5a9fbf 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
@@ -1620,7 +1620,7 @@ int EncAdaptiveLoopFilter::getNonFilterCoeffRate( AlfParam& alfParam )
   if( alfParam.numLumaFilters > 1 )
   {
 #if JVET_O0491_HLS_CLEANUP
-    const int coeffLength = (int)ceil(log2(alfParam.numLumaFilters));
+    const int coeffLength = ceilLog2(alfParam.numLumaFilters);
 #endif
     for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ )
     {
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index 327f3fa486..b234dfca15 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -322,14 +322,14 @@ void HLSWriter::codePPS( const PPS* pcPPS )
       WRITE_UVLC( pcPPS->getNumSlicesInPicMinus1(), "num_slices_in_pic_minus1" );
       int numSlicesInPic = pcPPS->getNumSlicesInPicMinus1() + 1;
       int numTilesInPic = (pcPPS->getNumTileColumnsMinus1() + 1) * (pcPPS->getNumTileRowsMinus1() + 1);
-      int codeLength = (int)ceil(log2(numTilesInPic));
+      int codeLength = ceilLog2(numTilesInPic);
       int codeLength2 = codeLength;
       for (int i = 0; i < numSlicesInPic; ++i)
       {
         if (i > 0)
         {
           WRITE_CODE(pcPPS->getTopLeftBrickIdx(i), codeLength, "top_left_brick_idx ");
-          codeLength2 = (int)ceil(log2((numTilesInPic - pcPPS->getTopLeftBrickIdx(i) < 2) ? 2 : numTilesInPic - pcPPS->getTopLeftBrickIdx(i)));
+          codeLength2 = ceilLog2((numTilesInPic - pcPPS->getTopLeftBrickIdx(i) < 2) ? 2 : numTilesInPic - pcPPS->getTopLeftBrickIdx(i));
         }
         WRITE_CODE(pcPPS->getBottomRightBrickIdx(i) - pcPPS->getTopLeftBrickIdx(i), codeLength2, "bottom_right_brick_idx_delta");
       }
@@ -383,13 +383,13 @@ void HLSWriter::codePPS( const PPS* pcPPS )
   if( pcPPS->getLoopFilterAcrossVirtualBoundariesDisabledFlag() )
   {
     WRITE_CODE( pcPPS->getNumVerVirtualBoundaries(), 2,                              "pps_num_ver_virtual_boundaries");
-    int numBits = (int)ceil(log2(pcPPS->pcv->lumaWidth) - 3);
+    int numBits = ceilLog2(pcPPS->pcv->lumaWidth) - 3;
     for( unsigned i = 0; i < pcPPS->getNumVerVirtualBoundaries(); i++ )
     {
       WRITE_CODE( pcPPS->getVirtualBoundariesPosX( i ) >> 3, numBits,                "pps_virtual_boundaries_pos_x" );
     }
     WRITE_CODE( pcPPS->getNumHorVirtualBoundaries(), 2,                              "pps_num_hor_virtual_boundaries");
-    numBits = (int)ceil(log2(pcPPS->pcv->lumaHeight) - 3);
+    numBits = ceilLog2(pcPPS->pcv->lumaHeight) - 3;
     for( unsigned i = 0; i < pcPPS->getNumHorVirtualBoundaries(); i++ )
     {
       WRITE_CODE( pcPPS->getVirtualBoundariesPosY( i ) >> 3, numBits,                "pps_virtual_boundaries_pos_y" );
@@ -535,7 +535,7 @@ void HLSWriter::codeAlfAps( APS* pcAPS )
     if (param.numLumaFilters > 1)
     {
 #if JVET_O0491_HLS_CLEANUP
-      const int length =  (int)ceil(log2( param.numLumaFilters));
+      const int length =  ceilLog2( param.numLumaFilters);
 #endif
       for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++)
       {
-- 
GitLab