From 3a056bde4fbe821e3bc0f3d50e9576e76ba8b2e5 Mon Sep 17 00:00:00 2001 From: Adam Wieckowski <adam.wieckowski@hhi.fraunhofer.de> Date: Fri, 12 Mar 2021 11:05:51 +0100 Subject: [PATCH] removed split parallelism --- CMakeLists.txt | 18 +- .../App/BitstreamExtractorApp/CMakeLists.txt | 20 -- source/App/DecoderAnalyserApp/CMakeLists.txt | 20 -- source/App/DecoderApp/CMakeLists.txt | 20 -- source/App/EncoderApp/CMakeLists.txt | 20 -- source/App/EncoderApp/EncApp.cpp | 4 - source/App/EncoderApp/EncAppCfg.cpp | 24 +- source/App/EncoderApp/EncAppCfg.h | 7 - source/App/EncoderApp/encmain.cpp | 10 - source/App/SEIRemovalApp/CMakeLists.txt | 20 -- source/App/StreamMergeApp/CMakeLists.txt | 20 -- source/App/SubpicMergeApp/CMakeLists.txt | 20 -- source/Lib/CommonAnalyserLib/CMakeLists.txt | 22 -- source/Lib/CommonLib/CMakeLists.txt | 22 -- source/Lib/CommonLib/CodingStructure.cpp | 25 -- source/Lib/CommonLib/CommonDef.h | 10 - source/Lib/CommonLib/Contexts.h | 6 - source/Lib/CommonLib/Picture.cpp | 240 ++------------ source/Lib/CommonLib/Picture.h | 50 --- source/Lib/CommonLib/Quant.cpp | 8 - source/Lib/CommonLib/Quant.h | 4 - source/Lib/CommonLib/RdCost.cpp | 20 -- source/Lib/CommonLib/RdCost.h | 4 - source/Lib/CommonLib/Reshape.h | 4 - source/Lib/CommonLib/TrQuant.cpp | 7 - source/Lib/CommonLib/TrQuant.h | 4 - source/Lib/CommonLib/TypeDef.h | 49 --- source/Lib/CommonLib/Unit.h | 15 - source/Lib/DecoderAnalyserLib/CMakeLists.txt | 20 -- source/Lib/DecoderLib/CMakeLists.txt | 20 -- source/Lib/EncoderLib/CABACWriter.cpp | 4 - source/Lib/EncoderLib/CMakeLists.txt | 20 -- source/Lib/EncoderLib/EncCfg.h | 11 - source/Lib/EncoderLib/EncCu.cpp | 254 +-------------- source/Lib/EncoderLib/EncCu.h | 13 +- source/Lib/EncoderLib/EncGOP.cpp | 7 +- source/Lib/EncoderLib/EncLib.cpp | 149 --------- source/Lib/EncoderLib/EncLib.h | 57 ---- source/Lib/EncoderLib/EncModeCtrl.cpp | 297 +----------------- source/Lib/EncoderLib/EncModeCtrl.h | 69 ---- source/Lib/EncoderLib/EncReshape.cpp | 40 --- source/Lib/EncoderLib/EncReshape.h | 4 - source/Lib/EncoderLib/EncSlice.cpp | 37 +-- source/Lib/EncoderLib/InterSearch.cpp | 7 - source/Lib/EncoderLib/InterSearch.h | 3 - source/Lib/Utilities/CMakeLists.txt | 20 -- 46 files changed, 46 insertions(+), 1679 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 10a86aa00..8728b0903 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,22 +52,6 @@ set_property( GLOBAL PROPERTY USE_FOLDERS ON ) # Include a utility module providing functions, macros, and settings include( ${CMAKE_SOURCE_DIR}/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake ) -# Enable multithreading -bb_multithreading() - -find_package(OpenMP) - -if( OpenMP_FOUND ) - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) - set( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}" ) - - set( SET_ENABLE_SPLIT_PARALLELISM OFF CACHE BOOL "Set ENABLE_SPLIT_PARALLELISM as a compiler flag" ) - set( ENABLE_SPLIT_PARALLELISM OFF CACHE BOOL "If SET_ENABLE_SPLIT_PARALLELISM is on, it will be set to this value" ) - set( SET_ENABLE_WPP_PARALLELISM OFF CACHE BOOL "Set ENABLE_WPP_PARALLELISM as a compiler flag" ) - set( ENABLE_WPP_PARALLELISM OFF CACHE BOOL "If SET_ENABLE_WPP_PARALLELISM is on, it will be set to this value" ) -endif() - # Enable warnings for some generators and toolsets. # bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare ) # bb_enable_warnings( gcc -Wno-unused-variable ) @@ -75,6 +59,8 @@ endif() # for gcc 8.2: bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare -Wno-class-memaccess) +bb_multithreading() + if( XCODE ) bb_enable_warnings( clang warnings-as-errors -Wno-deprecated-declarations diff --git a/source/App/BitstreamExtractorApp/CMakeLists.txt b/source/App/BitstreamExtractorApp/CMakeLists.txt index a299cf5cc..09f7da6cf 100644 --- a/source/App/BitstreamExtractorApp/CMakeLists.txt +++ b/source/App/BitstreamExtractorApp/CMakeLists.txt @@ -33,26 +33,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC ) set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ ) target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 ) diff --git a/source/App/DecoderAnalyserApp/CMakeLists.txt b/source/App/DecoderAnalyserApp/CMakeLists.txt index ad272ca1f..968a70c86 100644 --- a/source/App/DecoderAnalyserApp/CMakeLists.txt +++ b/source/App/DecoderAnalyserApp/CMakeLists.txt @@ -35,26 +35,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC ) set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ ) target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 ) diff --git a/source/App/DecoderApp/CMakeLists.txt b/source/App/DecoderApp/CMakeLists.txt index 4e71c5c1e..a3db4ff0d 100644 --- a/source/App/DecoderApp/CMakeLists.txt +++ b/source/App/DecoderApp/CMakeLists.txt @@ -33,26 +33,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC ) set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ ) target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 ) diff --git a/source/App/EncoderApp/CMakeLists.txt b/source/App/EncoderApp/CMakeLists.txt index dd87e52d1..b9621bc37 100644 --- a/source/App/EncoderApp/CMakeLists.txt +++ b/source/App/EncoderApp/CMakeLists.txt @@ -35,26 +35,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC ) set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ ) target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 ) diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index e9a2ca876..bd0cf1bb8 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -1041,10 +1041,6 @@ void EncApp::xInitLibCfg() m_cEncLib.setStopAfterFFtoPOC ( m_stopAfterFFtoPOC ); m_cEncLib.setBs2ModPOCAndType ( m_bs2ModPOCAndType ); m_cEncLib.setDebugCTU ( m_debugCTU ); -#if ENABLE_SPLIT_PARALLELISM - m_cEncLib.setNumSplitThreads ( m_numSplitThreads ); - m_cEncLib.setForceSingleSplitThread ( m_forceSplitSequential ); -#endif m_cEncLib.setUseALF ( m_alf ); #if JVET_U0081 m_cEncLib.setALFStrengthLuma (m_alfStrengthLuma); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 24b14a55d..7b87ededf 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -1392,12 +1392,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("StopAfterFFtoPOC", m_stopAfterFFtoPOC, false, "If using fast forward to POC, after the POC of interest has been hit, stop further encoding.") ("ForceDecodeBitstream1", m_forceDecodeBitstream1, false, "force decoding of bitstream 1 - use this only if you are realy sure about what you are doing ") ("DecodeBitstream2ModPOCAndType", m_bs2ModPOCAndType, false, "Modify POC and NALU-type of second input bitstream, to use second BS as closing I-slice") - ("NumSplitThreads", m_numSplitThreads, 1, "Number of threads used to parallelize splitting") - ("ForceSingleSplitThread", m_forceSplitSequential, false, "Force single thread execution even if taking the parallelized path") - ("NumWppThreads", m_numWppThreads, 1, "Number of threads used to run WPP-style parallelization") - ("NumWppExtraLines", m_numWppExtraLines, 0, "Number of additional wpp lines to switch when threads are blocked") + ("DebugCTU", m_debugCTU, -1, "If DebugBitstream is present, load frames up to this POC from this bitstream. Starting with DebugPOC-frame at CTUline containin debug CTU.") - ("EnsureWppBitEqual", m_ensureWppBitEqual, false, "Ensure the results are equal to results with WPP-style parallelism, even if WPP is off") ( "ALF", m_alf, true, "Adaptive Loop Filter\n" ) #if JVET_U0081 ("ALFStrengthLuma", m_alfStrengthLuma, 1.0, "Adaptive Loop Filter strength for luma. The parameter scales the magnitudes of the ALF filter coefficients for luma. Valid range is 0.0 <= ALFStrengthLuma <= 1.0") @@ -2588,16 +2584,6 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_wrapAroundOffset % minCUSize != 0, "Wrap-around offset must be an integer multiple of the specified minimum CU size" ); } -#if ENABLE_SPLIT_PARALLELISM - xConfirmPara( m_numSplitThreads < 1, "Number of used threads cannot be smaller than 1" ); - xConfirmPara( m_numSplitThreads > PARL_SPLIT_MAX_NUM_THREADS, "Number of used threads cannot be higher than the number of actual jobs" ); -#else - xConfirmPara( m_numSplitThreads != 1, "ENABLE_SPLIT_PARALLELISM is disabled, numSplitThreads has to be 1" ); -#endif - - xConfirmPara( m_numWppThreads != 1, "ENABLE_WPP_PARALLELISM is disabled, numWppThreads has to be 1" ); - xConfirmPara( m_ensureWppBitEqual, "ENABLE_WPP_PARALLELISM is disabled, cannot ensure being WPP bit-equal" ); - #if SHARP_LUMA_DELTA_QP && ENABLE_QPA xConfirmPara( m_bUsePerceptQPA && m_lumaLevelToDeltaQPMapping.mode >= 2, "QPA and SharpDeltaQP mode 2 cannot be used together" ); @@ -4123,14 +4109,6 @@ void EncAppCfg::xPrintParameter() if( m_MIP ) msg(VERBOSE, "FastMIP:%d ", m_useFastMIP); msg( VERBOSE, "FastLocalDualTree:%d ", m_fastLocalDualTreeMode ); - msg( VERBOSE, "NumSplitThreads:%d ", m_numSplitThreads ); - if( m_numSplitThreads > 1 ) - { - msg( VERBOSE, "ForceSingleSplitThread:%d ", m_forceSplitSequential ); - } - msg( VERBOSE, "NumWppThreads:%d+%d ", m_numWppThreads, m_numWppExtraLines ); - msg( VERBOSE, "EnsureWppBitEqual:%d ", m_ensureWppBitEqual ); - if (m_resChangeInClvsEnabled) { msg( VERBOSE, "RPR:(%1.2lfx, %1.2lfx)|%d ", m_scalingRatioHor, m_scalingRatioVer, m_switchPocPeriod ); diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index fb76daaae..3b97a4ee1 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -398,13 +398,6 @@ protected: bool m_useFastMIP; int m_fastLocalDualTreeMode; - - int m_numSplitThreads; - bool m_forceSplitSequential; - int m_numWppThreads; - int m_numWppExtraLines; - bool m_ensureWppBitEqual; - int m_log2MaxTbSize; // coding tools (bit-depth) int m_inputBitDepth [MAX_NUM_CHANNEL_TYPE]; ///< bit-depth of input file diff --git a/source/App/EncoderApp/encmain.cpp b/source/App/EncoderApp/encmain.cpp index 69b324878..74ef03119 100644 --- a/source/App/EncoderApp/encmain.cpp +++ b/source/App/EncoderApp/encmain.cpp @@ -101,16 +101,6 @@ int main(int argc, char* argv[]) #endif #if ENABLE_TRACING fprintf( stdout, "[ENABLE_TRACING] " ); -#endif -#if ENABLE_SPLIT_PARALLELISM - fprintf( stdout, "[SPLIT_PARALLEL (%d jobs)]", PARL_SPLIT_MAX_NUM_JOBS ); -#endif -#if ENABLE_SPLIT_PARALLELISM - const char* waitPolicy = getenv( "OMP_WAIT_POLICY" ); - const char* maxThLim = getenv( "OMP_THREAD_LIMIT" ); - fprintf( stdout, waitPolicy ? "[OMP: WAIT_POLICY=%s," : "[OMP: WAIT_POLICY=,", waitPolicy ); - fprintf( stdout, maxThLim ? "THREAD_LIMIT=%s" : "THREAD_LIMIT=", maxThLim ); - fprintf( stdout, "]" ); #endif fprintf( stdout, "\n" ); diff --git a/source/App/SEIRemovalApp/CMakeLists.txt b/source/App/SEIRemovalApp/CMakeLists.txt index cb783adff..ec566ecc0 100644 --- a/source/App/SEIRemovalApp/CMakeLists.txt +++ b/source/App/SEIRemovalApp/CMakeLists.txt @@ -33,26 +33,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC ) set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ ) target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 ) diff --git a/source/App/StreamMergeApp/CMakeLists.txt b/source/App/StreamMergeApp/CMakeLists.txt index 77c53ece6..84ba9a57f 100644 --- a/source/App/StreamMergeApp/CMakeLists.txt +++ b/source/App/StreamMergeApp/CMakeLists.txt @@ -33,26 +33,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC ) set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ ) target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 ) diff --git a/source/App/SubpicMergeApp/CMakeLists.txt b/source/App/SubpicMergeApp/CMakeLists.txt index dd8fac45f..8418bf9af 100644 --- a/source/App/SubpicMergeApp/CMakeLists.txt +++ b/source/App/SubpicMergeApp/CMakeLists.txt @@ -33,26 +33,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC ) set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ ) target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 ) diff --git a/source/Lib/CommonAnalyserLib/CMakeLists.txt b/source/Lib/CommonAnalyserLib/CMakeLists.txt index e915720f2..84ca09922 100644 --- a/source/Lib/CommonAnalyserLib/CMakeLists.txt +++ b/source/Lib/CommonAnalyserLib/CMakeLists.txt @@ -64,28 +64,6 @@ if( SET_ENABLE_TRACING ) target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_TRACING=0 ) endif() endif() - -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() - target_include_directories( ${LIB_NAME} PUBLIC ${OpenMP_CXX_INCLUDE_DIRS} ) - target_link_libraries( ${LIB_NAME} ${OpenMP_CXX_LIBRARIES} ) -else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() target_include_directories( ${LIB_NAME} PUBLIC ../CommonLib/. ../CommonLib/.. ../CommonLib/x86 ../libmd5 ) target_link_libraries( ${LIB_NAME} Threads::Threads ) diff --git a/source/Lib/CommonLib/CMakeLists.txt b/source/Lib/CommonLib/CMakeLists.txt index 6ae75c82b..6e110a0b6 100644 --- a/source/Lib/CommonLib/CMakeLists.txt +++ b/source/Lib/CommonLib/CMakeLists.txt @@ -62,28 +62,6 @@ if( SET_ENABLE_TRACING ) target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_TRACING=0 ) endif() endif() - -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() - target_include_directories( ${LIB_NAME} PUBLIC ${OpenMP_CXX_INCLUDE_DIRS} ) - target_link_libraries( ${LIB_NAME} ${OpenMP_CXX_LIBRARIES} ) -else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() target_include_directories( ${LIB_NAME} PUBLIC . .. ./x86 ../libmd5 ) target_link_libraries( ${LIB_NAME} Threads::Threads ) diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp index a62d548ee..dbfdc3ee3 100644 --- a/source/Lib/CommonLib/CodingStructure.cpp +++ b/source/Lib/CommonLib/CodingStructure.cpp @@ -549,10 +549,6 @@ CodingUnit& CodingStructure::addCU( const UnitArea &unit, const ChannelType chTy if( prevCU ) { prevCU->next = cu; -#if ENABLE_SPLIT_PARALLELISM - - CHECK( prevCU->cacheId != cu->cacheId, "Inconsintent cacheId between previous and current CU" ); -#endif } cus.push_back( cu ); @@ -593,21 +589,12 @@ PredictionUnit& CodingStructure::addPU( const UnitArea &unit, const ChannelType pu->cs = this; pu->cu = m_isTuEnc ? cus[0] : getCU( unit.blocks[chType].pos(), chType ); pu->chType = chType; -#if ENABLE_SPLIT_PARALLELISM - - CHECK( pu->cacheId != pu->cu->cacheId, "Inconsintent cacheId between the PU and assigned CU" ); - CHECK( pu->cu->firstPU != nullptr, "Without an RQT the firstPU should be null" ); -#endif PredictionUnit *prevPU = m_numPUs > 0 ? pus.back() : nullptr; if( prevPU && prevPU->cu == pu->cu ) { prevPU->next = pu; -#if ENABLE_SPLIT_PARALLELISM - - CHECK( prevPU->cacheId != pu->cacheId, "Inconsintent cacheId between previous and current PU" ); -#endif } pus.push_back( pu ); @@ -654,14 +641,6 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c tu->cs = this; tu->cu = m_isTuEnc ? cus[0] : getCU( unit.blocks[chType].pos(), chType ); tu->chType = chType; -#if ENABLE_SPLIT_PARALLELISM - - if( tu->cu ) - { - CHECK(tu->cacheId != tu->cu->cacheId, "Inconsintent cacheId between the TU and assigned CU"); - } -#endif - TransformUnit *prevTU = m_numTUs > 0 ? tus.back() : nullptr; @@ -669,10 +648,6 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c { prevTU->next = tu; tu->prev = prevTU; -#if ENABLE_SPLIT_PARALLELISM - - CHECK( prevTU->cacheId != tu->cacheId, "Inconsintent cacheId between previous and current TU" ); -#endif } tus.push_back( tu ); diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 8f0c64f42..5304de1a0 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -680,16 +680,6 @@ static inline int ceilLog2(uint32_t x) #define _UNIT_AREA_AT(_a,_x,_y,_w,_h) #endif -#if ENABLE_SPLIT_PARALLELISM -#include <omp.h> - -#define PARL_PARAM(DEF) , DEF -#define PARL_PARAM0(DEF) DEF -#else -#define PARL_PARAM(DEF) -#define PARL_PARAM0(DEF) -#endif - static const uint32_t CCALF_CANDS_COEFF_NR = 8; static const int CCALF_SMALL_TAB[CCALF_CANDS_COEFF_NR] = { 0, 1, 2, 4, 8, 16, 32, 64 }; diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index 06ee292cb..a5e89abf8 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -438,12 +438,6 @@ private: CtxStore<BinProbModel_Std> m_CtxStore_Std; protected: unsigned m_GRAdaptStats[RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS]; -#if ENABLE_SPLIT_PARALLELISM - -public: - int64_t cacheId; - bool cacheUsed; -#endif }; diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp index df80b30d9..3fc433bef 100644 --- a/source/Lib/CommonLib/Picture.cpp +++ b/source/Lib/CommonLib/Picture.cpp @@ -40,130 +40,6 @@ #include "ChromaFormat.h" #include "CommonLib/InterpolationFilter.h" - -#if ENABLE_SPLIT_PARALLELISM - -int g_wppThreadId( 0 ); -#pragma omp threadprivate(g_wppThreadId) - -#if ENABLE_SPLIT_PARALLELISM -int g_splitThreadId( 0 ); -#pragma omp threadprivate(g_splitThreadId) - -int g_splitJobId( 0 ); -#pragma omp threadprivate(g_splitJobId) -#endif - -Scheduler::Scheduler() : -#if ENABLE_SPLIT_PARALLELISM - m_numSplitThreads( 1 ) -#endif -{ -} - -Scheduler::~Scheduler() -{ -} - -#if ENABLE_SPLIT_PARALLELISM -unsigned Scheduler::getSplitDataId( int jobId ) const -{ - if( m_numSplitThreads > 1 && m_hasParallelBuffer ) - { - int splitJobId = jobId == CURR_THREAD_ID ? g_splitJobId : jobId; - - return ( g_wppThreadId * NUM_RESERVERD_SPLIT_JOBS ) + splitJobId; - } - else - { - return 0; - } -} - -unsigned Scheduler::getSplitPicId( int tId /*= CURR_THREAD_ID */ ) const -{ - if( m_numSplitThreads > 1 && m_hasParallelBuffer ) - { - int threadId = tId == CURR_THREAD_ID ? g_splitThreadId : tId; - - return ( g_wppThreadId * m_numSplitThreads ) + threadId; - } - else - { - return 0; - } -} - -unsigned Scheduler::getSplitJobId() const -{ - if( m_numSplitThreads > 1 ) - { - return g_splitJobId; - } - else - { - return 0; - } -} - -void Scheduler::setSplitJobId( const int jobId ) -{ - CHECK( g_splitJobId != 0 && jobId != 0, "Need to reset the jobId after usage!" ); - g_splitJobId = jobId; -} - -void Scheduler::startParallel() -{ - m_hasParallelBuffer = true; -} - -void Scheduler::finishParallel() -{ - m_hasParallelBuffer = false; -} - -void Scheduler::setSplitThreadId( const int tId ) -{ - g_splitThreadId = tId == CURR_THREAD_ID ? omp_get_thread_num() : tId; -} - -#endif - - - -unsigned Scheduler::getDataId() const -{ -#if ENABLE_SPLIT_PARALLELISM - if( m_numSplitThreads > 1 ) - { - return getSplitDataId(); - } -#endif - return 0; -} - -bool Scheduler::init( const int ctuYsize, const int ctuXsize, const int numWppThreadsRunning, const int numWppExtraLines, const int numSplitThreads ) -{ -#if ENABLE_SPLIT_PARALLELISM - m_numSplitThreads = numSplitThreads; -#endif - - return true; -} - - -int Scheduler::getNumPicInstances() const -{ -#if !ENABLE_SPLIT_PARALLELISM - return 1; -#else - return ( m_numSplitThreads > 1 ? m_numSplitThreads : 1 ); -#endif -} - -#endif - - // --------------------------------------------------------------------------- // picture methods // --------------------------------------------------------------------------- @@ -225,39 +101,34 @@ void Picture::create( const ChromaFormat &_chromaFormat, const Size &size, const void Picture::destroy() { -#if ENABLE_SPLIT_PARALLELISM - for( int jId = 0; jId < PARL_SPLIT_MAX_NUM_THREADS; jId++ ) -#endif + for (uint32_t t = 0; t < NUM_PIC_TYPES; t++) { - for (uint32_t t = 0; t < NUM_PIC_TYPES; t++) - { - M_BUFS(jId, t).destroy(); - } - m_hashMap.clearAll(); - if (cs) - { - cs->destroy(); - delete cs; - cs = nullptr; - } + M_BUFS(jId, t).destroy(); + } + m_hashMap.clearAll(); + if (cs) + { + cs->destroy(); + delete cs; + cs = nullptr; + } - for (auto &ps: slices) - { - delete ps; - } - slices.clear(); + for (auto &ps: slices) + { + delete ps; + } + slices.clear(); - for (auto &psei: SEIs) - { - delete psei; - } - SEIs.clear(); + for (auto &psei: SEIs) + { + delete psei; + } + SEIs.clear(); - if (m_spliceIdx) - { - delete[] m_spliceIdx; - m_spliceIdx = NULL; - } + if (m_spliceIdx) + { + delete[] m_spliceIdx; + m_spliceIdx = NULL; } } @@ -269,21 +140,8 @@ void Picture::createTempBuffers( const unsigned _maxCUSize ) const Area a = m_ctuArea.Y(); #endif -#if ENABLE_SPLIT_PARALLELISM - scheduler.startParallel(); - - for( int jId = 0; jId < scheduler.getNumPicInstances(); jId++ ) -#endif - { - M_BUFS( jId, PIC_PREDICTION ).create( chromaFormat, a, _maxCUSize ); - M_BUFS( jId, PIC_RESIDUAL ).create( chromaFormat, a, _maxCUSize ); -#if ENABLE_SPLIT_PARALLELISM - if (jId > 0) - { - M_BUFS(jId, PIC_RECONSTRUCTION).create(chromaFormat, Y(), _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE); - } -#endif - } + M_BUFS( jId, PIC_PREDICTION ).create( chromaFormat, a, _maxCUSize ); + M_BUFS( jId, PIC_RESIDUAL ).create( chromaFormat, a, _maxCUSize ); if (cs) { @@ -293,24 +151,11 @@ void Picture::createTempBuffers( const unsigned _maxCUSize ) void Picture::destroyTempBuffers() { -#if ENABLE_SPLIT_PARALLELISM - scheduler.finishParallel(); - - for( int jId = 0; jId < scheduler.getNumPicInstances(); jId++ ) -#endif + for (uint32_t t = 0; t < NUM_PIC_TYPES; t++) { - for (uint32_t t = 0; t < NUM_PIC_TYPES; t++) + if (t == PIC_RESIDUAL || t == PIC_PREDICTION) { - if (t == PIC_RESIDUAL || t == PIC_PREDICTION) - { - M_BUFS(jId, t).destroy(); - } -#if ENABLE_SPLIT_PARALLELISM - if (t == PIC_RECONSTRUCTION && jId > 0) - { - M_BUFS(jId, t).destroy(); - } -#endif + M_BUFS(0, t).destroy(); } } @@ -470,23 +315,6 @@ void Picture::clearSliceBuffer() slices.clear(); } -#if ENABLE_SPLIT_PARALLELISM -void Picture::finishParallelPart( const UnitArea& area ) -{ - const UnitArea clipdArea = clipArea( area, *this ); - const int sourceID = scheduler.getSplitPicId( 0 ); - CHECK( scheduler.getSplitJobId() > 0, "Finish-CU cannot be called from within a mode- or split-parallelized block!" ); - - // distribute the reconstruction across all of the parallel workers - for( int tId = 1; tId < scheduler.getNumSplitThreads(); tId++ ) - { - const int destID = scheduler.getSplitPicId( tId ); - - M_BUFS( destID, PIC_RECONSTRUCTION ).subBuf( clipdArea ).copyFrom( M_BUFS( sourceID, PIC_RECONSTRUCTION ).subBuf( clipdArea ) ); - } -} -#endif - const TFilterCoeff DownsamplingFilterSRC[8][16][12] = { { // D = 1 @@ -1240,9 +1068,6 @@ PelBuf Picture::getBuf( const CompArea &blk, const PictureType &type ) return PelBuf(); } -#if ENABLE_SPLIT_PARALLELISM - const int jId = ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL || type == PIC_ORIGINAL_INPUT || type == PIC_TRUE_ORIGINAL_INPUT ) ? 0 : scheduler.getSplitPicId(); -#endif #if !KEEP_PRED_AND_RESI_SIGNALS if( type == PIC_RESIDUAL || type == PIC_PREDICTION ) { @@ -1264,10 +1089,6 @@ const CPelBuf Picture::getBuf( const CompArea &blk, const PictureType &type ) co return PelBuf(); } -#if ENABLE_SPLIT_PARALLELISM - const int jId = ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL ) ? 0 : scheduler.getSplitPicId(); - -#endif #if !KEEP_PRED_AND_RESI_SIGNALS if( type == PIC_RESIDUAL || type == PIC_PREDICTION ) { @@ -1308,9 +1129,6 @@ const CPelUnitBuf Picture::getBuf( const UnitArea &unit, const PictureType &type Pel* Picture::getOrigin( const PictureType &type, const ComponentID compID ) const { -#if ENABLE_SPLIT_PARALLELISM - const int jId = ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL ) ? 0 : scheduler.getSplitPicId(); -#endif return M_BUFS( jId, type ).getOrigin( compID ); } diff --git a/source/Lib/CommonLib/Picture.h b/source/Lib/CommonLib/Picture.h index 5506da1b4..2f985828e 100644 --- a/source/Lib/CommonLib/Picture.h +++ b/source/Lib/CommonLib/Picture.h @@ -50,49 +50,13 @@ #include "MCTS.h" #include <deque> -#if ENABLE_SPLIT_PARALLELISM - -#define CURR_THREAD_ID -1 - -class Scheduler -{ -public: - Scheduler(); - ~Scheduler(); - -#if ENABLE_SPLIT_PARALLELISM - unsigned getSplitDataId( int jobId = CURR_THREAD_ID ) const; - unsigned getSplitPicId ( int tId = CURR_THREAD_ID ) const; - unsigned getSplitJobId () const; - void setSplitJobId ( const int jobId ); - void startParallel (); - void finishParallel(); - void setSplitThreadId( const int tId = CURR_THREAD_ID ); - unsigned getNumSplitThreads() const { return m_numSplitThreads; }; -#endif - unsigned getDataId () const; - bool init ( const int ctuYsize, const int ctuXsize, const int numWppThreadsRunning, const int numWppExtraLines, const int numSplitThreads ); - int getNumPicInstances() const; -#if ENABLE_SPLIT_PARALLELISM - - int m_numSplitThreads; - bool m_hasParallelBuffer; -#endif -}; -#endif class SEI; class AQpLayer; typedef std::list<SEI*> SEIMessages; - - -#if ENABLE_SPLIT_PARALLELISM -#define M_BUFS(JID,PID) m_bufs[JID][PID] -#else #define M_BUFS(JID,PID) m_bufs[PID] -#endif struct Picture : public UnitArea { @@ -237,12 +201,7 @@ public: bool interLayerRefPicFlag; bool mixedNaluTypesInPicFlag; - -#if ENABLE_SPLIT_PARALLELISM - PelStorage m_bufs[PARL_SPLIT_MAX_NUM_JOBS][NUM_PIC_TYPES]; -#else PelStorage m_bufs[NUM_PIC_TYPES]; -#endif const Picture* unscaledPic; TComHash m_hashMap; @@ -280,15 +239,6 @@ private: UnitArea m_ctuArea; #endif -#if ENABLE_SPLIT_PARALLELISM -public: - void finishParallelPart ( const UnitArea& ctuArea ); -#endif -#if ENABLE_SPLIT_PARALLELISM -public: - Scheduler scheduler; -#endif - public: SAOBlkParam *getSAO(int id = 0) { return &m_sao[id][0]; }; void resizeSAO(unsigned numEntries, int dstid) { m_sao[dstid].resize(numEntries); } diff --git a/source/Lib/CommonLib/Quant.cpp b/source/Lib/CommonLib/Quant.cpp index 1f7ae1f97..ce9f46528 100644 --- a/source/Lib/CommonLib/Quant.cpp +++ b/source/Lib/CommonLib/Quant.cpp @@ -501,14 +501,6 @@ void Quant::init( uint32_t uiMaxTrSize, m_resetStore = true; } -#if ENABLE_SPLIT_PARALLELISM -void Quant::copyState( const Quant& other ) -{ - m_dLambda = other.m_dLambda; - memcpy( m_lambdas, other.m_lambdas, sizeof( m_lambdas ) ); -} -#endif - /** set quantized matrix coefficient for encode * \param scalingList quantized matrix address * \param format chroma format diff --git a/source/Lib/CommonLib/Quant.h b/source/Lib/CommonLib/Quant.h index 7d7f51d94..b9b0d56c2 100644 --- a/source/Lib/CommonLib/Quant.h +++ b/source/Lib/CommonLib/Quant.h @@ -140,10 +140,6 @@ public: // de-quantization virtual void dequant ( const TransformUnit &tu, CoeffBuf &dstCoeff, const ComponentID &compID, const QpParam &cQP ); -#if ENABLE_SPLIT_PARALLELISM - virtual void copyState ( const Quant& other ); -#endif - protected: #if T0196_SELECTIVE_RDOQ diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp index 4392eae24..996c01935 100644 --- a/source/Lib/CommonLib/RdCost.cpp +++ b/source/Lib/CommonLib/RdCost.cpp @@ -222,26 +222,6 @@ void RdCost::init() m_pairCheck = 0; } - -#if ENABLE_SPLIT_PARALLELISM - -void RdCost::copyState( const RdCost& other ) -{ - m_costMode = other.m_costMode; - m_dLambda = other.m_dLambda; - m_DistScale = other.m_DistScale; - memcpy( m_distortionWeight, other.m_distortionWeight, sizeof( m_distortionWeight ) ); - m_mvPredictor = other.m_mvPredictor; - m_motionLambda = other.m_motionLambda; - m_iCostScale = other.m_iCostScale; - m_dLambdaMotionSAD = other.m_dLambdaMotionSAD; -#if WCG_EXT - m_dLambda_unadjusted = other.m_dLambda_unadjusted ; - m_DistScaleUnadjusted = other.m_DistScaleUnadjusted; -#endif -} -#endif - void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRefY, int iRefStride, int bitDepth, ComponentID compID, int subShiftMode, int step, bool useHadamard ) { rcDP.bitDepth = bitDepth; diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h index 3f82dc0ee..e3ad4f06e 100644 --- a/source/Lib/CommonLib/RdCost.h +++ b/source/Lib/CommonLib/RdCost.h @@ -293,10 +293,6 @@ public: return length; } -#if ENABLE_SPLIT_PARALLELISM - void copyState( const RdCost& other ); -#endif - // for motion cost static uint32_t xGetExpGolombNumberOfBits( int iVal ) { diff --git a/source/Lib/CommonLib/Reshape.h b/source/Lib/CommonLib/Reshape.h index e3b9c9f8f..2d28d9719 100644 --- a/source/Lib/CommonLib/Reshape.h +++ b/source/Lib/CommonLib/Reshape.h @@ -74,11 +74,7 @@ protected: int m_vpduY; public: Reshape(); -#if ENABLE_SPLIT_PARALLELISM - virtual ~Reshape(); -#else ~Reshape(); -#endif void createDec(int bitDepth); void destroy(); diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index 006862181..59d4c22de 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -208,13 +208,6 @@ TrQuant::~TrQuant() } } -#if ENABLE_SPLIT_PARALLELISM -void TrQuant::copyState( const TrQuant& other ) -{ - m_quant->copyState( *other.m_quant ); -} -#endif - void TrQuant::xDeQuant(const TransformUnit &tu, CoeffBuf &dstCoeff, const ComponentID &compID, diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h index 6fcafb91e..f93ab67de 100644 --- a/source/Lib/CommonLib/TrQuant.h +++ b/source/Lib/CommonLib/TrQuant.h @@ -115,10 +115,6 @@ public: void lambdaAdjustColorTrans(bool forward) { m_quant->lambdaAdjustColorTrans(forward); } void resetStore() { m_quant->resetStore(); } -#if ENABLE_SPLIT_PARALLELISM - void copyState( const TrQuant& other ); -#endif - protected: TCoeff m_tempCoeff[MAX_TB_SIZEY * MAX_TB_SIZEY]; diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 019a18f6e..67fceef08 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -101,17 +101,6 @@ typedef std::pair<int, int> TrCost; #define JVET_O0756_CALCULATE_HDRMETRICS 1 #endif -#ifndef ENABLE_SPLIT_PARALLELISM -#define ENABLE_SPLIT_PARALLELISM 0 -#endif -#if ENABLE_SPLIT_PARALLELISM -#define PARL_SPLIT_MAX_NUM_JOBS 6 // number of parallel jobs that can be defined and need memory allocated -#define NUM_RESERVERD_SPLIT_JOBS ( PARL_SPLIT_MAX_NUM_JOBS + 1 ) // number of all data structures including the merge thread (0) -#define PARL_SPLIT_MAX_NUM_THREADS PARL_SPLIT_MAX_NUM_JOBS -#define NUM_SPLIT_THREADS_IF_MSVC 4 - -#endif - // clang-format on // ==================================================================================================================== @@ -1218,20 +1207,8 @@ template<typename T> class dynamic_cache { std::vector<T*> m_cache; -#if ENABLE_SPLIT_PARALLELISM - int64_t m_cacheId; -#endif public: - -#if ENABLE_SPLIT_PARALLELISM - dynamic_cache() - { - static int cacheId = 0; - m_cacheId = cacheId++; - } - -#endif ~dynamic_cache() { deleteEntries(); @@ -1256,48 +1233,22 @@ public: { ret = m_cache.back(); m_cache.pop_back(); -#if ENABLE_SPLIT_PARALLELISM - CHECK( ret->cacheId != m_cacheId, "Putting item into wrong cache!" ); - CHECK( !ret->cacheUsed, "Fetched an element that should've been in cache!!" ); -#endif } else { ret = new T; } -#if ENABLE_SPLIT_PARALLELISM - ret->cacheId = m_cacheId; - ret->cacheUsed = false; - -#endif return ret; } void cache( T* el ) { -#if ENABLE_SPLIT_PARALLELISM - CHECK( el->cacheId != m_cacheId, "Putting item into wrong cache!" ); - CHECK( el->cacheUsed, "Putting cached item back into cache!" ); - - el->cacheUsed = true; - -#endif m_cache.push_back( el ); } void cache( std::vector<T*>& vel ) { -#if ENABLE_SPLIT_PARALLELISM - for( auto el : vel ) - { - CHECK( el->cacheId != m_cacheId, "Putting item into wrong cache!" ); - CHECK( el->cacheUsed, "Putting cached item back into cache!" ); - - el->cacheUsed = true; - } - -#endif m_cache.insert( m_cache.end(), vel.begin(), vel.end() ); vel.clear(); } diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index 62395e292..430f6e8dc 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -350,11 +350,7 @@ struct CodingUnit : public UnitArea TransformUnit *firstTU; TransformUnit *lastTU; -#if ENABLE_SPLIT_PARALLELISM - int64_t cacheId; - bool cacheUsed; -#endif const uint8_t getSbtIdx() const { assert( ( ( sbtInfo >> 0 ) & 0xf ) < NUMBER_SBT_IDX ); return ( sbtInfo >> 0 ) & 0xf; } const uint8_t getSbtPos() const { return ( sbtInfo >> 4 ) & 0x3; } void setSbtIdx( uint8_t idx ) { CHECK( idx >= NUMBER_SBT_IDX, "sbt_idx wrong" ); sbtInfo = ( idx << 0 ) + ( sbtInfo & 0xf0 ); } @@ -434,12 +430,6 @@ struct PredictionUnit : public UnitArea, public IntraPredictionData, public Inte const MotionInfo& getMotionInfo( const Position& pos ) const; MotionBuf getMotionBuf(); CMotionBuf getMotionBuf() const; - -#if ENABLE_SPLIT_PARALLELISM - - int64_t cacheId; - bool cacheUsed; -#endif }; // --------------------------------------------------------------------------- @@ -490,11 +480,6 @@ struct TransformUnit : public UnitArea Pel* getPLTIndex(const ComponentID id); bool* getRunTypes(const ComponentID id); -#if ENABLE_SPLIT_PARALLELISM - int64_t cacheId; - bool cacheUsed; - -#endif private: TCoeff *m_coeffs[ MAX_NUM_TBLOCKS ]; Pel *m_pcmbuf[ MAX_NUM_TBLOCKS ]; diff --git a/source/Lib/DecoderAnalyserLib/CMakeLists.txt b/source/Lib/DecoderAnalyserLib/CMakeLists.txt index 4fbd3463e..dc940061d 100644 --- a/source/Lib/DecoderAnalyserLib/CMakeLists.txt +++ b/source/Lib/DecoderAnalyserLib/CMakeLists.txt @@ -29,26 +29,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - target_include_directories( ${LIB_NAME} PUBLIC ../DecoderLib ) target_link_libraries( ${LIB_NAME} CommonAnalyserLib Threads::Threads ) diff --git a/source/Lib/DecoderLib/CMakeLists.txt b/source/Lib/DecoderLib/CMakeLists.txt index 23a3659a1..9c84ac8f0 100644 --- a/source/Lib/DecoderLib/CMakeLists.txt +++ b/source/Lib/DecoderLib/CMakeLists.txt @@ -28,26 +28,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - target_include_directories( ${LIB_NAME} PUBLIC . ) target_link_libraries( ${LIB_NAME} CommonLib Threads::Threads ) diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index ea573ccf1..1b827be85 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -1757,11 +1757,7 @@ Pel CABACWriter::writePLTIndex(const CodingUnit& cu, uint32_t idx, PelBuf& palet void CABACWriter::prediction_unit( const PredictionUnit& pu ) { CHECK( pu.cu->treeType == TREE_C, "cannot be chroma CU" ); -#if ENABLE_SPLIT_PARALLELISM - CHECK( pu.cacheUsed, "Processing a PU that should be in cache!" ); - CHECK( pu.cu->cacheUsed, "Processing a CU that should be in cache!" ); -#endif if( pu.cu->skip ) { CHECK( !pu.mergeFlag, "merge_flag must be true for skipped CUs" ); diff --git a/source/Lib/EncoderLib/CMakeLists.txt b/source/Lib/EncoderLib/CMakeLists.txt index 2a50346f2..c82c024aa 100644 --- a/source/Lib/EncoderLib/CMakeLists.txt +++ b/source/Lib/EncoderLib/CMakeLists.txt @@ -32,26 +32,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - target_include_directories( ${LIB_NAME} PUBLIC . ) target_link_libraries( ${LIB_NAME} CommonLib Threads::Threads ) diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 1a81a3420..8981657ec 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -736,11 +736,6 @@ protected: CfgVPSParameters m_cfgVPSParameters; -#if ENABLE_SPLIT_PARALLELISM - int m_numSplitThreads; - bool m_forceSingleSplitThread; -#endif - bool m_alf; ///< Adaptive Loop Filter #if JVET_U0081 double m_alfStrengthLuma; @@ -1938,12 +1933,6 @@ public: void setDebugCTU( int i ) { m_debugCTU = i; } int getDebugCTU() const { return m_debugCTU; } -#if ENABLE_SPLIT_PARALLELISM - void setNumSplitThreads( int n ) { m_numSplitThreads = n; } - int getNumSplitThreads() const { return m_numSplitThreads; } - void setForceSingleSplitThread( bool b ) { m_forceSingleSplitThread = b; } - int getForceSingleSplitThread() const { return m_forceSingleSplitThread; } -#endif void setUseALF( bool b ) { m_alf = b; } bool getUseALF() const { return m_alf; } #if JVET_U0081 diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 1ae5af099..cac2ed29f 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -227,22 +227,18 @@ EncCu::~EncCu() /** \param pcEncLib pointer of encoder class */ -void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) ) +void EncCu::init( EncLib* pcEncLib, const SPS& sps ) { m_pcEncCfg = pcEncLib; - m_pcIntraSearch = pcEncLib->getIntraSearch( PARL_PARAM0( tId ) ); - m_pcInterSearch = pcEncLib->getInterSearch( PARL_PARAM0( tId ) ); - m_pcTrQuant = pcEncLib->getTrQuant( PARL_PARAM0( tId ) ); - m_pcRdCost = pcEncLib->getRdCost ( PARL_PARAM0( tId ) ); - m_CABACEstimator = pcEncLib->getCABACEncoder( PARL_PARAM0( tId ) )->getCABACEstimator( &sps ); + m_pcIntraSearch = pcEncLib->getIntraSearch(); + m_pcInterSearch = pcEncLib->getInterSearch(); + m_pcTrQuant = pcEncLib->getTrQuant(); + m_pcRdCost = pcEncLib->getRdCost (); + m_CABACEstimator = pcEncLib->getCABACEncoder()->getCABACEstimator( &sps ); m_CABACEstimator->setEncCu(this); - m_CtxCache = pcEncLib->getCtxCache( PARL_PARAM0( tId ) ); + m_CtxCache = pcEncLib->getCtxCache(); m_pcRateCtrl = pcEncLib->getRateCtrl(); m_pcSliceEncoder = pcEncLib->getSliceEncoder(); -#if ENABLE_SPLIT_PARALLELISM - m_pcEncLib = pcEncLib; - m_dataId = tId; -#endif m_pcLoopFilter = pcEncLib->getLoopFilter(); m_GeoCostList.init(GEO_NUM_PARTITION_MODE, m_pcEncCfg->getMaxNumGeoCand()); m_AFFBestSATDCost = MAX_DOUBLE; @@ -267,39 +263,6 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign cs.slice->m_mapPltCost[0].clear(); cs.slice->m_mapPltCost[1].clear(); -#if ENABLE_SPLIT_PARALLELISM - if( m_pcEncCfg->getNumSplitThreads() > 1 ) - { - for( int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++ ) - { - EncCu* jobEncCu = m_pcEncLib->getCuEncoder( cs.picture->scheduler.getSplitDataId( jId ) ); - CacheBlkInfoCtrl* cacheCtrl = dynamic_cast< CacheBlkInfoCtrl* >( jobEncCu->m_modeCtrl ); -#if REUSE_CU_RESULTS - BestEncInfoCache* bestCache = dynamic_cast< BestEncInfoCache* >( jobEncCu->m_modeCtrl ); -#endif - SaveLoadEncInfoSbt *sbtCache = dynamic_cast< SaveLoadEncInfoSbt* >( jobEncCu->m_modeCtrl ); - if( cacheCtrl ) - { - cacheCtrl->init( *cs.slice ); - } -#if REUSE_CU_RESULTS - if (bestCache) - { - bestCache->init(*cs.slice); - } -#endif - if (sbtCache) - { - sbtCache->init(*cs.slice); - } - } - } - -#if REUSE_CU_RESULTS - if( auto* cacheCtrl = dynamic_cast<BestEncInfoCache*>( m_modeCtrl ) ) { cacheCtrl->tick(); } -#endif - if( auto* cacheCtrl = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) ) { cacheCtrl->tick(); } -#endif // init the partitioning manager QTBTPartitioner partitioner; partitioner.initCtu(area, CH_L, *cs.slice); @@ -535,20 +498,7 @@ bool EncCu::xCheckBestMode( CodingStructure *&tempCS, CodingStructure *&bestCS, void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& partitioner, double maxCostAllowed ) { CHECK(maxCostAllowed < 0, "Wrong value of maxCostAllowed!"); -#if ENABLE_SPLIT_PARALLELISM - CHECK( m_dataId != tempCS->picture->scheduler.getDataId(), "Working in the wrong dataId!" ); - - if( m_pcEncCfg->getNumSplitThreads() != 1 && tempCS->picture->scheduler.getSplitJobId() == 0 ) - { - if( m_modeCtrl->isParallelSplit( *tempCS, partitioner ) ) - { - m_modeCtrl->setParallelSplit( true ); - xCompressCUParallel( tempCS, bestCS, partitioner ); - return; - } - } -#endif uint32_t compBegin; uint32_t numComp; bool jointPLT = false; @@ -610,17 +560,6 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl ); int maxSLSize = sps.getUseSBT() ? tempCS->slice->getSPS()->getMaxTbSize() : MTS_INTER_MAX_CU_SIZE; slsSbt->resetSaveloadSbt( maxSLSize ); -#if ENABLE_SPLIT_PARALLELISM - CHECK( tempCS->picture->scheduler.getSplitJobId() != 0, "The SBT search reset need to happen in sequential region." ); - if (m_pcEncCfg->getNumSplitThreads() > 1) - { - for (int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++) - { - auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt *>(m_pcEncLib->getCuEncoder(jId)->m_modeCtrl); - slsSbt->resetSaveloadSbt(maxSLSize); - } - } -#endif } m_sbtCostSave[0] = m_sbtCostSave[1] = MAX_DOUBLE; @@ -722,9 +661,6 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par #endif )) { -#if ENABLE_SPLIT_PARALLELISM - CHECK( tempCS->picture->scheduler.getSplitJobId() > 0, "Changing lambda is only allowed in the master thread!" ); -#endif if (currTestMode.qp >= 0) { updateLambda (&slice, currTestMode.qp, @@ -915,17 +851,6 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par ////////////////////////////////////////////////////////////////////////// // Finishing CU -#if ENABLE_SPLIT_PARALLELISM - if( bestCS->cus.empty() ) - { - CHECK( bestCS->cost != MAX_DOUBLE, "Cost should be maximal if no encoding found" ); - CHECK( bestCS->picture->scheduler.getSplitJobId() == 0, "Should always get a result in serial case" ); - - m_modeCtrl->finishCULevel( partitioner ); - return; - } - -#endif if( tempCS->cost == MAX_DOUBLE && bestCS->cost == MAX_DOUBLE ) { //although some coding modes were planned to be tried in RDO, no coding mode actually finished encoding due to early termination @@ -968,13 +893,6 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par m_pcIntraSearch->saveCuAreaCostInSCIPU( Area( partitioner.currArea().lumaPos(), partitioner.currArea().lumaSize() ), bestCS->cost ); } -#if ENABLE_SPLIT_PARALLELISM - if( tempCS->picture->scheduler.getSplitJobId() == 0 && m_pcEncCfg->getNumSplitThreads() != 1 ) - { - tempCS->picture->finishParallelPart( currCsArea ); - } - -#endif if (bestCS->cus.size() == 1) // no partition { CHECK(bestCS->cus[0]->tileIdx != bestCS->pps->getTileIdx(bestCS->area.lumaPos()), "Wrong tile index!"); @@ -1063,164 +981,6 @@ void EncCu::updateLambda (Slice* slice, const int dQP, } #endif // SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU -#if ENABLE_SPLIT_PARALLELISM -//#undef DEBUG_PARALLEL_TIMINGS -//#define DEBUG_PARALLEL_TIMINGS 1 -void EncCu::xCompressCUParallel( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner ) -{ - const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() ); - const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() ); - - Picture* picture = tempCS->picture; - - int numJobs = m_modeCtrl->getNumParallelJobs( *bestCS, partitioner ); - - bool jobUsed [NUM_RESERVERD_SPLIT_JOBS]; - std::fill( jobUsed, jobUsed + NUM_RESERVERD_SPLIT_JOBS, false ); - - const UnitArea currArea = CS::getArea( *tempCS, partitioner.currArea(), partitioner.chType ); - const bool doParallel = !m_pcEncCfg->getForceSingleSplitThread(); - omp_set_num_threads( m_pcEncCfg->getNumSplitThreads() ); - -#pragma omp parallel for schedule(dynamic,1) if(doParallel) - for( int jId = 1; jId <= numJobs; jId++ ) - { - // thread start - picture->scheduler.setSplitThreadId(); - picture->scheduler.setSplitJobId( jId ); - - QTBTPartitioner jobPartitioner; - EncCu* jobCuEnc = m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) ); - auto* jobBlkCache = dynamic_cast<CacheBlkInfoCtrl*>( jobCuEnc->m_modeCtrl ); -#if REUSE_CU_RESULTS - auto* jobBestCache = dynamic_cast<BestEncInfoCache*>( jobCuEnc->m_modeCtrl ); -#endif - - jobPartitioner.copyState( partitioner ); - jobCuEnc ->copyState( this, jobPartitioner, currArea, true ); - - if( jobBlkCache ) { jobBlkCache ->tick(); } -#if REUSE_CU_RESULTS - if( jobBestCache ) { jobBestCache->tick(); } - -#endif - CodingStructure *&jobBest = jobCuEnc->m_pBestCS[wIdx][hIdx]; - CodingStructure *&jobTemp = jobCuEnc->m_pTempCS[wIdx][hIdx]; - - jobUsed[jId] = true; - - jobCuEnc->xCompressCU( jobTemp, jobBest, jobPartitioner ); - - picture->scheduler.setSplitJobId( 0 ); - // thread stop - } - picture->scheduler.setSplitThreadId( 0 ); - - int bestJId = 0; - double bestCost = bestCS->cost; - for( int jId = 1; jId <= numJobs; jId++ ) - { - EncCu* jobCuEnc = m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) ); - - if( jobUsed[jId] && jobCuEnc->m_pBestCS[wIdx][hIdx]->cost < bestCost ) - { - bestCost = jobCuEnc->m_pBestCS[wIdx][hIdx]->cost; - bestJId = jId; - } - } - - if( bestJId > 0 ) - { - copyState( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( bestJId ) ), partitioner, currArea, false ); - m_CurrCtx->best = m_CABACEstimator->getCtx(); - - tempCS = m_pTempCS[wIdx][hIdx]; - bestCS = m_pBestCS[wIdx][hIdx]; - } - - const int bitDepthY = tempCS->sps->getBitDepth( CH_L ); - const UnitArea clipdArea = clipArea( currArea, *picture ); - - CHECK( calcCheckSum( picture->getRecoBuf( clipdArea.Y() ), bitDepthY ) != calcCheckSum( bestCS->getRecoBuf( clipdArea.Y() ), bitDepthY ), "Data copied incorrectly!" ); - - picture->finishParallelPart( currArea ); - - if( auto *blkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) ) - { - for( int jId = 1; jId <= numJobs; jId++ ) - { - if( !jobUsed[jId] || jId == bestJId ) continue; - - auto *jobBlkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) )->m_modeCtrl ); - CHECK( !jobBlkCache, "If own mode controller has blk info cache capability so should all other mode controllers!" ); - blkCache->CacheBlkInfoCtrl::copyState( *jobBlkCache, partitioner.currArea() ); - } - - blkCache->tick(); - } -#if REUSE_CU_RESULTS - - if( auto *blkCache = dynamic_cast<BestEncInfoCache*>( m_modeCtrl ) ) - { - for( int jId = 1; jId <= numJobs; jId++ ) - { - if( !jobUsed[jId] || jId == bestJId ) continue; - - auto *jobBlkCache = dynamic_cast<BestEncInfoCache*>( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) )->m_modeCtrl ); - CHECK( !jobBlkCache, "If own mode controller has blk info cache capability so should all other mode controllers!" ); - blkCache->BestEncInfoCache::copyState( *jobBlkCache, partitioner.currArea() ); - } - - blkCache->tick(); - } -#endif -} - -void EncCu::copyState( EncCu* other, Partitioner& partitioner, const UnitArea& currArea, const bool isDist ) -{ - const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth () ); - const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() ); - - if( isDist ) - { - other->m_pBestCS[wIdx][hIdx]->initSubStructure( *m_pBestCS[wIdx][hIdx], partitioner.chType, partitioner.currArea(), false ); - other->m_pTempCS[wIdx][hIdx]->initSubStructure( *m_pTempCS[wIdx][hIdx], partitioner.chType, partitioner.currArea(), false ); - } - else - { - CodingStructure* dst = m_pBestCS[wIdx][hIdx]; - const CodingStructure* src = other->m_pBestCS[wIdx][hIdx]; - bool keepResi = KEEP_PRED_AND_RESI_SIGNALS; - bool keepPred = true; - - dst->useSubStructure( *src, partitioner.chType, currArea, keepPred, true, keepResi, keepResi, true ); - - dst->cost = src->cost; - dst->dist = src->dist; - dst->fracBits = src->fracBits; - dst->features = src->features; - } - - if( isDist ) - { - m_CurrCtx = m_CtxBuffer.data(); - } - - m_pcInterSearch->copyState( *other->m_pcInterSearch ); - m_modeCtrl ->copyState( *other->m_modeCtrl, partitioner.currArea() ); - m_pcRdCost ->copyState( *other->m_pcRdCost ); - m_pcTrQuant ->copyState( *other->m_pcTrQuant ); - if( m_pcEncCfg->getLmcs() ) - { - EncReshape *encReshapeThis = dynamic_cast<EncReshape*>( m_pcReshape); - EncReshape *encReshapeOther = dynamic_cast<EncReshape*>(other->m_pcReshape); - encReshapeThis->copyState( *encReshapeOther ); - } - - m_CABACEstimator->getCtx() = other->m_CABACEstimator->getCtx(); -} -#endif - void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, const ModeType modeTypeParent, bool &skipInterPass ) { const int qp = encTestMode.qp; diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index 8ae2aad32..b953c9b2f 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -161,10 +161,6 @@ private: CtxPair* m_CurrCtx; CtxCache* m_CtxCache; -#if ENABLE_SPLIT_PARALLELISM - int m_dataId; -#endif - // Data : encoder control int m_cuChromaQpOffsetIdxPlus1; // if 0, then cu_chroma_qp_offset_flag will be 0, otherwise cu_chroma_qp_offset_flag will be 1. @@ -199,9 +195,6 @@ private: int m_ctuIbcSearchRangeX; int m_ctuIbcSearchRangeY; -#if ENABLE_SPLIT_PARALLELISM - EncLib* m_pcEncLib; -#endif int m_bestBcwIdx[2]; double m_bestBcwCost[2]; GeoMotionInfo m_GeoModeTest[GEO_MAX_NUM_CANDS]; @@ -215,7 +208,7 @@ private: double m_sbtCostSave[2]; public: /// copy parameters from encoder class - void init ( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int jId = 0 ) ); + void init ( EncLib* pcEncLib, const SPS& sps ); void setDecCuReshaperInEncCU(EncReshape* pcReshape, ChromaFormat chromaFormatIDC) { initDecCuReshaper((Reshape*) pcReshape, chromaFormatIDC); } /// create internal buffers @@ -248,10 +241,6 @@ protected: Distortion getDistortionDb ( CodingStructure &cs, CPelBuf org, CPelBuf reco, ComponentID compID, const CompArea& compArea, bool afterDb ); void xCompressCU ( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& pm, double maxCostAllowed = MAX_DOUBLE ); -#if ENABLE_SPLIT_PARALLELISM - void xCompressCUParallel ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm ); - void copyState ( EncCu* other, Partitioner& pm, const UnitArea& currArea, const bool isDist ); -#endif bool xCheckBestMode ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestmode ); diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp index 199d0ec8a..81f350b69 100644 --- a/source/Lib/EncoderLib/EncGOP.cpp +++ b/source/Lib/EncoderLib/EncGOP.cpp @@ -2110,9 +2110,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, m_pcSliceEncoder->create( picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxTotalCUDepth ); -#if ENABLE_SPLIT_PARALLELISM - pcPic->scheduler.init( pcPic->cs->pcv->heightInCtus, pcPic->cs->pcv->widthInCtus, 1 , 0 , m_pcCfg->getNumSplitThreads() ); -#endif pcPic->createTempBuffers( pcPic->cs->pps->pcv->maxCUWidth ); pcPic->cs->createCoeffs((bool)pcPic->cs->sps->getPLTMode()); @@ -3010,7 +3007,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, m_pcSAO->SAOProcess( cs, sliceEnabled, pcSlice->getLambdas(), #if ENABLE_QPA - (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost (PARL_PARAM0 (0))->getChromaWeight() : 0.0), + (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost ()->getChromaWeight() : 0.0), #endif m_pcCfg->getTestSAODisableAtPictureLevel(), m_pcCfg->getSaoEncodingRate(), m_pcCfg->getSaoEncodingRateChroma(), m_pcCfg->getSaoCtuBoundary(), m_pcCfg->getSaoGreedyMergeEnc() ); //assign SAO slice header @@ -3042,7 +3039,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, m_pcALF->initCABACEstimator(m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice, m_pcEncLib->getApsMap()); m_pcALF->ALFProcess(cs, pcSlice->getLambdas() #if ENABLE_QPA - , (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost(PARL_PARAM0(0))->getChromaWeight() : 0.0) + , (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost()->getChromaWeight() : 0.0) #endif , pcPic, uiNumSliceSegments ); diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index af03029af..72eb14452 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -43,9 +43,6 @@ #include "CommonLib/Picture.h" #include "CommonLib/CommonDef.h" #include "CommonLib/ChromaFormat.h" -#if ENABLE_SPLIT_PARALLELISM -#include <omp.h> -#endif #include "EncLibCommon.h" #include "CommonLib/ProfileLevelTier.h" @@ -102,28 +99,7 @@ void EncLib::create( const int layerId ) m_iPOCLast = m_compositeRefEnabled ? -2 : -1; // create processing unit classes m_cGOPEncoder. create( ); -#if ENABLE_SPLIT_PARALLELISM -#if ENABLE_SPLIT_PARALLELISM - m_numCuEncStacks = m_numSplitThreads == 1 ? 1 : NUM_RESERVERD_SPLIT_JOBS; -#else - m_numCuEncStacks = 1; -#endif - - m_cCuEncoder = new EncCu [m_numCuEncStacks]; - m_cInterSearch = new InterSearch [m_numCuEncStacks]; - m_cIntraSearch = new IntraSearch [m_numCuEncStacks]; - m_cTrQuant = new TrQuant [m_numCuEncStacks]; - m_CABACEncoder = new CABACEncoder [m_numCuEncStacks]; - m_cRdCost = new RdCost [m_numCuEncStacks]; - m_CtxCache = new CtxCache [m_numCuEncStacks]; - - for( int jId = 0; jId < m_numCuEncStacks; jId++ ) - { - m_cCuEncoder[jId]. create( this ); - } -#else m_cCuEncoder. create( this ); -#endif #if JVET_J0090_MEMORY_BANDWITH_MEASURE m_cInterSearch.cacheAssign( &m_cacheModel ); #endif @@ -135,19 +111,9 @@ void EncLib::create( const int layerId ) m_cLoopFilter.initEncPicYuvBuffer(m_chromaFormatIDC, Size(getSourceWidth(), getSourceHeight()), getMaxCUWidth()); } -#if ENABLE_SPLIT_PARALLELISM - m_cReshaper = new EncReshape[m_numCuEncStacks]; -#endif if (m_lmcsEnabled) { -#if ENABLE_SPLIT_PARALLELISM - for (int jId = 0; jId < m_numCuEncStacks; jId++) - { - m_cReshaper[jId].createEnc(getSourceWidth(), getSourceHeight(), m_maxCUWidth, m_maxCUHeight, m_bitDepth[COMPONENT_Y]); - } -#else m_cReshaper.createEnc( getSourceWidth(), getSourceHeight(), m_maxCUWidth, m_maxCUHeight, m_bitDepth[COMPONENT_Y]); -#endif } if ( m_RCEnableRateControl ) { @@ -162,14 +128,7 @@ void EncLib::destroy () // destroy processing unit classes m_cGOPEncoder. destroy(); m_cSliceEncoder. destroy(); -#if ENABLE_SPLIT_PARALLELISM - for( int jId = 0; jId < m_numCuEncStacks; jId++ ) - { - m_cCuEncoder[jId].destroy(); - } -#else m_cCuEncoder. destroy(); -#endif if( m_alf ) { m_cEncALF.destroy(); @@ -178,34 +137,9 @@ void EncLib::destroy () m_cEncSAO. destroy(); m_cLoopFilter. destroy(); m_cRateCtrl. destroy(); -#if ENABLE_SPLIT_PARALLELISM - for (int jId = 0; jId < m_numCuEncStacks; jId++) - { - m_cReshaper[jId]. destroy(); - } -#else m_cReshaper. destroy(); -#endif -#if ENABLE_SPLIT_PARALLELISM - for( int jId = 0; jId < m_numCuEncStacks; jId++ ) - { - m_cInterSearch[jId]. destroy(); - m_cIntraSearch[jId]. destroy(); - } -#else m_cInterSearch. destroy(); m_cIntraSearch. destroy(); -#endif - -#if ENABLE_SPLIT_PARALLELISM - delete[] m_cCuEncoder; - delete[] m_cInterSearch; - delete[] m_cIntraSearch; - delete[] m_cTrQuant; - delete[] m_CABACEncoder; - delete[] m_cRdCost; - delete[] m_CtxCache; -#endif return; } @@ -235,13 +169,6 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) xInitVPS( sps0 ); xInitOPI(m_opi); xInitDCI(m_dci, sps0); -#if ENABLE_SPLIT_PARALLELISM - if( omp_get_dynamic() ) - { - omp_set_dynamic( false ); - } - omp_set_nested( true ); -#endif if (getUseCompositeRef() || getDependentRAPIndicationSEIEnabled()) { @@ -254,14 +181,7 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) m_cRateCtrl.initHrdParam(sps0.getGeneralHrdParameters(), sps0.getOlsHrdParameters(), m_iFrameRate, m_RCInitialCpbFullness); } #endif -#if ENABLE_SPLIT_PARALLELISM - for( int jId = 0; jId < m_numCuEncStacks; jId++ ) - { - m_cRdCost[jId].setCostMode ( m_costMode ); - } -#else m_cRdCost.setCostMode ( m_costMode ); -#endif // initialize PPS pps0.setPicWidthInLumaSamples( m_iSourceWidth ); @@ -373,54 +293,6 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) // initialize processing unit classes m_cGOPEncoder. init( this ); m_cSliceEncoder.init( this, sps0 ); -#if ENABLE_SPLIT_PARALLELISM - for( int jId = 0; jId < m_numCuEncStacks; jId++ ) - { - // precache a few objects - for( int i = 0; i < 10; i++ ) - { - auto x = m_CtxCache[jId].get(); - m_CtxCache[jId].cache( x ); - } - - m_cCuEncoder[jId].init( this, sps0, jId ); - - // initialize transform & quantization class - m_cTrQuant[jId].init( jId == 0 ? nullptr : m_cTrQuant[0].getQuant(), - 1 << m_log2MaxTbSize, - - m_useRDOQ, - m_useRDOQTS, -#if T0196_SELECTIVE_RDOQ - m_useSelectiveRDOQ, -#endif - true - ); - - // initialize encoder search class - CABACWriter* cabacEstimator = m_CABACEncoder[jId].getCABACEstimator( &sps0 ); - m_cIntraSearch[jId].init( this, - &m_cTrQuant[jId], - &m_cRdCost[jId], - cabacEstimator, - getCtxCache( jId ), m_maxCUWidth, m_maxCUHeight, floorLog2(m_maxCUWidth) - m_log2MinCUSize - , &m_cReshaper[jId] - , sps0.getBitDepth(CHANNEL_TYPE_LUMA) - ); - m_cInterSearch[jId].init( this, - &m_cTrQuant[jId], - m_iSearchRange, - m_bipredSearchRange, - m_motionEstimationSearchMethod, - getUseCompositeRef(), - m_maxCUWidth, m_maxCUHeight, floorLog2(m_maxCUWidth) - m_log2MinCUSize, &m_cRdCost[jId], cabacEstimator, getCtxCache( jId ) - , &m_cReshaper[jId] - ); - - // link temporary buffets from intra search with inter search to avoid unnecessary memory overhead - m_cInterSearch[jId].setTempBuffers( m_cIntraSearch[jId].getSplitCSBuf(), m_cIntraSearch[jId].getFullCSBuf(), m_cIntraSearch[jId].getSaveCSBuf() ); - } -#else // ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM m_cCuEncoder. init( this, sps0 ); // initialize transform & quantization class @@ -456,7 +328,6 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) // link temporary buffets from intra search with inter search to avoid unneccessary memory overhead m_cInterSearch.setTempBuffers( m_cIntraSearch.getSplitCSBuf(), m_cIntraSearch.getFullCSBuf(), m_cIntraSearch.getSaveCSBuf() ); -#endif // ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM m_iMaxRefPicNum = 0; @@ -507,26 +378,12 @@ void EncLib::xInitScalingLists( SPS &sps, APS &aps ) { quant->setFlatScalingList(maxLog2TrDynamicRange, sps.getBitDepths()); quant->setUseScalingList(false); -#if ENABLE_SPLIT_PARALLELISM - for( int jId = 1; jId < m_numCuEncStacks; jId++ ) - { - getTrQuant( jId )->getQuant()->setFlatScalingList( maxLog2TrDynamicRange, sps.getBitDepths() ); - getTrQuant( jId )->getQuant()->setUseScalingList( false ); - } -#endif } else if(getUseScalingListId() == SCALING_LIST_DEFAULT) { aps.getScalingList().setDefaultScalingList (); quant->setScalingList( &( aps.getScalingList() ), maxLog2TrDynamicRange, sps.getBitDepths() ); quant->setUseScalingList(true); -#if ENABLE_SPLIT_PARALLELISM - for( int jId = 1; jId < m_numCuEncStacks; jId++ ) - { - getTrQuant( jId )->getQuant()->setUseScalingList( true ); - } - sps.setDisableScalingMatrixForLfnstBlks(getDisableScalingMatrixForLfnstBlks()); -#endif } else if(getUseScalingListId() == SCALING_LIST_FILE_READ) { @@ -540,12 +397,6 @@ void EncLib::xInitScalingLists( SPS &sps, APS &aps ) aps.getScalingList().setChromaScalingListPresentFlag((sps.getChromaFormatIdc()!=CHROMA_400)); quant->setScalingList( &( aps.getScalingList() ), maxLog2TrDynamicRange, sps.getBitDepths() ); quant->setUseScalingList(true); -#if ENABLE_SPLIT_PARALLELISM - for( int jId = 1; jId < m_numCuEncStacks; jId++ ) - { - getTrQuant( jId )->getQuant()->setUseScalingList( true ); - } -#endif sps.setDisableScalingMatrixForLfnstBlks(getDisableScalingMatrixForLfnstBlks()); } diff --git a/source/Lib/EncoderLib/EncLib.h b/source/Lib/EncoderLib/EncLib.h index a87bdf133..1c05ed00b 100644 --- a/source/Lib/EncoderLib/EncLib.h +++ b/source/Lib/EncoderLib/EncLib.h @@ -79,65 +79,35 @@ private: int m_layerId; // encoder search -#if ENABLE_SPLIT_PARALLELISM - InterSearch *m_cInterSearch; ///< encoder search class - IntraSearch *m_cIntraSearch; ///< encoder search class -#else InterSearch m_cInterSearch; ///< encoder search class IntraSearch m_cIntraSearch; ///< encoder search class -#endif // coding tool -#if ENABLE_SPLIT_PARALLELISM - TrQuant *m_cTrQuant; ///< transform & quantization class -#else TrQuant m_cTrQuant; ///< transform & quantization class -#endif LoopFilter m_cLoopFilter; ///< deblocking filter class EncSampleAdaptiveOffset m_cEncSAO; ///< sample adaptive offset class EncAdaptiveLoopFilter m_cEncALF; HLSWriter m_HLSWriter; ///< CAVLC encoder -#if ENABLE_SPLIT_PARALLELISM - CABACEncoder *m_CABACEncoder; -#else CABACEncoder m_CABACEncoder; -#endif -#if ENABLE_SPLIT_PARALLELISM - EncReshape *m_cReshaper; ///< reshaper class -#else EncReshape m_cReshaper; ///< reshaper class -#endif // processing unit EncGOP m_cGOPEncoder; ///< GOP encoder EncSlice m_cSliceEncoder; ///< slice encoder -#if ENABLE_SPLIT_PARALLELISM - EncCu *m_cCuEncoder; ///< CU encoder -#else EncCu m_cCuEncoder; ///< CU encoder -#endif // SPS ParameterSetMap<SPS>& m_spsMap; ///< SPS. This is the base value. This is copied to PicSym ParameterSetMap<PPS>& m_ppsMap; ///< PPS. This is the base value. This is copied to PicSym ParameterSetMap<APS>& m_apsMap; ///< APS. This is the base value. This is copied to PicSym PicHeader m_picHeader; ///< picture header // RD cost computation -#if ENABLE_SPLIT_PARALLELISM - RdCost *m_cRdCost; ///< RD cost computation class - CtxCache *m_CtxCache; ///< buffer for temporarily stored context models -#else RdCost m_cRdCost; ///< RD cost computation class CtxCache m_CtxCache; ///< buffer for temporarily stored context models -#endif // quality control RateCtrl m_cRateCtrl; ///< Rate control class AUWriterIf* m_AUWriterIf; -#if ENABLE_SPLIT_PARALLELISM - int m_numCuEncStacks; -#endif - #if JVET_J0090_MEMORY_BANDWITH_MEASURE CacheModel m_cacheModel; #endif @@ -195,40 +165,22 @@ public: AUWriterIf* getAUWriterIf () { return m_AUWriterIf; } PicList* getListPic () { return &m_cListPic; } -#if ENABLE_SPLIT_PARALLELISM - InterSearch* getInterSearch ( int jId = 0 ) { return &m_cInterSearch[jId]; } - IntraSearch* getIntraSearch ( int jId = 0 ) { return &m_cIntraSearch[jId]; } - - TrQuant* getTrQuant ( int jId = 0 ) { return &m_cTrQuant[jId]; } -#else InterSearch* getInterSearch () { return &m_cInterSearch; } IntraSearch* getIntraSearch () { return &m_cIntraSearch; } TrQuant* getTrQuant () { return &m_cTrQuant; } -#endif LoopFilter* getLoopFilter () { return &m_cLoopFilter; } EncSampleAdaptiveOffset* getSAO () { return &m_cEncSAO; } EncAdaptiveLoopFilter* getALF () { return &m_cEncALF; } EncGOP* getGOPEncoder () { return &m_cGOPEncoder; } EncSlice* getSliceEncoder () { return &m_cSliceEncoder; } EncHRD* getHRD () { return &m_encHRD; } -#if ENABLE_SPLIT_PARALLELISM - EncCu* getCuEncoder ( int jId = 0 ) { return &m_cCuEncoder[jId]; } -#else EncCu* getCuEncoder () { return &m_cCuEncoder; } -#endif HLSWriter* getHLSWriter () { return &m_HLSWriter; } -#if ENABLE_SPLIT_PARALLELISM - CABACEncoder* getCABACEncoder ( int jId = 0 ) { return &m_CABACEncoder[jId]; } - - RdCost* getRdCost ( int jId = 0 ) { return &m_cRdCost[jId]; } - CtxCache* getCtxCache ( int jId = 0 ) { return &m_CtxCache[jId]; } -#else CABACEncoder* getCABACEncoder () { return &m_CABACEncoder; } RdCost* getRdCost () { return &m_cRdCost; } CtxCache* getCtxCache () { return &m_CtxCache; } -#endif RateCtrl* getRateCtrl () { return &m_cRateCtrl; } @@ -242,16 +194,7 @@ public: const PPS* getPPS( int Id ) { return m_ppsMap.getPS( Id); } const APS* getAPS(int Id) { return m_apsMap.getPS(Id); } -#if ENABLE_SPLIT_PARALLELISM - void setNumCuEncStacks( int n ) { m_numCuEncStacks = n; } - int getNumCuEncStacks() const { return m_numCuEncStacks; } -#endif - -#if ENABLE_SPLIT_PARALLELISM - EncReshape* getReshaper( int jId = 0 ) { return &m_cReshaper[jId]; } -#else EncReshape* getReshaper() { return &m_cReshaper; } -#endif ParameterSetMap<APS>* getApsMap() { return &m_apsMap; } diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index ed4f7116e..cc8335fa1 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -64,15 +64,6 @@ void EncModeCtrl::init( EncCfg *pCfg, RateCtrl *pRateCtrl, RdCost* pRdCost ) bool EncModeCtrl::tryModeMaster( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) { -#if ENABLE_SPLIT_PARALLELISM - if( m_ComprCUCtxList.back().isLevelSplitParallel ) - { - if( !parallelJobSelector( encTestmode, cs, partitioner ) ) - { - return false; - } - } -#endif return tryMode( encTestmode, cs, partitioner ); } @@ -248,18 +239,6 @@ int EncModeCtrl::calculateLumaDQP( const CPelBuf& rcOrg ) } #endif -#if ENABLE_SPLIT_PARALLELISM -void EncModeCtrl::copyState( const EncModeCtrl& other, const UnitArea& area ) -{ - m_slice = other.m_slice; - m_fastDeltaQP = other.m_fastDeltaQP; - m_lumaQPOffset = other.m_lumaQPOffset; - m_runNextInParallel - = other.m_runNextInParallel; - m_ComprCUCtxList = other.m_ComprCUCtxList; -} - -#endif void CacheBlkInfoCtrl::create() { const unsigned numPos = MAX_CU_SIZE >> MIN_CU_LOG2; @@ -354,71 +333,7 @@ void CacheBlkInfoCtrl::init( const Slice &slice ) } m_slice_chblk = &slice; -#if ENABLE_SPLIT_PARALLELISM - - m_currTemporalId = 0; -#endif } -#if ENABLE_SPLIT_PARALLELISM - -void CacheBlkInfoCtrl::touch( const UnitArea& area ) -{ - CodedCUInfo& cuInfo = getBlkInfo( area ); - cuInfo.temporalId = m_currTemporalId; -} - -void CacheBlkInfoCtrl::copyState( const CacheBlkInfoCtrl &other, const UnitArea& area ) -{ - m_slice_chblk = other.m_slice_chblk; - - m_currTemporalId = other.m_currTemporalId; - - if( m_slice_chblk->isIntra() ) return; - - const int cuSizeMask = m_slice_chblk->getSPS()->getMaxCUWidth() - 1; - - const int minPosX = ( area.lx() & cuSizeMask ) >> MIN_CU_LOG2; - const int minPosY = ( area.ly() & cuSizeMask ) >> MIN_CU_LOG2; - const int maxPosX = ( area.Y().bottomRight().x & cuSizeMask ) >> MIN_CU_LOG2; - const int maxPosY = ( area.Y().bottomRight().y & cuSizeMask ) >> MIN_CU_LOG2; - - for( unsigned x = minPosX; x <= maxPosX; x++ ) - { - for( unsigned y = minPosY; y <= maxPosY; y++ ) - { - for( int wIdx = 0; wIdx < gp_sizeIdxInfo->numWidths(); wIdx++ ) - { - const int width = gp_sizeIdxInfo->sizeFrom( wIdx ); - - if( m_codedCUInfo[x][y][wIdx] && width <= area.lwidth() && x + ( width >> MIN_CU_LOG2 ) <= ( maxPosX + 1 ) ) - { - for( int hIdx = 0; hIdx < gp_sizeIdxInfo->numHeights(); hIdx++ ) - { - const int height = gp_sizeIdxInfo->sizeFrom( hIdx ); - - if( gp_sizeIdxInfo->isCuSize( height ) && height <= area.lheight() && y + ( height >> MIN_CU_LOG2 ) <= ( maxPosY + 1 ) ) - { - if( other.m_codedCUInfo[x][y][wIdx][hIdx]->temporalId > m_codedCUInfo[x][y][wIdx][hIdx]->temporalId ) - { - *m_codedCUInfo[x][y][wIdx][hIdx] = *other.m_codedCUInfo[x][y][wIdx][hIdx]; - m_codedCUInfo[x][y][wIdx][hIdx]->temporalId = m_currTemporalId; - } - } - else if( y + ( height >> MIN_CU_LOG2 ) > maxPosY + 1 ) - { - break;; - } - } - } - else if( x + ( width >> MIN_CU_LOG2 ) > maxPosX + 1 ) - { - break; - } - } - } - } -} -#endif CodedCUInfo& CacheBlkInfoCtrl::getBlkInfo( const UnitArea& area ) { @@ -461,10 +376,6 @@ void CacheBlkInfoCtrl::setMv( const UnitArea& area, const RefPicList refPicList, m_codedCUInfo[idx1][idx2][idx3][idx4]->saveMv [refPicList][iRefIdx] = rMv; m_codedCUInfo[idx1][idx2][idx3][idx4]->validMv[refPicList][iRefIdx] = true; -#if ENABLE_SPLIT_PARALLELISM - - touch( area ); -#endif } bool CacheBlkInfoCtrl::getMv( const UnitArea& area, const RefPicList refPicList, const int iRefIdx, Mv& rMv ) const @@ -563,13 +474,6 @@ bool SaveLoadEncInfoSbt::saveBestSbt( const UnitArea& area, const uint32_t curPu return true; } -#if ENABLE_SPLIT_PARALLELISM -void SaveLoadEncInfoSbt::copyState(const SaveLoadEncInfoSbt &other) -{ - m_sliceSbt = other.m_sliceSbt; -} -#endif - void SaveLoadEncInfoSbt::resetSaveloadSbt( int maxSbtSize ) { int numSizeIdx = gp_sizeIdxInfo->idxFrom( maxSbtSize ) - MIN_CU_LOG2 + 1; @@ -852,10 +756,6 @@ void BestEncInfoCache::init( const Slice &slice ) } } } -#if ENABLE_SPLIT_PARALLELISM - - m_currTemporalId = 0; -#endif } bool BestEncInfoCache::setFromCs( const CodingStructure& cs, const Partitioner& partitioner ) @@ -993,75 +893,6 @@ bool BestEncInfoCache::setCsFrom( CodingStructure& cs, EncTestMode& testMode, co return true; } -#if ENABLE_SPLIT_PARALLELISM -void BestEncInfoCache::copyState(const BestEncInfoCache &other, const UnitArea &area) -{ - m_slice_bencinf = other.m_slice_bencinf; - m_currTemporalId = other.m_currTemporalId; - - if( m_slice_bencinf->isIntra() ) return; - - const int cuSizeMask = m_slice_bencinf->getSPS()->getMaxCUWidth() - 1; - - const int minPosX = ( area.lx() & cuSizeMask ) >> MIN_CU_LOG2; - const int minPosY = ( area.ly() & cuSizeMask ) >> MIN_CU_LOG2; - const int maxPosX = ( area.Y().bottomRight().x & cuSizeMask ) >> MIN_CU_LOG2; - const int maxPosY = ( area.Y().bottomRight().y & cuSizeMask ) >> MIN_CU_LOG2; - - for( unsigned x = minPosX; x <= maxPosX; x++ ) - { - for( unsigned y = minPosY; y <= maxPosY; y++ ) - { - for( int wIdx = 0; wIdx < gp_sizeIdxInfo->numWidths(); wIdx++ ) - { - const int width = gp_sizeIdxInfo->sizeFrom( wIdx ); - - if( m_bestEncInfo[x][y][wIdx] && width <= area.lwidth() && x + ( width >> MIN_CU_LOG2 ) <= ( maxPosX + 1 ) ) - { - for( int hIdx = 0; hIdx < gp_sizeIdxInfo->numHeights(); hIdx++ ) - { - const int height = gp_sizeIdxInfo->sizeFrom( hIdx ); - - if( gp_sizeIdxInfo->isCuSize( height ) && height <= area.lheight() && y + ( height >> MIN_CU_LOG2 ) <= ( maxPosY + 1 ) ) - { - if( other.m_bestEncInfo[x][y][wIdx][hIdx]->temporalId > m_bestEncInfo[x][y][wIdx][hIdx]->temporalId ) - { - m_bestEncInfo[x][y][wIdx][hIdx]->cu = other.m_bestEncInfo[x][y][wIdx][hIdx]->cu; - m_bestEncInfo[x][y][wIdx][hIdx]->pu = other.m_bestEncInfo[x][y][wIdx][hIdx]->pu; - m_bestEncInfo[x][y][wIdx][hIdx]->numTus = other.m_bestEncInfo[x][y][wIdx][hIdx]->numTus; - m_bestEncInfo[x][y][wIdx][hIdx]->poc = other.m_bestEncInfo[x][y][wIdx][hIdx]->poc; - m_bestEncInfo[x][y][wIdx][hIdx]->testMode = other.m_bestEncInfo[x][y][wIdx][hIdx]->testMode; - - for( int i = 0; i < m_bestEncInfo[x][y][wIdx][hIdx]->numTus; i++ ) - m_bestEncInfo[x][y][wIdx][hIdx]->tus[i] = other.m_bestEncInfo[x][y][wIdx][hIdx]->tus[i]; - } - } - else if( y + ( height >> MIN_CU_LOG2 ) > maxPosY + 1 ) - { - break;; - } - } - } - else if( x + ( width >> MIN_CU_LOG2 ) > maxPosX + 1 ) - { - break; - } - } - } - } -} - -void BestEncInfoCache::touch(const UnitArea &area) -{ - unsigned idx1, idx2, idx3, idx4; - getAreaIdx(area.Y(), *m_slice_bencinf->getPPS()->pcv, idx1, idx2, idx3, idx4); - BestEncodingInfo &encInfo = *m_bestEncInfo[idx1][idx2][idx3][idx4]; - - encInfo.temporalId = m_currTemporalId; -} - -#endif - #endif static bool interHadActive( const ComprCUCtx& ctx ) @@ -1102,9 +933,6 @@ void EncModeCtrlMTnoRQT::initCTUEncoding( const Slice &slice ) CHECK( !m_ComprCUCtxList.empty(), "Mode list is not empty at the beginning of a CTU" ); m_slice = &slice; -#if ENABLE_SPLIT_PARALLELISM - m_runNextInParallel = false; -#endif if( m_pcEncCfg->getUseE0023FastEnc() ) { @@ -1136,19 +964,6 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru m_ComprCUCtxList.push_back( ComprCUCtx( cs, minDepth, maxDepth, NUM_EXTRA_FEATURES ) ); -#if ENABLE_SPLIT_PARALLELISM - if( m_runNextInParallel ) - { - for( auto &level : m_ComprCUCtxList ) - { - CHECK( level.isLevelSplitParallel, "Tring to parallelize a level within parallel execution!" ); - } - CHECK( cs.picture->scheduler.getSplitJobId() == 0, "Trying to run a parallel level although jobId is 0!" ); - m_runNextInParallel = false; - m_ComprCUCtxList.back().isLevelSplitParallel = true; - } - -#endif const CodingUnit* cuLeft = cs.getCU( cs.area.blocks[partitioner.chType].pos().offset( -1, 0 ), partitioner.chType ); const CodingUnit* cuAbove = cs.getCU( cs.area.blocks[partitioner.chType].pos().offset( 0, -1 ), partitioner.chType ); @@ -1746,9 +1561,6 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt { case CU_QUAD_SPLIT: { -#if ENABLE_SPLIT_PARALLELISM - if( !cuECtx.isLevelSplitParallel ) -#endif if( !cuECtx.get<bool>( QT_BEFORE_BT ) && bestCU ) { unsigned maxBTD = cs.pcv->getMaxBtDepth( slice, partitioner.chType ); @@ -1971,12 +1783,7 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt relatedCU.relatedCuIsValid = true; } } -#if ENABLE_SPLIT_PARALLELISM -#if REUSE_CU_RESULTS - BestEncInfoCache::touch(partitioner.currArea()); -#endif - CacheBlkInfoCtrl::touch(partitioner.currArea()); -#endif + cuECtx.set( IS_BEST_NOSPLIT_SKIP, bestCU->skip ); } } @@ -2117,105 +1924,3 @@ bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingSt } } -#if ENABLE_SPLIT_PARALLELISM -void EncModeCtrlMTnoRQT::copyState( const EncModeCtrl& other, const UnitArea& area ) -{ - const EncModeCtrlMTnoRQT* pOther = dynamic_cast<const EncModeCtrlMTnoRQT*>( &other ); - - CHECK( !pOther, "Trying to copy state from a different type of controller" ); - - this->EncModeCtrl ::copyState( *pOther, area ); - this->CacheBlkInfoCtrl ::copyState( *pOther, area ); -#if REUSE_CU_RESULTS - this->BestEncInfoCache ::copyState( *pOther, area ); -#endif - this->SaveLoadEncInfoSbt ::copyState( *pOther ); - - m_skipThreshold = pOther->m_skipThreshold; -} - -int EncModeCtrlMTnoRQT::getNumParallelJobs( const CodingStructure &cs, Partitioner& partitioner ) const -{ - int numJobs = 0; - - if( partitioner.canSplit( CU_TRIH_SPLIT, cs ) ) - { - numJobs = 6; - } - else if( partitioner.canSplit( CU_TRIV_SPLIT, cs ) ) - { - numJobs = 5; - } - else if( partitioner.canSplit( CU_HORZ_SPLIT, cs ) ) - { - numJobs = 4; - } - else if( partitioner.canSplit( CU_VERT_SPLIT, cs ) ) - { - numJobs = 3; - } - else if( partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) - { - numJobs = 2; - } - else if( partitioner.canSplit( CU_DONT_SPLIT, cs ) ) - { - numJobs = 1; - } - - CHECK( numJobs >= NUM_RESERVERD_SPLIT_JOBS, "More jobs specified than allowed" ); - - return numJobs; -} - -bool EncModeCtrlMTnoRQT::isParallelSplit( const CodingStructure &cs, Partitioner& partitioner ) const -{ - if( partitioner.getImplicitSplit( cs ) != CU_DONT_SPLIT || cs.picture->scheduler.getSplitJobId() != 0 ) return false; - if( cs.pps->getUseDQP() && partitioner.currQgEnable() ) return false; - const int numJobs = getNumParallelJobs( cs, partitioner ); - const int numPxl = partitioner.currArea().Y().area(); - const int parlAt = m_pcEncCfg->getNumSplitThreads() <= 3 ? 1024 : 256; - if( cs.slice->isIntra() && numJobs > 2 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true; - if( !cs.slice->isIntra() && numJobs > 1 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true; - return false; -} - -bool EncModeCtrlMTnoRQT::parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const -{ - // Job descriptors - // - 1: all non-split modes - // - 2: QT-split - // - 3: all vertical modes but TT_V - // - 4: all horizontal modes but TT_H - // - 5: TT_V - // - 6: TT_H - switch( cs.picture->scheduler.getSplitJobId() ) - { - case 1: - // be sure to execute post dont split - return !isModeSplit( encTestmode ); - break; - case 2: - return encTestmode.type == ETM_SPLIT_QT; - break; - case 3: - return encTestmode.type == ETM_SPLIT_BT_V; - break; - case 4: - return encTestmode.type == ETM_SPLIT_BT_H; - break; - case 5: - return encTestmode.type == ETM_SPLIT_TT_V; - break; - case 6: - return encTestmode.type == ETM_SPLIT_TT_H; - break; - default: - THROW( "Unknown job-ID for parallelization of EncModeCtrlMTnoRQT: " << cs.picture->scheduler.getSplitJobId() ); - break; - } -} - -#endif - - diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h index 740f1a5e2..b590cb932 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.h +++ b/source/Lib/EncoderLib/EncModeCtrl.h @@ -194,10 +194,6 @@ struct ComprCUCtx , skipSecondMTSPass ( false ) , interHad (std::numeric_limits<Distortion>::max()) -#if ENABLE_SPLIT_PARALLELISM - , isLevelSplitParallel - ( false ) -#endif , bestCostWithoutSplitFlags( MAX_DOUBLE ) , bestCostMtsFirstPassNoIsp( MAX_DOUBLE ) , bestCostIsp ( MAX_DOUBLE ) @@ -245,9 +241,6 @@ struct ComprCUCtx double bestMtsSize2Nx2N1stPass; bool skipSecondMTSPass; Distortion interHad; -#if ENABLE_SPLIT_PARALLELISM - bool isLevelSplitParallel; -#endif double bestCostWithoutSplitFlags; double bestCostMtsFirstPassNoIsp; double bestCostIsp; @@ -286,9 +279,6 @@ protected: #endif bool m_fastDeltaQP; static_vector<ComprCUCtx, ( MAX_CU_DEPTH << 2 )> m_ComprCUCtxList; -#if ENABLE_SPLIT_PARALLELISM - int m_runNextInParallel; -#endif InterSearch* m_pcInterSearch; bool m_doPlt; @@ -311,13 +301,6 @@ public: virtual bool useModeResult ( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ) = 0; virtual bool checkSkipOtherLfnst ( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ) = 0; -#if ENABLE_SPLIT_PARALLELISM - virtual void copyState ( const EncModeCtrl& other, const UnitArea& area ); - virtual int getNumParallelJobs ( const CodingStructure &cs, Partitioner& partitioner ) const { return 1; } - virtual bool isParallelSplit ( const CodingStructure &cs, Partitioner& partitioner ) const { return false; } - virtual bool parallelJobSelector ( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const { return true; } - void setParallelSplit ( bool val ) { m_runNextInParallel = val; } -#endif void init ( EncCfg *pCfg, RateCtrl *pRateCtrl, RdCost *pRdCost ); bool tryModeMaster ( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ); @@ -395,13 +378,7 @@ struct SaveLoadStructSbt class SaveLoadEncInfoSbt { protected: -#if ENABLE_SPLIT_PARALLELISM -public: -#endif void init( const Slice &slice ); -#if ENABLE_SPLIT_PARALLELISM -protected: -#endif void create(); void destroy(); @@ -414,9 +391,6 @@ public: void resetSaveloadSbt( int maxSbtSize ); uint16_t findBestSbt( const UnitArea& area, const uint32_t curPuSse ); bool saveBestSbt( const UnitArea& area, const uint32_t curPuSse, const uint8_t curPuSbt, const uint8_t curPuTrs ); -#if ENABLE_SPLIT_PARALLELISM - void copyState(const SaveLoadEncInfoSbt& other); -#endif }; static const int MAX_STORED_CU_INFO_REFS = 4; @@ -439,12 +413,6 @@ struct CodedCUInfo double bestNonDCT2Cost; bool relatedCuIsValid; uint8_t bestISPIntraMode; - -#if ENABLE_SPLIT_PARALLELISM - - uint64_t - temporalId; -#endif }; class CacheBlkInfoCtrl @@ -460,21 +428,7 @@ protected: void create (); void destroy (); -#if ENABLE_SPLIT_PARALLELISM -public: -#endif void init ( const Slice &slice ); -#if ENABLE_SPLIT_PARALLELISM -private: - uint64_t - m_currTemporalId; -public: - void tick () { m_currTemporalId++; CHECK( m_currTemporalId <= 0, "Problem with integer overflow!" ); } - // mark the state of the blk as changed within the current temporal id - void copyState( const CacheBlkInfoCtrl &other, const UnitArea& area ); -protected: - void touch ( const UnitArea& area ); -#endif CodedCUInfo& getBlkInfo( const UnitArea& area ); @@ -508,10 +462,6 @@ struct BestEncodingInfo EncTestMode testMode; int poc; - -#if ENABLE_SPLIT_PARALLELISM - int64_t temporalId; -#endif }; class BestEncInfoCache @@ -526,9 +476,6 @@ private: bool *m_runType; CodingStructure m_dummyCS; XUCache m_dummyCache; -#if ENABLE_SPLIT_PARALLELISM - int64_t m_currTemporalId; -#endif protected: @@ -537,19 +484,10 @@ protected: bool setFromCs( const CodingStructure& cs, const Partitioner& partitioner ); bool isValid ( const CodingStructure &cs, const Partitioner &partitioner, int qp ); - -#if ENABLE_SPLIT_PARALLELISM - void touch ( const UnitArea& area ); -#endif public: BestEncInfoCache() : m_slice_bencinf( nullptr ), m_dummyCS( m_dummyCache.cuCache, m_dummyCache.puCache, m_dummyCache.tuCache ) {} virtual ~BestEncInfoCache() {} - -#if ENABLE_SPLIT_PARALLELISM - void copyState( const BestEncInfoCache &other, const UnitArea &area ); - void tick () { m_currTemporalId++; CHECK( m_currTemporalId <= 0, "Problem with integer overflow!" ); } -#endif void init ( const Slice &slice ); bool setCsFrom( CodingStructure& cs, EncTestMode& testMode, const Partitioner& partitioner ) const; }; @@ -602,13 +540,6 @@ public: virtual bool tryMode ( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ); virtual bool useModeResult ( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ); -#if ENABLE_SPLIT_PARALLELISM - virtual void copyState ( const EncModeCtrl& other, const UnitArea& area ); - - virtual int getNumParallelJobs ( const CodingStructure &cs, Partitioner& partitioner ) const; - virtual bool isParallelSplit ( const CodingStructure &cs, Partitioner& partitioner ) const; - virtual bool parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const; -#endif virtual bool checkSkipOtherLfnst( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ); }; diff --git a/source/Lib/EncoderLib/EncReshape.cpp b/source/Lib/EncoderLib/EncReshape.cpp index a4de78ad2..ac70e08d4 100644 --- a/source/Lib/EncoderLib/EncReshape.cpp +++ b/source/Lib/EncoderLib/EncReshape.cpp @@ -1396,45 +1396,5 @@ void EncReshape::adjustLmcsPivot() } } } - -#if ENABLE_SPLIT_PARALLELISM -void EncReshape::copyState(const EncReshape &other) -{ - m_srcReshaped = other.m_srcReshaped; - m_picWidth = other.m_picWidth; - m_picHeight = other.m_picHeight; - m_maxCUWidth = other.m_maxCUWidth; - m_maxCUHeight = other.m_maxCUHeight; - m_widthInCtus = other.m_widthInCtus; - m_heightInCtus = other.m_heightInCtus; - m_numCtuInFrame = other.m_numCtuInFrame; - m_exceedSTD = other.m_exceedSTD; - m_binImportance = other.m_binImportance; - m_tcase = other.m_tcase; - m_rateAdpMode = other.m_rateAdpMode; - m_useAdpCW = other.m_useAdpCW; - m_initCWAnalyze = other.m_initCWAnalyze; - m_reshapeCW = other.m_reshapeCW; - memcpy( m_cwLumaWeight, other.m_cwLumaWeight, sizeof( m_cwLumaWeight ) ); - m_chromaWeight = other.m_chromaWeight; - m_chromaAdj = other.m_chromaAdj; - - m_sliceReshapeInfo = other.m_sliceReshapeInfo; - m_CTUFlag = other.m_CTUFlag; - m_recReshaped = other.m_recReshaped; - m_invLUT = other.m_invLUT; - m_fwdLUT = other.m_fwdLUT; - m_chromaAdjHelpLUT = other.m_chromaAdjHelpLUT; - m_binCW = other.m_binCW; - m_initCW = other.m_initCW; - m_reshape = other.m_reshape; - m_reshapePivot = other.m_reshapePivot; - m_inputPivot = other.m_inputPivot; - m_fwdScaleCoef = other.m_fwdScaleCoef; - m_invScaleCoef = other.m_invScaleCoef; - m_lumaBD = other.m_lumaBD; - m_reshapeLUTSize = other.m_reshapeLUTSize; -} -#endif // //! \} diff --git a/source/Lib/EncoderLib/EncReshape.h b/source/Lib/EncoderLib/EncReshape.h index a16cd63e6..72efd26fe 100644 --- a/source/Lib/EncoderLib/EncReshape.h +++ b/source/Lib/EncoderLib/EncReshape.h @@ -115,10 +115,6 @@ public: Pel * getWeightTable() { return m_cwLumaWeight; } double getCWeight() { return m_chromaWeight; } void adjustLmcsPivot(); - -#if ENABLE_SPLIT_PARALLELISM - void copyState(const EncReshape& other); -#endif };// END CLASS DEFINITION EncReshape //! \} diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp index 637c50754..e3d48195d 100644 --- a/source/Lib/EncoderLib/EncSlice.cpp +++ b/source/Lib/EncoderLib/EncSlice.cpp @@ -1323,14 +1323,6 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c m_CABACEstimator->initCtxModels( *pcSlice ); -#if ENABLE_SPLIT_PARALLELISM - for( int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++ ) - { - CABACWriter* cw = m_pcLib->getCABACEncoder( jId )->getCABACEstimator( pcSlice->getSPS() ); - cw->initCtxModels( *pcSlice ); - } - -#endif m_pcCuEncoder->getModeCtrl()->setFastDeltaQp(bFastDeltaQP); @@ -1376,13 +1368,6 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c (m_pcCfg->getBaseQP() >= 38) || (m_pcCfg->getSourceWidth() <= 512 && m_pcCfg->getSourceHeight() <= 320), m_adaptedLumaQP)) { m_CABACEstimator->initCtxModels (*pcSlice); -#if ENABLE_SPLIT_PARALLELISM - for (int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++) - { - CABACWriter* cw = m_pcLib->getCABACEncoder (jId)->getCABACEstimator (pcSlice->getSPS()); - cw->initCtxModels (*pcSlice); - } -#endif pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp(); if (pcSlice->getFirstCtuRsAddrInSlice() == 0) { @@ -1506,12 +1491,9 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons const int iQPIndex = pcSlice->getSliceQpBase(); #endif -#if ENABLE_SPLIT_PARALLELISM - const int dataId = 0; -#endif - CABACWriter* pCABACWriter = pEncLib->getCABACEncoder( PARL_PARAM0( dataId ) )->getCABACEstimator( pcSlice->getSPS() ); - TrQuant* pTrQuant = pEncLib->getTrQuant( PARL_PARAM0( dataId ) ); - RdCost* pRdCost = pEncLib->getRdCost( PARL_PARAM0( dataId ) ); + CABACWriter* pCABACWriter = pEncLib->getCABACEncoder()->getCABACEstimator( pcSlice->getSPS() ); + TrQuant* pTrQuant = pEncLib->getTrQuant(); + RdCost* pRdCost = pEncLib->getRdCost(); EncCfg* pCfg = pEncLib; RateCtrl* pRateCtrl = pEncLib->getRateCtrl(); pRdCost->setLosslessRDCost(pcSlice->isLossless()); @@ -1703,13 +1685,6 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons if (pcSlice->getSPS()->getUseLmcs()) { m_pcCuEncoder->setDecCuReshaperInEncCU(m_pcLib->getReshaper(), pcSlice->getSPS()->getChromaFormatIdc()); - -#if ENABLE_SPLIT_PARALLELISM - for (int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++) - { - m_pcLib->getCuEncoder(jId)->setDecCuReshaperInEncCU(m_pcLib->getReshaper(jId), pcSlice->getSPS()->getChromaFormatIdc()); - } -#endif } if( !cs.slice->isIntra() && pCfg->getMCTSEncConstraint() ) { @@ -1728,13 +1703,7 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons pCABACWriter->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true, true ); const int numberOfWrittenBits = int( pCABACWriter->getEstFracBits() >> SCALE_BITS ); -#if ENABLE_SPLIT_PARALLELISM -#pragma omp critical -#endif pcSlice->setSliceBits( ( uint32_t ) ( pcSlice->getSliceBits() + numberOfWrittenBits ) ); -#if ENABLE_SPLIT_PARALLELISM -#pragma omp critical -#endif // Store probabilities of first CTU in line into buffer - used only if wavefront-parallel-processing is enabled. if( cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && pEncLib->getEntropyCodingSyncEnabledFlag() ) diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index eecee45c2..e625a05db 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -177,13 +177,6 @@ void InterSearch::setTempBuffers( CodingStructure ****pSplitCS, CodingStructure m_pSaveCS = pSaveCS; } -#if ENABLE_SPLIT_PARALLELISM -void InterSearch::copyState( const InterSearch& other ) -{ - memcpy( m_aaiAdaptSR, other.m_aaiAdaptSR, sizeof( m_aaiAdaptSR ) ); -} -#endif - InterSearch::~InterSearch() { if (m_isInitialized) diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h index 59e96de5f..51800ad05 100644 --- a/source/Lib/EncoderLib/InterSearch.h +++ b/source/Lib/EncoderLib/InterSearch.h @@ -206,9 +206,6 @@ public: void setTempBuffers (CodingStructure ****pSlitCS, CodingStructure ****pFullCS, CodingStructure **pSaveCS ); void resetCtuRecord () { m_ctuRecord.clear(); } -#if ENABLE_SPLIT_PARALLELISM - void copyState ( const InterSearch& other ); -#endif void setAffineModeSelected ( bool flag) { m_affineModeSelected = flag; } void resetAffineMVList() { m_affMVListIdx = 0; m_affMVListSize = 0; } void savePrevAffMVInfo(int idx, AffineMVInfo &tmpMVInfo, bool& isSaved) diff --git a/source/Lib/Utilities/CMakeLists.txt b/source/Lib/Utilities/CMakeLists.txt index 83b05cf5d..5eaaad366 100644 --- a/source/Lib/Utilities/CMakeLists.txt +++ b/source/Lib/Utilities/CMakeLists.txt @@ -28,26 +28,6 @@ if( SET_ENABLE_TRACING ) endif() endif() -if( OpenMP_FOUND ) - if( SET_ENABLE_SPLIT_PARALLELISM ) - if( ENABLE_SPLIT_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - endif() - endif() - if( SET_ENABLE_WPP_PARALLELISM ) - if( ENABLE_WPP_PARALLELISM ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) - else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) - endif() - endif() -else() - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) - target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) -endif() - target_include_directories( ${LIB_NAME} PUBLIC . .. ) target_link_libraries( ${LIB_NAME} CommonLib Threads::Threads ) -- GitLab