diff --git a/CMakeLists.txt b/CMakeLists.txt index 0157bdb967b7ba9f1b0a7e4bc3262d3d6053db03..f39624da5d8da8693b0a3d7c68a6fb44cdbb75d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,14 +72,16 @@ if( XCODE ) -Wno-deprecated-register -Wno-pessimizing-move -Wno-absolute-value - -Wno-unused-const-variable ) + -Wno-unused-const-variable + -Wno-unused-command-line-argument ) else() bb_enable_warnings( clang warnings-as-errors -Wno-unknown-attributes -Wno-deprecated-register -Wno-pessimizing-move -Wno-absolute-value - -Wno-unused-const-variable ) + -Wno-unused-const-variable + -Wno-unused-command-line-argument ) endif() #bb_enable_warnings( clang warnings-as-errors ) @@ -87,7 +89,7 @@ endif() bb_enable_warnings( msvc warnings-as-errors "/wd4996" "/wd4244" ) # enable sse4.1 build for all source files for gcc and clang -if( UNIX OR MINGW ) +if( (UNIX OR MINGW) AND NOT (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") ) add_compile_options( "-msse4.1" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing" ) endif() diff --git a/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake b/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake index 07532ba3ef188edb7a1f52d08434ff82341b0eab..82a7f51ba4c2dbb2e94eeb025d1562a29620373e 100644 --- a/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake +++ b/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake @@ -873,6 +873,9 @@ function( _bb_query_system_info system_info_ ) endif() elseif( CMAKE_HOST_APPLE ) set( _system_info "macosx" ) + if( CMAKE_SYSTEM_PROCESSOR MATCHES "arm64" ) + set( _os_arch "arm64" ) + endif() if( NOT CMAKE_VERSION VERSION_LESS 3.10.0 ) # sw_vers -productVersion cmake_host_system_information( RESULT _lsb_distro_version QUERY OS_RELEASE ) @@ -973,7 +976,9 @@ function( _bb_get_platform_dir platform_dir_ ) set( _platform_dir "${CMAKE_SYSTEM_PROCESSOR}" ) endif() else() - if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) + if( CMAKE_SYSTEM_PROCESSOR MATCHES "arm64" ) + set( _platform_dir "arm64" ) + elseif( CMAKE_SIZEOF_VOID_P EQUAL 8 ) set( _platform_dir "x86_64" ) else() set( _platform_dir "x86" ) diff --git a/source/App/BitstreamExtractorApp/BitstreamExtractorApp.cpp b/source/App/BitstreamExtractorApp/BitstreamExtractorApp.cpp index 15009db24e9c9fe261458cd3d85f0053b05f4856..7e2d123f78b8c8595f5a91de887f54e32f8d676c 100644 --- a/source/App/BitstreamExtractorApp/BitstreamExtractorApp.cpp +++ b/source/App/BitstreamExtractorApp/BitstreamExtractorApp.cpp @@ -569,7 +569,6 @@ uint32_t BitstreamExtractorApp::decode() bitstreamFileIn.clear(); bitstreamFileIn.seekg( 0, std::ios::beg ); - int unitCnt = 0; bool lastSliceWritten= false; // stores status of previous slice for associated filler data NAL units VPS *vpsIdZero = new VPS(); @@ -865,7 +864,6 @@ uint32_t BitstreamExtractorApp::decode() { m_prevPicPOC = slice.getPOC(); } - unitCnt++; if( writeInpuNalUnitToStream ) { diff --git a/source/Lib/CommonAnalyserLib/CMakeLists.txt b/source/Lib/CommonAnalyserLib/CMakeLists.txt index c2779b4b3b0df6c4b171cd2f3a3d76739fe14fef..754a1053b4eb3d9cd0c22e0cdddfecee24acead1 100644 --- a/source/Lib/CommonAnalyserLib/CMakeLists.txt +++ b/source/Lib/CommonAnalyserLib/CMakeLists.txt @@ -6,29 +6,31 @@ if( MSVC ) file( GLOB NATVIS_FILES "../../VisualStudio/*.natvis" ) endif() -# get source files -file( GLOB BASE_SRC_FILES "../CommonLib/*.cpp" ) +if ( NOT (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") ) + # get source files + file( GLOB BASE_SRC_FILES "../CommonLib/*.cpp" ) -# get include files -file( GLOB BASE_INC_FILES "../CommonLib/*.h" ) + # get include files + file( GLOB BASE_INC_FILES "../CommonLib/*.h" ) -# get x86 source files -file( GLOB X86_SRC_FILES "../CommonLib/x86/*.cpp" ) + # get x86 source files + file( GLOB X86_SRC_FILES "../CommonLib/x86/*.cpp" ) -# get x86 include files -file( GLOB X86_INC_FILES "../CommonLib/x86/*.h" ) + # get x86 include files + file( GLOB X86_INC_FILES "../CommonLib/x86/*.h" ) -# get avx source files -file( GLOB AVX_SRC_FILES "../CommonLib/x86/avx/*.cpp" ) + # get avx source files + file( GLOB AVX_SRC_FILES "../CommonLib/x86/avx/*.cpp" ) -# get avx2 source files -file( GLOB AVX2_SRC_FILES "../CommonLib/x86/avx2/*.cpp" ) + # get avx2 source files + file( GLOB AVX2_SRC_FILES "../CommonLib/x86/avx2/*.cpp" ) -# get sse4.1 source files -file( GLOB SSE41_SRC_FILES "../CommonLib/x86/sse41/*.cpp" ) + # get sse4.1 source files + file( GLOB SSE41_SRC_FILES "../CommonLib/x86/sse41/*.cpp" ) -# get sse4.2 source files -file( GLOB SSE42_SRC_FILES "../CommonLib/x86/sse42/*.cpp" ) + # get sse4.2 source files + file( GLOB SSE42_SRC_FILES "../CommonLib/x86/sse42/*.cpp" ) +endif() # get libmd5 source files file( GLOB MD5_SRC_FILES "../libmd5/*.cpp" ) @@ -71,32 +73,37 @@ endif() target_include_directories( ${LIB_NAME} PUBLIC ../CommonLib/. ../CommonLib/.. ../CommonLib/x86 ../libmd5 ) target_link_libraries( ${LIB_NAME} ) -# set needed compile definitions -set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE41 ) -set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE42 ) -set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX ) -set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 ) -# set needed compile flags -if( MSVC ) - set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX" ) - set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) -elseif( UNIX OR MINGW ) - set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" ) - set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" ) - set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx" ) - set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx2" ) -endif() +if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" ) + set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 ") + set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 ") +else() + # set needed compile definitions + set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE41 ) + set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE42 ) + set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX ) + set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 ) + # set needed compile flags + if( MSVC ) + set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX" ) + set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + elseif( UNIX OR MINGW ) + set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" ) + set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" ) + set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx" ) + set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx2" ) + endif() -if( MSVC ) - set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") - set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") -elseif( UNIX OR MINGW ) - if( NNLF_BUILD_WITH_AVX512 STREQUAL "1" ) - set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq ") - set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq ") - else() - set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2") - set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2") + if( MSVC ) + set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + elseif( UNIX OR MINGW ) + if( NNLF_BUILD_WITH_AVX512 STREQUAL "1" ) + set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq ") + set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq ") + else() + set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2") + set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2") + endif() endif() endif() diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp index 91b0490bd400d704bedeab8ca3c74178aea977b0..1b3bfce2664648811201095a2c71eaff5f7c5232 100644 --- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp +++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp @@ -307,7 +307,6 @@ void AdaptiveLoopFilter::applyCcAlfFilter(CodingStructure &cs, ComponentID compI int horVirBndryPos[] = { 0, 0, 0 }; int verVirBndryPos[] = { 0, 0, 0 }; - int ctuIdx = 0; for( int yPos = 0; yPos < m_picHeight; yPos += m_maxCUHeight ) { for( int xPos = 0; xPos < m_picWidth; xPos += m_maxCUWidth ) @@ -394,7 +393,6 @@ void AdaptiveLoopFilter::applyCcAlfFilter(CodingStructure &cs, ComponentID compI m_alfVBLumaPos); } } - ctuIdx++; } } } diff --git a/source/Lib/CommonLib/CMakeLists.txt b/source/Lib/CommonLib/CMakeLists.txt index 455667a5371236aab1f7fb52c97c4a2bd8b82e5c..ff02c773dd4a026c2d1a5762b5140b31318d94d6 100644 --- a/source/Lib/CommonLib/CMakeLists.txt +++ b/source/Lib/CommonLib/CMakeLists.txt @@ -12,23 +12,25 @@ file( GLOB BASE_SRC_FILES "*.cpp" ) # get include files file( GLOB BASE_INC_FILES "*.h" ) -# get x86 source files -file( GLOB X86_SRC_FILES "x86/*.cpp" ) +if (NOT (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") ) + # get x86 source files + file( GLOB X86_SRC_FILES "x86/*.cpp" ) -# get x86 include files -file( GLOB X86_INC_FILES "x86/*.h" ) + # get x86 include files + file( GLOB X86_INC_FILES "x86/*.h" ) -# get avx source files -file( GLOB AVX_SRC_FILES "x86/avx/*.cpp" ) + # get avx source files + file( GLOB AVX_SRC_FILES "x86/avx/*.cpp" ) -# get avx2 source files -file( GLOB AVX2_SRC_FILES "x86/avx2/*.cpp" ) + # get avx2 source files + file( GLOB AVX2_SRC_FILES "x86/avx2/*.cpp" ) -# get sse4.2 source files -file( GLOB SSE42_SRC_FILES "x86/sse42/*.cpp" ) + # get sse4.2 source files + file( GLOB SSE42_SRC_FILES "x86/sse42/*.cpp" ) -# get sse4.1 source files -file( GLOB SSE41_SRC_FILES "x86/sse41/*.cpp" ) + # get sse4.1 source files + file( GLOB SSE41_SRC_FILES "x86/sse41/*.cpp" ) +endif() # get libmd5 source files file( GLOB MD5_SRC_FILES "../libmd5/*.cpp" ) @@ -68,53 +70,65 @@ endif() target_include_directories( ${LIB_NAME} PUBLIC . .. ./x86 ../libmd5 ) target_link_libraries( ${LIB_NAME} ) -# set needed compile definitions -set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE41 ) -set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE42 ) -set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX ) -set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 ) -# set needed compile flags -if( MSVC ) - set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX" ) - set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) -elseif( UNIX OR MINGW ) - set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" ) - set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" ) - set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx" ) - set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx2" ) -endif() +if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" ) + set_property( SOURCE NNSRUnified.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -ffast-math" ) + set_property( SOURCE NNSuperResolution.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -ffast-math" ) + set_property( SOURCE SEINeuralNetworkPostFiltering.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -ffast-math" ) + set_property( SOURCE NNInference.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -ffast-math" ) + set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -ffast-math" ) + set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -ffast-math" ) + set_property( SOURCE NNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -ffast-math" ) + set_property( SOURCE NNFilterUnified.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -ffast-math" ) + set_property( SOURCE intra_pred_sadl.cpp APPEND PROPERTY COMPILE_FLAGS "-ffast-math -DNDEBUG=1 " ) +else() + # set needed compile definitions + set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE41 ) + set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE42 ) + set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX ) + set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 ) + # set needed compile flags + if( MSVC ) + set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX" ) + set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + elseif( UNIX OR MINGW ) + set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" ) + set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" ) + set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx" ) + set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx2" ) + endif() -if( MSVC ) - set_property( SOURCE NNSRUnified.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") - set_property( SOURCE NNSuperResolution.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") - set_property( SOURCE SEINeuralNetworkPostFiltering.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") - set_property( SOURCE NNInference.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") - set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") - set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") - set_property( SOURCE NNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") - set_property( SOURCE NNFilterUnified.cpp.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") - set_property( SOURCE intra_pred_sadl.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") -elseif( UNIX OR MINGW ) - if( NNLF_BUILD_WITH_AVX512 STREQUAL "1" ) - set_property( SOURCE NNSRUnified.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math") - set_property( SOURCE NNSuperResolution.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math") - set_property( SOURCE SEINeuralNetworkPostFiltering.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math") - set_property( SOURCE NNInference.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq -ffast-math") - set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq -ffast-math") - set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq -ffast-math") - set_property( SOURCE NNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq -ffast-math") - set_property( SOURCE NNFilterUnified.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq -ffast-math") - set_property( SOURCE intra_pred_sadl.cpp APPEND PROPERTY COMPILE_FLAGS "-ffast-math -mavx512f -mavx512bw -mavx512dq -DNDEBUG=1 ") - else() - set_property( SOURCE NNSRUnified.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") - set_property( SOURCE NNSuperResolution.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") - set_property( SOURCE SEINeuralNetworkPostFiltering.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") - set_property( SOURCE NNInference.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") - set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") - set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") - set_property( SOURCE NNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") - set_property( SOURCE NNFilterUnified.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") - set_property( SOURCE intra_pred_sadl.cpp APPEND PROPERTY COMPILE_FLAGS "-ffast-math -mavx2 -DNDEBUG=1 ") + if( MSVC ) + set_property( SOURCE NNSRUnified.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + set_property( SOURCE NNSuperResolution.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + set_property( SOURCE SEINeuralNetworkPostFiltering.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + set_property( SOURCE NNInference.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + set_property( SOURCE NNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + set_property( SOURCE NNFilterUnified.cpp.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + set_property( SOURCE intra_pred_sadl.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ") + elseif( UNIX OR MINGW ) + if( NNLF_BUILD_WITH_AVX512 STREQUAL "1" ) + set_property( SOURCE NNSRUnified.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math") + set_property( SOURCE NNSuperResolution.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math") + set_property( SOURCE SEINeuralNetworkPostFiltering.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math") + set_property( SOURCE NNInference.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq -ffast-math") + set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq -ffast-math") + set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq -ffast-math") + set_property( SOURCE NNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq -ffast-math") + set_property( SOURCE NNFilterUnified.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -mavx512dq -ffast-math") + set_property( SOURCE intra_pred_sadl.cpp APPEND PROPERTY COMPILE_FLAGS "-ffast-math -mavx512f -mavx512bw -mavx512dq -DNDEBUG=1 ") + else() + set_property( SOURCE NNSRUnified.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") + set_property( SOURCE NNSuperResolution.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") + set_property( SOURCE SEINeuralNetworkPostFiltering.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") + set_property( SOURCE NNInference.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") + set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") + set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") + set_property( SOURCE NNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") + set_property( SOURCE NNFilterUnified.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math") + set_property( SOURCE intra_pred_sadl.cpp APPEND PROPERTY COMPILE_FLAGS "-ffast-math -mavx2 -DNDEBUG=1 ") + endif() endif() endif() diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 6eac650c0bccf8d7ceb431fe8007fff8bae2864e..643c2f264b2dbd765a6d26688edeefd5be32a14f 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -685,7 +685,7 @@ typedef enum{ AVX2, AVX512 } X86_VEXT; -#elif defined (__ARM_NEON__) +#elif defined (__ARM_NEON__) || defined(__ARM_NEON) #define TARGET_SIMD_ARM 1 #else #error no simd target diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index 5ccb5dcfc3ceeecb4b9de38440d34df6f85913f6..a2115a3cb6f52513c3e69ce1cce26dc1546f3619 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -1082,22 +1082,10 @@ void IntraPrediction::collectContext(const CompArea& area,const CodingUnit& cu,c const int totalAboveUnits{static_cast<int>(pairSizesCtxBorders.second)/unitWidth}; const int totalLeftUnits{static_cast<int>(pairSizesCtxBorders.first)/unitHeight}; const int totalUnits{totalAboveUnits + totalLeftUnits + 1}; - const int numAboveUnits{tuWidth/unitWidth}; - const int numLeftUnits{tuHeight/unitHeight}; - const int numAboveRightUnits{totalAboveUnits - numAboveUnits}; - const int numLeftBelowUnits{totalLeftUnits - numLeftUnits}; const Position posLT(area); - const Position posRT(area.topRight()); - const Position posLB(area.bottomLeft()); bool arrayNeighborFlags[4*MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1]; - int numIntraNeighbor = 0; memset(arrayNeighborFlags,0,totalUnits); arrayNeighborFlags[totalLeftUnits] = isAboveLeftAvailable(cu,chType,posLT); - numIntraNeighbor += arrayNeighborFlags[totalLeftUnits] ? 1 : 0; - numIntraNeighbor += isAboveAvailable(cu,chType,posLT,numAboveUnits,unitWidth,arrayNeighborFlags + totalLeftUnits + 1); - numIntraNeighbor += isAboveRightAvailable(cu,chType,posRT,numAboveRightUnits,unitWidth,arrayNeighborFlags + totalLeftUnits + 1 + numAboveUnits); - numIntraNeighbor += isLeftAvailable(cu, chType,posLT,numLeftUnits,unitHeight,arrayNeighborFlags + totalLeftUnits - 1); - numIntraNeighbor += isBelowLeftAvailable(cu,chType,posLB,numLeftBelowUnits,unitHeight,arrayNeighborFlags + totalLeftUnits - 1 - numLeftUnits); int error_code{0}; #if JVET_AB0149_ACTIVATE_WRITER_BLOCK_STATS if (isComputedHeightWidthMasks) diff --git a/source/Lib/CommonLib/NNSRUnified.cpp b/source/Lib/CommonLib/NNSRUnified.cpp index e971055c9fadae6c409fff1f66e47081134f4ed4..4860ff61e9d64e900f4235190a354b2ef7be1e14 100644 --- a/source/Lib/CommonLib/NNSRUnified.cpp +++ b/source/Lib/CommonLib/NNSRUnified.cpp @@ -109,13 +109,11 @@ void NNSRUnified::CNNSRProcess(const CPelUnitBuf &recBuf, const PelUnitBuf &pred int actualInputPatchSizeH = padPatchBR_y - padPatchTL_y + 1; int actualInputPatchSizeW = padPatchBR_x - padPatchTL_x + 1; - - int InputIdx = 0; + for (auto &m: m_Inputs) { sadl::Dimensions dims(std::initializer_list<int>({ 1, actualInputPatchSizeH, actualInputPatchSizeW, 1})); m.resize(dims); - InputIdx++; } if (!m_SRModel->init(m_Inputs)) diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp index 9b9eccf286785f4c5758cadfb14fbeb4a38c2b1b..df8e59b34b031abcc175df7d926c845e5df6a207 100644 --- a/source/Lib/CommonLib/QuantRDOQ.cpp +++ b/source/Lib/CommonLib/QuantRDOQ.cpp @@ -1198,7 +1198,6 @@ void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compI transformShift = std::max<int>( 0, transformShift ); } - double blockUncodedCost = 0; const uint32_t maxNumCoeff = rect.area(); CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" ); @@ -1291,7 +1290,6 @@ void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compI coeffLevelError[0] = dErr * dErr * errorScale; costCoeff0[scanPos] = coeffLevelError[0]; - blockUncodedCost += costCoeff0[ scanPos ]; dstCoeff[blkPos] = coeffLevels[0]; //===== coefficient level estimation ===== @@ -1420,7 +1418,6 @@ void QuantRDOQ::forwardBDPCM(TransformUnit &tu, const ComponentID &compID, const transformShift = std::max<int>(0, transformShift); } - double blockUncodedCost = 0; const uint32_t maxNumCoeff = rect.area(); CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID"); @@ -1510,7 +1507,6 @@ void QuantRDOQ::forwardBDPCM(TransformUnit &tu, const ComponentID &compID, const coeffLevelError[0] = dErr * dErr * errorScale; costCoeff0[scanPos] = coeffLevelError[0]; - blockUncodedCost += costCoeff0[scanPos]; dstCoeff[blkPos] = coeffLevels[0]; //===== coefficient level estimation ===== diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 411b8ff22abb8d69d2e3a3d8de249d5a593756ba..04288d6106ad41145e0a5556679552b887bcd5ce 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -336,7 +336,11 @@ typedef std::pair<int, int> TrCost; // SIMD optimizations #define SIMD_ENABLE 1 -#define ENABLE_SIMD_OPT ( SIMD_ENABLE && !RExt__HIGH_BIT_DEPTH_SUPPORT ) ///< SIMD optimizations, no impact on RD performance +#ifdef TARGET_SIMD_X86 +#define ENABLE_SIMD_OPT SIMD_ENABLE ///< SIMD optimizations, no impact on RD performance +#else +#define ENABLE_SIMD_OPT 0 ///< SIMD optimizations, no impact on RD performance +#endif #define ENABLE_SIMD_OPT_MCIF ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the interpolation filter, no impact on RD performance #define ENABLE_SIMD_OPT_BUFFER ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the buffer operations, no impact on RD performance #define ENABLE_SIMD_OPT_DIST ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the distortion calculations(SAD,SSE,HADAMARD), no impact on RD performance diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index fb210055e7865eed0b9b310e2515f741e96eb055..514134b116f05fa807354201763355b358bc9e5f 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -564,7 +564,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS ) { pcPPS->setSingleSlicePerSubPicFlag(0); } - if (pcPPS->getRectSliceFlag() & !(pcPPS->getSingleSlicePerSubPicFlag())) + if (pcPPS->getRectSliceFlag() && !(pcPPS->getSingleSlicePerSubPicFlag())) { int32_t tileIdx = 0; diff --git a/source/Lib/EncoderLib/EncNNFilterSet0.cpp b/source/Lib/EncoderLib/EncNNFilterSet0.cpp index e8c21a09c2a0ff41599ce1f6ebd27cb10685e8f7..9b22b34701df0ae273f9f8419ff034bfb1acce7e 100644 --- a/source/Lib/EncoderLib/EncNNFilterSet0.cpp +++ b/source/Lib/EncoderLib/EncNNFilterSet0.cpp @@ -522,17 +522,13 @@ int EncNNFilterSet0::lengthTruncatedUnary(int symbol, int maxSymbol) } bool codeLast = (maxSymbol > symbol); - int bins = 0; int numBins = 0; while (symbol--) { - bins <<= 1; - bins++; numBins++; } if (codeLast) { - bins <<= 1; numBins++; } diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 1af40d9e5850dc932f30eadec4111b3381d8e812..ddbfef0eef2e99ec20a6128c7a5db27fdf026ee0 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -5336,7 +5336,7 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti piReco.reconstruct(piPred, piResi, cs.slice->clpRng(compID)); if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs() - & slice.getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD())))) + && slice.getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD())))) { const CPelBuf orgLuma = csFull->getOrgBuf(csFull->area.blocks[COMPONENT_Y]); if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())) @@ -5449,7 +5449,7 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti } piReco.reconstruct(piPred, piResi, cs.slice->clpRng(compID)); if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() - || (m_pcEncCfg->getLmcs() & slice.getLmcsEnabledFlag() + || (m_pcEncCfg->getLmcs() && slice.getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD())))) { const CPelBuf orgLuma = csFull->getOrgBuf(csFull->area.blocks[COMPONENT_Y]); diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index 28b7bae359c5ac16e9c49e0a68dea490c5f138a1..4088ee5e51a93df7f39c6c20d31acd6e6fabadfa 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -369,7 +369,7 @@ void HLSWriter::codePPS( const PPS* pcPPS ) { WRITE_FLAG(pcPPS->getSingleSlicePerSubPicFlag( ) ? 1 : 0, "pps_single_slice_per_subpic_flag"); } - if (pcPPS->getRectSliceFlag() & !(pcPPS->getSingleSlicePerSubPicFlag())) + if (pcPPS->getRectSliceFlag() && !(pcPPS->getSingleSlicePerSubPicFlag())) { WRITE_UVLC( pcPPS->getNumSlicesInPic( ) - 1, "pps_num_slices_in_pic_minus1" ); if ((pcPPS->getNumSlicesInPic() - 1) > 1)